Merge pull request #6392 from dstftw/generalized-fragmented-fd
Generalized fragmented media file downloader
This commit is contained in:
		@@ -7,8 +7,7 @@ import os
 | 
			
		||||
import time
 | 
			
		||||
import xml.etree.ElementTree as etree
 | 
			
		||||
 | 
			
		||||
from .common import FileDownloader
 | 
			
		||||
from .http import HttpFD
 | 
			
		||||
from .fragment import FragmentFD
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    compat_urllib_error,
 | 
			
		||||
@@ -16,8 +15,6 @@ from ..compat import (
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    struct_pack,
 | 
			
		||||
    struct_unpack,
 | 
			
		||||
    encodeFilename,
 | 
			
		||||
    sanitize_open,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -226,16 +223,13 @@ def _add_ns(prop):
 | 
			
		||||
    return '{http://ns.adobe.com/f4m/1.0}%s' % prop
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class HttpQuietDownloader(HttpFD):
 | 
			
		||||
    def to_screen(self, *args, **kargs):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class F4mFD(FileDownloader):
 | 
			
		||||
class F4mFD(FragmentFD):
 | 
			
		||||
    """
 | 
			
		||||
    A downloader for f4m manifests or AdobeHDS.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    FD_NAME = 'f4m'
 | 
			
		||||
 | 
			
		||||
    def _get_unencrypted_media(self, doc):
 | 
			
		||||
        media = doc.findall(_add_ns('media'))
 | 
			
		||||
        if not media:
 | 
			
		||||
@@ -288,7 +282,7 @@ class F4mFD(FileDownloader):
 | 
			
		||||
    def real_download(self, filename, info_dict):
 | 
			
		||||
        man_url = info_dict['url']
 | 
			
		||||
        requested_bitrate = info_dict.get('tbr')
 | 
			
		||||
        self.to_screen('[download] Downloading f4m manifest')
 | 
			
		||||
        self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
 | 
			
		||||
        manifest = self.ydl.urlopen(man_url).read()
 | 
			
		||||
 | 
			
		||||
        doc = etree.fromstring(manifest)
 | 
			
		||||
@@ -320,67 +314,20 @@ class F4mFD(FileDownloader):
 | 
			
		||||
        # For some akamai manifests we'll need to add a query to the fragment url
 | 
			
		||||
        akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
 | 
			
		||||
 | 
			
		||||
        self.report_destination(filename)
 | 
			
		||||
        http_dl = HttpQuietDownloader(
 | 
			
		||||
            self.ydl,
 | 
			
		||||
            {
 | 
			
		||||
                'continuedl': True,
 | 
			
		||||
                'quiet': True,
 | 
			
		||||
                'noprogress': True,
 | 
			
		||||
                'ratelimit': self.params.get('ratelimit', None),
 | 
			
		||||
                'test': self.params.get('test', False),
 | 
			
		||||
            }
 | 
			
		||||
        )
 | 
			
		||||
        tmpfilename = self.temp_name(filename)
 | 
			
		||||
        (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
 | 
			
		||||
        ctx = {
 | 
			
		||||
            'filename': filename,
 | 
			
		||||
            'total_frags': total_frags,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        self._prepare_frag_download(ctx)
 | 
			
		||||
 | 
			
		||||
        dest_stream = ctx['dest_stream']
 | 
			
		||||
 | 
			
		||||
        write_flv_header(dest_stream)
 | 
			
		||||
        if not live:
 | 
			
		||||
            write_metadata_tag(dest_stream, metadata)
 | 
			
		||||
 | 
			
		||||
        # This dict stores the download progress, it's updated by the progress
 | 
			
		||||
        # hook
 | 
			
		||||
        state = {
 | 
			
		||||
            'status': 'downloading',
 | 
			
		||||
            'downloaded_bytes': 0,
 | 
			
		||||
            'frag_index': 0,
 | 
			
		||||
            'frag_count': total_frags,
 | 
			
		||||
            'filename': filename,
 | 
			
		||||
            'tmpfilename': tmpfilename,
 | 
			
		||||
        }
 | 
			
		||||
        start = time.time()
 | 
			
		||||
 | 
			
		||||
        def frag_progress_hook(s):
 | 
			
		||||
            if s['status'] not in ('downloading', 'finished'):
 | 
			
		||||
                return
 | 
			
		||||
 | 
			
		||||
            frag_total_bytes = s.get('total_bytes', 0)
 | 
			
		||||
            if s['status'] == 'finished':
 | 
			
		||||
                state['downloaded_bytes'] += frag_total_bytes
 | 
			
		||||
                state['frag_index'] += 1
 | 
			
		||||
 | 
			
		||||
            estimated_size = (
 | 
			
		||||
                (state['downloaded_bytes'] + frag_total_bytes) /
 | 
			
		||||
                (state['frag_index'] + 1) * total_frags)
 | 
			
		||||
            time_now = time.time()
 | 
			
		||||
            state['total_bytes_estimate'] = estimated_size
 | 
			
		||||
            state['elapsed'] = time_now - start
 | 
			
		||||
 | 
			
		||||
            if s['status'] == 'finished':
 | 
			
		||||
                progress = self.calc_percent(state['frag_index'], total_frags)
 | 
			
		||||
            else:
 | 
			
		||||
                frag_downloaded_bytes = s['downloaded_bytes']
 | 
			
		||||
                frag_progress = self.calc_percent(frag_downloaded_bytes,
 | 
			
		||||
                                                  frag_total_bytes)
 | 
			
		||||
                progress = self.calc_percent(state['frag_index'], total_frags)
 | 
			
		||||
                progress += frag_progress / float(total_frags)
 | 
			
		||||
 | 
			
		||||
                state['eta'] = self.calc_eta(
 | 
			
		||||
                    start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
 | 
			
		||||
                state['speed'] = s.get('speed')
 | 
			
		||||
            self._hook_progress(state)
 | 
			
		||||
 | 
			
		||||
        http_dl.add_progress_hook(frag_progress_hook)
 | 
			
		||||
        self._start_frag_download(ctx)
 | 
			
		||||
 | 
			
		||||
        frags_filenames = []
 | 
			
		||||
        while fragments_list:
 | 
			
		||||
@@ -391,9 +338,9 @@ class F4mFD(FileDownloader):
 | 
			
		||||
                url += '?' + akamai_pv.strip(';')
 | 
			
		||||
            if info_dict.get('extra_param_to_segment_url'):
 | 
			
		||||
                url += info_dict.get('extra_param_to_segment_url')
 | 
			
		||||
            frag_filename = '%s-%s' % (tmpfilename, name)
 | 
			
		||||
            frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
 | 
			
		||||
            try:
 | 
			
		||||
                success = http_dl.download(frag_filename, {'url': url})
 | 
			
		||||
                success = ctx['dl'].download(frag_filename, {'url': url})
 | 
			
		||||
                if not success:
 | 
			
		||||
                    return False
 | 
			
		||||
                with open(frag_filename, 'rb') as down:
 | 
			
		||||
@@ -425,20 +372,9 @@ class F4mFD(FileDownloader):
 | 
			
		||||
                    msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
 | 
			
		||||
                    self.report_warning(msg)
 | 
			
		||||
 | 
			
		||||
        dest_stream.close()
 | 
			
		||||
        self._finish_frag_download(ctx)
 | 
			
		||||
 | 
			
		||||
        elapsed = time.time() - start
 | 
			
		||||
        self.try_rename(tmpfilename, filename)
 | 
			
		||||
        for frag_file in frags_filenames:
 | 
			
		||||
            os.remove(frag_file)
 | 
			
		||||
 | 
			
		||||
        fsize = os.path.getsize(encodeFilename(filename))
 | 
			
		||||
        self._hook_progress({
 | 
			
		||||
            'downloaded_bytes': fsize,
 | 
			
		||||
            'total_bytes': fsize,
 | 
			
		||||
            'filename': filename,
 | 
			
		||||
            'status': 'finished',
 | 
			
		||||
            'elapsed': elapsed,
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
        return True
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										110
									
								
								youtube_dl/downloader/fragment.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								youtube_dl/downloader/fragment.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,110 @@
 | 
			
		||||
from __future__ import division, unicode_literals
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from .common import FileDownloader
 | 
			
		||||
from .http import HttpFD
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    encodeFilename,
 | 
			
		||||
    sanitize_open,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class HttpQuietDownloader(HttpFD):
 | 
			
		||||
    def to_screen(self, *args, **kargs):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FragmentFD(FileDownloader):
 | 
			
		||||
    """
 | 
			
		||||
    A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def _prepare_and_start_frag_download(self, ctx):
 | 
			
		||||
        self._prepare_frag_download(ctx)
 | 
			
		||||
        self._start_frag_download(ctx)
 | 
			
		||||
 | 
			
		||||
    def _prepare_frag_download(self, ctx):
 | 
			
		||||
        self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
 | 
			
		||||
        self.report_destination(ctx['filename'])
 | 
			
		||||
        dl = HttpQuietDownloader(
 | 
			
		||||
            self.ydl,
 | 
			
		||||
            {
 | 
			
		||||
                'continuedl': True,
 | 
			
		||||
                'quiet': True,
 | 
			
		||||
                'noprogress': True,
 | 
			
		||||
                'ratelimit': self.params.get('ratelimit', None),
 | 
			
		||||
                'test': self.params.get('test', False),
 | 
			
		||||
            }
 | 
			
		||||
        )
 | 
			
		||||
        tmpfilename = self.temp_name(ctx['filename'])
 | 
			
		||||
        dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb')
 | 
			
		||||
        ctx.update({
 | 
			
		||||
            'dl': dl,
 | 
			
		||||
            'dest_stream': dest_stream,
 | 
			
		||||
            'tmpfilename': tmpfilename,
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
    def _start_frag_download(self, ctx):
 | 
			
		||||
        total_frags = ctx['total_frags']
 | 
			
		||||
        # This dict stores the download progress, it's updated by the progress
 | 
			
		||||
        # hook
 | 
			
		||||
        state = {
 | 
			
		||||
            'status': 'downloading',
 | 
			
		||||
            'downloaded_bytes': 0,
 | 
			
		||||
            'frag_index': 0,
 | 
			
		||||
            'frag_count': total_frags,
 | 
			
		||||
            'filename': ctx['filename'],
 | 
			
		||||
            'tmpfilename': ctx['tmpfilename'],
 | 
			
		||||
        }
 | 
			
		||||
        start = time.time()
 | 
			
		||||
        ctx['started'] = start
 | 
			
		||||
 | 
			
		||||
        def frag_progress_hook(s):
 | 
			
		||||
            if s['status'] not in ('downloading', 'finished'):
 | 
			
		||||
                return
 | 
			
		||||
 | 
			
		||||
            frag_total_bytes = s.get('total_bytes', 0)
 | 
			
		||||
            if s['status'] == 'finished':
 | 
			
		||||
                state['downloaded_bytes'] += frag_total_bytes
 | 
			
		||||
                state['frag_index'] += 1
 | 
			
		||||
 | 
			
		||||
            estimated_size = (
 | 
			
		||||
                (state['downloaded_bytes'] + frag_total_bytes) /
 | 
			
		||||
                (state['frag_index'] + 1) * total_frags)
 | 
			
		||||
            time_now = time.time()
 | 
			
		||||
            state['total_bytes_estimate'] = estimated_size
 | 
			
		||||
            state['elapsed'] = time_now - start
 | 
			
		||||
 | 
			
		||||
            if s['status'] == 'finished':
 | 
			
		||||
                progress = self.calc_percent(state['frag_index'], total_frags)
 | 
			
		||||
            else:
 | 
			
		||||
                frag_downloaded_bytes = s['downloaded_bytes']
 | 
			
		||||
                frag_progress = self.calc_percent(frag_downloaded_bytes,
 | 
			
		||||
                                                  frag_total_bytes)
 | 
			
		||||
                progress = self.calc_percent(state['frag_index'], total_frags)
 | 
			
		||||
                progress += frag_progress / float(total_frags)
 | 
			
		||||
 | 
			
		||||
                state['eta'] = self.calc_eta(
 | 
			
		||||
                    start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
 | 
			
		||||
                state['speed'] = s.get('speed')
 | 
			
		||||
            self._hook_progress(state)
 | 
			
		||||
 | 
			
		||||
        ctx['dl'].add_progress_hook(frag_progress_hook)
 | 
			
		||||
 | 
			
		||||
        return start
 | 
			
		||||
 | 
			
		||||
    def _finish_frag_download(self, ctx):
 | 
			
		||||
        ctx['dest_stream'].close()
 | 
			
		||||
        elapsed = time.time() - ctx['started']
 | 
			
		||||
        self.try_rename(ctx['tmpfilename'], ctx['filename'])
 | 
			
		||||
        fsize = os.path.getsize(encodeFilename(ctx['filename']))
 | 
			
		||||
 | 
			
		||||
        self._hook_progress({
 | 
			
		||||
            'downloaded_bytes': fsize,
 | 
			
		||||
            'total_bytes': fsize,
 | 
			
		||||
            'filename': ctx['filename'],
 | 
			
		||||
            'status': 'finished',
 | 
			
		||||
            'elapsed': elapsed,
 | 
			
		||||
        })
 | 
			
		||||
@@ -4,12 +4,11 @@ import os
 | 
			
		||||
import re
 | 
			
		||||
import subprocess
 | 
			
		||||
 | 
			
		||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
 | 
			
		||||
from .common import FileDownloader
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
)
 | 
			
		||||
from .fragment import FragmentFD
 | 
			
		||||
 | 
			
		||||
from ..compat import compat_urlparse
 | 
			
		||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    encodeArgument,
 | 
			
		||||
    encodeFilename,
 | 
			
		||||
@@ -51,54 +50,50 @@ class HlsFD(FileDownloader):
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NativeHlsFD(FileDownloader):
 | 
			
		||||
class NativeHlsFD(FragmentFD):
 | 
			
		||||
    """ A more limited implementation that does not require ffmpeg """
 | 
			
		||||
 | 
			
		||||
    def real_download(self, filename, info_dict):
 | 
			
		||||
        url = info_dict['url']
 | 
			
		||||
        self.report_destination(filename)
 | 
			
		||||
        tmpfilename = self.temp_name(filename)
 | 
			
		||||
    FD_NAME = 'hlsnative'
 | 
			
		||||
 | 
			
		||||
        self.to_screen(
 | 
			
		||||
            '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id'])
 | 
			
		||||
        data = self.ydl.urlopen(url).read()
 | 
			
		||||
        s = data.decode('utf-8', 'ignore')
 | 
			
		||||
        segment_urls = []
 | 
			
		||||
    def real_download(self, filename, info_dict):
 | 
			
		||||
        man_url = info_dict['url']
 | 
			
		||||
        self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
 | 
			
		||||
        manifest = self.ydl.urlopen(man_url).read()
 | 
			
		||||
 | 
			
		||||
        s = manifest.decode('utf-8', 'ignore')
 | 
			
		||||
        fragment_urls = []
 | 
			
		||||
        for line in s.splitlines():
 | 
			
		||||
            line = line.strip()
 | 
			
		||||
            if line and not line.startswith('#'):
 | 
			
		||||
                segment_url = (
 | 
			
		||||
                    line
 | 
			
		||||
                    if re.match(r'^https?://', line)
 | 
			
		||||
                    else compat_urlparse.urljoin(url, line))
 | 
			
		||||
                segment_urls.append(segment_url)
 | 
			
		||||
 | 
			
		||||
        is_test = self.params.get('test', False)
 | 
			
		||||
        remaining_bytes = self._TEST_FILE_SIZE if is_test else None
 | 
			
		||||
        byte_counter = 0
 | 
			
		||||
        with open(tmpfilename, 'wb') as outf:
 | 
			
		||||
            for i, segurl in enumerate(segment_urls):
 | 
			
		||||
                self.to_screen(
 | 
			
		||||
                    '[hlsnative] %s: Downloading segment %d / %d' %
 | 
			
		||||
                    (info_dict['id'], i + 1, len(segment_urls)))
 | 
			
		||||
                seg_req = compat_urllib_request.Request(segurl)
 | 
			
		||||
                if remaining_bytes is not None:
 | 
			
		||||
                    seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
 | 
			
		||||
 | 
			
		||||
                segment = self.ydl.urlopen(seg_req).read()
 | 
			
		||||
                if remaining_bytes is not None:
 | 
			
		||||
                    segment = segment[:remaining_bytes]
 | 
			
		||||
                    remaining_bytes -= len(segment)
 | 
			
		||||
                outf.write(segment)
 | 
			
		||||
                byte_counter += len(segment)
 | 
			
		||||
                if remaining_bytes is not None and remaining_bytes <= 0:
 | 
			
		||||
                    else compat_urlparse.urljoin(man_url, line))
 | 
			
		||||
                fragment_urls.append(segment_url)
 | 
			
		||||
                # We only download the first fragment during the test
 | 
			
		||||
                if self.params.get('test', False):
 | 
			
		||||
                    break
 | 
			
		||||
 | 
			
		||||
        self._hook_progress({
 | 
			
		||||
            'downloaded_bytes': byte_counter,
 | 
			
		||||
            'total_bytes': byte_counter,
 | 
			
		||||
        ctx = {
 | 
			
		||||
            'filename': filename,
 | 
			
		||||
            'status': 'finished',
 | 
			
		||||
        })
 | 
			
		||||
        self.try_rename(tmpfilename, filename)
 | 
			
		||||
            'total_frags': len(fragment_urls),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        self._prepare_and_start_frag_download(ctx)
 | 
			
		||||
 | 
			
		||||
        frags_filenames = []
 | 
			
		||||
        for i, frag_url in enumerate(fragment_urls):
 | 
			
		||||
            frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
 | 
			
		||||
            success = ctx['dl'].download(frag_filename, {'url': frag_url})
 | 
			
		||||
            if not success:
 | 
			
		||||
                return False
 | 
			
		||||
            with open(frag_filename, 'rb') as down:
 | 
			
		||||
                ctx['dest_stream'].write(down.read())
 | 
			
		||||
            frags_filenames.append(frag_filename)
 | 
			
		||||
 | 
			
		||||
        self._finish_frag_download(ctx)
 | 
			
		||||
 | 
			
		||||
        for frag_file in frags_filenames:
 | 
			
		||||
            os.remove(frag_file)
 | 
			
		||||
 | 
			
		||||
        return True
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user