[youtube] Improve cache and add an option to print the extracted signatures
This commit is contained in:
		@@ -40,7 +40,7 @@ class FileDownloader(object):
 | 
				
			|||||||
    min_filesize:      Skip files smaller than this size
 | 
					    min_filesize:      Skip files smaller than this size
 | 
				
			||||||
    max_filesize:      Skip files larger than this size
 | 
					    max_filesize:      Skip files larger than this size
 | 
				
			||||||
    cachedir:          Location of the cache files in the filesystem.
 | 
					    cachedir:          Location of the cache files in the filesystem.
 | 
				
			||||||
                       False to disable filesystem cache.
 | 
					                       "NONE" to disable filesystem cache.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    params = None
 | 
					    params = None
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -167,6 +167,7 @@ def parseOpts(overrideArguments=None):
 | 
				
			|||||||
            help='Output descriptions of all supported extractors', default=False)
 | 
					            help='Output descriptions of all supported extractors', default=False)
 | 
				
			||||||
    general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
 | 
					    general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
 | 
				
			||||||
    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
 | 
					    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
 | 
				
			||||||
 | 
					    general.add_option('--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache', help='Location in the filesystem where youtube-dl can store downloaded information permanently. NONE to disable filesystem caching, %default by default')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    selection.add_option('--playlist-start',
 | 
					    selection.add_option('--playlist-start',
 | 
				
			||||||
@@ -272,6 +273,10 @@ def parseOpts(overrideArguments=None):
 | 
				
			|||||||
    verbosity.add_option('--dump-intermediate-pages',
 | 
					    verbosity.add_option('--dump-intermediate-pages',
 | 
				
			||||||
            action='store_true', dest='dump_intermediate_pages', default=False,
 | 
					            action='store_true', dest='dump_intermediate_pages', default=False,
 | 
				
			||||||
            help='print downloaded pages to debug problems(very verbose)')
 | 
					            help='print downloaded pages to debug problems(very verbose)')
 | 
				
			||||||
 | 
					    verbosity.add_option('--youtube-print-sig-code',
 | 
				
			||||||
 | 
					            action='store_true', dest='youtube_print_sig_code', default=False,
 | 
				
			||||||
 | 
					            help=optparse.SUPPRESS_HELP)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    filesystem.add_option('-t', '--title',
 | 
					    filesystem.add_option('-t', '--title',
 | 
				
			||||||
            action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
 | 
					            action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
 | 
				
			||||||
@@ -613,6 +618,7 @@ def _real_main(argv=None):
 | 
				
			|||||||
        'min_filesize': opts.min_filesize,
 | 
					        'min_filesize': opts.min_filesize,
 | 
				
			||||||
        'max_filesize': opts.max_filesize,
 | 
					        'max_filesize': opts.max_filesize,
 | 
				
			||||||
        'daterange': date,
 | 
					        'daterange': date,
 | 
				
			||||||
 | 
					        'youtube_print_sig_code': opts.youtube_print_sig_code
 | 
				
			||||||
        })
 | 
					        })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if opts.verbose:
 | 
					    if opts.verbose:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,13 +1,13 @@
 | 
				
			|||||||
# coding: utf-8
 | 
					# coding: utf-8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import collections
 | 
					import collections
 | 
				
			||||||
 | 
					import errno
 | 
				
			||||||
import itertools
 | 
					import itertools
 | 
				
			||||||
import io
 | 
					import io
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
import operator
 | 
					import operator
 | 
				
			||||||
import os.path
 | 
					import os.path
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import shutil
 | 
					 | 
				
			||||||
import socket
 | 
					import socket
 | 
				
			||||||
import string
 | 
					import string
 | 
				
			||||||
import struct
 | 
					import struct
 | 
				
			||||||
@@ -17,6 +17,7 @@ import zlib
 | 
				
			|||||||
from .common import InfoExtractor, SearchInfoExtractor
 | 
					from .common import InfoExtractor, SearchInfoExtractor
 | 
				
			||||||
from .subtitles import SubtitlesInfoExtractor
 | 
					from .subtitles import SubtitlesInfoExtractor
 | 
				
			||||||
from ..utils import (
 | 
					from ..utils import (
 | 
				
			||||||
 | 
					    compat_chr,
 | 
				
			||||||
    compat_http_client,
 | 
					    compat_http_client,
 | 
				
			||||||
    compat_parse_qs,
 | 
					    compat_parse_qs,
 | 
				
			||||||
    compat_urllib_error,
 | 
					    compat_urllib_error,
 | 
				
			||||||
@@ -30,6 +31,7 @@ from ..utils import (
 | 
				
			|||||||
    unescapeHTML,
 | 
					    unescapeHTML,
 | 
				
			||||||
    unified_strdate,
 | 
					    unified_strdate,
 | 
				
			||||||
    orderedSet,
 | 
					    orderedSet,
 | 
				
			||||||
 | 
					    write_json_file,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
 | 
					class YoutubeBaseInfoExtractor(InfoExtractor):
 | 
				
			||||||
@@ -433,18 +435,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
				
			|||||||
        # Read from filesystem cache
 | 
					        # Read from filesystem cache
 | 
				
			||||||
        func_id = '%s_%s_%d' % (player_type, player_id, slen)
 | 
					        func_id = '%s_%s_%d' % (player_type, player_id, slen)
 | 
				
			||||||
        assert os.path.basename(func_id) == func_id
 | 
					        assert os.path.basename(func_id) == func_id
 | 
				
			||||||
        cache_dir = self.downloader.params.get('cachedir',
 | 
					        cache_dir = self._downloader.params.get('cachedir',
 | 
				
			||||||
                                               u'~/.youtube-dl/cache')
 | 
					                                                u'~/.youtube-dl/cache')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if cache_dir is not False:
 | 
					        if cache_dir != u'NONE':
 | 
				
			||||||
            cache_fn = os.path.join(os.path.expanduser(cache_dir),
 | 
					            cache_fn = os.path.join(os.path.expanduser(cache_dir),
 | 
				
			||||||
                                    u'youtube-sigfuncs',
 | 
					                                    u'youtube-sigfuncs',
 | 
				
			||||||
                                    func_id + '.json')
 | 
					                                    func_id + '.json')
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                with io.open(cache_fn, '', encoding='utf-8') as cachef:
 | 
					                with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 | 
				
			||||||
                    cache_spec = json.load(cachef)
 | 
					                    cache_spec = json.load(cachef)
 | 
				
			||||||
                return lambda s: u''.join(s[i] for i in cache_spec)
 | 
					                return lambda s: u''.join(s[i] for i in cache_spec)
 | 
				
			||||||
            except OSError:
 | 
					            except IOError:
 | 
				
			||||||
                pass  # No cache available
 | 
					                pass  # No cache available
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if player_type == 'js':
 | 
					        if player_type == 'js':
 | 
				
			||||||
@@ -464,13 +466,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
				
			|||||||
            assert False, 'Invalid player type %r' % player_type
 | 
					            assert False, 'Invalid player type %r' % player_type
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if cache_dir is not False:
 | 
					        if cache_dir is not False:
 | 
				
			||||||
            cache_res = res(map(compat_chr, range(slen)))
 | 
					            try:
 | 
				
			||||||
            cache_spec = [ord(c) for c in cache_res]
 | 
					                cache_res = res(map(compat_chr, range(slen)))
 | 
				
			||||||
            shutil.makedirs(os.path.dirname(cache_fn))
 | 
					                cache_spec = [ord(c) for c in cache_res]
 | 
				
			||||||
            write_json_file(cache_spec, cache_fn)
 | 
					                try:
 | 
				
			||||||
 | 
					                    os.makedirs(os.path.dirname(cache_fn))
 | 
				
			||||||
 | 
					                except OSError as ose:
 | 
				
			||||||
 | 
					                    if ose.errno != errno.EEXIST:
 | 
				
			||||||
 | 
					                        raise
 | 
				
			||||||
 | 
					                write_json_file(cache_spec, cache_fn)
 | 
				
			||||||
 | 
					            except Exception as e:
 | 
				
			||||||
 | 
					                tb = traceback.format_exc()
 | 
				
			||||||
 | 
					                self._downloader.report_warning(
 | 
				
			||||||
 | 
					                    u'Writing cache to %r failed: %s' % (cache_fn, tb))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return res
 | 
					        return res
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _print_sig_code(self, func, slen):
 | 
				
			||||||
 | 
					        def gen_sig_code(idxs):
 | 
				
			||||||
 | 
					            def _genslice(start, end, step):
 | 
				
			||||||
 | 
					                starts = u'' if start == 0 else str(start)
 | 
				
			||||||
 | 
					                ends = u':%d' % (end+step)
 | 
				
			||||||
 | 
					                steps = u'' if step == 1 else (':%d' % step)
 | 
				
			||||||
 | 
					                return u's[%s%s%s]' % (starts, ends, steps)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            step = None
 | 
				
			||||||
 | 
					            for i, prev in zip(idxs[1:], idxs[:-1]):
 | 
				
			||||||
 | 
					                if step is not None:
 | 
				
			||||||
 | 
					                    if i - prev == step:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    yield _genslice(start, prev, step)
 | 
				
			||||||
 | 
					                    step = None
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                if i - prev in [-1, 1]:
 | 
				
			||||||
 | 
					                    step = i - prev
 | 
				
			||||||
 | 
					                    start = prev
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    yield u's[%d]' % prev
 | 
				
			||||||
 | 
					            if step is None:
 | 
				
			||||||
 | 
					                yield u's[%d]' % i
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                yield _genslice(start, i, step)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        cache_res = func(map(compat_chr, range(slen)))
 | 
				
			||||||
 | 
					        cache_spec = [ord(c) for c in cache_res]
 | 
				
			||||||
 | 
					        expr_code = u' + '.join(gen_sig_code(cache_spec))
 | 
				
			||||||
 | 
					        code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 | 
				
			||||||
 | 
					        self.to_screen(u'Extracted signature:\n' + code)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _parse_sig_js(self, jscode):
 | 
					    def _parse_sig_js(self, jscode):
 | 
				
			||||||
        funcname = self._search_regex(
 | 
					        funcname = self._search_regex(
 | 
				
			||||||
            r'signature=([a-zA-Z]+)', jscode,
 | 
					            r'signature=([a-zA-Z]+)', jscode,
 | 
				
			||||||
@@ -1007,7 +1051,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
				
			|||||||
                        video_id, player_url, len(s)
 | 
					                        video_id, player_url, len(s)
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
                    self._player_cache[player_url] = func
 | 
					                    self._player_cache[player_url] = func
 | 
				
			||||||
                return self._player_cache[player_url](s)
 | 
					                func = self._player_cache[player_url]
 | 
				
			||||||
 | 
					                if self._downloader.params.get('youtube_print_sig_code'):
 | 
				
			||||||
 | 
					                    self._print_sig_code(func, len(s))
 | 
				
			||||||
 | 
					                return func(s)
 | 
				
			||||||
            except Exception as e:
 | 
					            except Exception as e:
 | 
				
			||||||
                tb = traceback.format_exc()
 | 
					                tb = traceback.format_exc()
 | 
				
			||||||
                self._downloader.report_warning(
 | 
					                self._downloader.report_warning(
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user