Compare commits
	
		
			91 Commits
		
	
	
		
			2013.10.23
			...
			2013.11.02
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | aa2484e390 | ||
|  | 8eddf3e91d | ||
|  | 60d142aa8d | ||
|  | 66cf3ac342 | ||
|  | 5f1ea943ab | ||
|  | 0ef7ad5cd4 | ||
|  | 9f1109a564 | ||
|  | 33b1d9595d | ||
|  | 7193498811 | ||
|  | 72321ead7b | ||
|  | b5d0d817bc | ||
|  | 94badb2599 | ||
|  | b9a836515f | ||
|  | 21c924f406 | ||
|  | e54fd4b23b | ||
|  | 57dd9a8f2f | ||
|  | 912cbf5d4e | ||
|  | 43d7895ea0 | ||
|  | f7ff55aa78 | ||
|  | 795f28f871 | ||
|  | f6cc16f5d8 | ||
|  | 321a01f971 | ||
|  | 646e17a53d | ||
|  | dd508b7c4f | ||
|  | 2563bcc85c | ||
|  | 702665c085 | ||
|  | 369a759acc | ||
|  | 79b3f61228 | ||
|  | 216d71d001 | ||
|  | 78a3a9f89e | ||
|  | a7685f3bf4 | ||
|  | f088ea5486 | ||
|  | 1003d108d5 | ||
|  | 8abeeb9449 | ||
|  | c1002e96e9 | ||
|  | 77d0a82fef | ||
|  | ebc14f251c | ||
|  | d41e6efc85 | ||
|  | 8ffa13e03e | ||
|  | db477d3a37 | ||
|  | 750e9833b8 | ||
|  | 82f0ac657c | ||
|  | eb6a2277a2 | ||
|  | f8778fb0fa | ||
|  | e2f9de207c | ||
|  | a93cc0d943 | ||
|  | 7d8c2e07f2 | ||
|  | efb4c36b18 | ||
|  | 29526d0d2b | ||
|  | 198e370f23 | ||
|  | c19f7764a5 | ||
|  | bc63d9d329 | ||
|  | aa929c37d5 | ||
|  | af4d506eb3 | ||
|  | 5da0549581 | ||
|  | 749a4fd2fd | ||
|  | 6f71ef580c | ||
|  | 67874aeffa | ||
|  | 3e6a330d38 | ||
|  | aee5e18c8f | ||
|  | 5b11143d05 | ||
|  | 7b2212e954 | ||
|  | 71865091ab | ||
|  | 125cfd78e8 | ||
|  | 8cb57d9b91 | ||
|  | 14e10b2b6e | ||
|  | 6e76104d66 | ||
|  | 1d45a23b74 | ||
|  | 7df286540f | ||
|  | 5d0c97541a | ||
|  | 49a25557b0 | ||
|  | b5936c0059 | ||
|  | 600cc1a4f0 | ||
|  | ea32fbacc8 | ||
|  | 00fe14fc75 | ||
|  | fcc28edb2f | ||
|  | fac6be2dd5 | ||
|  | 1cf64ee468 | ||
|  | cdec0190c4 | ||
|  | 2450bcb28b | ||
|  | 3126050c0f | ||
|  | 93b22c7828 | ||
|  | 0a89b2852e | ||
|  | 55b3e45bba | ||
|  | 365bcf6d97 | ||
|  | 71907db3ba | ||
|  | 6803655ced | ||
|  | b0505eb611 | ||
|  | f6f1fc9286 | ||
|  | 16f36a6fc9 | ||
|  | f44415360e | 
							
								
								
									
										23
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										23
									
								
								README.md
									
									
									
									
									
								
							| @@ -79,16 +79,17 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                different, %(autonumber)s to get an automatically | ||||
|                                incremented number, %(ext)s for the filename | ||||
|                                extension, %(format)s for the format description | ||||
|                                (like "22 - 1280x720" or "HD")%(upload_date)s for | ||||
|                                the upload date (YYYYMMDD), %(extractor)s for the | ||||
|                                provider (youtube, metacafe, etc), %(id)s for the | ||||
|                                video id , %(playlist)s for the playlist the | ||||
|                                video is in, %(playlist_index)s for the position | ||||
|                                in the playlist and %% for a literal percent. Use | ||||
|                                - to output to stdout. Can also be used to | ||||
|                                download to a different directory, for example | ||||
|                                with -o '/my/downloads/%(uploader)s/%(title)s-%(i | ||||
|                                d)s.%(ext)s' . | ||||
|                                (like "22 - 1280x720" or "HD"),%(format_id)s for | ||||
|                                the unique id of the format (like Youtube's | ||||
|                                itags: "137"),%(upload_date)s for the upload date | ||||
|                                (YYYYMMDD), %(extractor)s for the provider | ||||
|                                (youtube, metacafe, etc), %(id)s for the video id | ||||
|                                , %(playlist)s for the playlist the video is in, | ||||
|                                %(playlist_index)s for the position in the | ||||
|                                playlist and %% for a literal percent. Use - to | ||||
|                                output to stdout. Can also be used to download to | ||||
|                                a different directory, for example with -o '/my/d | ||||
|                                ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' . | ||||
|     --autonumber-size NUMBER   Specifies the number of digits in %(autonumber)s | ||||
|                                when it is present in output filename template or | ||||
|                                --autonumber option is given | ||||
| @@ -126,6 +127,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     -v, --verbose              print various debugging information | ||||
|     --dump-intermediate-pages  print downloaded pages to debug problems(very | ||||
|                                verbose) | ||||
|     --write-pages              Write downloaded pages to files in the current | ||||
|                                directory | ||||
|  | ||||
| ## Video Format Options: | ||||
|     -f, --format FORMAT        video format code, specifiy the order of | ||||
|   | ||||
							
								
								
									
										39
									
								
								devscripts/check-porn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								devscripts/check-porn.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| """ | ||||
| This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check | ||||
| if we are not 'age_limit' tagging some porn site | ||||
| """ | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| import sys | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import get_testcases | ||||
| from youtube_dl.utils import compat_urllib_request | ||||
|  | ||||
| for test in get_testcases(): | ||||
|     try: | ||||
|         webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() | ||||
|     except: | ||||
|         print('\nFail: {0}'.format(test['name'])) | ||||
|         continue | ||||
|  | ||||
|     webpage = webpage.decode('utf8', 'replace') | ||||
|  | ||||
|     if 'porn' in webpage.lower() and ('info_dict' not in test | ||||
|                                       or 'age_limit' not in test['info_dict'] | ||||
|                                       or test['info_dict']['age_limit'] != 18): | ||||
|         print('\nPotential missing age_limit check: {0}'.format(test['name'])) | ||||
|  | ||||
|     elif 'porn' not in webpage.lower() and ('info_dict' in test and | ||||
|                                             'age_limit' in test['info_dict'] and | ||||
|                                             test['info_dict']['age_limit'] == 18): | ||||
|         print('\nPotential false negative: {0}'.format(test['name'])) | ||||
|  | ||||
|     else: | ||||
|         sys.stdout.write('.') | ||||
|     sys.stdout.flush() | ||||
|  | ||||
| print() | ||||
							
								
								
									
										7
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								setup.py
									
									
									
									
									
								
							| @@ -8,8 +8,10 @@ import sys | ||||
|  | ||||
| try: | ||||
|     from setuptools import setup | ||||
|     setuptools_available = True | ||||
| except ImportError: | ||||
|     from distutils.core import setup | ||||
|     setuptools_available = False | ||||
|  | ||||
| try: | ||||
|     # This will create an exe that needs Microsoft Visual C++ 2008 | ||||
| @@ -43,13 +45,16 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': | ||||
|     params = py2exe_params | ||||
| else: | ||||
|     params = { | ||||
|         'scripts': ['bin/youtube-dl'], | ||||
|         'data_files': [  # Installing system-wide would require sudo... | ||||
|             ('etc/bash_completion.d', ['youtube-dl.bash-completion']), | ||||
|             ('share/doc/youtube_dl', ['README.txt']), | ||||
|             ('share/man/man1/', ['youtube-dl.1']) | ||||
|         ] | ||||
|     } | ||||
|     if setuptools_available: | ||||
|         params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']} | ||||
|     else: | ||||
|         params['scripts'] = ['bin/youtube-dl'] | ||||
|  | ||||
| # Get the version from youtube_dl/version.py without importing the package | ||||
| exec(compile(open('youtube_dl/version.py').read(), | ||||
|   | ||||
| @@ -5,9 +5,11 @@ import json | ||||
| import os.path | ||||
| import re | ||||
| import types | ||||
| import sys | ||||
|  | ||||
| import youtube_dl.extractor | ||||
| from youtube_dl import YoutubeDL | ||||
| from youtube_dl.utils import preferredencoding | ||||
|  | ||||
|  | ||||
| def global_setup(): | ||||
| @@ -33,6 +35,21 @@ def try_rm(filename): | ||||
|             raise | ||||
|  | ||||
|  | ||||
| def report_warning(message): | ||||
|     ''' | ||||
|     Print the message to stderr, it will be prefixed with 'WARNING:' | ||||
|     If stderr is a tty file the 'WARNING:' will be colored | ||||
|     ''' | ||||
|     if sys.stderr.isatty() and os.name != 'nt': | ||||
|         _msg_header = u'\033[0;33mWARNING:\033[0m' | ||||
|     else: | ||||
|         _msg_header = u'WARNING:' | ||||
|     output = u'%s %s\n' % (_msg_header, message) | ||||
|     if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3: | ||||
|         output = output.encode(preferredencoding()) | ||||
|     sys.stderr.write(output) | ||||
|  | ||||
|  | ||||
| class FakeYDL(YoutubeDL): | ||||
|     def __init__(self, override=None): | ||||
|         # Different instances of the downloader can't share the same dictionary | ||||
|   | ||||
| @@ -62,10 +62,10 @@ class TestFormatSelection(unittest.TestCase): | ||||
|  | ||||
|     def test_format_limit(self): | ||||
|         formats = [ | ||||
|             {u'format_id': u'meh'}, | ||||
|             {u'format_id': u'good'}, | ||||
|             {u'format_id': u'great'}, | ||||
|             {u'format_id': u'excellent'}, | ||||
|             {u'format_id': u'meh', u'url': u'http://example.com/meh'}, | ||||
|             {u'format_id': u'good', u'url': u'http://example.com/good'}, | ||||
|             {u'format_id': u'great', u'url': u'http://example.com/great'}, | ||||
|             {u'format_id': u'excellent', u'url': u'http://example.com/exc'}, | ||||
|         ] | ||||
|         info_dict = { | ||||
|             u'formats': formats, u'extractor': u'test', 'id': 'testvid'} | ||||
|   | ||||
| @@ -22,7 +22,7 @@ class TestDailymotionSubtitles(unittest.TestCase): | ||||
|         return info_dict | ||||
|     def getSubtitles(self): | ||||
|         info_dict = self.getInfoDict() | ||||
|         return info_dict[0]['subtitles'] | ||||
|         return info_dict['subtitles'] | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|   | ||||
| @@ -6,7 +6,14 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import get_params, get_testcases, global_setup, try_rm, md5 | ||||
| from test.helper import ( | ||||
|     get_params, | ||||
|     get_testcases, | ||||
|     global_setup, | ||||
|     try_rm, | ||||
|     md5, | ||||
|     report_warning | ||||
| ) | ||||
| global_setup() | ||||
|  | ||||
|  | ||||
| @@ -19,6 +26,7 @@ import youtube_dl.YoutubeDL | ||||
| from youtube_dl.utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_HTTPError, | ||||
|     DownloadError, | ||||
|     ExtractorError, | ||||
|     UnavailableVideoError, | ||||
| @@ -60,9 +68,12 @@ def generator(test_case): | ||||
|         if not ie._WORKING: | ||||
|             print_skipping('IE marked as not _WORKING') | ||||
|             return | ||||
|         if 'playlist' not in test_case and not test_case['file']: | ||||
|             print_skipping('No output file specified') | ||||
|             return | ||||
|         if 'playlist' not in test_case: | ||||
|             info_dict = test_case.get('info_dict', {}) | ||||
|             if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')): | ||||
|                 print_skipping('The output file cannot be know, the "file" ' | ||||
|                     'key is missing or the info_dict is incomplete') | ||||
|                 return | ||||
|         if 'skip' in test_case: | ||||
|             print_skipping(test_case['skip']) | ||||
|             return | ||||
| @@ -77,35 +88,47 @@ def generator(test_case): | ||||
|                 finished_hook_called.add(status['filename']) | ||||
|         ydl.fd.add_progress_hook(_hook) | ||||
|  | ||||
|         def get_tc_filename(tc): | ||||
|             return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) | ||||
|  | ||||
|         test_cases = test_case.get('playlist', [test_case]) | ||||
|         for tc in test_cases: | ||||
|             try_rm(tc['file']) | ||||
|             try_rm(tc['file'] + '.part') | ||||
|             try_rm(tc['file'] + '.info.json') | ||||
|         def try_rm_tcs_files(): | ||||
|             for tc in test_cases: | ||||
|                 tc_filename = get_tc_filename(tc) | ||||
|                 try_rm(tc_filename) | ||||
|                 try_rm(tc_filename + '.part') | ||||
|                 try_rm(tc_filename + '.info.json') | ||||
|         try_rm_tcs_files() | ||||
|         try: | ||||
|             for retry in range(1, RETRIES + 1): | ||||
|             try_num = 1 | ||||
|             while True: | ||||
|                 try: | ||||
|                     ydl.download([test_case['url']]) | ||||
|                 except (DownloadError, ExtractorError) as err: | ||||
|                     if retry == RETRIES: raise | ||||
|  | ||||
|                     # Check if the exception is not a network related one | ||||
|                     if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): | ||||
|                     if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): | ||||
|                         raise | ||||
|  | ||||
|                     print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry)) | ||||
|                     if try_num == RETRIES: | ||||
|                         report_warning(u'Failed due to network errors, skipping...') | ||||
|                         return | ||||
|  | ||||
|                     print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) | ||||
|  | ||||
|                     try_num += 1 | ||||
|                 else: | ||||
|                     break | ||||
|  | ||||
|             for tc in test_cases: | ||||
|                 tc_filename = get_tc_filename(tc) | ||||
|                 if not test_case.get('params', {}).get('skip_download', False): | ||||
|                     self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file']) | ||||
|                     self.assertTrue(tc['file'] in finished_hook_called) | ||||
|                 self.assertTrue(os.path.exists(tc['file'] + '.info.json')) | ||||
|                     self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) | ||||
|                     self.assertTrue(tc_filename in finished_hook_called) | ||||
|                 self.assertTrue(os.path.exists(tc_filename + '.info.json')) | ||||
|                 if 'md5' in tc: | ||||
|                     md5_for_file = _file_md5(tc['file']) | ||||
|                     md5_for_file = _file_md5(tc_filename) | ||||
|                     self.assertEqual(md5_for_file, tc['md5']) | ||||
|                 with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: | ||||
|                 with io.open(tc_filename + '.info.json', encoding='utf-8') as infof: | ||||
|                     info_dict = json.load(infof) | ||||
|                 for (info_field, expected) in tc.get('info_dict', {}).items(): | ||||
|                     if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||
| @@ -126,10 +149,7 @@ def generator(test_case): | ||||
|                 for key in ('id', 'url', 'title', 'ext'): | ||||
|                     self.assertTrue(key in info_dict.keys() and info_dict[key]) | ||||
|         finally: | ||||
|             for tc in test_cases: | ||||
|                 try_rm(tc['file']) | ||||
|                 try_rm(tc['file'] + '.part') | ||||
|                 try_rm(tc['file'] + '.info.json') | ||||
|             try_rm_tcs_files() | ||||
|  | ||||
|     return test_template | ||||
|  | ||||
|   | ||||
| @@ -272,7 +272,7 @@ class YoutubeDL(object): | ||||
|                 autonumber_size = 5 | ||||
|             autonumber_templ = u'%0' + str(autonumber_size) + u'd' | ||||
|             template_dict['autonumber'] = autonumber_templ % self._num_downloads | ||||
|             if template_dict['playlist_index'] is not None: | ||||
|             if template_dict.get('playlist_index') is not None: | ||||
|                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] | ||||
|  | ||||
|             sanitize = lambda k, v: sanitize_filename( | ||||
| @@ -462,7 +462,7 @@ class YoutubeDL(object): | ||||
|             info_dict['playlist_index'] = None | ||||
|  | ||||
|         # This extractors handle format selection themselves | ||||
|         if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']: | ||||
|         if info_dict['extractor'] in [u'youtube', u'Youku']: | ||||
|             if download: | ||||
|                 self.process_info(info_dict) | ||||
|             return info_dict | ||||
| @@ -482,8 +482,11 @@ class YoutubeDL(object): | ||||
|                 format['format'] = u'{id} - {res}{note}'.format( | ||||
|                     id=format['format_id'], | ||||
|                     res=self.format_resolution(format), | ||||
|                     note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '', | ||||
|                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', | ||||
|                 ) | ||||
|             # Automatically determine file extension if missing | ||||
|             if 'ext' not in format: | ||||
|                 format['ext'] = determine_ext(format['url']) | ||||
|  | ||||
|         if self.params.get('listformats', None): | ||||
|             self.list_formats(info_dict) | ||||
| @@ -521,7 +524,8 @@ class YoutubeDL(object): | ||||
|                     formats_to_download = [selected_format] | ||||
|                     break | ||||
|         if not formats_to_download: | ||||
|             raise ExtractorError(u'requested format not available') | ||||
|             raise ExtractorError(u'requested format not available', | ||||
|                                  expected=True) | ||||
|  | ||||
|         if download: | ||||
|             if len(formats_to_download) > 1: | ||||
| @@ -571,9 +575,9 @@ class YoutubeDL(object): | ||||
|         if self.params.get('forceurl', False): | ||||
|             # For RTMP URLs, also include the playpath | ||||
|             compat_print(info_dict['url'] + info_dict.get('play_path', u'')) | ||||
|         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: | ||||
|         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: | ||||
|             compat_print(info_dict['thumbnail']) | ||||
|         if self.params.get('forcedescription', False) and 'description' in info_dict: | ||||
|         if self.params.get('forcedescription', False) and info_dict.get('description') is not None: | ||||
|             compat_print(info_dict['description']) | ||||
|         if self.params.get('forcefilename', False) and filename is not None: | ||||
|             compat_print(filename) | ||||
| @@ -754,30 +758,36 @@ class YoutubeDL(object): | ||||
|             archive_file.write(vid_id + u'\n') | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_resolution(format): | ||||
|     def format_resolution(format, default='unknown'): | ||||
|         if format.get('_resolution') is not None: | ||||
|             return format['_resolution'] | ||||
|         if format.get('height') is not None: | ||||
|             if format.get('width') is not None: | ||||
|                 res = u'%sx%s' % (format['width'], format['height']) | ||||
|             else: | ||||
|                 res = u'%sp' % format['height'] | ||||
|         else: | ||||
|             res = '???' | ||||
|             res = default | ||||
|         return res | ||||
|  | ||||
|     def list_formats(self, info_dict): | ||||
|         formats_s = [] | ||||
|         for format in info_dict.get('formats', [info_dict]): | ||||
|             formats_s.append(u'%-15s: %-5s     %-15s[%s]' % ( | ||||
|         def line(format): | ||||
|             return (u'%-15s%-10s%-12s%s' % ( | ||||
|                 format['format_id'], | ||||
|                 format['ext'], | ||||
|                 format.get('format_note') or '-', | ||||
|                 self.format_resolution(format), | ||||
|                 format.get('format_note', ''), | ||||
|                 ) | ||||
|             ) | ||||
|         if len(formats_s) != 1: | ||||
|             formats_s[0] += ' (worst)' | ||||
|             formats_s[-1] += ' (best)' | ||||
|         formats_s = "\n".join(formats_s) | ||||
|         self.to_screen(u'[info] Available formats for %s:\n' | ||||
|             u'format code    extension   note           resolution\n%s' % ( | ||||
|                 info_dict['id'], formats_s)) | ||||
|  | ||||
|         formats = info_dict.get('formats', [info_dict]) | ||||
|         formats_s = list(map(line, formats)) | ||||
|         if len(formats) > 1: | ||||
|             formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)' | ||||
|             formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)' | ||||
|  | ||||
|         header_line = line({ | ||||
|             'format_id': u'format code', 'ext': u'extension', | ||||
|             '_resolution': u'resolution', 'format_note': u'note'}) | ||||
|         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' % | ||||
|                        (info_dict['id'], header_line, u"\n".join(formats_s))) | ||||
|   | ||||
| @@ -133,7 +133,7 @@ def parseOpts(overrideArguments=None): | ||||
|  | ||||
|     def _hide_login_info(opts): | ||||
|         opts = list(opts) | ||||
|         for private_opt in ['-p', '--password', '-u', '--username']: | ||||
|         for private_opt in ['-p', '--password', '-u', '--username', '--video-password']: | ||||
|             try: | ||||
|                 i = opts.index(private_opt) | ||||
|                 opts[i+1] = '<PRIVATE>' | ||||
| @@ -316,6 +316,9 @@ def parseOpts(overrideArguments=None): | ||||
|     verbosity.add_option('--dump-intermediate-pages', | ||||
|             action='store_true', dest='dump_intermediate_pages', default=False, | ||||
|             help='print downloaded pages to debug problems(very verbose)') | ||||
|     verbosity.add_option('--write-pages', | ||||
|             action='store_true', dest='write_pages', default=False, | ||||
|             help='Write downloaded pages to files in the current directory') | ||||
|     verbosity.add_option('--youtube-print-sig-code', | ||||
|             action='store_true', dest='youtube_print_sig_code', default=False, | ||||
|             help=optparse.SUPPRESS_HELP) | ||||
| @@ -336,7 +339,8 @@ def parseOpts(overrideArguments=None): | ||||
|                   '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' | ||||
|                   '%(autonumber)s to get an automatically incremented number, ' | ||||
|                   '%(ext)s for the filename extension, ' | ||||
|                   '%(format)s for the format description (like "22 - 1280x720" or "HD")' | ||||
|                   '%(format)s for the format description (like "22 - 1280x720" or "HD"),' | ||||
|                   '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"),' | ||||
|                   '%(upload_date)s for the upload date (YYYYMMDD), ' | ||||
|                   '%(extractor)s for the provider (youtube, metacafe, etc), ' | ||||
|                   '%(id)s for the video id , %(playlist)s for the playlist the video is in, ' | ||||
| @@ -651,6 +655,7 @@ def _real_main(argv=None): | ||||
|         'prefer_free_formats': opts.prefer_free_formats, | ||||
|         'verbose': opts.verbose, | ||||
|         'dump_intermediate_pages': opts.dump_intermediate_pages, | ||||
|         'write_pages': opts.write_pages, | ||||
|         'test': opts.test, | ||||
|         'keepvideo': opts.keepvideo, | ||||
|         'min_filesize': opts.min_filesize, | ||||
|   | ||||
| @@ -72,6 +72,7 @@ from .jeuxvideo import JeuxVideoIE | ||||
| from .jukebox import JukeboxIE | ||||
| from .justintv import JustinTVIE | ||||
| from .kankan import KankanIE | ||||
| from .keezmovies import KeezMoviesIE | ||||
| from .kickstarter import KickStarterIE | ||||
| from .keek import KeekIE | ||||
| from .liveleak import LiveLeakIE | ||||
| @@ -82,6 +83,7 @@ from .mit import TechTVMITIE, MITIE | ||||
| from .mixcloud import MixcloudIE | ||||
| from .mtv import MTVIE | ||||
| from .muzu import MuzuTVIE | ||||
| from .myspace import MySpaceIE | ||||
| from .myspass import MySpassIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .naver import NaverIE | ||||
| @@ -94,6 +96,7 @@ from .ooyala import OoyalaIE | ||||
| from .orf import ORFIE | ||||
| from .pbs import PBSIE | ||||
| from .photobucket import PhotobucketIE | ||||
| from .pornhub import PornHubIE | ||||
| from .pornotube import PornotubeIE | ||||
| from .rbmaradio import RBMARadioIE | ||||
| from .redtube import RedTubeIE | ||||
| @@ -109,6 +112,7 @@ from .slideshare import SlideshareIE | ||||
| from .sohu import SohuIE | ||||
| from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE | ||||
| from .southparkstudios import SouthParkStudiosIE | ||||
| from .spankwire import SpankwireIE | ||||
| from .spiegel import SpiegelIE | ||||
| from .stanfordoc import StanfordOpenClassroomIE | ||||
| from .statigram import StatigramIE | ||||
| @@ -121,6 +125,7 @@ from .tf1 import TF1IE | ||||
| from .thisav import ThisAVIE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| from .trilulilu import TriluliluIE | ||||
| from .tube8 import Tube8IE | ||||
| from .tudou import TudouIE | ||||
| from .tumblr import TumblrIE | ||||
| from .tutv import TutvIE | ||||
| @@ -137,6 +142,7 @@ from .videofyme import VideofyMeIE | ||||
| from .videopremium import VideoPremiumIE | ||||
| from .vimeo import VimeoIE, VimeoChannelIE | ||||
| from .vine import VineIE | ||||
| from .vk import VKIE | ||||
| from .wat import WatIE | ||||
| from .websurg import WeBSurgIE | ||||
| from .weibo import WeiboIE | ||||
|   | ||||
| @@ -17,8 +17,8 @@ class AddAnimeIE(InfoExtractor): | ||||
|     IE_NAME = u'AddAnime' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', | ||||
|         u'file': u'24MR3YO5SAS9.flv', | ||||
|         u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1', | ||||
|         u'file': u'24MR3YO5SAS9.mp4', | ||||
|         u'md5': u'72954ea10bc979ab5e2eb288b21425a0', | ||||
|         u'info_dict': { | ||||
|             u"description": u"One Piece 606", | ||||
|             u"title": u"One Piece 606" | ||||
| @@ -31,7 +31,8 @@ class AddAnimeIE(InfoExtractor): | ||||
|             video_id = mobj.group('video_id') | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|         except ExtractorError as ee: | ||||
|             if not isinstance(ee.cause, compat_HTTPError): | ||||
|             if not isinstance(ee.cause, compat_HTTPError) or \ | ||||
|                ee.cause.code != 503: | ||||
|                 raise | ||||
|  | ||||
|             redir_webpage = ee.cause.read().decode('utf-8') | ||||
| @@ -60,16 +61,26 @@ class AddAnimeIE(InfoExtractor): | ||||
|                 note=u'Confirming after redirect') | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex(r"var normal_video_file = '(.*?)';", | ||||
|                                        webpage, u'video file URL') | ||||
|         formats = [] | ||||
|         for format_id in ('normal', 'hq'): | ||||
|             rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) | ||||
|             video_url = self._search_regex(rex, webpage, u'video file URLx', | ||||
|                                            fatal=False) | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'url': video_url, | ||||
|             }) | ||||
|         if not formats: | ||||
|             raise ExtractorError(u'Cannot find any video format!') | ||||
|         video_title = self._og_search_title(webpage) | ||||
|         video_description = self._og_search_description(webpage) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id':  video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'flv', | ||||
|             'formats': formats, | ||||
|             'title': video_title, | ||||
|             'description': video_description | ||||
|         } | ||||
|   | ||||
| @@ -158,7 +158,9 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|             'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), | ||||
|         } | ||||
|  | ||||
|         formats = player_info['VSR'].values() | ||||
|         all_formats = player_info['VSR'].values() | ||||
|         # Some formats use the m3u8 protocol | ||||
|         all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) | ||||
|         def _match_lang(f): | ||||
|             if f.get('versionCode') is None: | ||||
|                 return True | ||||
| @@ -170,11 +172,16 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] | ||||
|             return any(re.match(r, f['versionCode']) for r in regexes) | ||||
|         # Some formats may not be in the same language as the url | ||||
|         formats = filter(_match_lang, formats) | ||||
|         # Some formats use the m3u8 protocol | ||||
|         formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats) | ||||
|         # We order the formats by quality | ||||
|         formats = filter(_match_lang, all_formats) | ||||
|         formats = list(formats) # in python3 filter returns an iterator | ||||
|         if not formats: | ||||
|             # Some videos are only available in the 'Originalversion' | ||||
|             # they aren't tagged as being in French or German | ||||
|             if all(f['versionCode'] == 'VO' for f in all_formats): | ||||
|                 formats = all_formats | ||||
|             else: | ||||
|                 raise ExtractorError(u'The formats list is empty') | ||||
|         # We order the formats by quality | ||||
|         if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: | ||||
|             sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) | ||||
|         else: | ||||
|   | ||||
| @@ -55,30 +55,30 @@ class CinemassacreIE(InfoExtractor): | ||||
|             video_description = None | ||||
|  | ||||
|         playerdata = self._download_webpage(playerdata_url, video_id) | ||||
|         base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://.*?)/(?:vod|Cinemassacre)\'', | ||||
|             playerdata, u'base_url') | ||||
|         base_url += '/Cinemassacre/' | ||||
|         # Important: The file names in playerdata are not used by the player and even wrong for some videos | ||||
|         sd_file = 'Cinemassacre-%s_high.mp4' % video_id | ||||
|         hd_file = 'Cinemassacre-%s.mp4' % video_id | ||||
|         video_thumbnail = 'http://image.screenwavemedia.com/Cinemassacre/Cinemassacre-%s_thumb_640x360.jpg' % video_id | ||||
|         url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url') | ||||
|  | ||||
|         sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file') | ||||
|         hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file') | ||||
|         video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': base_url + sd_file, | ||||
|                 'url': url, | ||||
|                 'play_path': 'mp4:' + sd_file, | ||||
|                 'ext': 'flv', | ||||
|                 'format': 'sd', | ||||
|                 'format_id': 'sd', | ||||
|             }, | ||||
|             { | ||||
|                 'url': base_url + hd_file, | ||||
|                 'url': url, | ||||
|                 'play_path': 'mp4:' + hd_file, | ||||
|                 'ext': 'flv', | ||||
|                 'format': 'hd', | ||||
|                 'format_id': 'hd', | ||||
|             }, | ||||
|         ] | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'formats': formats, | ||||
| @@ -86,6 +86,3 @@ class CinemassacreIE(InfoExtractor): | ||||
|             'upload_date': video_date, | ||||
|             'thumbnail': video_thumbnail, | ||||
|         } | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|         return info | ||||
|   | ||||
| @@ -14,6 +14,8 @@ from ..utils import ( | ||||
|     clean_html, | ||||
|     compiled_regex_type, | ||||
|     ExtractorError, | ||||
|     RegexNotFoundError, | ||||
|     sanitize_filename, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
| @@ -61,7 +63,7 @@ class InfoExtractor(object): | ||||
|                     * ext       Will be calculated from url if missing | ||||
|                     * format    A human-readable description of the format | ||||
|                                 ("mp4 container with h264/opus"). | ||||
|                                 Calculated from the format_id, width, height  | ||||
|                                 Calculated from the format_id, width, height. | ||||
|                                 and format_note fields if missing. | ||||
|                     * format_id A short description of the format | ||||
|                                 ("mp4_h264_opus" or "19") | ||||
| @@ -181,6 +183,17 @@ class InfoExtractor(object): | ||||
|             self.to_screen(u'Dumping request to ' + url) | ||||
|             dump = base64.b64encode(webpage_bytes).decode('ascii') | ||||
|             self._downloader.to_screen(dump) | ||||
|         if self._downloader.params.get('write_pages', False): | ||||
|             try: | ||||
|                 url = url_or_request.get_full_url() | ||||
|             except AttributeError: | ||||
|                 url = url_or_request | ||||
|             raw_filename = ('%s_%s.dump' % (video_id, url)) | ||||
|             filename = sanitize_filename(raw_filename, restricted=True) | ||||
|             self.to_screen(u'Saving request to ' + filename) | ||||
|             with open(filename, 'wb') as outf: | ||||
|                 outf.write(webpage_bytes) | ||||
|  | ||||
|         content = webpage_bytes.decode(encoding, 'replace') | ||||
|         return (content, urlh) | ||||
|  | ||||
| @@ -231,7 +244,7 @@ class InfoExtractor(object): | ||||
|         Perform a regex search on the given string, using a single or a list of | ||||
|         patterns returning the first matching group. | ||||
|         In case of failure return a default value or raise a WARNING or a | ||||
|         ExtractorError, depending on fatal, specifying the field name. | ||||
|         RegexNotFoundError, depending on fatal, specifying the field name. | ||||
|         """ | ||||
|         if isinstance(pattern, (str, compat_str, compiled_regex_type)): | ||||
|             mobj = re.search(pattern, string, flags) | ||||
| @@ -251,7 +264,7 @@ class InfoExtractor(object): | ||||
|         elif default is not None: | ||||
|             return default | ||||
|         elif fatal: | ||||
|             raise ExtractorError(u'Unable to extract %s' % _name) | ||||
|             raise RegexNotFoundError(u'Unable to extract %s' % _name) | ||||
|         else: | ||||
|             self._downloader.report_warning(u'unable to extract %s; ' | ||||
|                 u'please report this issue on http://yt-dl.org/bug' % _name) | ||||
| @@ -317,10 +330,10 @@ class InfoExtractor(object): | ||||
|     def _og_search_title(self, html, **kargs): | ||||
|         return self._og_search_property('title', html, **kargs) | ||||
|  | ||||
|     def _og_search_video_url(self, html, name='video url', **kargs): | ||||
|         return self._html_search_regex([self._og_regex('video:secure_url'), | ||||
|                                         self._og_regex('video')], | ||||
|                                        html, name, **kargs) | ||||
|     def _og_search_video_url(self, html, name='video url', secure=True, **kargs): | ||||
|         regexes = [self._og_regex('video')] | ||||
|         if secure: regexes.insert(0, self._og_regex('video:secure_url')) | ||||
|         return self._html_search_regex(regexes, html, name, **kargs) | ||||
|  | ||||
|     def _rta_search(self, html): | ||||
|         # See http://www.rtalabel.org/index.php?content=howtofaq#single | ||||
|   | ||||
| @@ -21,6 +21,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): | ||||
|         """Build a request with the family filter disabled""" | ||||
|         request = compat_urllib_request.Request(url) | ||||
|         request.add_header('Cookie', 'family_filter=off') | ||||
|         request.add_header('Cookie', 'ff=off') | ||||
|         return request | ||||
|  | ||||
| class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
| @@ -28,6 +29,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' | ||||
|     IE_NAME = u'dailymotion' | ||||
|  | ||||
|     _FORMATS = [ | ||||
|         (u'stream_h264_ld_url', u'ld'), | ||||
|         (u'stream_h264_url', u'standard'), | ||||
|         (u'stream_h264_hq_url', u'hq'), | ||||
|         (u'stream_h264_hd_url', u'hd'), | ||||
|         (u'stream_h264_hd1080_url', u'hd180'), | ||||
|     ] | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', | ||||
| @@ -52,6 +62,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             }, | ||||
|             u'skip': u'VEVO is only available in some countries', | ||||
|         }, | ||||
|         # age-restricted video | ||||
|         { | ||||
|             u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', | ||||
|             u'file': u'xyh2zz.mp4', | ||||
|             u'md5': u'0d667a7b9cebecc3c89ee93099c4159d', | ||||
|             u'info_dict': { | ||||
|                 u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', | ||||
|                 u'uploader': 'HotWaves1012', | ||||
|                 u'age_limit': 18, | ||||
|             } | ||||
|  | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -60,7 +82,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         video_id = mobj.group(1).split('_')[0].split('?')[0] | ||||
|  | ||||
|         video_extension = 'mp4' | ||||
|         url = 'http://www.dailymotion.com/video/%s' % video_id | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
| @@ -82,7 +103,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', | ||||
|                                              # Looking for official user | ||||
|                                              r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'], | ||||
|                                             webpage, 'video uploader') | ||||
|                                             webpage, 'video uploader', fatal=False) | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         video_upload_date = None | ||||
|         mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) | ||||
| @@ -99,18 +121,24 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|         # TODO: support choosing qualities | ||||
|  | ||||
|         for key in ['stream_h264_hd1080_url','stream_h264_hd_url', | ||||
|                     'stream_h264_hq_url','stream_h264_url', | ||||
|                     'stream_h264_ld_url']: | ||||
|             if info.get(key):#key in info and info[key]: | ||||
|                 max_quality = key | ||||
|                 self.to_screen(u'Using %s' % key) | ||||
|                 break | ||||
|         else: | ||||
|         formats = [] | ||||
|         for (key, format_id) in self._FORMATS: | ||||
|             video_url = info.get(key) | ||||
|             if video_url is not None: | ||||
|                 m_size = re.search(r'H264-(\d+)x(\d+)', video_url) | ||||
|                 if m_size is not None: | ||||
|                     width, height = m_size.group(1), m_size.group(2) | ||||
|                 else: | ||||
|                     width, height = None, None | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'ext': 'mp4', | ||||
|                     'format_id': format_id, | ||||
|                     'width': width, | ||||
|                     'height': height, | ||||
|                 }) | ||||
|         if not formats: | ||||
|             raise ExtractorError(u'Unable to extract video URL') | ||||
|         video_url = info[max_quality] | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id) | ||||
| @@ -118,16 +146,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             self._list_available_subtitles(video_id) | ||||
|             return | ||||
|  | ||||
|         return [{ | ||||
|         return { | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'formats': formats, | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date':  video_upload_date, | ||||
|             'title':    self._og_search_title(webpage), | ||||
|             'ext':      video_extension, | ||||
|             'subtitles':    video_subtitles, | ||||
|             'thumbnail': info['thumbnail_url'] | ||||
|         }] | ||||
|             'thumbnail': info['thumbnail_url'], | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         try: | ||||
|   | ||||
| @@ -101,7 +101,7 @@ class EightTracksIE(InfoExtractor): | ||||
|         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id) | ||||
|         next_url = first_url | ||||
|         res = [] | ||||
|         for i in itertools.count(): | ||||
|         for i in range(track_count): | ||||
|             api_json = self._download_webpage(next_url, playlist_id, | ||||
|                 note=u'Downloading song information %s/%s' % (str(i+1), track_count), | ||||
|                 errnote=u'Failed to download song information') | ||||
| @@ -116,7 +116,5 @@ class EightTracksIE(InfoExtractor): | ||||
|                 'ext': 'm4a', | ||||
|             } | ||||
|             res.append(info) | ||||
|             if api_data['set']['at_last_track']: | ||||
|                 break | ||||
|             next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id']) | ||||
|         return res | ||||
|   | ||||
| @@ -11,14 +11,14 @@ class ExfmIE(InfoExtractor): | ||||
|     _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u'url': u'http://ex.fm/song/1bgtzg', | ||||
|             u'file': u'95223130.mp3', | ||||
|             u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf', | ||||
|             u'url': u'http://ex.fm/song/eh359', | ||||
|             u'file': u'44216187.mp3', | ||||
|             u'md5': u'e45513df5631e6d760970b14cc0c11e7', | ||||
|             u'info_dict': { | ||||
|                 u"title": u"We Can't Stop - Miley Cyrus", | ||||
|                 u"uploader": u"Miley Cyrus", | ||||
|                 u'upload_date': u'20130603', | ||||
|                 u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC', | ||||
|                 u"title": u"Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive", | ||||
|                 u"uploader": u"deadjournalist", | ||||
|                 u'upload_date': u'20120424', | ||||
|                 u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive', | ||||
|             }, | ||||
|             u'note': u'Soundcloud song', | ||||
|         }, | ||||
|   | ||||
| @@ -19,7 +19,8 @@ class FacebookIE(InfoExtractor): | ||||
|     """Information Extractor for Facebook""" | ||||
|  | ||||
|     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' | ||||
|     _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' | ||||
|     _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' | ||||
|     _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' | ||||
|     _NETRC_MACHINE = 'facebook' | ||||
|     IE_NAME = u'facebook' | ||||
|     _TEST = { | ||||
| @@ -36,50 +37,56 @@ class FacebookIE(InfoExtractor): | ||||
|         """Report attempt to log in.""" | ||||
|         self.to_screen(u'Logging in') | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         if self._downloader is None: | ||||
|             return | ||||
|  | ||||
|         useremail = None | ||||
|         password = None | ||||
|         downloader_params = self._downloader.params | ||||
|  | ||||
|         # Attempt to use provided username and password or .netrc data | ||||
|         if downloader_params.get('username', None) is not None: | ||||
|             useremail = downloader_params['username'] | ||||
|             password = downloader_params['password'] | ||||
|         elif downloader_params.get('usenetrc', False): | ||||
|             try: | ||||
|                 info = netrc.netrc().authenticators(self._NETRC_MACHINE) | ||||
|                 if info is not None: | ||||
|                     useremail = info[0] | ||||
|                     password = info[2] | ||||
|                 else: | ||||
|                     raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) | ||||
|             except (IOError, netrc.NetrcParseError) as err: | ||||
|                 self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err)) | ||||
|                 return | ||||
|  | ||||
|     def _login(self): | ||||
|         (useremail, password) = self._get_login_info() | ||||
|         if useremail is None: | ||||
|             return | ||||
|  | ||||
|         # Log in | ||||
|         login_page_req = compat_urllib_request.Request(self._LOGIN_URL) | ||||
|         login_page_req.add_header('Cookie', 'locale=en_US') | ||||
|         self.report_login() | ||||
|         login_page = self._download_webpage(login_page_req, None, note=False, | ||||
|             errnote=u'Unable to download login page') | ||||
|         lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd') | ||||
|         lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd') | ||||
|  | ||||
|         login_form = { | ||||
|             'email': useremail, | ||||
|             'pass': password, | ||||
|             'login': 'Log+In' | ||||
|             'lsd': lsd, | ||||
|             'lgnrnd': lgnrnd, | ||||
|             'next': 'http://facebook.com/home.php', | ||||
|             'default_persistent': '0', | ||||
|             'legacy_return': '1', | ||||
|             'timezone': '-60', | ||||
|             'trynum': '1', | ||||
|             } | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | ||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         try: | ||||
|             self.report_login() | ||||
|             login_results = compat_urllib_request.urlopen(request).read() | ||||
|             if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: | ||||
|                 self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') | ||||
|                 return | ||||
|  | ||||
|             check_form = { | ||||
|                 'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'), | ||||
|                 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'), | ||||
|                 'name_action_selected': 'dont_save', | ||||
|                 'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'), | ||||
|             } | ||||
|             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form)) | ||||
|             check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|             check_response = compat_urllib_request.urlopen(check_req).read() | ||||
|             if re.search(r'id="checkpointSubmitButton"', check_response) is not None: | ||||
|                 self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) | ||||
|             return | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
| @@ -93,7 +100,13 @@ class FacebookIE(InfoExtractor): | ||||
|         AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' | ||||
|         m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) | ||||
|         if not m: | ||||
|             raise ExtractorError(u'Cannot parse data') | ||||
|             m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) | ||||
|             if m_msg is not None: | ||||
|                 raise ExtractorError( | ||||
|                     u'The video is not available, Facebook said: "%s"' % m_msg.group(1), | ||||
|                     expected=True) | ||||
|             else: | ||||
|                 raise ExtractorError(u'Cannot parse data') | ||||
|         data = dict(json.loads(m.group(1))) | ||||
|         params_raw = compat_urllib_parse.unquote(data['params']) | ||||
|         params = json.loads(params_raw) | ||||
|   | ||||
| @@ -5,8 +5,6 @@ import xml.etree.ElementTree | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     clean_html, | ||||
|     get_element_by_attribute, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -47,12 +45,12 @@ class FazIE(InfoExtractor): | ||||
|                 'format_id': code.lower(), | ||||
|             }) | ||||
|  | ||||
|         descr_html = get_element_by_attribute('class', 'Content Copy', webpage) | ||||
|         descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description') | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'formats': formats, | ||||
|             'description': clean_html(descr_html), | ||||
|             'description': descr, | ||||
|             'thumbnail': config.find('STILL/STILL_BIG').text, | ||||
|         } | ||||
|         # TODO: Remove when #980 has been merged | ||||
|   | ||||
| @@ -25,7 +25,7 @@ class GenericIE(InfoExtractor): | ||||
|         { | ||||
|             u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', | ||||
|             u'file': u'13601338388002.mp4', | ||||
|             u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', | ||||
|             u'md5': u'6e15c93721d7ec9e9ca3fdbf07982cfd', | ||||
|             u'info_dict': { | ||||
|                 u"uploader": u"www.hodiho.fr", | ||||
|                 u"title": u"R\u00e9gis plante sa Jeep" | ||||
| @@ -41,7 +41,17 @@ class GenericIE(InfoExtractor): | ||||
|                 u"uploader_id": u"skillsmatter", | ||||
|                 u"uploader": u"Skills Matter", | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|         # bandcamp page with custom domain | ||||
|         { | ||||
|             u'url': u'http://bronyrock.com/track/the-pony-mash', | ||||
|             u'file': u'3235767654.mp3', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'The Pony Mash', | ||||
|                 u'uploader': u'M_Pallante', | ||||
|             }, | ||||
|             u'skip': u'There is a limit of 200 free downloads / month for the test song', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_download_webpage(self, video_id): | ||||
| @@ -155,6 +165,12 @@ class GenericIE(InfoExtractor): | ||||
|             surl = unescapeHTML(mobj.group(1)) | ||||
|             return self.url_result(surl, 'Youtube') | ||||
|  | ||||
|         # Look for Bandcamp pages with custom domain | ||||
|         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) | ||||
|         if mobj is not None: | ||||
|             burl = unescapeHTML(mobj.group(1)) | ||||
|             return self.url_result(burl, 'Bandcamp') | ||||
|  | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|   | ||||
| @@ -26,7 +26,7 @@ class InstagramIE(InfoExtractor): | ||||
|  | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       self._og_search_video_url(webpage), | ||||
|             'url':       self._og_search_video_url(webpage, secure=False), | ||||
|             'ext':       'mp4', | ||||
|             'title':     u'Video by %s' % uploader_id, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|   | ||||
							
								
								
									
										61
									
								
								youtube_dl/extractor/keezmovies.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								youtube_dl/extractor/keezmovies.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| import os | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
| from ..aes import ( | ||||
|     aes_decrypt_text | ||||
| ) | ||||
|  | ||||
| class KeezMoviesIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', | ||||
|         u'file': u'1214711.mp4', | ||||
|         u'md5': u'6e297b7e789329923fcf83abb67c9289', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Petite Asian Lady Mai Playing In Bathtub", | ||||
|             u"age_limit": 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         url = 'http://www.' + mobj.group('url') | ||||
|  | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         # embedded video | ||||
|         mobj = re.search(r'href="([^"]+)"></iframe>', webpage) | ||||
|         if mobj: | ||||
|             embedded_url = mobj.group(1) | ||||
|             return self.url_result(embedded_url) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title') | ||||
|         video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) | ||||
|         if webpage.find('encrypted=true')!=-1: | ||||
|             password = self._html_search_regex(r'video_title=(.+?)&', webpage, u'password') | ||||
|             video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') | ||||
|         path = compat_urllib_parse_urlparse( video_url ).path | ||||
|         extension = os.path.splitext( path )[1][1:] | ||||
|         format = path.split('/')[4].split('_')[:2] | ||||
|         format = "-".join( format ) | ||||
|  | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'url': video_url, | ||||
|             'ext': extension, | ||||
|             'format': format, | ||||
|             'format_id': format, | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
| @@ -40,13 +40,9 @@ class LivestreamIE(InfoExtractor): | ||||
|  | ||||
|         if video_id is None: | ||||
|             # This is an event page: | ||||
|             player = get_meta_content('twitter:player', webpage) | ||||
|             if player is None: | ||||
|                 raise ExtractorError('Couldn\'t extract event api url') | ||||
|             api_url = player.replace('/player', '') | ||||
|             api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url) | ||||
|             info = json.loads(self._download_webpage(api_url, event_name, | ||||
|                                                      u'Downloading event info')) | ||||
|             config_json = self._search_regex(r'window.config = ({.*?});', | ||||
|                 webpage, u'window config') | ||||
|             info = json.loads(config_json)['event'] | ||||
|             videos = [self._extract_video_info(video_data['data']) | ||||
|                 for video_data in info['feed']['data'] if video_data['type'] == u'video'] | ||||
|             return self.playlist_result(videos, info['id'], info['full_name']) | ||||
|   | ||||
| @@ -20,10 +20,12 @@ class MetacafeIE(InfoExtractor): | ||||
|     _DISCLAIMER = 'http://www.metacafe.com/family_filter/' | ||||
|     _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' | ||||
|     IE_NAME = u'metacafe' | ||||
|     _TESTS = [{ | ||||
|     _TESTS = [ | ||||
|     # Youtube video | ||||
|     { | ||||
|         u"add_ie": ["Youtube"], | ||||
|         u"url":  u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", | ||||
|         u"file":  u"_aUehQsCQtM.flv", | ||||
|         u"file":  u"_aUehQsCQtM.mp4", | ||||
|         u"info_dict": { | ||||
|             u"upload_date": u"20090102", | ||||
|             u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!", | ||||
| @@ -32,15 +34,42 @@ class MetacafeIE(InfoExtractor): | ||||
|             u"uploader_id": u"PBS" | ||||
|         } | ||||
|     }, | ||||
|     # Normal metacafe video | ||||
|     { | ||||
|         u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/', | ||||
|         u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad', | ||||
|         u'info_dict': { | ||||
|             u'id': u'11121940', | ||||
|             u'ext': u'mp4', | ||||
|             u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4', | ||||
|             u'uploader': u'ign', | ||||
|             u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.', | ||||
|         }, | ||||
|     }, | ||||
|     # AnyClip video | ||||
|     { | ||||
|         u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/", | ||||
|         u"file": u"an-dVVXnuY7Jh77J.mp4", | ||||
|         u"info_dict": { | ||||
|             u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3", | ||||
|             u"uploader": u"anyclip", | ||||
|             u"description": u"md5:38c711dd98f5bb87acf973d573442e67" | ||||
|         } | ||||
|     }] | ||||
|             u"description": u"md5:38c711dd98f5bb87acf973d573442e67", | ||||
|         }, | ||||
|     }, | ||||
|     # age-restricted video | ||||
|     { | ||||
|         u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/', | ||||
|         u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09', | ||||
|         u'info_dict': { | ||||
|             u'id': u'5186653', | ||||
|             u'ext': u'mp4', | ||||
|             u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.', | ||||
|             u'uploader': u'Dwayne Pipe', | ||||
|             u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b', | ||||
|             u'age_limit': 18, | ||||
|         }, | ||||
|     }, | ||||
|     ] | ||||
|  | ||||
|  | ||||
|     def report_disclaimer(self): | ||||
| @@ -62,6 +91,7 @@ class MetacafeIE(InfoExtractor): | ||||
|             'submit': "Continue - I'm over 18", | ||||
|             } | ||||
|         request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) | ||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         try: | ||||
|             self.report_age_confirmation() | ||||
|             compat_urllib_request.urlopen(request).read() | ||||
| @@ -83,7 +113,12 @@ class MetacafeIE(InfoExtractor): | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id) | ||||
|         req.headers['Cookie'] = 'flashVersion=0;' | ||||
|  | ||||
|         # AnyClip videos require the flashversion cookie so that we get the link | ||||
|         # to the mp4 file | ||||
|         mobj_an = re.match(r'^an-(.*?)$', video_id) | ||||
|         if mobj_an: | ||||
|             req.headers['Cookie'] = 'flashVersion=0;' | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         # Extract URL, uploader and title from webpage | ||||
| @@ -125,6 +160,11 @@ class MetacafeIE(InfoExtractor): | ||||
|                 r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);', | ||||
|                 webpage, u'uploader nickname', fatal=False) | ||||
|  | ||||
|         if re.search(r'"contentRating":"restricted"', webpage) is not None: | ||||
|             age_limit = 18 | ||||
|         else: | ||||
|             age_limit = 0 | ||||
|  | ||||
|         return { | ||||
|             '_type':    'video', | ||||
|             'id':       video_id, | ||||
| @@ -134,4 +174,5 @@ class MetacafeIE(InfoExtractor): | ||||
|             'upload_date':  None, | ||||
|             'title':    video_title, | ||||
|             'ext':      video_ext, | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
|   | ||||
| @@ -80,6 +80,8 @@ class MTVIE(InfoExtractor): | ||||
|         video_id = self._id_from_uri(uri) | ||||
|         self.report_extraction(video_id) | ||||
|         mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url'] | ||||
|         # Remove the templates, like &device={device} | ||||
|         mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url) | ||||
|         if 'acceptMethods' not in mediagen_url: | ||||
|             mediagen_url += '&acceptMethods=fms' | ||||
|         mediagen_page = self._download_webpage(mediagen_url, video_id, | ||||
|   | ||||
							
								
								
									
										48
									
								
								youtube_dl/extractor/myspace.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								youtube_dl/extractor/myspace.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MySpaceIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689', | ||||
|         u'info_dict': { | ||||
|             u'id': u'100008689', | ||||
|             u'ext': u'flv', | ||||
|             u'title': u'Viva La Vida', | ||||
|             u'description': u'The official Viva La Vida video, directed by Hype Williams', | ||||
|             u'uploader': u'Coldplay', | ||||
|             u'uploader_id': u'coldplay', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # rtmp download | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         context = json.loads(self._search_regex(r'context = ({.*?});', webpage, | ||||
|             u'context')) | ||||
|         video = context['video'] | ||||
|         rtmp_url, play_path = video['streamUrl'].split(';', 1) | ||||
|  | ||||
|         return { | ||||
|             'id': compat_str(video['mediaId']), | ||||
|             'title': video['title'], | ||||
|             'url': rtmp_url, | ||||
|             'play_path': play_path, | ||||
|             'ext': 'flv', | ||||
|             'description': video['description'], | ||||
|             'thumbnail': video['imageUrl'], | ||||
|             'uploader': video['artistName'], | ||||
|             'uploader_id': video['artistUsername'], | ||||
|         } | ||||
| @@ -20,7 +20,10 @@ class NowVideoIE(InfoExtractor): | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage_url = 'http://www.nowvideo.ch/video/' + video_id | ||||
|         embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|         embed_page = self._download_webpage(embed_url, video_id, | ||||
|             u'Downloading embed page') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
| @@ -28,7 +31,7 @@ class NowVideoIE(InfoExtractor): | ||||
|             webpage, u'video title') | ||||
|  | ||||
|         video_key = self._search_regex(r'var fkzd="(.*)";', | ||||
|             webpage, u'video key') | ||||
|             embed_page, u'video key') | ||||
|  | ||||
|         api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key) | ||||
|         api_response = self._download_webpage(api_call, video_id, | ||||
|   | ||||
							
								
								
									
										69
									
								
								youtube_dl/extractor/pornhub.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								youtube_dl/extractor/pornhub.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,69 @@ | ||||
| import os | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
|     unescapeHTML, | ||||
| ) | ||||
| from ..aes import ( | ||||
|     aes_decrypt_text | ||||
| ) | ||||
|  | ||||
| class PornHubIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015', | ||||
|         u'file': u'648719015.mp4', | ||||
|         u'md5': u'882f488fa1f0026f023f33576004a2ed', | ||||
|         u'info_dict': { | ||||
|             u"uploader": u"BABES-COM",  | ||||
|             u"title": u"Seductive Indian beauty strips down and fingers her pink pussy", | ||||
|             u"age_limit": 18 | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         url = 'http://www.' + mobj.group('url') | ||||
|  | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, u'title') | ||||
|         video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False) | ||||
|         thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False) | ||||
|         if thumbnail: | ||||
|             thumbnail = compat_urllib_parse.unquote(thumbnail) | ||||
|  | ||||
|         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) | ||||
|         if webpage.find('"encrypted":true') != -1: | ||||
|             password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password').replace('+', ' ') | ||||
|             video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) | ||||
|  | ||||
|         formats = [] | ||||
|         for video_url in video_urls: | ||||
|             path = compat_urllib_parse_urlparse( video_url ).path | ||||
|             extension = os.path.splitext( path )[1][1:] | ||||
|             format = path.split('/')[5].split('_')[:2] | ||||
|             format = "-".join( format ) | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'ext': extension, | ||||
|                 'format': format, | ||||
|                 'format_id': format, | ||||
|             }) | ||||
|         formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'uploader': video_uploader, | ||||
|             'title': video_title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
| @@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor): | ||||
|         u'md5': u'374dd6dcedd24234453b295209aa69b6', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20090708",  | ||||
|             u"title": u"Marilyn-Monroe-Bathing" | ||||
|             u"title": u"Marilyn-Monroe-Bathing", | ||||
|             u"age_limit": 18 | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -63,13 +63,12 @@ class RTLnowIE(InfoExtractor): | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1', | ||||
|         u'file': u'127367.flv', | ||||
|         u'url': u'http://www.rtlnitronow.de/recht-ordnung/stadtpolizei-frankfurt-gerichtsvollzieher-leipzig.php?film_id=129679&player=1&season=1', | ||||
|         u'file': u'129679.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20130926',  | ||||
|             u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...', | ||||
|             u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin', | ||||
|             u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg', | ||||
|             u'upload_date': u'20131016',  | ||||
|             u'title': u'Recht & Ordnung - Stadtpolizei Frankfurt/ Gerichtsvollzieher...', | ||||
|             u'description': u'Stadtpolizei Frankfurt/ Gerichtsvollzieher Leipzig', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|   | ||||
							
								
								
									
										74
									
								
								youtube_dl/extractor/spankwire.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								youtube_dl/extractor/spankwire.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,74 @@ | ||||
| import os | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
|     unescapeHTML, | ||||
| ) | ||||
| from ..aes import ( | ||||
|     aes_decrypt_text | ||||
| ) | ||||
|  | ||||
| class SpankwireIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', | ||||
|         u'file': u'103545.mp4', | ||||
|         u'md5': u'1b3f55e345500552dbc252a3e9c1af43', | ||||
|         u'info_dict': { | ||||
|             u"uploader": u"oreusz",  | ||||
|             u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch", | ||||
|             u"description": u"Crazy Bitch X rated music video.", | ||||
|             u"age_limit": 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         url = 'http://www.' + mobj.group('url') | ||||
|  | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title') | ||||
|         video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False) | ||||
|         thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False) | ||||
|         description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False) | ||||
|         if len(description) == 0: | ||||
|             description = None | ||||
|  | ||||
|         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage))) | ||||
|         if webpage.find('flashvars\.encrypted = "true"') != -1: | ||||
|             password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ') | ||||
|             video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) | ||||
|  | ||||
|         formats = [] | ||||
|         for video_url in video_urls: | ||||
|             path = compat_urllib_parse_urlparse( video_url ).path | ||||
|             extension = os.path.splitext( path )[1][1:] | ||||
|             format = path.split('/')[4].split('_')[:2] | ||||
|             format = "-".join( format ) | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'ext': extension, | ||||
|                 'format': format, | ||||
|                 'format_id': format, | ||||
|             }) | ||||
|         formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) | ||||
|  | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'uploader': video_uploader, | ||||
|             'title': video_title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|             'formats': formats, | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
							
								
								
									
										65
									
								
								youtube_dl/extractor/tube8.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								youtube_dl/extractor/tube8.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| import os | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
|     unescapeHTML, | ||||
| ) | ||||
| from ..aes import ( | ||||
|     aes_decrypt_text | ||||
| ) | ||||
|  | ||||
| class Tube8IE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/', | ||||
|         u'file': u'229795.mp4', | ||||
|         u'md5': u'e9e0b0c86734e5e3766e653509475db0', | ||||
|         u'info_dict': { | ||||
|             u"description": u"hot teen Kasia grinding",  | ||||
|             u"uploader": u"unknown",  | ||||
|             u"title": u"Kasia music video", | ||||
|             u"age_limit": 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         url = 'http://www.' + mobj.group('url') | ||||
|  | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'videotitle	="([^"]+)', webpage, u'title') | ||||
|         video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False) | ||||
|         video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False) | ||||
|         thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False) | ||||
|         if thumbnail: | ||||
|             thumbnail = thumbnail.replace('\\/', '/') | ||||
|  | ||||
|         video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url') | ||||
|         if webpage.find('"encrypted":true')!=-1: | ||||
|             password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password') | ||||
|             video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') | ||||
|         path = compat_urllib_parse_urlparse( video_url ).path | ||||
|         extension = os.path.splitext( path )[1][1:] | ||||
|         format = path.split('/')[4].split('_')[:2] | ||||
|         format = "-".join( format ) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'uploader': video_uploader, | ||||
|             'title': video_title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': video_description, | ||||
|             'url': video_url, | ||||
|             'ext': extension, | ||||
|             'format': format, | ||||
|             'format_id': format, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
| @@ -5,7 +5,7 @@ import datetime | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     compat_HTTPError, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| @@ -16,26 +16,22 @@ class VevoIE(InfoExtractor): | ||||
|     (currently used by MTVIE) | ||||
|     """ | ||||
|     _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | ||||
|         u'file': u'GB1101300280.mp4', | ||||
|         u"md5": u"06bea460acb744eab74a9d7dcb4bfd61", | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20130624", | ||||
|             u"uploader": u"Hurts", | ||||
|             u"title": u"Somebody to Die For", | ||||
|             u'duration': 230, | ||||
|             u"duration": 230, | ||||
|             u"width": 1920, | ||||
|             u"height": 1080, | ||||
|         } | ||||
|     } | ||||
|     }] | ||||
|     _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id | ||||
|         info_json = self._download_webpage(json_url, video_id, u'Downloading json info') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         video_info = json.loads(info_json)['video'] | ||||
|     def _formats_from_json(self, video_info): | ||||
|         last_version = {'version': -1} | ||||
|         for version in video_info['videoVersions']: | ||||
|             # These are the HTTP downloads, other types are for different manifests | ||||
| @@ -50,17 +46,74 @@ class VevoIE(InfoExtractor): | ||||
|         # Already sorted from worst to best quality | ||||
|         for rend in renditions.findall('rendition'): | ||||
|             attr = rend.attrib | ||||
|             f_url = attr['url'] | ||||
|             format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr | ||||
|             formats.append({ | ||||
|                 'url': f_url, | ||||
|                 'ext': determine_ext(f_url), | ||||
|                 'url': attr['url'], | ||||
|                 'format_id': attr['name'], | ||||
|                 'format_note': format_note, | ||||
|                 'height': int(attr['frameheight']), | ||||
|                 'width': int(attr['frameWidth']), | ||||
|             }) | ||||
|         return formats | ||||
|  | ||||
|         date_epoch = int(self._search_regex( | ||||
|             r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000 | ||||
|         upload_date = datetime.datetime.fromtimestamp(date_epoch) | ||||
|     def _formats_from_smil(self, smil_xml): | ||||
|         formats = [] | ||||
|         smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8')) | ||||
|         els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') | ||||
|         for el in els: | ||||
|             src = el.attrib['src'] | ||||
|             m = re.match(r'''(?xi) | ||||
|                 (?P<ext>[a-z0-9]+): | ||||
|                 (?P<path> | ||||
|                     [/a-z0-9]+     # The directory and main part of the URL | ||||
|                     _(?P<cbr>[0-9]+)k | ||||
|                     _(?P<width>[0-9]+)x(?P<height>[0-9]+) | ||||
|                     _(?P<vcodec>[a-z0-9]+) | ||||
|                     _(?P<vbr>[0-9]+) | ||||
|                     _(?P<acodec>[a-z0-9]+) | ||||
|                     _(?P<abr>[0-9]+) | ||||
|                     \.[a-z0-9]+  # File extension | ||||
|                 )''', src) | ||||
|             if not m: | ||||
|                 continue | ||||
|  | ||||
|             format_url = self._SMIL_BASE_URL + m.group('path') | ||||
|             format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' % | ||||
|                            m.groupdict()) | ||||
|             formats.append({ | ||||
|                 'url': format_url, | ||||
|                 'format_id': u'SMIL_' + m.group('cbr'), | ||||
|                 'format_note': format_note, | ||||
|                 'ext': m.group('ext'), | ||||
|                 'width': int(m.group('width')), | ||||
|                 'height': int(m.group('height')), | ||||
|             }) | ||||
|         return formats | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id | ||||
|         info_json = self._download_webpage(json_url, video_id, u'Downloading json info') | ||||
|         video_info = json.loads(info_json)['video'] | ||||
|  | ||||
|         formats = self._formats_from_json(video_info) | ||||
|         try: | ||||
|             smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( | ||||
|                 self._SMIL_BASE_URL, video_id, video_id.lower()) | ||||
|             smil_xml = self._download_webpage(smil_url, video_id, | ||||
|                                               u'Downloading SMIL info') | ||||
|             formats.extend(self._formats_from_smil(smil_xml)) | ||||
|         except ExtractorError as ee: | ||||
|             if not isinstance(ee.cause, compat_HTTPError): | ||||
|                 raise | ||||
|             self._downloader.report_warning( | ||||
|                 u'Cannot download SMIL information, falling back to JSON ..') | ||||
|  | ||||
|         timestamp_ms = int(self._search_regex( | ||||
|             r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date')) | ||||
|         upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000) | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': video_info['title'], | ||||
| @@ -71,7 +124,4 @@ class VevoIE(InfoExtractor): | ||||
|             'duration': video_info['duration'], | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| # encoding: utf-8 | ||||
| import json | ||||
| import re | ||||
| import itertools | ||||
| @@ -10,6 +11,7 @@ from ..utils import ( | ||||
|     clean_html, | ||||
|     get_element_by_attribute, | ||||
|     ExtractorError, | ||||
|     RegexNotFoundError, | ||||
|     std_headers, | ||||
|     unsmuggle_url, | ||||
| ) | ||||
| @@ -18,12 +20,12 @@ class VimeoIE(InfoExtractor): | ||||
|     """Information extractor for vimeo.com.""" | ||||
|  | ||||
|     # _VALID_URL matches Vimeo URLs | ||||
|     _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?$' | ||||
|     _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$' | ||||
|     _NETRC_MACHINE = 'vimeo' | ||||
|     IE_NAME = u'vimeo' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u'url': u'http://vimeo.com/56015672', | ||||
|             u'url': u'http://vimeo.com/56015672#at=0', | ||||
|             u'file': u'56015672.mp4', | ||||
|             u'md5': u'8879b6cc097e987f02484baf890129e5', | ||||
|             u'info_dict': { | ||||
| @@ -54,7 +56,22 @@ class VimeoIE(InfoExtractor): | ||||
|                 u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software', | ||||
|                 u'uploader': u'The BLN & Business of Software', | ||||
|             }, | ||||
|         } | ||||
|         }, | ||||
|         { | ||||
|             u'url': u'http://vimeo.com/68375962', | ||||
|             u'file': u'68375962.mp4', | ||||
|             u'md5': u'aaf896bdb7ddd6476df50007a0ac0ae7', | ||||
|             u'note': u'Video protected with password', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'youtube-dl password protected test video', | ||||
|                 u'upload_date': u'20130614', | ||||
|                 u'uploader_id': u'user18948128', | ||||
|                 u'uploader': u'Jaime Marquínez Ferrándiz', | ||||
|             }, | ||||
|             u'params': { | ||||
|                 u'videopassword': u'youtube-dl', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _login(self): | ||||
| @@ -129,18 +146,26 @@ class VimeoIE(InfoExtractor): | ||||
|  | ||||
|         # Extract the config JSON | ||||
|         try: | ||||
|             config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], | ||||
|                 webpage, u'info section', flags=re.DOTALL) | ||||
|             config = json.loads(config) | ||||
|         except: | ||||
|             try: | ||||
|                 config_url = self._html_search_regex( | ||||
|                     r' data-config-url="(.+?)"', webpage, u'config URL') | ||||
|                 config_json = self._download_webpage(config_url, video_id) | ||||
|                 config = json.loads(config_json) | ||||
|             except RegexNotFoundError: | ||||
|                 # For pro videos or player.vimeo.com urls | ||||
|                 config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], | ||||
|                     webpage, u'info section', flags=re.DOTALL) | ||||
|                 config = json.loads(config) | ||||
|         except Exception as e: | ||||
|             if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): | ||||
|                 raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option') | ||||
|  | ||||
|             if re.search('If so please provide the correct password.', webpage): | ||||
|             if re.search('<form[^>]+?id="pw_form"', webpage) is not None: | ||||
|                 self._verify_video_password(url, video_id, webpage) | ||||
|                 return self._real_extract(url) | ||||
|             else: | ||||
|                 raise ExtractorError(u'Unable to extract info section') | ||||
|                 raise ExtractorError(u'Unable to extract info section', | ||||
|                                      cause=e) | ||||
|  | ||||
|         # Extract title | ||||
|         video_title = config["video"]["title"] | ||||
|   | ||||
							
								
								
									
										45
									
								
								youtube_dl/extractor/vk.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								youtube_dl/extractor/vk.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | ||||
| # encoding: utf-8 | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VKIE(InfoExtractor): | ||||
|     IE_NAME = u'vk.com' | ||||
|     _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', | ||||
|         u'md5': u'0deae91935c54e00003c2a00646315f0', | ||||
|         u'info_dict': { | ||||
|             u'id': u'162222515', | ||||
|             u'ext': u'flv', | ||||
|             u'title': u'ProtivoGunz - Хуёвая песня', | ||||
|             u'uploader': u'Noize MC', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id | ||||
|         info_page = self._download_webpage(info_url, video_id) | ||||
|         m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page) | ||||
|         if m_yt is not None: | ||||
|             self.to_screen(u'Youtube video detected') | ||||
|             return self.url_result(m_yt.group(1), 'Youtube') | ||||
|         vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars') | ||||
|         vars = json.loads(vars_json) | ||||
|  | ||||
|         return { | ||||
|             'id': compat_str(vars['vid']), | ||||
|             'url': vars['url240'], | ||||
|             'title': unescapeHTML(vars['md_title']), | ||||
|             'thumbnail': vars['jpg'], | ||||
|             'uploader': vars['md_author'], | ||||
|         } | ||||
| @@ -36,21 +36,25 @@ class XHamsterIE(InfoExtractor): | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         def extract_video_url(webpage): | ||||
|             mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'Unable to extract media URL') | ||||
|             if len(mobj.group('server')) == 0: | ||||
|                 return compat_urllib_parse.unquote(mobj.group('file')) | ||||
|             else: | ||||
|                 return mobj.group('server')+'/key='+mobj.group('file') | ||||
|  | ||||
|         def is_hd(webpage): | ||||
|             return webpage.find('<div class=\'icon iconHD\'>') != -1 | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         seo = mobj.group('seo') | ||||
|         mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo) | ||||
|         mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo) | ||||
|         webpage = self._download_webpage(mrss_url, video_id) | ||||
|  | ||||
|         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract media URL') | ||||
|         if len(mobj.group('server')) == 0: | ||||
|             video_url = compat_urllib_parse.unquote(mobj.group('file')) | ||||
|         else: | ||||
|             video_url = mobj.group('server')+'/key='+mobj.group('file') | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', | ||||
|             webpage, u'title') | ||||
|  | ||||
| @@ -76,14 +80,32 @@ class XHamsterIE(InfoExtractor): | ||||
|  | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'ext':      determine_ext(video_url), | ||||
|             'title':    video_title, | ||||
|         video_url = extract_video_url(webpage) | ||||
|         hd = is_hd(webpage) | ||||
|         formats = [{ | ||||
|             'url': video_url, | ||||
|             'ext': determine_ext(video_url), | ||||
|             'format': 'hd' if hd else 'sd', | ||||
|             'format_id': 'hd' if hd else 'sd', | ||||
|         }] | ||||
|         if not hd: | ||||
|             webpage = self._download_webpage(mrss_url+'?hd', video_id) | ||||
|             if is_hd(webpage): | ||||
|                 video_url = extract_video_url(webpage) | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'ext': determine_ext(video_url), | ||||
|                     'format': 'hd', | ||||
|                     'format_id': 'hd', | ||||
|                 }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'formats': formats, | ||||
|             'description': video_description, | ||||
|             'upload_date': video_upload_date, | ||||
|             'uploader_id': video_uploader_id, | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'age_limit': age_limit, | ||||
|         }] | ||||
|         } | ||||
|   | ||||
| @@ -13,7 +13,8 @@ class YouJizzIE(InfoExtractor): | ||||
|         u'file': u'2189178.flv', | ||||
|         u'md5': u'07e15fa469ba384c7693fd246905547c', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Zeichentrick 1" | ||||
|             u"title": u"Zeichentrick 1", | ||||
|             u"age_limit": 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -25,6 +26,8 @@ class YouJizzIE(InfoExtractor): | ||||
|         # Get webpage content | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         # Get the video title | ||||
|         video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>', | ||||
|             webpage, u'title').strip() | ||||
| @@ -60,6 +63,7 @@ class YouJizzIE(InfoExtractor): | ||||
|                 'title': video_title, | ||||
|                 'ext': 'flv', | ||||
|                 'format': 'flv', | ||||
|                 'player_url': embed_page_url} | ||||
|                 'player_url': embed_page_url, | ||||
|                 'age_limit': age_limit} | ||||
|  | ||||
|         return [info] | ||||
|   | ||||
| @@ -17,7 +17,7 @@ from ..aes import ( | ||||
| ) | ||||
|  | ||||
| class YouPornIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)' | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', | ||||
|         u'file': u'505835.mp4', | ||||
| @@ -31,23 +31,10 @@ class YouPornIE(InfoExtractor): | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         """Print all available formats""" | ||||
|         print(u'Available formats:') | ||||
|         print(u'ext\t\tformat') | ||||
|         print(u'---------------------------------') | ||||
|         for format in formats: | ||||
|             print(u'%s\t\t%s'  % (format['ext'], format['format'])) | ||||
|  | ||||
|     def _specific(self, req_format, formats): | ||||
|         for x in formats: | ||||
|             if x["format"] == req_format: | ||||
|                 return x | ||||
|         return None | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         url = 'http://www.' + mobj.group('url') | ||||
|  | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
| @@ -71,27 +58,22 @@ class YouPornIE(InfoExtractor): | ||||
|         except KeyError: | ||||
|             raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1]) | ||||
|  | ||||
|         # Get all of the formats available | ||||
|         # Get all of the links from the page | ||||
|         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>' | ||||
|         download_list_html = self._search_regex(DOWNLOAD_LIST_RE, | ||||
|             webpage, u'download list').strip() | ||||
|  | ||||
|         # Get all of the links from the page | ||||
|         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">' | ||||
|         LINK_RE = r'<a href="([^"]+)">' | ||||
|         links = re.findall(LINK_RE, download_list_html) | ||||
|          | ||||
|         # Get link of hd video if available | ||||
|         mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage) | ||||
|         if mobj != None: | ||||
|             encrypted_video_url = mobj.group(u'encrypted_video_url') | ||||
|             video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8') | ||||
|             links = [video_url] + links | ||||
|  | ||||
|         # Get all encrypted links | ||||
|         encrypted_links = re.findall(r'var encryptedQuality[0-9]{3}URL = \'([a-zA-Z0-9+/]+={0,2})\';', webpage) | ||||
|         for encrypted_link in encrypted_links: | ||||
|             link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8') | ||||
|             links.append(link) | ||||
|          | ||||
|         if not links: | ||||
|             raise ExtractorError(u'ERROR: no known formats available for video') | ||||
|  | ||||
|         self.to_screen(u'Links found: %d' % len(links)) | ||||
|  | ||||
|         formats = [] | ||||
|         for link in links: | ||||
|  | ||||
| @@ -103,39 +85,32 @@ class YouPornIE(InfoExtractor): | ||||
|             path = compat_urllib_parse_urlparse( video_url ).path | ||||
|             extension = os.path.splitext( path )[1][1:] | ||||
|             format = path.split('/')[4].split('_')[:2] | ||||
|  | ||||
|             # size = format[0] | ||||
|             # bitrate = format[1] | ||||
|             format = "-".join( format ) | ||||
|             # title = u'%s-%s-%s' % (video_title, size, bitrate) | ||||
|  | ||||
|             formats.append({ | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'uploader': video_uploader, | ||||
|                 'upload_date': upload_date, | ||||
|                 'title': video_title, | ||||
|                 'ext': extension, | ||||
|                 'format': format, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'description': video_description, | ||||
|                 'age_limit': age_limit, | ||||
|                 'format_id': format, | ||||
|             }) | ||||
|  | ||||
|         if self._downloader.params.get('listformats', None): | ||||
|             self._print_formats(formats) | ||||
|             return | ||||
|  | ||||
|         req_format = self._downloader.params.get('format', 'best') | ||||
|         self.to_screen(u'Format: %s' % req_format) | ||||
|  | ||||
|         if req_format is None or req_format == 'best': | ||||
|             return [formats[0]] | ||||
|         elif req_format == 'worst': | ||||
|             return [formats[-1]] | ||||
|         elif req_format in ('-1', 'all'): | ||||
|             return formats | ||||
|         else: | ||||
|             format = self._specific( req_format, formats ) | ||||
|             if format is None: | ||||
|                 raise ExtractorError(u'Requested format not available') | ||||
|             return [format] | ||||
|         # Sort and remove doubles | ||||
|         formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) | ||||
|         for i in range(len(formats)-1,0,-1): | ||||
|             if formats[i]['format_id'] == formats[i-1]['format_id']: | ||||
|                 del formats[i] | ||||
|          | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'title': video_title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': video_description, | ||||
|             'age_limit': age_limit, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -74,14 +74,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err)) | ||||
|             return False | ||||
|  | ||||
|         galx = None | ||||
|         dsh = None | ||||
|         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page) | ||||
|         if match: | ||||
|           galx = match.group(1) | ||||
|         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page) | ||||
|         if match: | ||||
|           dsh = match.group(1) | ||||
|         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"', | ||||
|                                   login_page, u'Login GALX parameter') | ||||
|  | ||||
|         # Log in | ||||
|         login_form_strs = { | ||||
| @@ -95,7 +89,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|                 u'checkConnection': u'', | ||||
|                 u'checkedDomains': u'youtube', | ||||
|                 u'dnConn': u'', | ||||
|                 u'dsh': dsh, | ||||
|                 u'pstMsg': u'0', | ||||
|                 u'rmShown': u'1', | ||||
|                 u'secTok': u'', | ||||
| @@ -236,11 +229,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         '136': 'mp4', | ||||
|         '137': 'mp4', | ||||
|         '138': 'mp4', | ||||
|         '139': 'mp4', | ||||
|         '140': 'mp4', | ||||
|         '141': 'mp4', | ||||
|         '160': 'mp4', | ||||
|  | ||||
|         # Dash mp4 audio | ||||
|         '139': 'm4a', | ||||
|         '140': 'm4a', | ||||
|         '141': 'm4a', | ||||
|  | ||||
|         # Dash webm | ||||
|         '171': 'webm', | ||||
|         '172': 'webm', | ||||
| @@ -346,7 +341,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         }, | ||||
|         { | ||||
|             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U", | ||||
|             u"file":  u"1ltcDfZMA3U.flv", | ||||
|             u"file":  u"1ltcDfZMA3U.mp4", | ||||
|             u"note": u"Test VEVO video (#897)", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": u"20070518", | ||||
| @@ -1116,7 +1111,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 'lang': lang, | ||||
|                 'v': video_id, | ||||
|                 'fmt': self._downloader.params.get('subtitlesformat'), | ||||
|                 'name': l[0], | ||||
|                 'name': l[0].encode('utf-8'), | ||||
|             }) | ||||
|             url = u'http://www.youtube.com/api/timedtext?' + params | ||||
|             sub_lang_list[lang] = url | ||||
| @@ -1403,32 +1398,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             # this signatures are encrypted | ||||
|             if 'url_encoded_fmt_stream_map' not in args: | ||||
|                 raise ValueError(u'No stream_map present')  # caught below | ||||
|             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map']) | ||||
|             re_signature = re.compile(r'[&,]s=') | ||||
|             m_s = re_signature.search(args['url_encoded_fmt_stream_map']) | ||||
|             if m_s is not None: | ||||
|                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id) | ||||
|                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] | ||||
|             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u'')) | ||||
|             m_s = re_signature.search(args.get('adaptive_fmts', u'')) | ||||
|             if m_s is not None: | ||||
|                 if 'url_encoded_fmt_stream_map' in video_info: | ||||
|                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts'] | ||||
|                 if 'adaptive_fmts' in video_info: | ||||
|                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts'] | ||||
|                 else: | ||||
|                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']] | ||||
|             elif 'adaptive_fmts' in video_info: | ||||
|                 if 'url_encoded_fmt_stream_map' in video_info: | ||||
|                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0] | ||||
|                 else: | ||||
|                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts'] | ||||
|                     video_info['adaptive_fmts'] = [args['adaptive_fmts']] | ||||
|         except ValueError: | ||||
|             pass | ||||
|  | ||||
|         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): | ||||
|             self.report_rtmp_download() | ||||
|             video_url_list = [(None, video_info['conn'][0])] | ||||
|         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: | ||||
|             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]: | ||||
|         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: | ||||
|             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0] | ||||
|             if 'rtmpe%3Dyes' in encoded_url_map: | ||||
|                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) | ||||
|             url_map = {} | ||||
|             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','): | ||||
|             for url_data_str in encoded_url_map.split(','): | ||||
|                 url_data = compat_parse_qs(url_data_str) | ||||
|                 if 'itag' in url_data and 'url' in url_data: | ||||
|                     url = url_data['url'][0] | ||||
| @@ -1481,13 +1473,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') | ||||
|  | ||||
|         results = [] | ||||
|         for format_param, video_real_url in video_url_list: | ||||
|         for itag, video_real_url in video_url_list: | ||||
|             # Extension | ||||
|             video_extension = self._video_extensions.get(format_param, 'flv') | ||||
|             video_extension = self._video_extensions.get(itag, 'flv') | ||||
|  | ||||
|             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension, | ||||
|                                               self._video_dimensions.get(format_param, '???'), | ||||
|                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '') | ||||
|             video_format = '{0} - {1}{2}'.format(itag if itag else video_extension, | ||||
|                                               self._video_dimensions.get(itag, '???'), | ||||
|                                               ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '') | ||||
|  | ||||
|             results.append({ | ||||
|                 'id':       video_id, | ||||
| @@ -1498,6 +1490,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 'title':    video_title, | ||||
|                 'ext':      video_extension, | ||||
|                 'format':   video_format, | ||||
|                 'format_id': itag, | ||||
|                 'thumbnail':    video_thumbnail, | ||||
|                 'description':  video_description, | ||||
|                 'player_url':   player_url, | ||||
|   | ||||
| @@ -572,6 +572,11 @@ class ExtractorError(Exception): | ||||
|         return u''.join(traceback.format_tb(self.traceback)) | ||||
|  | ||||
|  | ||||
| class RegexNotFoundError(ExtractorError): | ||||
|     """Error when a regex didn't match""" | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class DownloadError(Exception): | ||||
|     """Download Error exception. | ||||
|  | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2013.10.23' | ||||
| __version__ = '2013.11.02' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user