Compare commits
	
		
			139 Commits
		
	
	
		
			2014.03.04
			...
			2014.03.21
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 14e17e18cb | ||
|  | 1b124d1942 | ||
|  | 747373d4ae | ||
|  | 18d367c0a5 | ||
|  | a1a530b067 | ||
|  | cb9722cb3f | ||
|  | 773c0b4bb8 | ||
|  | 23c322a531 | ||
|  | 7e8c0af004 | ||
|  | d2983ccb25 | ||
|  | f24e9833dc | ||
|  | bc2bdf5709 | ||
|  | 627a209f74 | ||
|  | 1a4895453a | ||
|  | aab74fa106 | ||
|  | 2bd9efd4c2 | ||
|  | 39a743fb9b | ||
|  | 4966a0b22d | ||
|  | fc26023120 | ||
|  | 8d7c0cca13 | ||
|  | f66ede4328 | ||
|  | cc88b90ec8 | ||
|  | b6c5fa9a0b | ||
|  | dff10eaa77 | ||
|  | 4e6f9aeca1 | ||
|  | e68301af21 | ||
|  | 17286a96f2 | ||
|  | 0892363e6d | ||
|  | f102372b5f | ||
|  | ecbe1ad207 | ||
|  | 9d840c43b5 | ||
|  | 6f50f63382 | ||
|  | ff14fc4964 | ||
|  | e125c21531 | ||
|  | 93d020dd65 | ||
|  | a7515ec265 | ||
|  | b6c1ceccc2 | ||
|  | 4056ad8f36 | ||
|  | 6563837ee1 | ||
|  | fd5e6f7ef2 | ||
|  | 15fd51b37c | ||
|  | f1cef7a9ff | ||
|  | 8264223511 | ||
|  | bc6d597828 | ||
|  | aba77bbfc2 | ||
|  | 955c451456 | ||
|  | e5de3f6c89 | ||
|  | 2a1db721d4 | ||
|  | 1e0eb60f1a | ||
|  | 87a29e6f25 | ||
|  | c3d36f134f | ||
|  | 84769e708c | ||
|  | 9d2ecdbc71 | ||
|  | 9b69af5342 | ||
|  | c21215b421 | ||
|  | cddcfd90b4 | ||
|  | f36aacba0f | ||
|  | 355271fb61 | ||
|  | 2a5b502364 | ||
|  | 98ff9d82d4 | ||
|  | b1ff87224c | ||
|  | b461641fb9 | ||
|  | b047de6f6e | ||
|  | 34ca5d9ba0 | ||
|  | 60cc4dc4b4 | ||
|  | db95dc13a1 | ||
|  | 777ac90791 | ||
|  | 04f9bebbcb | ||
|  | 4ea3137e41 | ||
|  | a0792b738e | ||
|  | 19a41fc613 | ||
|  | 3ee52157fb | ||
|  | c4d197ee2d | ||
|  | a33932cfe3 | ||
|  | bcf89ce62c | ||
|  | e3899d0e00 | ||
|  | dcb00da49c | ||
|  | aa51d20d19 | ||
|  | ae7ed92057 | ||
|  | e45b31d9bd | ||
|  | 5a25f39653 | ||
|  | 963d7ec412 | ||
|  | e712d94adf | ||
|  | 6a72423955 | ||
|  | 4126826b10 | ||
|  | b773ead7fd | ||
|  | 855e2750bc | ||
|  | 805ef3c60b | ||
|  | fbc2dcb40b | ||
|  | 5375d7ad84 | ||
|  | 90f3476180 | ||
|  | ee95c09333 | ||
|  | 75d06db9fc | ||
|  | 439a1fffcb | ||
|  | 9d9d70c462 | ||
|  | b4a186b7be | ||
|  | bdebf51c8f | ||
|  | 264b86f9b4 | ||
|  | 9e55e37a2e | ||
|  | 1471956573 | ||
|  | 27865b2169 | ||
|  | 6d07ce0162 | ||
|  | edb7fc5435 | ||
|  | 31f77343f2 | ||
|  | 63ad031583 | ||
|  | 957688cee6 | ||
|  | 806d6c2e8c | ||
|  | 0ef68e04d9 | ||
|  | a496524db2 | ||
|  | 935c7360cc | ||
|  | 340b046876 | ||
|  | cc1db7f9b7 | ||
|  | a4ff6c4762 | ||
|  | 1060425cbb | ||
|  | e9c092f125 | ||
|  | 22ff5d2105 | ||
|  | 136db7881b | ||
|  | dae313e725 | ||
|  | b74fa8cd2c | ||
|  | 94eae04c94 | ||
|  | 16ff7ebc77 | ||
|  | c361c505b0 | ||
|  | d37c07c575 | ||
|  | 9d6105c9f0 | ||
|  | 8dec03ecba | ||
|  | 826547870b | ||
|  | 52d6a9a61d | ||
|  | ad242b5fbc | ||
|  | 3524175625 | ||
|  | 7b9965ea93 | ||
|  | 0a5bce566f | ||
|  | 8012bd2424 | ||
|  | f55a1f0a88 | ||
|  | bacac173a9 | ||
|  | ca1fee34f2 | ||
|  | 6dadaa9930 | ||
|  | 553f6e4633 | ||
|  | 652bee05f0 | ||
|  | 1b86cc41cf | 
| @@ -36,6 +36,9 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      an empty string (--proxy "") for direct | ||||
|                                      connection | ||||
|     --no-check-certificate           Suppress HTTPS certificate validation. | ||||
|     --prefer-insecure                Use an unencrypted connection to retrieve | ||||
|                                      information about the video. (Currently | ||||
|                                      supported only for YouTube) | ||||
|     --cache-dir DIR                  Location in the filesystem where youtube-dl | ||||
|                                      can store some downloaded information | ||||
|                                      permanently. By default $XDG_CACHE_HOME | ||||
| @@ -191,9 +194,9 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      preference using slashes: "-f 22/17/18". | ||||
|                                      "-f mp4" and "-f flv" are also supported. | ||||
|                                      You can also use the special names "best", | ||||
|                                      "bestaudio", "worst", and "worstaudio". By | ||||
|                                      default, youtube-dl will pick the best | ||||
|                                      quality. | ||||
|                                      "bestvideo", "bestaudio", "worst", | ||||
|                                      "worstvideo" and "worstaudio". By default, | ||||
|                                      youtube-dl will pick the best quality. | ||||
|     --all-formats                    download all available video formats | ||||
|     --prefer-free-formats            prefer free video formats unless a specific | ||||
|                                      one is requested | ||||
|   | ||||
| @@ -70,7 +70,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" | ||||
| git checkout HEAD -- youtube-dl youtube-dl.exe | ||||
|  | ||||
| /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." | ||||
| for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done | ||||
| for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done | ||||
| scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ | ||||
| ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" | ||||
| ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" | ||||
| @@ -97,7 +97,7 @@ rm -rf build | ||||
|  | ||||
| make pypi-files | ||||
| echo "Uploading to PyPi ..." | ||||
| python setup.py sdist upload | ||||
| python setup.py sdist bdist_wheel upload | ||||
| make clean | ||||
|  | ||||
| /bin/echo -e "\n### DONE!" | ||||
|   | ||||
| @@ -71,7 +71,7 @@ class FakeYDL(YoutubeDL): | ||||
|             old_report_warning(message) | ||||
|         self.report_warning = types.MethodType(report_warning, self) | ||||
|  | ||||
| def get_testcases(): | ||||
| def gettestcases(): | ||||
|     for ie in youtube_dl.extractor.gen_extractors(): | ||||
|         t = getattr(ie, '_TEST', None) | ||||
|         if t: | ||||
|   | ||||
							
								
								
									
										44
									
								
								test/test_InfoExtractor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								test/test_InfoExtractor.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import FakeYDL | ||||
| from youtube_dl.extractor.common import InfoExtractor | ||||
| from youtube_dl.extractor import YoutubeIE, get_info_extractor | ||||
|  | ||||
|  | ||||
| class TestIE(InfoExtractor): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class TestInfoExtractor(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         self.ie = TestIE(FakeYDL()) | ||||
|  | ||||
|     def test_ie_key(self): | ||||
|         self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) | ||||
|  | ||||
|     def test_html_search_regex(self): | ||||
|         html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>' | ||||
|         search = lambda re, *args: self.ie._html_search_regex(re, html, *args) | ||||
|         self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video') | ||||
|  | ||||
|     def test_opengraph(self): | ||||
|         ie = self.ie | ||||
|         html = ''' | ||||
|             <meta name="og:title" content='Foo'/> | ||||
|             <meta content="Some video's description " name="og:description"/> | ||||
|             <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/> | ||||
|             ''' | ||||
|         self.assertEqual(ie._og_search_title(html), 'Foo') | ||||
|         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') | ||||
|         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -182,6 +182,24 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'vid-high') | ||||
|  | ||||
|     def test_format_selection_video(self): | ||||
|         formats = [ | ||||
|             {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'}, | ||||
|             {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'}, | ||||
|             {'format_id': 'vid', 'ext': 'mp4', 'preference': 3}, | ||||
|         ] | ||||
|         info_dict = {'formats': formats, 'extractor': 'test'} | ||||
|  | ||||
|         ydl = YDL({'format': 'bestvideo'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'dash-video-high') | ||||
|  | ||||
|         ydl = YDL({'format': 'worstvideo'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'dash-video-low') | ||||
|  | ||||
|     def test_youtube_format_selection(self): | ||||
|         order = [ | ||||
|             '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13', | ||||
|   | ||||
| @@ -9,7 +9,7 @@ import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
|  | ||||
| from test.helper import get_testcases | ||||
| from test.helper import gettestcases | ||||
|  | ||||
| from youtube_dl.extractor import ( | ||||
|     FacebookIE, | ||||
| @@ -105,7 +105,7 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|  | ||||
|     def test_no_duplicates(self): | ||||
|         ies = gen_extractors() | ||||
|         for tc in get_testcases(): | ||||
|         for tc in gettestcases(): | ||||
|             url = tc['url'] | ||||
|             for ie in ies: | ||||
|                 if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): | ||||
| @@ -124,6 +124,8 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|  | ||||
|     def test_vimeo_matching(self): | ||||
|         self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel']) | ||||
|         self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel']) | ||||
|         self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo']) | ||||
|         self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user']) | ||||
|         self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user']) | ||||
|         self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) | ||||
| @@ -139,6 +141,7 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|     def test_pbs(self): | ||||
|         # https://github.com/rg3/youtube-dl/issues/2350 | ||||
|         self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) | ||||
|         self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS']) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -8,7 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import ( | ||||
|     get_params, | ||||
|     get_testcases, | ||||
|     gettestcases, | ||||
|     try_rm, | ||||
|     md5, | ||||
|     report_warning | ||||
| @@ -51,7 +51,7 @@ def _file_md5(fn): | ||||
|     with open(fn, 'rb') as f: | ||||
|         return hashlib.md5(f.read()).hexdigest() | ||||
|  | ||||
| defs = get_testcases() | ||||
| defs = gettestcases() | ||||
|  | ||||
|  | ||||
| class TestDownload(unittest.TestCase): | ||||
| @@ -144,6 +144,10 @@ def generator(test_case): | ||||
|                         self.assertTrue( | ||||
|                             isinstance(got, compat_str) and match_rex.match(got), | ||||
|                             u'field %s (value: %r) should match %r' % (info_field, got, match_str)) | ||||
|                     elif isinstance(expected, type): | ||||
|                         got = info_dict.get(info_field) | ||||
|                         self.assertTrue(isinstance(got, expected), | ||||
|                             u'Expected type %r, but got value %r of type %r' % (expected, got, type(got))) | ||||
|                     else: | ||||
|                         if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||
|                             got = 'md5:' + md5(info_dict.get(info_field)) | ||||
| @@ -152,19 +156,19 @@ def generator(test_case): | ||||
|                         self.assertEqual(expected, got, | ||||
|                             u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) | ||||
|  | ||||
|                 # If checkable fields are missing from the test case, print the info_dict | ||||
|                 test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) | ||||
|                     for key, value in info_dict.items() | ||||
|                     if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location')) | ||||
|                 if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): | ||||
|                     sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') | ||||
|  | ||||
|                 # Check for the presence of mandatory fields | ||||
|                 for key in ('id', 'url', 'title', 'ext'): | ||||
|                     self.assertTrue(key in info_dict.keys() and info_dict[key]) | ||||
|                 # Check for mandatory fields that are automatically set by YoutubeDL | ||||
|                 for key in ['webpage_url', 'extractor', 'extractor_key']: | ||||
|                     self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) | ||||
|  | ||||
|                 # If checkable fields are missing from the test case, print the info_dict | ||||
|                 test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) | ||||
|                     for key, value in info_dict.items() | ||||
|                     if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) | ||||
|                 if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): | ||||
|                     sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') | ||||
|         finally: | ||||
|             try_rm_tcs_files() | ||||
|  | ||||
|   | ||||
| @@ -36,6 +36,7 @@ from youtube_dl.extractor import ( | ||||
|     RutubeChannelIE, | ||||
|     GoogleSearchIE, | ||||
|     GenericIE, | ||||
|     TEDIE, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -98,7 +99,7 @@ class TestPlaylists(unittest.TestCase): | ||||
|         result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], '5124905') | ||||
|         self.assertTrue(len(result['entries']) >= 11) | ||||
|         self.assertTrue(len(result['entries']) >= 6) | ||||
|  | ||||
|     def test_soundcloud_set(self): | ||||
|         dl = FakeYDL() | ||||
| @@ -248,16 +249,25 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'python language') | ||||
|         self.assertEqual(result['title'], 'python language') | ||||
|         self.assertTrue(len(result['entries']) == 15) | ||||
|         self.assertEqual(len(result['entries']), 15) | ||||
|  | ||||
|     def test_generic_rss_feed(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = GenericIE(dl) | ||||
|         result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml') | ||||
|         result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml') | ||||
|         self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml') | ||||
|         self.assertEqual(result['title'], 'Zero Punctuation') | ||||
|         self.assertTrue(len(result['entries']) > 10) | ||||
|  | ||||
|     def test_ted_playlist(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = TEDIE(dl) | ||||
|         result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], '10') | ||||
|         self.assertEqual(result['title'], 'Who are the hackers?') | ||||
|         self.assertTrue(len(result['entries']) >= 6) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -33,6 +33,7 @@ from youtube_dl.utils import ( | ||||
|     unified_strdate, | ||||
|     unsmuggle_url, | ||||
|     url_basename, | ||||
|     urlencode_postdata, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
|  | ||||
| @@ -261,5 +262,9 @@ class TestUtil(unittest.TestCase): | ||||
|             bam''') | ||||
|         self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) | ||||
|  | ||||
|     def test_urlencode_postdata(self): | ||||
|         data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) | ||||
|         self.assertTrue(isinstance(data, bytes)) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -1,4 +0,0 @@ | ||||
| # Legacy file for backwards compatibility, use youtube_dl.extractor instead! | ||||
|  | ||||
| from .extractor.common import InfoExtractor, SearchInfoExtractor | ||||
| from .extractor import gen_extractors, get_info_extractor | ||||
| @@ -4,6 +4,7 @@ | ||||
| from __future__ import absolute_import, unicode_literals | ||||
|  | ||||
| import collections | ||||
| import datetime | ||||
| import errno | ||||
| import io | ||||
| import json | ||||
| @@ -147,6 +148,8 @@ class YoutubeDL(object): | ||||
|                        again. | ||||
|     cookiefile:        File name where cookies should be read from and dumped to. | ||||
|     nocheckcertificate:Do not verify SSL certificates | ||||
|     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information. | ||||
|                        At the moment, this is only supported by YouTube. | ||||
|     proxy:             URL of the proxy server to use | ||||
|     socket_timeout:    Time to wait for unresponsive hosts, in seconds | ||||
|     bidi_workaround:   Work around buggy terminals without bidirectional text | ||||
| @@ -370,12 +373,15 @@ class YoutubeDL(object): | ||||
|         Print the message to stderr, it will be prefixed with 'WARNING:' | ||||
|         If stderr is a tty file the 'WARNING:' will be colored | ||||
|         ''' | ||||
|         if self._err_file.isatty() and os.name != 'nt': | ||||
|             _msg_header = '\033[0;33mWARNING:\033[0m' | ||||
|         if self.params.get('logger') is not None: | ||||
|             self.params['logger'].warning(message) | ||||
|         else: | ||||
|             _msg_header = 'WARNING:' | ||||
|         warning_message = '%s %s' % (_msg_header, message) | ||||
|         self.to_stderr(warning_message) | ||||
|             if self._err_file.isatty() and os.name != 'nt': | ||||
|                 _msg_header = '\033[0;33mWARNING:\033[0m' | ||||
|             else: | ||||
|                 _msg_header = 'WARNING:' | ||||
|             warning_message = '%s %s' % (_msg_header, message) | ||||
|             self.to_stderr(warning_message) | ||||
|  | ||||
|     def report_error(self, message, tb=None): | ||||
|         ''' | ||||
| @@ -413,9 +419,9 @@ class YoutubeDL(object): | ||||
|                 if template_dict.get('width') and template_dict.get('height'): | ||||
|                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) | ||||
|                 elif template_dict.get('height'): | ||||
|                     res = '%sp' % template_dict['height'] | ||||
|                     template_dict['resolution'] = '%sp' % template_dict['height'] | ||||
|                 elif template_dict.get('width'): | ||||
|                     res = '?x%d' % template_dict['width'] | ||||
|                     template_dict['resolution'] = '?x%d' % template_dict['width'] | ||||
|  | ||||
|             sanitize = lambda k, v: sanitize_filename( | ||||
|                 compat_str(v), | ||||
| @@ -529,7 +535,7 @@ class YoutubeDL(object): | ||||
|                 else: | ||||
|                     raise | ||||
|         else: | ||||
|             self.report_error('no suitable InfoExtractor: %s' % url) | ||||
|             self.report_error('no suitable InfoExtractor for URL %s' % url) | ||||
|  | ||||
|     def process_ie_result(self, ie_result, download=True, extra_info={}): | ||||
|         """ | ||||
| @@ -663,6 +669,18 @@ class YoutubeDL(object): | ||||
|                 if f.get('vcodec') == 'none'] | ||||
|             if audio_formats: | ||||
|                 return audio_formats[0] | ||||
|         elif format_spec == 'bestvideo': | ||||
|             video_formats = [ | ||||
|                 f for f in available_formats | ||||
|                 if f.get('acodec') == 'none'] | ||||
|             if video_formats: | ||||
|                 return video_formats[-1] | ||||
|         elif format_spec == 'worstvideo': | ||||
|             video_formats = [ | ||||
|                 f for f in available_formats | ||||
|                 if f.get('acodec') == 'none'] | ||||
|             if video_formats: | ||||
|                 return video_formats[0] | ||||
|         else: | ||||
|             extensions = ['mp4', 'flv', 'webm', '3gp'] | ||||
|             if format_spec in extensions: | ||||
| @@ -685,6 +703,11 @@ class YoutubeDL(object): | ||||
|         if 'display_id' not in info_dict and 'id' in info_dict: | ||||
|             info_dict['display_id'] = info_dict['id'] | ||||
|  | ||||
|         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None: | ||||
|             upload_date = datetime.datetime.utcfromtimestamp( | ||||
|                 info_dict['timestamp']) | ||||
|             info_dict['upload_date'] = upload_date.strftime('%Y%m%d') | ||||
|  | ||||
|         # This extractors handle format selection themselves | ||||
|         if info_dict['extractor'] in ['Youku']: | ||||
|             if download: | ||||
| @@ -698,8 +721,11 @@ class YoutubeDL(object): | ||||
|         else: | ||||
|             formats = info_dict['formats'] | ||||
|  | ||||
|         if not formats: | ||||
|             raise ExtractorError('No video formats found!') | ||||
|  | ||||
|         # We check that all the formats have the format and format_id fields | ||||
|         for (i, format) in enumerate(formats): | ||||
|         for i, format in enumerate(formats): | ||||
|             if format.get('format_id') is None: | ||||
|                 format['format_id'] = compat_str(i) | ||||
|             if format.get('format') is None: | ||||
| @@ -918,7 +944,7 @@ class YoutubeDL(object): | ||||
|                     self.to_screen('[%s] %s: Downloading thumbnail ...' % | ||||
|                                    (info_dict['extractor'], info_dict['id'])) | ||||
|                     try: | ||||
|                         uf = compat_urllib_request.urlopen(info_dict['thumbnail']) | ||||
|                         uf = self.urlopen(info_dict['thumbnail']) | ||||
|                         with open(thumb_filename, 'wb') as thumbf: | ||||
|                             shutil.copyfileobj(uf, thumbf) | ||||
|                         self.to_screen('[%s] %s: Writing thumbnail to: %s' % | ||||
| @@ -1164,7 +1190,7 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def urlopen(self, req): | ||||
|         """ Start an HTTP download """ | ||||
|         return self._opener.open(req) | ||||
|         return self._opener.open(req, timeout=self._socket_timeout) | ||||
|  | ||||
|     def print_debug_header(self): | ||||
|         if not self.params.get('verbose'): | ||||
| @@ -1195,7 +1221,7 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def _setup_opener(self): | ||||
|         timeout_val = self.params.get('socket_timeout') | ||||
|         timeout = 600 if timeout_val is None else float(timeout_val) | ||||
|         self._socket_timeout = 600 if timeout_val is None else float(timeout_val) | ||||
|  | ||||
|         opts_cookiefile = self.params.get('cookiefile') | ||||
|         opts_proxy = self.params.get('proxy') | ||||
| @@ -1233,7 +1259,3 @@ class YoutubeDL(object): | ||||
|         # (See https://github.com/rg3/youtube-dl/issues/1309 for details) | ||||
|         opener.addheaders = [] | ||||
|         self._opener = opener | ||||
|  | ||||
|         # TODO remove this global modification | ||||
|         compat_urllib_request.install_opener(opener) | ||||
|         socket.setdefaulttimeout(timeout) | ||||
|   | ||||
| @@ -50,12 +50,12 @@ __authors__  = ( | ||||
|     'Anthony Weems', | ||||
|     'David Wagner', | ||||
|     'Juan C. Olivares', | ||||
|     'Mattias Harrysson', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
|  | ||||
| import codecs | ||||
| import getpass | ||||
| import io | ||||
| import locale | ||||
| import optparse | ||||
| @@ -67,6 +67,7 @@ import sys | ||||
|  | ||||
|  | ||||
| from .utils import ( | ||||
|     compat_getpass, | ||||
|     compat_print, | ||||
|     DateRange, | ||||
|     decodeOption, | ||||
| @@ -236,6 +237,9 @@ def parseOpts(overrideArguments=None): | ||||
|         '--proxy', dest='proxy', default=None, metavar='URL', | ||||
|         help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') | ||||
|     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') | ||||
|     general.add_option( | ||||
|         '--prefer-insecure', action='store_true', dest='prefer_insecure', | ||||
|         help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') | ||||
|     general.add_option( | ||||
|         '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', | ||||
|         help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') | ||||
| @@ -256,7 +260,6 @@ def parseOpts(overrideArguments=None): | ||||
|         action='store_true', | ||||
|         help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') | ||||
|  | ||||
|  | ||||
|     selection.add_option( | ||||
|         '--playlist-start', | ||||
|         dest='playliststart', metavar='NUMBER', default=1, type=int, | ||||
| @@ -315,7 +318,7 @@ def parseOpts(overrideArguments=None): | ||||
|  | ||||
|     video_format.add_option('-f', '--format', | ||||
|             action='store', dest='format', metavar='FORMAT', default=None, | ||||
|             help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestaudio", "worst", and "worstaudio". By default, youtube-dl will pick the best quality.') | ||||
|             help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') | ||||
|     video_format.add_option('--all-formats', | ||||
|             action='store_const', dest='format', help='download all available video formats', const='all') | ||||
|     video_format.add_option('--prefer-free-formats', | ||||
| @@ -610,7 +613,7 @@ def _real_main(argv=None): | ||||
|     if opts.usetitle and opts.useid: | ||||
|         parser.error(u'using title conflicts with using video ID') | ||||
|     if opts.username is not None and opts.password is None: | ||||
|         opts.password = getpass.getpass(u'Type account password and press return:') | ||||
|         opts.password = compat_getpass(u'Type account password and press [Return]: ') | ||||
|     if opts.ratelimit is not None: | ||||
|         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) | ||||
|         if numeric_limit is None: | ||||
| @@ -755,6 +758,7 @@ def _real_main(argv=None): | ||||
|         'download_archive': download_archive_fn, | ||||
|         'cookiefile': opts.cookiefile, | ||||
|         'nocheckcertificate': opts.no_check_certificate, | ||||
|         'prefer_insecure': opts.prefer_insecure, | ||||
|         'proxy': opts.proxy, | ||||
|         'socket_timeout': opts.socket_timeout, | ||||
|         'bidi_workaround': opts.bidi_workaround, | ||||
|   | ||||
| @@ -49,7 +49,7 @@ class HttpFD(FileDownloader): | ||||
|         while count <= retries: | ||||
|             # Establish connection | ||||
|             try: | ||||
|                 data = compat_urllib_request.urlopen(request) | ||||
|                 data = self.ydl.urlopen(request) | ||||
|                 break | ||||
|             except (compat_urllib_error.HTTPError, ) as err: | ||||
|                 if (err.code < 500 or err.code >= 600) and err.code != 416: | ||||
| @@ -59,7 +59,7 @@ class HttpFD(FileDownloader): | ||||
|                     # Unable to resume (requested range not satisfiable) | ||||
|                     try: | ||||
|                         # Open the connection again without the range header | ||||
|                         data = compat_urllib_request.urlopen(basic_request) | ||||
|                         data = self.ydl.urlopen(basic_request) | ||||
|                         content_length = data.info()['Content-Length'] | ||||
|                     except (compat_urllib_error.HTTPError, ) as err: | ||||
|                         if err.code < 500 or err.code >= 600: | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| import subprocess | ||||
| @@ -22,7 +24,7 @@ class RtmpFD(FileDownloader): | ||||
|             proc_stderr_closed = False | ||||
|             while not proc_stderr_closed: | ||||
|                 # read line from stderr | ||||
|                 line = u'' | ||||
|                 line = '' | ||||
|                 while True: | ||||
|                     char = proc.stderr.read(1) | ||||
|                     if not char: | ||||
| @@ -46,7 +48,7 @@ class RtmpFD(FileDownloader): | ||||
|                     data_len = None | ||||
|                     if percent > 0: | ||||
|                         data_len = int(downloaded_data_len * 100 / percent) | ||||
|                     data_len_str = u'~' + format_bytes(data_len) | ||||
|                     data_len_str = '~' + format_bytes(data_len) | ||||
|                     self.report_progress(percent, data_len_str, speed, eta) | ||||
|                     cursor_in_new_line = False | ||||
|                     self._hook_progress({ | ||||
| @@ -76,12 +78,12 @@ class RtmpFD(FileDownloader): | ||||
|                         }) | ||||
|                     elif self.params.get('verbose', False): | ||||
|                         if not cursor_in_new_line: | ||||
|                             self.to_screen(u'') | ||||
|                             self.to_screen('') | ||||
|                         cursor_in_new_line = True | ||||
|                         self.to_screen(u'[rtmpdump] '+line) | ||||
|                         self.to_screen('[rtmpdump] '+line) | ||||
|             proc.wait() | ||||
|             if not cursor_in_new_line: | ||||
|                 self.to_screen(u'') | ||||
|                 self.to_screen('') | ||||
|             return proc.returncode | ||||
|  | ||||
|         url = info_dict['url'] | ||||
| @@ -102,7 +104,7 @@ class RtmpFD(FileDownloader): | ||||
|         try: | ||||
|             subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | ||||
|         except (OSError, IOError): | ||||
|             self.report_error(u'RTMP download detected but "rtmpdump" could not be run') | ||||
|             self.report_error('RTMP download detected but "rtmpdump" could not be run') | ||||
|             return False | ||||
|  | ||||
|         # Download using rtmpdump. rtmpdump returns exit code 2 when | ||||
| @@ -127,7 +129,7 @@ class RtmpFD(FileDownloader): | ||||
|             basic_args += ['--live'] | ||||
|         if conn: | ||||
|             basic_args += ['--conn', conn] | ||||
|         args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] | ||||
|         args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)] | ||||
|  | ||||
|         if sys.platform == 'win32' and sys.version_info < (3, 0): | ||||
|             # Windows subprocess module does not actually support Unicode | ||||
| @@ -150,26 +152,35 @@ class RtmpFD(FileDownloader): | ||||
|                 shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) | ||||
|             except ImportError: | ||||
|                 shell_quote = repr | ||||
|             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args)) | ||||
|             self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args)) | ||||
|  | ||||
|         RD_SUCCESS = 0 | ||||
|         RD_FAILED = 1 | ||||
|         RD_INCOMPLETE = 2 | ||||
|         RD_NO_CONNECT = 3 | ||||
|  | ||||
|         retval = run_rtmpdump(args) | ||||
|  | ||||
|         while (retval == 2 or retval == 1) and not test: | ||||
|         if retval == RD_NO_CONNECT: | ||||
|             self.report_error('[rtmpdump] Could not connect to RTMP server.') | ||||
|             return False | ||||
|  | ||||
|         while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live: | ||||
|             prevsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen(u'[rtmpdump] %s bytes' % prevsize) | ||||
|             self.to_screen('[rtmpdump] %s bytes' % prevsize) | ||||
|             time.sleep(5.0) # This seems to be needed | ||||
|             retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) | ||||
|             retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED]) | ||||
|             cursize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             if prevsize == cursize and retval == 1: | ||||
|             if prevsize == cursize and retval == RD_FAILED: | ||||
|                 break | ||||
|              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those | ||||
|             if prevsize == cursize and retval == 2 and cursize > 1024: | ||||
|                 self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.') | ||||
|                 retval = 0 | ||||
|             if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024: | ||||
|                 self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.') | ||||
|                 retval = RD_SUCCESS | ||||
|                 break | ||||
|         if retval == 0 or (test and retval == 2): | ||||
|         if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): | ||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen(u'[rtmpdump] %s bytes' % fsize) | ||||
|             self.to_screen('[rtmpdump] %s bytes' % fsize) | ||||
|             self.try_rename(tmpfilename, filename) | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': fsize, | ||||
| @@ -179,6 +190,6 @@ class RtmpFD(FileDownloader): | ||||
|             }) | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr(u"\n") | ||||
|             self.report_error(u'rtmpdump exited with code %d' % retval) | ||||
|             self.to_stderr('\n') | ||||
|             self.report_error('rtmpdump exited with code %d' % retval) | ||||
|             return False | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| from .academicearth import AcademicEarthCourseIE | ||||
| from .addanime import AddAnimeIE | ||||
| from .aftonbladet import AftonbladetIE | ||||
| from .anitube import AnitubeIE | ||||
| from .aparat import AparatIE | ||||
| from .appletrailers import AppleTrailersIE | ||||
| @@ -9,6 +10,7 @@ from .arte import ( | ||||
|     ArteTvIE, | ||||
|     ArteTVPlus7IE, | ||||
|     ArteTVCreativeIE, | ||||
|     ArteTVConcertIE, | ||||
|     ArteTVFutureIE, | ||||
|     ArteTVDDCIE, | ||||
| ) | ||||
| @@ -52,7 +54,6 @@ from .dailymotion import ( | ||||
|     DailymotionUserIE, | ||||
| ) | ||||
| from .daum import DaumIE | ||||
| from .depositfiles import DepositFilesIE | ||||
| from .dotsub import DotsubIE | ||||
| from .dreisat import DreiSatIE | ||||
| from .defense import DefenseGouvFrIE | ||||
| @@ -173,8 +174,10 @@ from .nowness import NownessIE | ||||
| from .nowvideo import NowVideoIE | ||||
| from .ooyala import OoyalaIE | ||||
| from .orf import ORFIE | ||||
| from .parliamentliveuk import ParliamentLiveUKIE | ||||
| from .pbs import PBSIE | ||||
| from .photobucket import PhotobucketIE | ||||
| from .playvid import PlayvidIE | ||||
| from .podomatic import PodomaticIE | ||||
| from .pornhd import PornHdIE | ||||
| from .pornhub import PornHubIE | ||||
| @@ -195,6 +198,7 @@ from .rutube import ( | ||||
|     RutubeMovieIE, | ||||
|     RutubePersonIE, | ||||
| ) | ||||
| from .rutv import RUTVIE | ||||
| from .savefrom import SaveFromIE | ||||
| from .servingsys import ServingSysIE | ||||
| from .sina import SinaIE | ||||
| @@ -241,6 +245,10 @@ from .tumblr import TumblrIE | ||||
| from .tutv import TutvIE | ||||
| from .tvigle import TvigleIE | ||||
| from .tvp import TvpIE | ||||
| from .udemy import ( | ||||
|     UdemyIE, | ||||
|     UdemyCourseIE | ||||
| ) | ||||
| from .unistra import UnistraIE | ||||
| from .ustream import UstreamIE, UstreamChannelIE | ||||
| from .vbox7 import Vbox7IE | ||||
| @@ -267,6 +275,7 @@ from .viki import VikiIE | ||||
| from .vk import VKIE | ||||
| from .vube import VubeIE | ||||
| from .wat import WatIE | ||||
| from .wdr import WDRIE | ||||
| from .weibo import WeiboIE | ||||
| from .wimp import WimpIE | ||||
| from .wistia import WistiaIE | ||||
|   | ||||
							
								
								
									
										69
									
								
								youtube_dl/extractor/aftonbladet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								youtube_dl/extractor/aftonbladet.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,69 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class AftonbladetIE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])' | ||||
|     _TEST = { | ||||
|         'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab', | ||||
|         'info_dict': { | ||||
|             'id': 'article36015', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna', | ||||
|             'description': 'Jupiters måne mest aktiv av alla himlakroppar', | ||||
|             'upload_date': '20140306', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.search(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('video_id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # find internal video meta data | ||||
|         META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' | ||||
|         internal_meta_id = self._html_search_regex( | ||||
|             r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id') | ||||
|         internal_meta_url = META_URL % internal_meta_id | ||||
|         internal_meta_json = self._download_json( | ||||
|             internal_meta_url, video_id, 'Downloading video meta data') | ||||
|  | ||||
|         # find internal video formats | ||||
|         FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s' | ||||
|         internal_video_id = internal_meta_json['videoId'] | ||||
|         internal_formats_url = FORMATS_URL % internal_video_id | ||||
|         internal_formats_json = self._download_json( | ||||
|             internal_formats_url, video_id, 'Downloading video formats') | ||||
|  | ||||
|         formats = [] | ||||
|         for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']: | ||||
|             p = fmt['paths'][0] | ||||
|             formats.append({ | ||||
|                 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']), | ||||
|                 'ext': 'mp4', | ||||
|                 'width': fmt['width'], | ||||
|                 'height': fmt['height'], | ||||
|                 'tbr': fmt['bitrate'], | ||||
|                 'protocol': 'http', | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished']) | ||||
|         upload_date = timestamp.strftime('%Y%m%d') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': internal_meta_json['title'], | ||||
|             'formats': formats, | ||||
|             'thumbnail': internal_meta_json['imageUrl'], | ||||
|             'description': internal_meta_json['shortPreamble'], | ||||
|             'upload_date': upload_date, | ||||
|             'duration': internal_meta_json['duration'], | ||||
|             'view_count': internal_meta_json['views'], | ||||
|         } | ||||
| @@ -72,18 +72,22 @@ class ArteTvIE(InfoExtractor): | ||||
|             return self._extract_liveweb(url, name, lang) | ||||
|  | ||||
|         if re.search(self._LIVE_URL, url) is not None: | ||||
|             raise ExtractorError(u'Arte live streams are not yet supported, sorry') | ||||
|             raise ExtractorError('Arte live streams are not yet supported, sorry') | ||||
|             # self.extractLiveStream(url) | ||||
|             # return | ||||
|  | ||||
|         raise ExtractorError('No video found') | ||||
|  | ||||
|     def _extract_video(self, url, video_id, lang): | ||||
|         """Extract from videos.arte.tv""" | ||||
|         ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') | ||||
|         ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') | ||||
|         ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata') | ||||
|         ref_xml_doc = self._download_xml( | ||||
|             ref_xml_url, video_id, note='Downloading metadata') | ||||
|         config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) | ||||
|         config_xml_url = config_node.attrib['ref'] | ||||
|         config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration') | ||||
|         config_xml = self._download_webpage( | ||||
|             config_xml_url, video_id, note='Downloading configuration') | ||||
|  | ||||
|         video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml)) | ||||
|         def _key(m): | ||||
| @@ -127,7 +131,7 @@ class ArteTvIE(InfoExtractor): | ||||
|  | ||||
| class ArteTVPlus7IE(InfoExtractor): | ||||
|     IE_NAME = 'arte.tv:+7' | ||||
|     _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' | ||||
|     _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' | ||||
|  | ||||
|     @classmethod | ||||
|     def _extract_url_info(cls, url): | ||||
| @@ -198,6 +202,8 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|                     re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None, | ||||
|                     # The version with sourds/mal subtitles has also lower relevance | ||||
|                     re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None, | ||||
|                     # Prefer http downloads over m3u8 | ||||
|                     0 if f['url'].endswith('m3u8') else 1, | ||||
|                 ) | ||||
|         formats = sorted(formats, key=sort_key) | ||||
|         def _format(format_info): | ||||
| @@ -238,8 +244,9 @@ class ArteTVCreativeIE(ArteTVPlus7IE): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', | ||||
|         'file': '050489-002.mp4', | ||||
|         'info_dict': { | ||||
|             'id': '050489-002', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design', | ||||
|         }, | ||||
|     } | ||||
| @@ -251,8 +258,9 @@ class ArteTVFutureIE(ArteTVPlus7IE): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', | ||||
|         'file': '050940-003.mp4', | ||||
|         'info_dict': { | ||||
|             'id': '050940-003', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Les champignons au secours de la planète', | ||||
|         }, | ||||
|     } | ||||
| @@ -266,7 +274,7 @@ class ArteTVFutureIE(ArteTVPlus7IE): | ||||
|  | ||||
| class ArteTVDDCIE(ArteTVPlus7IE): | ||||
|     IE_NAME = 'arte.tv:ddc' | ||||
|     _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)' | ||||
|     _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id, lang = self._extract_url_info(url) | ||||
| @@ -280,3 +288,19 @@ class ArteTVDDCIE(ArteTVPlus7IE): | ||||
|         javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator') | ||||
|         json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url') | ||||
|         return self._extract_from_json_url(json_url, video_id, lang) | ||||
|  | ||||
|  | ||||
| class ArteTVConcertIE(ArteTVPlus7IE): | ||||
|     IE_NAME = 'arte.tv:concert' | ||||
|     _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde', | ||||
|         'md5': '9ea035b7bd69696b67aa2ccaaa218161', | ||||
|         'info_dict': { | ||||
|             'id': '186', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"', | ||||
|             'upload_date': '20140128', | ||||
|         }, | ||||
|     } | ||||
|   | ||||
| @@ -9,21 +9,35 @@ from ..utils import ExtractorError | ||||
|  | ||||
| class BRIE(InfoExtractor): | ||||
|     IE_DESC = "Bayerischer Rundfunk Mediathek" | ||||
|     _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$" | ||||
|     _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$" | ||||
|     _BASE_URL = "http://www.br.de" | ||||
|  | ||||
|     _TEST = { | ||||
|         "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", | ||||
|         "md5": "c4f83cf0f023ba5875aba0bf46860df2", | ||||
|         "info_dict": { | ||||
|             "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", | ||||
|             "ext": "mp4", | ||||
|             "title": "Feiern und Verzichten", | ||||
|             "description": "Anselm Grün: Feiern und Verzichten", | ||||
|             "uploader": "BR/Birgit Baier", | ||||
|             "upload_date": "20140301" | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", | ||||
|             "md5": "c4f83cf0f023ba5875aba0bf46860df2", | ||||
|             "info_dict": { | ||||
|                 "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", | ||||
|                 "ext": "mp4", | ||||
|                 "title": "Feiern und Verzichten", | ||||
|                 "description": "Anselm Grün: Feiern und Verzichten", | ||||
|                 "uploader": "BR/Birgit Baier", | ||||
|                 "upload_date": "20140301" | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             "url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html", | ||||
|             "md5": "ab451b09d861dbed7d7cc9ab0be19ebe", | ||||
|             "info_dict": { | ||||
|                 "id": "2c060e69-3a27-4e13-b0f0-668fac17d812", | ||||
|                 "ext": "mp4", | ||||
|                 "title": "Über den Pass", | ||||
|                 "description": "Die Eroberung der Alpen: Über den Pass", | ||||
|                 "uploader": None, | ||||
|                 "upload_date": None | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -33,16 +47,21 @@ class BRIE(InfoExtractor): | ||||
|             r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL") | ||||
|         xml = self._download_xml(self._BASE_URL + xml_url, None) | ||||
|  | ||||
|         videos = [{ | ||||
|             "id": xml_video.get("externalId"), | ||||
|             "title": xml_video.find("title").text, | ||||
|             "formats": self._extract_formats(xml_video.find("assets")), | ||||
|             "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")), | ||||
|             "description": " ".join(xml_video.find("shareTitle").text.splitlines()), | ||||
|             "uploader": xml_video.find("author").text, | ||||
|             "upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))), | ||||
|             "webpage_url": xml_video.find("permalink").text, | ||||
|         } for xml_video in xml.findall("video")] | ||||
|         videos = [] | ||||
|         for xml_video in xml.findall("video"): | ||||
|             video = { | ||||
|                 "id": xml_video.get("externalId"), | ||||
|                 "title": xml_video.find("title").text, | ||||
|                 "formats": self._extract_formats(xml_video.find("assets")), | ||||
|                 "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")), | ||||
|                 "description": " ".join(xml_video.find("shareTitle").text.splitlines()), | ||||
|                 "webpage_url": xml_video.find("permalink").text | ||||
|             } | ||||
|             if xml_video.find("author").text: | ||||
|                 video["uploader"] = xml_video.find("author").text | ||||
|             if xml_video.find("broadcastDate").text: | ||||
|                 video["upload_date"] =  "".join(reversed(xml_video.find("broadcastDate").text.split("."))) | ||||
|             videos.append(video) | ||||
|  | ||||
|         if len(videos) > 1: | ||||
|             self._downloader.report_warning( | ||||
|   | ||||
| @@ -17,8 +17,9 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             'id': '6902724', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Comic-Con Cosplay Catastrophe', | ||||
|             'description': 'Fans get creative this year', | ||||
|             'description': "Fans get creative this year at San Diego.  Too creative.  And yes, that's really Joss Whedon.", | ||||
|             'age_limit': 13, | ||||
|             'duration': 187, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
| @@ -28,22 +29,22 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             'id': '3505939', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Font Conference', | ||||
|             'description': 'This video wasn\'t long enough,', | ||||
|             'description': "This video wasn't long enough, so we made it double-spaced.", | ||||
|             'age_limit': 10, | ||||
|             'duration': 179, | ||||
|         }, | ||||
|     }, | ||||
|     # embedded youtube video | ||||
|     { | ||||
|         'url': 'http://www.collegehumor.com/embed/6950457', | ||||
|         'url': 'http://www.collegehumor.com/embed/6950306', | ||||
|         'info_dict': { | ||||
|             'id': 'W5gMp3ZjYg4', | ||||
|             'id': 'Z-bao9fg6Yc', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', | ||||
|             'uploader': 'FunnyPlox TV', | ||||
|             'uploader_id': 'funnyploxtv', | ||||
|             'description': 'md5:7ded37421526d54afdf005e25bc2b7a3', | ||||
|             'upload_date': '20140128', | ||||
|             'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!', | ||||
|             'uploader': 'Mark Dice', | ||||
|             'uploader_id': 'MarkDice', | ||||
|             'description': 'md5:62c3dab9351fac7bb44b53b69511d87f', | ||||
|             'upload_date': '20140127', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -87,6 +88,7 @@ class CollegeHumorIE(InfoExtractor): | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         duration = int_or_none(vdata.get('duration'), 1000) | ||||
|         like_count = int_or_none(vdata.get('likes')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -96,4 +98,5 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             'formats': formats, | ||||
|             'age_limit': age_limit, | ||||
|             'duration': duration, | ||||
|             'like_count': like_count, | ||||
|         } | ||||
|   | ||||
| @@ -14,7 +14,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class ComedyCentralIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/ | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/ | ||||
|         (video-clips|episodes|cc-studios|video-collections) | ||||
|         /(?P<title>.*)''' | ||||
|     _FEED_URL = 'http://comedycentral.com/feeds/mrss/' | ||||
|   | ||||
| @@ -97,7 +97,9 @@ class InfoExtractor(object): | ||||
|     thumbnail:      Full URL to a video thumbnail image. | ||||
|     description:    One-line video description. | ||||
|     uploader:       Full name of the video uploader. | ||||
|     timestamp:      UNIX timestamp of the moment the video became available. | ||||
|     upload_date:    Video upload date (YYYYMMDD). | ||||
|                     If not explicitly set, calculated from timestamp. | ||||
|     uploader_id:    Nickname or id of the video uploader. | ||||
|     location:       Physical location of the video. | ||||
|     subtitles:      The subtitle file contents as a dictionary in the format | ||||
| @@ -118,9 +120,6 @@ class InfoExtractor(object): | ||||
|     _real_extract() methods and define a _VALID_URL regexp. | ||||
|     Probably, they should also be added to the list of extractors. | ||||
|  | ||||
|     _real_extract() must return a *list* of information dictionaries as | ||||
|     described above. | ||||
|  | ||||
|     Finally, the _WORKING attribute should be set to False for broken IEs | ||||
|     in order to warn the users and skip the tests. | ||||
|     """ | ||||
|   | ||||
| @@ -12,6 +12,7 @@ from ..utils import ( | ||||
|     get_element_by_id, | ||||
|     orderedSet, | ||||
|     str_to_int, | ||||
|     int_or_none, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
| @@ -124,7 +125,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             if video_url is not None: | ||||
|                 m_size = re.search(r'H264-(\d+)x(\d+)', video_url) | ||||
|                 if m_size is not None: | ||||
|                     width, height = m_size.group(1), m_size.group(2) | ||||
|                     width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) | ||||
|                 else: | ||||
|                     width, height = None, None | ||||
|                 formats.append({ | ||||
|   | ||||
| @@ -1,60 +0,0 @@ | ||||
| import re | ||||
| import os | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DepositFilesIE(InfoExtractor): | ||||
|     """Information extractor for depositfiles.com""" | ||||
|  | ||||
|     _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         file_id = url.split('/')[-1] | ||||
|         # Rebuild url in english locale | ||||
|         url = 'http://depositfiles.com/en/files/' + file_id | ||||
|  | ||||
|         # Retrieve file webpage with 'Free download' button pressed | ||||
|         free_download_indication = {'gateway_result' : '1'} | ||||
|         request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) | ||||
|         try: | ||||
|             self.report_download_webpage(file_id) | ||||
|             webpage = compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) | ||||
|  | ||||
|         # Search for the real file URL | ||||
|         mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage) | ||||
|         if (mobj is None) or (mobj.group(1) is None): | ||||
|             # Try to figure out reason of the error. | ||||
|             mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) | ||||
|             if (mobj is not None) and (mobj.group(1) is not None): | ||||
|                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() | ||||
|                 raise ExtractorError(u'%s' % restriction_message) | ||||
|             else: | ||||
|                 raise ExtractorError(u'Unable to extract download URL from: %s' % url) | ||||
|  | ||||
|         file_url = mobj.group(1) | ||||
|         file_extension = os.path.splitext(file_url)[1][1:] | ||||
|  | ||||
|         # Search for file title | ||||
|         file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title') | ||||
|  | ||||
|         return [{ | ||||
|             'id':       file_id.decode('utf-8'), | ||||
|             'url':      file_url.decode('utf-8'), | ||||
|             'uploader': None, | ||||
|             'upload_date':  None, | ||||
|             'title':    file_title, | ||||
|             'ext':      file_extension.decode('utf-8'), | ||||
|         }] | ||||
| @@ -11,16 +11,15 @@ from ..utils import ( | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     urlencode_postdata, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FacebookIE(InfoExtractor): | ||||
|     """Information Extractor for Facebook""" | ||||
|  | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://)?(?:\w+\.)?facebook\.com/ | ||||
|         https?://(?:\w+\.)?facebook\.com/ | ||||
|         (?:[^#?]*\#!/)? | ||||
|         (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) | ||||
|         (?:v|video_id)=(?P<id>[0-9]+) | ||||
| @@ -36,14 +35,10 @@ class FacebookIE(InfoExtractor): | ||||
|             'id': '120708114770723', | ||||
|             'ext': 'mp4', | ||||
|             'duration': 279, | ||||
|             'title': 'PEOPLE ARE AWESOME 2013' | ||||
|             'title': 'PEOPLE ARE AWESOME 2013', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def report_login(self): | ||||
|         """Report attempt to log in.""" | ||||
|         self.to_screen('Logging in') | ||||
|  | ||||
|     def _login(self): | ||||
|         (useremail, password) = self._get_login_info() | ||||
|         if useremail is None: | ||||
| @@ -51,8 +46,8 @@ class FacebookIE(InfoExtractor): | ||||
|  | ||||
|         login_page_req = compat_urllib_request.Request(self._LOGIN_URL) | ||||
|         login_page_req.add_header('Cookie', 'locale=en_US') | ||||
|         self.report_login() | ||||
|         login_page = self._download_webpage(login_page_req, None, note=False, | ||||
|         login_page = self._download_webpage(login_page_req, None, | ||||
|             note='Downloading login page', | ||||
|             errnote='Unable to download login page') | ||||
|         lsd = self._search_regex( | ||||
|             r'<input type="hidden" name="lsd" value="([^"]*)"', | ||||
| @@ -70,23 +65,25 @@ class FacebookIE(InfoExtractor): | ||||
|             'timezone': '-60', | ||||
|             'trynum': '1', | ||||
|             } | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form)) | ||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         try: | ||||
|             login_results = compat_urllib_request.urlopen(request).read() | ||||
|             login_results = self._download_webpage(request, None, | ||||
|                 note='Logging in', errnote='unable to fetch login page') | ||||
|             if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: | ||||
|                 self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') | ||||
|                 return | ||||
|  | ||||
|             check_form = { | ||||
|                 'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'), | ||||
|                 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), | ||||
|                 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'), | ||||
|                 'name_action_selected': 'dont_save', | ||||
|                 'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'), | ||||
|                 'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'), | ||||
|             } | ||||
|             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form)) | ||||
|             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) | ||||
|             check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|             check_response = compat_urllib_request.urlopen(check_req).read() | ||||
|             check_response = self._download_webpage(check_req, None, | ||||
|                 note='Confirming login') | ||||
|             if re.search(r'id="checkpointSubmitButton"', check_response) is not None: | ||||
|                 self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
| @@ -98,8 +95,6 @@ class FacebookIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         url = 'https://www.facebook.com/video/video.php?v=%s' % video_id | ||||
| @@ -125,18 +120,14 @@ class FacebookIE(InfoExtractor): | ||||
|             video_url = video_data['sd_src'] | ||||
|         if not video_url: | ||||
|             raise ExtractorError('Cannot find video URL') | ||||
|         video_duration = int(video_data['video_duration']) | ||||
|         thumbnail = video_data['thumbnail_src'] | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title') | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'duration': video_duration, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': int(video_data['video_duration']), | ||||
|             'thumbnail': video_data['thumbnail_src'], | ||||
|         } | ||||
|         return [info] | ||||
|   | ||||
| @@ -1,12 +1,13 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class FunnyOrDieIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', | ||||
|         'file': '0732f586d7.mp4', | ||||
| @@ -30,10 +31,23 @@ class FunnyOrDieIE(InfoExtractor): | ||||
|             [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''], | ||||
|             webpage, 'video URL', flags=re.DOTALL) | ||||
|  | ||||
|         if mobj.group('type') == 'embed': | ||||
|             post_json = self._search_regex( | ||||
|                 r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') | ||||
|             post = json.loads(post_json) | ||||
|             title = post['name'] | ||||
|             description = post.get('description') | ||||
|             thumbnail = post.get('picture') | ||||
|         else: | ||||
|             title = self._og_search_title(webpage) | ||||
|             description = self._og_search_description(webpage) | ||||
|             thumbnail = None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -6,13 +8,14 @@ from .common import InfoExtractor | ||||
| class GamekingsIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", | ||||
|         u'file': u'20130811.mp4', | ||||
|         'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', | ||||
|         # MD5 is flaky, seems to change regularly | ||||
|         #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3', | ||||
|         # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review", | ||||
|             u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.", | ||||
|             'id': '20130811', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', | ||||
|             'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,6 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| @@ -17,6 +16,7 @@ from ..utils import ( | ||||
|  | ||||
|     ExtractorError, | ||||
|     HEADRequest, | ||||
|     parse_xml, | ||||
|     smuggle_url, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
| @@ -24,6 +24,7 @@ from ..utils import ( | ||||
| ) | ||||
| from .brightcove import BrightcoveIE | ||||
| from .ooyala import OoyalaIE | ||||
| from .rutv import RUTVIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -116,6 +117,60 @@ class GenericIE(InfoExtractor): | ||||
|             'params': { | ||||
|                 'skip_download': False, | ||||
|             } | ||||
|         }, | ||||
|         # embed.ly video | ||||
|         { | ||||
|             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', | ||||
|             'info_dict': { | ||||
|                 'id': '9ODmcdjQcHQ', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second', | ||||
|                 'upload_date': '20140225', | ||||
|                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff', | ||||
|                 'uploader': 'Tested', | ||||
|                 'uploader_id': 'testedcom', | ||||
|             }, | ||||
|             # No need to test YoutubeIE here | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # funnyordie embed | ||||
|         { | ||||
|             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns', | ||||
|             'md5': '7cf780be104d40fea7bae52eed4a470e', | ||||
|             'info_dict': { | ||||
|                 'id': '18e820ec3f', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama', | ||||
|                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', | ||||
|             }, | ||||
|         }, | ||||
|         # RUTV embed | ||||
|         { | ||||
|             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html', | ||||
|             'info_dict': { | ||||
|                 'id': '776940', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Охотское море стало целиком российским', | ||||
|                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # Embedded TED video | ||||
|         { | ||||
|             'url': 'http://en.support.wordpress.com/videos/ted-talks/', | ||||
|             'md5': 'deeeabcc1085eb2ba205474e7235a3d5', | ||||
|             'info_dict': { | ||||
|                 'id': '981', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'My web playroom', | ||||
|                 'uploader': 'Ze Frank', | ||||
|                 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b', | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -142,9 +197,14 @@ class GenericIE(InfoExtractor): | ||||
|                     newurl = newurl.replace(' ', '%20') | ||||
|                     newheaders = dict((k,v) for k,v in req.headers.items() | ||||
|                                       if k.lower() not in ("content-length", "content-type")) | ||||
|                     try: | ||||
|                         # This function was deprecated in python 3.3 and removed in 3.4 | ||||
|                         origin_req_host = req.get_origin_req_host() | ||||
|                     except AttributeError: | ||||
|                         origin_req_host = req.origin_req_host | ||||
|                     return HEADRequest(newurl, | ||||
|                                        headers=newheaders, | ||||
|                                        origin_req_host=req.get_origin_req_host(), | ||||
|                                        origin_req_host=origin_req_host, | ||||
|                                        unverifiable=True) | ||||
|                 else: | ||||
|                     raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp) | ||||
| @@ -211,7 +271,7 @@ class GenericIE(InfoExtractor): | ||||
|             else: | ||||
|                 assert ':' in default_search | ||||
|                 return self.url_result(default_search + url) | ||||
|         video_id = os.path.splitext(url.split('/')[-1])[0] | ||||
|         video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0] | ||||
|  | ||||
|         self.to_screen('%s: Requesting header' % video_id) | ||||
|  | ||||
| @@ -257,7 +317,7 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Is it an RSS feed? | ||||
|         try: | ||||
|             doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8')) | ||||
|             doc = parse_xml(webpage) | ||||
|             if doc.tag == 'rss': | ||||
|                 return self._extract_rss(url, video_id, doc) | ||||
|         except compat_xml_parse_error: | ||||
| @@ -296,9 +356,9 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for embedded (iframe) Vimeo player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage) | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) | ||||
|         if mobj: | ||||
|             player_url = unescapeHTML(mobj.group(1)) | ||||
|             player_url = unescapeHTML(mobj.group('url')) | ||||
|             surl = smuggle_url(player_url, {'Referer': url}) | ||||
|             return self.url_result(surl, 'Vimeo') | ||||
|  | ||||
| @@ -407,6 +467,27 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'HuffPost') | ||||
|  | ||||
|         # Look for embed.ly | ||||
|         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url')) | ||||
|         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(compat_urllib_parse.unquote(mobj.group('url'))) | ||||
|  | ||||
|         # Look for funnyordie embed | ||||
|         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) | ||||
|         if matches: | ||||
|             urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie') | ||||
|                      for eurl in matches] | ||||
|             return self.playlist_result( | ||||
|                 urlrs, playlist_id=video_id, playlist_title=video_title) | ||||
|  | ||||
|         # Look for embedded RUTV player | ||||
|         rutv_url = RUTVIE._extract_url(webpage) | ||||
|         if rutv_url: | ||||
|             return self.url_result(rutv_url, 'RUTV') | ||||
|  | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
| @@ -418,6 +499,13 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is None: | ||||
|             # Broaden the search a little bit: JWPlayer JS loader | ||||
|             mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) | ||||
|  | ||||
|         # Look for embedded TED player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'TED') | ||||
|  | ||||
|         if mobj is None: | ||||
|             # Try to find twitter cards info | ||||
|             mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) | ||||
|   | ||||
| @@ -46,6 +46,6 @@ class GoogleSearchIE(SearchInfoExtractor): | ||||
|                     'url': mobj.group(1) | ||||
|                 }) | ||||
|  | ||||
|             if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage): | ||||
|             if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage): | ||||
|                 res['entries'] = entries[:n] | ||||
|                 return res | ||||
|   | ||||
| @@ -6,7 +6,10 @@ from random import random | ||||
| from math import floor | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_request | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class IPrimaIE(InfoExtractor): | ||||
| @@ -36,6 +39,7 @@ class IPrimaIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True,  # requires rtmpdump | ||||
|         }, | ||||
|         'skip': 'Do not have permission to access this page', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -44,6 +48,10 @@ class IPrimaIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage): | ||||
|             raise ExtractorError( | ||||
|                 '%s said: You do not have permission to access this page' % self.IE_NAME, expected=True) | ||||
|  | ||||
|         player_url = ( | ||||
|             'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % | ||||
|             (floor(random()*1073741824), floor(random()*1073741824)) | ||||
|   | ||||
| @@ -1,56 +1,61 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     RegexNotFoundError, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class JukeboxIE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html' | ||||
|     _IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>' | ||||
|     _VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"' | ||||
|     _TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>' | ||||
|     _IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html', | ||||
|         'md5': '5dc6477e74b1e37042ac5acedd8413e5', | ||||
|         'info_dict': { | ||||
|             'id': 'r303r', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Kosheen-En Vivo Pride', | ||||
|             'uploader': 'Kosheen', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         html = self._download_webpage(url, video_id) | ||||
|  | ||||
|         mobj = re.search(self._IFRAME, html) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Cannot extract iframe url') | ||||
|         iframe_url = unescapeHTML(mobj.group('iframe')) | ||||
|         iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url')) | ||||
|  | ||||
|         iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe') | ||||
|         mobj = re.search(r'class="jkb_waiting"', iframe_html) | ||||
|         if mobj is not None: | ||||
|             raise ExtractorError(u'Video is not available(in your country?)!') | ||||
|         if re.search(r'class="jkb_waiting"', iframe_html) is not None: | ||||
|             raise ExtractorError('Video is not available(in your country?)!') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         mobj = re.search(self._VIDEO_URL, iframe_html) | ||||
|         if mobj is None: | ||||
|             mobj = re.search(self._IS_YOUTUBE, iframe_html) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'Cannot extract video url') | ||||
|             youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/') | ||||
|             self.to_screen(u'Youtube video detected') | ||||
|             return self.url_result(youtube_url,ie='Youtube') | ||||
|         video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/') | ||||
|         video_ext = unescapeHTML(mobj.group('video_ext')) | ||||
|         try: | ||||
|             video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"', | ||||
|                 iframe_html, 'video url') | ||||
|             video_url = unescapeHTML(video_url).replace('\/', '/') | ||||
|         except RegexNotFoundError: | ||||
|             youtube_url = self._search_regex( | ||||
|                 r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"', | ||||
|                 iframe_html, 'youtube url') | ||||
|             youtube_url = unescapeHTML(youtube_url).replace('\/', '/') | ||||
|             self.to_screen('Youtube video detected') | ||||
|             return self.url_result(youtube_url, ie='Youtube') | ||||
|  | ||||
|         mobj = re.search(self._TITLE, html) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Cannot extract title') | ||||
|         title = unescapeHTML(mobj.group('title')) | ||||
|         artist = unescapeHTML(mobj.group('artist')) | ||||
|         title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>', | ||||
|             html, 'title') | ||||
|         artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>', | ||||
|             html, 'artist') | ||||
|  | ||||
|         return [{'id': video_id, | ||||
|                  'url': video_url, | ||||
|                  'title': artist + '-' + title, | ||||
|                  'ext': video_ext | ||||
|                  }] | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': artist + '-' + title, | ||||
|             'uploader': artist, | ||||
|         } | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class KontrTubeIE(InfoExtractor): | ||||
| @@ -32,27 +33,26 @@ class KontrTubeIE(InfoExtractor): | ||||
|  | ||||
|         video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL') | ||||
|         thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False) | ||||
|         title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, | ||||
|             'video title') | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title') | ||||
|         description = self._html_search_meta('description', webpage, 'video description') | ||||
|  | ||||
|         mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', | ||||
|             webpage) | ||||
|         mobj = re.search( | ||||
|             r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage) | ||||
|         duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None | ||||
|  | ||||
|         view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, | ||||
|             'view count', fatal=False) | ||||
|         view_count = int(view_count) if view_count is not None else None | ||||
|         view_count = self._html_search_regex( | ||||
|             r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False) | ||||
|  | ||||
|         comment_count = None | ||||
|         comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', | ||||
|             fatal=False) | ||||
|         comment_str = self._html_search_regex( | ||||
|             r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False) | ||||
|         if comment_str.startswith('комментариев нет'): | ||||
|             comment_count = 0 | ||||
|         else: | ||||
|             mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str) | ||||
|             if mobj: | ||||
|                 comment_count = int(mobj.group('total')) | ||||
|                 comment_count = mobj.group('total') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -61,6 +61,6 @@ class KontrTubeIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'view_count': int_or_none(view_count), | ||||
|             'comment_count': int_or_none(comment_count), | ||||
|         } | ||||
| @@ -8,7 +8,9 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     ExtractorError | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -19,16 +21,17 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|     _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' | ||||
|     _NETRC_MACHINE = 'lynda' | ||||
|  | ||||
|     _SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account' | ||||
|     _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true' | ||||
|     _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' | ||||
|  | ||||
|     ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', | ||||
|         'file': '114408.mp4', | ||||
|         'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', | ||||
|         'info_dict': { | ||||
|             'id': '114408', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Using the exercise files', | ||||
|             'duration': 68 | ||||
|         } | ||||
| @@ -41,27 +44,44 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, | ||||
|                                       video_id, 'Downloading video JSON') | ||||
|         page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id, | ||||
|             'Downloading video JSON') | ||||
|         video_json = json.loads(page) | ||||
|  | ||||
|         if 'Status' in video_json: | ||||
|             raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True) | ||||
|  | ||||
|         if video_json['HasAccess'] is False: | ||||
|             raise ExtractorError('Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True) | ||||
|             raise ExtractorError( | ||||
|                 'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True) | ||||
|  | ||||
|         video_id = video_json['ID'] | ||||
|         video_id = compat_str(video_json['ID']) | ||||
|         duration = video_json['DurationInSeconds'] | ||||
|         title = video_json['Title'] | ||||
|  | ||||
|         formats = [{'url': fmt['Url'], | ||||
|         formats = [] | ||||
|  | ||||
|         fmts = video_json.get('Formats') | ||||
|         if fmts: | ||||
|             formats.extend([ | ||||
|                 { | ||||
|                     'url': fmt['Url'], | ||||
|                     'ext': fmt['Extension'], | ||||
|                     'width': fmt['Width'], | ||||
|                     'height': fmt['Height'], | ||||
|                     'filesize': fmt['FileSize'], | ||||
|                     'format_id': str(fmt['Resolution']) | ||||
|                     } for fmt in video_json['Formats']] | ||||
|                 } for fmt in fmts]) | ||||
|  | ||||
|         prioritized_streams = video_json.get('PrioritizedStreams') | ||||
|         if prioritized_streams: | ||||
|             formats.extend([ | ||||
|                 { | ||||
|                     'url': video_url, | ||||
|                     'width': int_or_none(format_id), | ||||
|                     'format_id': format_id, | ||||
|                 } for format_id, video_url in prioritized_streams['0'].items() | ||||
|             ]) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -91,7 +111,7 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|             'stayPut': 'false' | ||||
|         }         | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | ||||
|         login_page = self._download_webpage(request, None, note='Logging in as %s' % username) | ||||
|         login_page = self._download_webpage(request, None, 'Logging in as %s' % username) | ||||
|  | ||||
|         # Not (yet) logged in | ||||
|         m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page) | ||||
| @@ -116,7 +136,7 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|                     'stayPut': 'false', | ||||
|                 } | ||||
|                 request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form)) | ||||
|                 login_page = self._download_webpage(request, None, note='Confirming log in and log out from another device') | ||||
|                 login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device') | ||||
|  | ||||
|         if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: | ||||
|             raise ExtractorError('Unable to log in') | ||||
| @@ -150,7 +170,7 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|         url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id | ||||
|         sub = self._download_webpage(url, None, note=False) | ||||
|         sub = self._download_webpage(url, None, False) | ||||
|         sub_json = json.loads(sub) | ||||
|         return {'en': url} if len(sub_json) > 0 else {} | ||||
|  | ||||
| @@ -179,6 +199,9 @@ class LyndaCourseIE(InfoExtractor): | ||||
|         videos = [] | ||||
|         (username, _) = self._get_login_info() | ||||
|  | ||||
|         # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided | ||||
|         # by single video API anymore | ||||
|  | ||||
|         for chapter in course_json['Chapters']: | ||||
|             for video in chapter['Videos']: | ||||
|                 if username is None and video['HasAccess'] is False: | ||||
|   | ||||
| @@ -5,9 +5,12 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     ExtractorError, | ||||
|     find_xpath_attr, | ||||
|     fix_xml_ampersands, | ||||
|     HEADRequest, | ||||
|     unescapeHTML, | ||||
|     url_basename, | ||||
|     RegexNotFoundError, | ||||
| ) | ||||
| @@ -18,6 +21,7 @@ def _media_xml_tag(tag): | ||||
|  | ||||
|  | ||||
| class MTVServicesInfoExtractor(InfoExtractor): | ||||
|     _MOBILE_TEMPLATE = None | ||||
|     @staticmethod | ||||
|     def _id_from_uri(uri): | ||||
|         return uri.split(':')[-1] | ||||
| @@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         else: | ||||
|             return thumb_node.attrib['url'] | ||||
|  | ||||
|     def _extract_video_formats(self, mdoc): | ||||
|         if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None: | ||||
|             raise ExtractorError('This video is not available from your country.', expected=True) | ||||
|     def _extract_mobile_video_formats(self, mtvn_id): | ||||
|         webpage_url = self._MOBILE_TEMPLATE % mtvn_id | ||||
|         req = compat_urllib_request.Request(webpage_url) | ||||
|         # Otherwise we get a webpage that would execute some javascript | ||||
|         req.add_header('Youtubedl-user-agent', 'curl/7') | ||||
|         webpage = self._download_webpage(req, mtvn_id, | ||||
|             'Downloading mobile page') | ||||
|         metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url')) | ||||
|         req = HEADRequest(metrics_url) | ||||
|         response = self._request_webpage(req, mtvn_id, 'Resolving url') | ||||
|         url = response.geturl() | ||||
|         # Transform the url to get the best quality: | ||||
|         url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1) | ||||
|         return [{'url': url,'ext': 'mp4'}] | ||||
|  | ||||
|     def _extract_video_formats(self, mdoc, mtvn_id): | ||||
|         if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None: | ||||
|             if mtvn_id is not None and self._MOBILE_TEMPLATE is not None: | ||||
|                 self.to_screen('The normal version is not available from your ' | ||||
|                     'country, trying with the mobile version') | ||||
|                 return self._extract_mobile_video_formats(mtvn_id) | ||||
|             raise ExtractorError('This video is not available from your country.', | ||||
|                 expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for rendition in mdoc.findall('.//rendition'): | ||||
| @@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|             raise ExtractorError('Could not find video title') | ||||
|         title = title.strip() | ||||
|  | ||||
|         # This a short id that's used in the webpage urls | ||||
|         mtvn_id = None | ||||
|         mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category', | ||||
|                 'scheme', 'urn:mtvn:id') | ||||
|         if mtvn_id_node is not None: | ||||
|             mtvn_id = mtvn_id_node.text | ||||
|  | ||||
|         return { | ||||
|             'title': title, | ||||
|             'formats': self._extract_video_formats(mediagen_doc), | ||||
|             'formats': self._extract_video_formats(mediagen_doc, mtvn_id), | ||||
|             'id': video_id, | ||||
|             'thumbnail': self._get_thumbnail_url(uri, itemdoc), | ||||
|             'description': description, | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import binascii | ||||
| import base64 | ||||
| import hashlib | ||||
| @@ -14,18 +16,16 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
|  | ||||
| class MyVideoIE(InfoExtractor): | ||||
|     """Information Extractor for myvideo.de.""" | ||||
|  | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*' | ||||
|     IE_NAME = u'myvideo' | ||||
|     _VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*' | ||||
|     IE_NAME = 'myvideo' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', | ||||
|         u'file': u'8229274.flv', | ||||
|         u'md5': u'2d2753e8130479ba2cb7e0a37002053e', | ||||
|         u'info_dict': { | ||||
|             u"title": u"bowling-fail-or-win" | ||||
|         'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', | ||||
|         'md5': '2d2753e8130479ba2cb7e0a37002053e', | ||||
|         'info_dict': { | ||||
|             'id': '8229274', | ||||
|             'ext': 'flv', | ||||
|             'title': 'bowling-fail-or-win', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -53,10 +53,7 @@ class MyVideoIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group(1) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         GK = ( | ||||
|           b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt' | ||||
| @@ -74,37 +71,33 @@ class MyVideoIE(InfoExtractor): | ||||
|             video_url = mobj.group(1) + '.flv' | ||||
|  | ||||
|             video_title = self._html_search_regex('<title>([^<]+)</title>', | ||||
|                 webpage, u'title') | ||||
|                 webpage, 'title') | ||||
|  | ||||
|             video_ext = self._search_regex('[.](.+?)$', video_url, u'extension') | ||||
|  | ||||
|             return [{ | ||||
|                 'id':       video_id, | ||||
|                 'url':      video_url, | ||||
|                 'uploader': None, | ||||
|                 'upload_date':  None, | ||||
|                 'title':    video_title, | ||||
|                 'ext':      video_ext, | ||||
|             }] | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'title': video_title, | ||||
|             } | ||||
|  | ||||
|         mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) | ||||
|         if mobj is not None: | ||||
|             request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') | ||||
|             response = self._download_webpage(request, video_id, | ||||
|                                               u'Downloading video info') | ||||
|                                               'Downloading video info') | ||||
|             info = json.loads(base64.b64decode(response).decode('utf-8')) | ||||
|             return {'id': video_id, | ||||
|                     'title': info['title'], | ||||
|                     'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), | ||||
|                     'play_path': info['filename'], | ||||
|                     'ext': 'flv', | ||||
|                     'thumbnail': info['thumbnail'][0]['url'], | ||||
|                     } | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'title': info['title'], | ||||
|                 'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), | ||||
|                 'play_path': info['filename'], | ||||
|                 'ext': 'flv', | ||||
|                 'thumbnail': info['thumbnail'][0]['url'], | ||||
|             } | ||||
|  | ||||
|         # try encxml | ||||
|         mobj = re.search('var flashvars={(.+?)}', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract video') | ||||
|             raise ExtractorError('Unable to extract video') | ||||
|  | ||||
|         params = {} | ||||
|         encxml = '' | ||||
| @@ -118,7 +111,7 @@ class MyVideoIE(InfoExtractor): | ||||
|             params['domain'] = 'www.myvideo.de' | ||||
|         xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params)) | ||||
|         if 'flash_playertype=MTV' in xmldata_url: | ||||
|             self._downloader.report_warning(u'avoiding MTV player') | ||||
|             self._downloader.report_warning('avoiding MTV player') | ||||
|             xmldata_url = ( | ||||
|                 'http://www.myvideo.de/dynamic/get_player_video_xml.php' | ||||
|                 '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes' | ||||
| @@ -144,7 +137,7 @@ class MyVideoIE(InfoExtractor): | ||||
|             video_url = compat_urllib_parse.unquote(mobj.group(1)) | ||||
|             if 'myvideo2flash' in video_url: | ||||
|                 self.report_warning( | ||||
|                     u'Rewriting URL to use unencrypted rtmp:// ...', | ||||
|                     'Rewriting URL to use unencrypted rtmp:// ...', | ||||
|                     video_id) | ||||
|                 video_url = video_url.replace('rtmpe://', 'rtmp://') | ||||
|  | ||||
| @@ -152,39 +145,31 @@ class MyVideoIE(InfoExtractor): | ||||
|             # extract non rtmp videos | ||||
|             mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'unable to extract url') | ||||
|                 raise ExtractorError('unable to extract url') | ||||
|             video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2)) | ||||
|  | ||||
|         video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file') | ||||
|         video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file') | ||||
|         video_file = compat_urllib_parse.unquote(video_file) | ||||
|  | ||||
|         if not video_file.endswith('f4m'): | ||||
|             ppath, prefix = video_file.split('.') | ||||
|             video_playpath = '%s:%s' % (prefix, ppath) | ||||
|             video_hls_playlist = '' | ||||
|         else: | ||||
|             video_playpath = '' | ||||
|             video_hls_playlist = ( | ||||
|                 video_file | ||||
|             ).replace('.f4m', '.m3u8') | ||||
|  | ||||
|         video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj') | ||||
|         video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj') | ||||
|         video_swfobj = compat_urllib_parse.unquote(video_swfobj) | ||||
|  | ||||
|         video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", | ||||
|             webpage, u'title') | ||||
|             webpage, 'title') | ||||
|  | ||||
|         return [{ | ||||
|             'id':                 video_id, | ||||
|             'url':                video_url, | ||||
|             'tc_url':             video_url, | ||||
|             'uploader':           None, | ||||
|             'upload_date':        None, | ||||
|             'title':              video_title, | ||||
|             'ext':                u'flv', | ||||
|             'play_path':          video_playpath, | ||||
|             'video_file':         video_file, | ||||
|             'video_hls_playlist': video_hls_playlist, | ||||
|             'player_url':         video_swfobj, | ||||
|         }] | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'tc_url': video_url, | ||||
|             'title': video_title, | ||||
|             'ext': 'flv', | ||||
|             'play_path': video_playpath, | ||||
|             'player_url': video_swfobj, | ||||
|         } | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -12,10 +11,13 @@ class NineGagIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         "url": "http://9gag.tv/v/1912", | ||||
|         "file": "1912.mp4", | ||||
|         "info_dict": { | ||||
|             "id": "1912", | ||||
|             "ext": "mp4", | ||||
|             "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", | ||||
|             "title": "\"People Are Awesome 2013\" Is Absolutely Awesome" | ||||
|             "title": "\"People Are Awesome 2013\" Is Absolutely Awesome", | ||||
|             "view_count": int, | ||||
|             "thumbnail": "re:^https?://", | ||||
|         }, | ||||
|         'add_ie': ['Youtube'] | ||||
|     } | ||||
| @@ -25,21 +27,27 @@ class NineGagIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         data_json = self._html_search_regex(r'''(?x) | ||||
|             <div\s*id="tv-video"\s*data-video-source="youtube"\s* | ||||
|                 data-video-meta="([^"]+)"''', webpage, 'video metadata') | ||||
|  | ||||
|         data = json.loads(data_json) | ||||
|         youtube_id = self._html_search_regex( | ||||
|             r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"', | ||||
|             webpage, 'video ID') | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage, | ||||
|             'description', fatal=False) | ||||
|         view_count_str = self._html_search_regex( | ||||
|             r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count', | ||||
|             fatal=False) | ||||
|         view_count = ( | ||||
|             None if view_count_str is None | ||||
|             else int(view_count_str.replace(',', ''))) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': data['youtubeVideoId'], | ||||
|             'url': youtube_id, | ||||
|             'ie_key': 'Youtube', | ||||
|             'id': video_id, | ||||
|             'title': data['title'], | ||||
|             'description': data['description'], | ||||
|             'view_count': int(data['view_count']), | ||||
|             'like_count': int(data['statistic']['like']), | ||||
|             'dislike_count': int(data['statistic']['dislike']), | ||||
|             'thumbnail': data['thumbnail_url'], | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': description, | ||||
|             'view_count': view_count, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|         } | ||||
|   | ||||
							
								
								
									
										57
									
								
								youtube_dl/extractor/parliamentliveuk.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								youtube_dl/extractor/parliamentliveuk.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ParliamentLiveUKIE(InfoExtractor): | ||||
|     IE_NAME = 'parliamentlive.tv' | ||||
|     IE_DESC = 'UK parliament videos' | ||||
|     _VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia', | ||||
|         'info_dict': { | ||||
|             'id': '15121', | ||||
|             'ext': 'asf', | ||||
|             'title': 'hoc home affairs committee, 18 mar 2014.pm', | ||||
|             'description': 'md5:033b3acdf83304cd43946b2d5e5798d1', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # Requires mplayer (mms) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         asx_url = self._html_search_regex( | ||||
|             r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage, | ||||
|             'metadata URL') | ||||
|         asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata') | ||||
|         video_url = asx.find('.//REF').attrib['HREF'] | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'''(?x)player\.setClipDetails\( | ||||
|                 (?:(?:[0-9]+|"[^"]+"),\s*){2} | ||||
|                 "([^"]+",\s*"[^"]+)" | ||||
|                 ''', | ||||
|             webpage, 'title').replace('", "', ', ') | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>', | ||||
|             webpage, 'description') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'ext': 'asf', | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|         } | ||||
| @@ -3,6 +3,9 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     US_RATINGS, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PBSIE(InfoExtractor): | ||||
| @@ -13,7 +16,7 @@ class PBSIE(InfoExtractor): | ||||
|             # Article with embedded player | ||||
|            (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) | | ||||
|            # Player | ||||
|            video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/ | ||||
|            video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ | ||||
|         ) | ||||
|     ''' | ||||
|  | ||||
| @@ -57,6 +60,11 @@ class PBSIE(InfoExtractor): | ||||
|         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id | ||||
|         info = self._download_json(info_url, display_id) | ||||
|  | ||||
|         rating_str = info.get('rating') | ||||
|         if rating_str is not None: | ||||
|             rating_str = rating_str.rpartition('-')[2] | ||||
|         age_limit = US_RATINGS.get(rating_str) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info['title'], | ||||
| @@ -65,4 +73,5 @@ class PBSIE(InfoExtractor): | ||||
|             'description': info['program'].get('description'), | ||||
|             'thumbnail': info.get('image_url'), | ||||
|             'duration': info.get('duration'), | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
|   | ||||
| @@ -1,76 +1,43 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class PhotobucketIE(InfoExtractor): | ||||
|     """Information extractor for photobucket.com.""" | ||||
|  | ||||
|     # TODO: the original _VALID_URL was: | ||||
|     # r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' | ||||
|     # Check if it's necessary to keep the old extracion process | ||||
|     _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' | ||||
|     IE_NAME = u'photobucket' | ||||
|     _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' | ||||
|     _TEST = { | ||||
|         u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', | ||||
|         u'file': u'zpsc0c3b9fa.mp4', | ||||
|         u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20130504",  | ||||
|             u"uploader": u"rachaneronas",  | ||||
|             u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!" | ||||
|         'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', | ||||
|         'file': 'zpsc0c3b9fa.mp4', | ||||
|         'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', | ||||
|         'info_dict': { | ||||
|             'upload_date': '20130504', | ||||
|             'uploader': 'rachaneronas', | ||||
|             'title': 'Tired of Link Building? Try BacklinkMyDomain.com!', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract id from URL | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_extension = mobj.group('ext') | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # Extract URL, uploader, and title from webpage | ||||
|         self.report_extraction(video_id) | ||||
|         # We try first by looking the javascript code: | ||||
|         mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage) | ||||
|         if mobj is not None: | ||||
|             info = json.loads(mobj.group('json')) | ||||
|             return [{ | ||||
|                 'id':       video_id, | ||||
|                 'url':      info[u'downloadUrl'], | ||||
|                 'uploader': info[u'username'], | ||||
|                 'upload_date':  datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'), | ||||
|                 'title':    info[u'title'], | ||||
|                 'ext':      video_extension, | ||||
|                 'thumbnail': info[u'thumbUrl'], | ||||
|             }] | ||||
|  | ||||
|         # We try looking in other parts of the webpage | ||||
|         video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />', | ||||
|             webpage, u'video URL') | ||||
|  | ||||
|         mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract title') | ||||
|         video_title = mobj.group(1).decode('utf-8') | ||||
|         video_uploader = mobj.group(2).decode('utf-8') | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id.decode('utf-8'), | ||||
|             'url':      video_url.decode('utf-8'), | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date':  None, | ||||
|             'title':    video_title, | ||||
|             'ext':      video_extension.decode('utf-8'), | ||||
|         }] | ||||
|         info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', | ||||
|             webpage, 'info json') | ||||
|         info = json.loads(info_json) | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': info['downloadUrl'], | ||||
|             'uploader': info['username'], | ||||
|             'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'), | ||||
|             'title': info['title'], | ||||
|             'ext': video_extension, | ||||
|             'thumbnail': info['thumbUrl'], | ||||
|         } | ||||
|   | ||||
							
								
								
									
										80
									
								
								youtube_dl/extractor/playvid.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/extractor/playvid.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PlayvidIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.playvid.com/watch/agbDDi7WZTV', | ||||
|         'md5': '44930f8afa616efdf9482daf4fe53e1e', | ||||
|         'info_dict': { | ||||
|             'id': 'agbDDi7WZTV', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Michelle Lewin in Miami Beach', | ||||
|             'duration': 240, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = None | ||||
|         duration = None | ||||
|         video_thumbnail = None | ||||
|         formats = [] | ||||
|  | ||||
|         # most of the information is stored in the flashvars | ||||
|         flashvars = self._html_search_regex( | ||||
|             r'flashvars="(.+?)"', webpage, 'flashvars') | ||||
|  | ||||
|         infos = compat_urllib_parse.unquote(flashvars).split(r'&') | ||||
|         for info in infos: | ||||
|             videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) | ||||
|             if videovars_match: | ||||
|                 key = videovars_match.group(1) | ||||
|                 val = videovars_match.group(2) | ||||
|  | ||||
|                 if key == 'title': | ||||
|                     video_title = compat_urllib_parse.unquote_plus(val) | ||||
|                 if key == 'duration': | ||||
|                     try: | ||||
|                         duration = int(val) | ||||
|                     except ValueError: | ||||
|                         pass | ||||
|                 if key == 'big_thumb': | ||||
|                     video_thumbnail = val | ||||
|  | ||||
|                 videourl_match = re.match( | ||||
|                     r'^video_urls\]\[(?P<resolution>[0-9]+)p', key) | ||||
|                 if videourl_match: | ||||
|                     height = int(videourl_match.group('resolution')) | ||||
|                     formats.append({ | ||||
|                         'height': height, | ||||
|                         'url': val, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         # Extract title - should be in the flashvars; if not, look elsewhere | ||||
|         if video_title is None: | ||||
|             video_title = self._html_search_regex( | ||||
|                 r'<title>(.*?)</title', webpage, 'title') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': video_title, | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'duration': duration, | ||||
|             'description': None, | ||||
|             'age_limit': 18 | ||||
|         } | ||||
| @@ -44,7 +44,7 @@ class PornHubIE(InfoExtractor): | ||||
|  | ||||
|         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) | ||||
|         if webpage.find('"encrypted":true') != -1: | ||||
|             password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ') | ||||
|             password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password')) | ||||
|             video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) | ||||
|  | ||||
|         formats = [] | ||||
|   | ||||
| @@ -51,14 +51,14 @@ class ProSiebenSat1IE(InfoExtractor): | ||||
|             'skip': 'Seems to be broken', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.prosiebenmaxx.de/yep/one-piece/video/148-folge-48-gold-rogers-heimat-ganze-folge', | ||||
|             'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge', | ||||
|             'info_dict': { | ||||
|                 'id': '2437108', | ||||
|                 'id': '2429369', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Folge 48: Gold Rogers Heimat', | ||||
|                 'description': 'Ruffy erreicht die Insel, auf der der berühmte Gold Roger lebte und hingerichtet wurde.', | ||||
|                 'upload_date': '20140226', | ||||
|                 'duration': 1401.48, | ||||
|                 'title': 'Countdown für die Autowerkstatt', | ||||
|                 'description': 'md5:809fc051a457b5d8666013bc40698817', | ||||
|                 'upload_date': '20140223', | ||||
|                 'duration': 2595.04, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|   | ||||
							
								
								
									
										183
									
								
								youtube_dl/extractor/rutv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										183
									
								
								youtube_dl/extractor/rutv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,183 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RUTVIE(InfoExtractor): | ||||
|     IE_DESC = 'RUTV.RU' | ||||
|     _VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724', | ||||
|             'info_dict': { | ||||
|                 'id': '774471', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Монологи на все времена', | ||||
|                 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5', | ||||
|                 'duration': 2906, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638', | ||||
|             'info_dict': { | ||||
|                 'id': '774016', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Чужой в семье Сталина', | ||||
|                 'description': '', | ||||
|                 'duration': 2539, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000', | ||||
|             'info_dict': { | ||||
|                 'id': '766888', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', | ||||
|                 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', | ||||
|                 'duration': 279, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169', | ||||
|             'info_dict': { | ||||
|                 'id': '771852', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет', | ||||
|                 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8', | ||||
|                 'duration': 3096, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014', | ||||
|             'info_dict': { | ||||
|                 'id': '51499', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', | ||||
|                 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Translation has finished', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     @classmethod | ||||
|     def _extract_url(cls, webpage): | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|         mobj = re.search( | ||||
|             r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)', | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_type = mobj.group('type') | ||||
|  | ||||
|         if not video_type or video_type == 'swf': | ||||
|             video_type = 'video' | ||||
|  | ||||
|         json_data = self._download_json( | ||||
|             'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id), | ||||
|             video_id, 'Downloading JSON') | ||||
|  | ||||
|         if json_data['errors']: | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True) | ||||
|  | ||||
|         playlist = json_data['data']['playlist'] | ||||
|         medialist = playlist['medialist'] | ||||
|         media = medialist[0] | ||||
|  | ||||
|         if media['errors']: | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True) | ||||
|  | ||||
|         view_count = playlist.get('count_views') | ||||
|         priority_transport = playlist['priority_transport'] | ||||
|  | ||||
|         thumbnail = media['picture'] | ||||
|         width = int_or_none(media['width']) | ||||
|         height = int_or_none(media['height']) | ||||
|         description = media['anons'] | ||||
|         title = media['title'] | ||||
|         duration = int_or_none(media.get('duration')) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for transport, links in media['sources'].items(): | ||||
|             for quality, url in links.items(): | ||||
|                 if transport == 'rtmp': | ||||
|                     mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url) | ||||
|                     if not mobj: | ||||
|                         continue | ||||
|                     fmt = { | ||||
|                         'url': mobj.group('url'), | ||||
|                         'play_path': mobj.group('playpath'), | ||||
|                         'app': mobj.group('app'), | ||||
|                         'page_url': 'http://player.rutv.ru', | ||||
|                         'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22', | ||||
|                         'rtmp_live': True, | ||||
|                         'ext': 'flv', | ||||
|                         'vbr': int(quality), | ||||
|                     } | ||||
|                 elif transport == 'm3u8': | ||||
|                     fmt = { | ||||
|                         'url': url, | ||||
|                         'ext': 'mp4', | ||||
|                     } | ||||
|                 else: | ||||
|                     fmt = { | ||||
|                         'url': url | ||||
|                     } | ||||
|                 fmt.update({ | ||||
|                     'width': width, | ||||
|                     'height': height, | ||||
|                     'format_id': '%s-%s' % (transport, quality), | ||||
|                     'preference': -1 if priority_transport == transport else -2, | ||||
|                 }) | ||||
|                 formats.append(fmt) | ||||
|  | ||||
|         if not formats: | ||||
|             raise ExtractorError('No media links available for %s' % video_id) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'view_count': view_count, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|                 'id': '47127627', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Goldrushed', | ||||
|                 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', | ||||
|                 'uploader': 'The Royal Concept', | ||||
|                 'upload_date': '20120521', | ||||
|             }, | ||||
|   | ||||
| @@ -1,10 +1,15 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
|  | ||||
|  | ||||
| class SpikeIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+' | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|         (www\.spike\.com/(video-clips|episodes)/.+| | ||||
|          m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+)) | ||||
|         ''' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', | ||||
|         'md5': '1a9265f32b0c375793d6c4ce45255256', | ||||
| @@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor): | ||||
|     } | ||||
|  | ||||
|     _FEED_URL = 'http://www.spike.com/feeds/mrss/' | ||||
|     _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.search(self._VALID_URL, url) | ||||
|         mobile_id = mobj.group('mobile_id') | ||||
|         if mobile_id is not None: | ||||
|             url = 'http://www.spike.com/video-clips/%s' % mobile_id | ||||
|         return super(SpikeIE, self)._real_extract(url) | ||||
|   | ||||
| @@ -6,115 +6,120 @@ import re | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     RegexNotFoundError, | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TEDIE(SubtitlesInfoExtractor): | ||||
|     _VALID_URL=r'''http://www\.ted\.com/ | ||||
|                    ( | ||||
|                         ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist | ||||
|                         | | ||||
|                         ((?P<type_talk>talks)) # We have a simple talk | ||||
|                    ) | ||||
|                    (/lang/(.*?))? # The url may contain the language | ||||
|                    /(?P<name>\w+) # Here goes the name and then ".html" | ||||
|                    ''' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?P<proto>https?://) | ||||
|         (?P<type>www|embed)(?P<urlmain>\.ted\.com/ | ||||
|         ( | ||||
|             (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist | ||||
|             | | ||||
|             ((?P<type_talk>talks)) # We have a simple talk | ||||
|         ) | ||||
|         (/lang/(.*?))? # The url may contain the language | ||||
|         /(?P<name>\w+) # Here goes the name and then ".html" | ||||
|         .*)$ | ||||
|         ''' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', | ||||
|         'file': '102.mp4', | ||||
|         'md5': '4ea1dada91e4174b53dac2bb8ace429d', | ||||
|         'info_dict': { | ||||
|             "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922", | ||||
|             "title": "Dan Dennett: The illusion of consciousness" | ||||
|             'id': '102', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The illusion of consciousness', | ||||
|             'description': ('Philosopher Dan Dennett makes a compelling ' | ||||
|                 'argument that not only don\'t we understand our own ' | ||||
|                 'consciousness, but that half the time our brains are ' | ||||
|                 'actively fooling us.'), | ||||
|             'uploader': 'Dan Dennett', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|     _FORMATS_PREFERENCE = { | ||||
|         'low': 1, | ||||
|         'medium': 2, | ||||
|         'high': 3, | ||||
|     } | ||||
|  | ||||
|     def _extract_info(self, webpage): | ||||
|         info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>', | ||||
|             webpage, 'info json') | ||||
|         return json.loads(info_json) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m=re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|         m = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|         if m.group('type') == 'embed': | ||||
|             desktop_url = m.group('proto') + 'www' + m.group('urlmain') | ||||
|             return self.url_result(desktop_url, 'TED') | ||||
|         name = m.group('name') | ||||
|         if m.group('type_talk'): | ||||
|             return self._talk_info(url) | ||||
|         else : | ||||
|             playlist_id=m.group('playlist_id') | ||||
|             name=m.group('name') | ||||
|             self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name)) | ||||
|             return [self._playlist_videos_info(url,name,playlist_id)] | ||||
|             return self._talk_info(url, name) | ||||
|         else: | ||||
|             return self._playlist_videos_info(url, name) | ||||
|  | ||||
|  | ||||
|     def _playlist_videos_info(self, url, name, playlist_id): | ||||
|     def _playlist_videos_info(self, url, name): | ||||
|         '''Returns the videos of the playlist''' | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             url, playlist_id, 'Downloading playlist webpage') | ||||
|         matches = re.finditer( | ||||
|             r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>', | ||||
|             webpage) | ||||
|  | ||||
|         playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>', | ||||
|                                                  webpage, 'playlist title') | ||||
|         webpage = self._download_webpage(url, name, | ||||
|             'Downloading playlist webpage') | ||||
|         info = self._extract_info(webpage) | ||||
|         playlist_info = info['playlist'] | ||||
|  | ||||
|         playlist_entries = [ | ||||
|             self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED') | ||||
|             for m in matches | ||||
|             self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key()) | ||||
|             for talk in info['talks'] | ||||
|         ] | ||||
|         return self.playlist_result( | ||||
|             playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title) | ||||
|             playlist_entries, | ||||
|             playlist_id=compat_str(playlist_info['id']), | ||||
|             playlist_title=playlist_info['title']) | ||||
|  | ||||
|     def _talk_info(self, url, video_id=0): | ||||
|         """Return the video for the talk in the url""" | ||||
|         m = re.match(self._VALID_URL, url,re.VERBOSE) | ||||
|         video_name = m.group('name') | ||||
|         webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name) | ||||
|     def _talk_info(self, url, video_name): | ||||
|         webpage = self._download_webpage(url, video_name) | ||||
|         self.report_extraction(video_name) | ||||
|         # If the url includes the language we get the title translated | ||||
|         title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>', | ||||
|                                         webpage, 'title') | ||||
|         json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>', | ||||
|                                     webpage, 'json data') | ||||
|         info = json.loads(json_data) | ||||
|         desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>', | ||||
|                                        webpage, 'description', flags = re.DOTALL) | ||||
|          | ||||
|         thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"', | ||||
|                                        webpage, 'thumbnail') | ||||
|  | ||||
|         talk_info = self._extract_info(webpage)['talks'][0] | ||||
|  | ||||
|         formats = [{ | ||||
|             'ext': 'mp4', | ||||
|             'url': stream['file'], | ||||
|             'format': stream['id'] | ||||
|         } for stream in info['htmlStreams']] | ||||
|  | ||||
|         video_id = info['id'] | ||||
|             'url': format_url, | ||||
|             'format_id': format_id, | ||||
|             'format': format_id, | ||||
|             'preference': self._FORMATS_PREFERENCE.get(format_id, -1), | ||||
|         } for (format_id, format_url) in talk_info['nativeDownloads'].items()] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         video_id = compat_str(talk_info['id']) | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, webpage) | ||||
|         video_subtitles = self.extract_subtitles(video_id, talk_info) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, webpage) | ||||
|             self._list_available_subtitles(video_id, talk_info) | ||||
|             return | ||||
|  | ||||
|         thumbnail = talk_info['thumb'] | ||||
|         if not thumbnail.startswith('http'): | ||||
|             thumbnail = 'http://' + thumbnail | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'title': talk_info['title'], | ||||
|             'uploader': talk_info['speaker'], | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': desc, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'subtitles': video_subtitles, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|         try: | ||||
|             options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL) | ||||
|             languages = re.findall(r'(?:<option value=")(\S+)"', options) | ||||
|             if languages: | ||||
|                 sub_lang_list = {} | ||||
|                 for l in languages: | ||||
|                     url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) | ||||
|                     sub_lang_list[l] = url | ||||
|                 return sub_lang_list | ||||
|         except RegexNotFoundError: | ||||
|     def _get_available_subtitles(self, video_id, talk_info): | ||||
|         languages = [lang['languageCode'] for lang in talk_info.get('languages', [])] | ||||
|         if languages: | ||||
|             sub_lang_list = {} | ||||
|             for l in languages: | ||||
|                 url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) | ||||
|                 sub_lang_list[l] = url | ||||
|             return sub_lang_list | ||||
|         else: | ||||
|             self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|         return {} | ||||
|             return {} | ||||
|   | ||||
							
								
								
									
										164
									
								
								youtube_dl/extractor/udemy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										164
									
								
								youtube_dl/extractor/udemy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,164 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class UdemyIE(InfoExtractor): | ||||
|     IE_NAME = 'udemy' | ||||
|     _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)' | ||||
|     _LOGIN_URL = 'https://www.udemy.com/join/login-submit/' | ||||
|     _NETRC_MACHINE = 'udemy' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757', | ||||
|         'md5': '98eda5b657e752cf945d8445e261b5c5', | ||||
|         'info_dict': { | ||||
|             'id': '160614', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Introduction and Installation', | ||||
|             'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876', | ||||
|             'duration': 579.29, | ||||
|         }, | ||||
|         'skip': 'Requires udemy account credentials', | ||||
|     }] | ||||
|  | ||||
|     def _handle_error(self, response): | ||||
|         if not isinstance(response, dict): | ||||
|             return | ||||
|         error = response.get('error') | ||||
|         if error: | ||||
|             error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message')) | ||||
|             error_data = error.get('data') | ||||
|             if error_data: | ||||
|                 error_str += ' - %s' % error_data.get('formErrors') | ||||
|             raise ExtractorError(error_str, expected=True) | ||||
|  | ||||
|     def _download_json(self, url, video_id, note='Downloading JSON metadata'): | ||||
|         response = super(UdemyIE, self)._download_json(url, video_id, note) | ||||
|         self._handle_error(response) | ||||
|         return response | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             raise ExtractorError( | ||||
|                 'Udemy account is required, use --username and --password options to provide account credentials.', | ||||
|                 expected=True) | ||||
|  | ||||
|         login_popup = self._download_webpage( | ||||
|             'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None, | ||||
|             'Downloading login popup') | ||||
|  | ||||
|         if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>': | ||||
|             return | ||||
|  | ||||
|         csrf = self._html_search_regex(r'<input type="hidden" name="csrf" value="(.+?)"', login_popup, 'csrf token') | ||||
|  | ||||
|         login_form = { | ||||
|             'email': username, | ||||
|             'password': password, | ||||
|             'csrf': csrf, | ||||
|             'displayType': 'json', | ||||
|             'isSubmitted': '1', | ||||
|         } | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | ||||
|         response = self._download_json(request, None, 'Logging in as %s' % username) | ||||
|  | ||||
|         if 'returnUrl' not in response: | ||||
|             raise ExtractorError('Unable to log in') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         lecture_id = mobj.group('id') | ||||
|  | ||||
|         lecture = self._download_json( | ||||
|             'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, lecture_id, 'Downloading lecture JSON') | ||||
|  | ||||
|         if lecture['assetType'] != 'Video': | ||||
|             raise ExtractorError('Lecture %s is not a video' % lecture_id, expected=True) | ||||
|  | ||||
|         asset = lecture['asset'] | ||||
|  | ||||
|         stream_url = asset['streamUrl'] | ||||
|         mobj = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url) | ||||
|         if mobj: | ||||
|             return self.url_result(mobj.group(1), 'Youtube') | ||||
|  | ||||
|         video_id = asset['id'] | ||||
|         thumbnail = asset['thumbnailUrl'] | ||||
|         duration = asset['data']['duration'] | ||||
|  | ||||
|         download_url = asset['downloadUrl'] | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': download_url['Video480p'][0], | ||||
|                 'format_id': '360p', | ||||
|             }, | ||||
|             { | ||||
|                 'url': download_url['Video'][0], | ||||
|                 'format_id': '720p', | ||||
|             }, | ||||
|         ] | ||||
|  | ||||
|         title = lecture['title'] | ||||
|         description = lecture['description'] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats | ||||
|         } | ||||
|  | ||||
|  | ||||
| class UdemyCourseIE(UdemyIE): | ||||
|     IE_NAME = 'udemy:course' | ||||
|     _VALID_URL = r'https?://www\.udemy\.com/(?P<coursepath>[\da-z-]+)' | ||||
|     _SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<' | ||||
|     _ALREADY_ENROLLED = '>You are already taking this course.<' | ||||
|     _TESTS = [] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         course_path = mobj.group('coursepath') | ||||
|  | ||||
|         response = self._download_json( | ||||
|             'https://www.udemy.com/api-1.1/courses/%s' % course_path, course_path, 'Downloading course JSON') | ||||
|  | ||||
|         course_id = int(response['id']) | ||||
|         course_title = response['title'] | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id, course_id, 'Enrolling in the course') | ||||
|  | ||||
|         if self._SUCCESSFULLY_ENROLLED in webpage: | ||||
|             self.to_screen('%s: Successfully enrolled in' % course_id) | ||||
|         elif self._ALREADY_ENROLLED in webpage: | ||||
|             self.to_screen('%s: Already enrolled in' % course_id) | ||||
|  | ||||
|         response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id, | ||||
|             course_id, 'Downloading course curriculum') | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy') | ||||
|             for asset in response if asset.get('assetType') == 'Video' | ||||
|         ] | ||||
|  | ||||
|         return self.playlist_result(entries, course_id, course_title) | ||||
| @@ -4,14 +4,11 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none | ||||
| ) | ||||
| from ..utils import ExtractorError | ||||
| from .rutv import RUTVIE | ||||
|  | ||||
|  | ||||
| class VestiIE(InfoExtractor): | ||||
|     IE_NAME = 'vesti' | ||||
|     IE_DESC = 'Вести.Ru' | ||||
|     _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)' | ||||
|  | ||||
| @@ -30,6 +27,20 @@ class VestiIE(InfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.vesti.ru/doc.html?id=1349233', | ||||
|             'info_dict': { | ||||
|                 'id': '773865', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Участники митинга штурмуют Донецкую областную администрацию', | ||||
|                 'description': 'md5:1a160e98b3195379b4c849f2f4958009', | ||||
|                 'duration': 210, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.vesti.ru/only_video.html?vid=576180', | ||||
|             'info_dict': { | ||||
| @@ -44,6 +55,20 @@ class VestiIE(InfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://hitech.vesti.ru/news/view/id/4000', | ||||
|             'info_dict': { | ||||
|                 'id': '766888', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', | ||||
|                 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', | ||||
|                 'duration': 279, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403', | ||||
|             'info_dict': { | ||||
| @@ -57,7 +82,7 @@ class VestiIE(InfoExtractor): | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Blocked outside Russia' | ||||
|             'skip': 'Blocked outside Russia', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://sochi2014.vesti.ru/live/play/live_id/301', | ||||
| @@ -72,7 +97,7 @@ class VestiIE(InfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Translation has finished' | ||||
|         } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -81,90 +106,16 @@ class VestiIE(InfoExtractor): | ||||
|  | ||||
|         page = self._download_webpage(url, video_id, 'Downloading page') | ||||
|  | ||||
|         mobj = re.search(r'<meta property="og:video" content=".+?\.swf\?v?id=(?P<id>\d+).*?" />', page) | ||||
|         mobj = re.search( | ||||
|             r'<meta[^>]+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)', | ||||
|             page) | ||||
|         if mobj: | ||||
|             video_type = 'video' | ||||
|             video_id = mobj.group('id') | ||||
|         else: | ||||
|             mobj = re.search( | ||||
|                 r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>', page) | ||||
|             page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id, | ||||
|                 'Downloading video page') | ||||
|  | ||||
|             if not mobj: | ||||
|                 raise ExtractorError('No media found') | ||||
|         rutv_url = RUTVIE._extract_url(page) | ||||
|         if rutv_url: | ||||
|             return self.url_result(rutv_url, 'RUTV') | ||||
|  | ||||
|             video_type = mobj.group('type') | ||||
|             video_id = mobj.group('id') | ||||
|  | ||||
|         json_data = self._download_json( | ||||
|             'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id), | ||||
|             video_id, 'Downloading JSON') | ||||
|  | ||||
|         if json_data['errors']: | ||||
|             raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True) | ||||
|  | ||||
|         playlist = json_data['data']['playlist'] | ||||
|         medialist = playlist['medialist'] | ||||
|         media = medialist[0] | ||||
|  | ||||
|         if media['errors']: | ||||
|             raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True) | ||||
|  | ||||
|         view_count = playlist.get('count_views') | ||||
|         priority_transport = playlist['priority_transport'] | ||||
|  | ||||
|         thumbnail = media['picture'] | ||||
|         width = int_or_none(media['width']) | ||||
|         height = int_or_none(media['height']) | ||||
|         description = media['anons'] | ||||
|         title = media['title'] | ||||
|         duration = int_or_none(media.get('duration')) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for transport, links in media['sources'].items(): | ||||
|             for quality, url in links.items(): | ||||
|                 if transport == 'rtmp': | ||||
|                     mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url) | ||||
|                     if not mobj: | ||||
|                         continue | ||||
|                     fmt = { | ||||
|                         'url': mobj.group('url'), | ||||
|                         'play_path': mobj.group('playpath'), | ||||
|                         'app': mobj.group('app'), | ||||
|                         'page_url': 'http://player.rutv.ru', | ||||
|                         'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22', | ||||
|                         'rtmp_live': True, | ||||
|                         'ext': 'flv', | ||||
|                         'vbr': int(quality), | ||||
|                     } | ||||
|                 elif transport == 'm3u8': | ||||
|                     fmt = { | ||||
|                         'url': url, | ||||
|                         'ext': 'mp4', | ||||
|                     } | ||||
|                 else: | ||||
|                     fmt = { | ||||
|                         'url': url | ||||
|                     } | ||||
|                 fmt.update({ | ||||
|                     'width': width, | ||||
|                     'height': height, | ||||
|                     'format_id': '%s-%s' % (transport, quality), | ||||
|                     'preference': -1 if priority_transport == transport else -2, | ||||
|                 }) | ||||
|                 formats.append(fmt) | ||||
|  | ||||
|         if not formats: | ||||
|             raise ExtractorError('No media links available for %s' % video_id) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'view_count': view_count, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|         raise ExtractorError('No video found', expected=True) | ||||
| @@ -2,7 +2,6 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
| import datetime | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| @@ -22,6 +21,7 @@ class VevoIE(InfoExtractor): | ||||
|            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| | ||||
|            vevo:) | ||||
|         (?P<id>[^&?#]+)''' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | ||||
|         "md5": "06bea460acb744eab74a9d7dcb4bfd61", | ||||
| @@ -34,6 +34,8 @@ class VevoIE(InfoExtractor): | ||||
|             "duration": 230.12, | ||||
|             "width": 1920, | ||||
|             "height": 1080, | ||||
|             # timestamp and upload_date are often incorrect; seem to change randomly | ||||
|             'timestamp': int, | ||||
|         } | ||||
|     }, { | ||||
|         'note': 'v3 SMIL format', | ||||
| @@ -47,6 +49,7 @@ class VevoIE(InfoExtractor): | ||||
|             'title': 'I Wish I Could Break Your Heart', | ||||
|             'duration': 226.101, | ||||
|             'age_limit': 0, | ||||
|             'timestamp': int, | ||||
|         } | ||||
|     }, { | ||||
|         'note': 'Age-limited video', | ||||
| @@ -57,7 +60,8 @@ class VevoIE(InfoExtractor): | ||||
|             'age_limit': 18, | ||||
|             'title': 'Tunnel Vision (Explicit)', | ||||
|             'uploader': 'Justin Timberlake', | ||||
|             'upload_date': '20130704', | ||||
|             'upload_date': 're:2013070[34]', | ||||
|             'timestamp': int, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'true', | ||||
| @@ -169,13 +173,13 @@ class VevoIE(InfoExtractor): | ||||
|  | ||||
|         timestamp_ms = int(self._search_regex( | ||||
|             r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date')) | ||||
|         upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_info['title'], | ||||
|             'formats': formats, | ||||
|             'thumbnail': video_info['imageUrl'], | ||||
|             'upload_date': upload_date.strftime('%Y%m%d'), | ||||
|             'timestamp': timestamp_ms // 1000, | ||||
|             'uploader': video_info['mainArtists'][0]['artistName'], | ||||
|             'duration': video_info['duration'], | ||||
|             'age_limit': age_limit, | ||||
|   | ||||
| @@ -29,6 +29,7 @@ class VideoBamIE(InfoExtractor): | ||||
|             'info_dict': { | ||||
|                 'id': 'pqLvq', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '_', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
| @@ -61,7 +62,7 @@ class VideoBamIE(InfoExtractor): | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._og_search_title(page, default='VideoBam', fatal=False) | ||||
|         title = self._og_search_title(page, default='_', fatal=False) | ||||
|         description = self._og_search_description(page, default=None) | ||||
|         thumbnail = self._og_search_thumbnail(page) | ||||
|         uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None) | ||||
|   | ||||
| @@ -1,22 +1,23 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .internetvideoarchive import InternetVideoArchiveIE | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import compat_urlparse | ||||
|  | ||||
|  | ||||
| class VideoDetectiveIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487', | ||||
|         u'file': u'194487.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'KICK-ASS 2', | ||||
|             u'description': u'md5:65ba37ad619165afac7d432eaded6013', | ||||
|             u'duration': 135, | ||||
|         'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', | ||||
|         'info_dict': { | ||||
|             'id': '194487', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'KICK-ASS 2', | ||||
|             'description': 'md5:65ba37ad619165afac7d432eaded6013', | ||||
|             'duration': 135, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         og_video = self._og_search_video_url(webpage) | ||||
|         query = compat_urlparse.urlparse(og_video).query | ||||
|         return self.url_result(InternetVideoArchiveIE._build_url(query), | ||||
|             ie=InternetVideoArchiveIE.ie_key()) | ||||
|         return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key()) | ||||
|   | ||||
| @@ -1,29 +1,33 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     US_RATINGS, | ||||
| ) | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
|  | ||||
| class VikiIE(SubtitlesInfoExtractor): | ||||
|     IE_NAME = u'viki' | ||||
|     IE_NAME = 'viki' | ||||
|  | ||||
|     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14', | ||||
|         u'file': u'1023585v.mp4', | ||||
|         u'md5': u'a21454021c2646f5433514177e2caa5f', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Heirs Episode 14', | ||||
|             u'uploader': u'SBS', | ||||
|             u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e', | ||||
|             u'upload_date': u'20131121', | ||||
|             u'age_limit': 13, | ||||
|         'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', | ||||
|         'md5': 'a21454021c2646f5433514177e2caa5f', | ||||
|         'info_dict': { | ||||
|             'id': '1023585v', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Heirs Episode 14', | ||||
|             'uploader': 'SBS', | ||||
|             'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e', | ||||
|             'upload_date': '20131121', | ||||
|             'age_limit': 13, | ||||
|         }, | ||||
|         u'skip': u'Blocked in the US', | ||||
|         'skip': 'Blocked in the US', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -44,28 +48,21 @@ class VikiIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|         rating_str = self._html_search_regex( | ||||
|             r'<strong>Rating: </strong>\s*([^<]*)<', webpage, | ||||
|             u'rating information', default='').strip() | ||||
|         RATINGS = { | ||||
|             'G': 0, | ||||
|             'PG': 10, | ||||
|             'PG-13': 13, | ||||
|             'R': 16, | ||||
|             'NC': 18, | ||||
|         } | ||||
|         age_limit = RATINGS.get(rating_str) | ||||
|             'rating information', default='').strip() | ||||
|         age_limit = US_RATINGS.get(rating_str) | ||||
|  | ||||
|         info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id | ||||
|         info_webpage = self._download_webpage( | ||||
|             info_url, video_id, note=u'Downloading info page') | ||||
|             info_url, video_id, note='Downloading info page') | ||||
|         if re.match(r'\s*<div\s+class="video-error', info_webpage): | ||||
|             raise ExtractorError( | ||||
|                 u'Video %s is blocked from your location.' % video_id, | ||||
|                 'Video %s is blocked from your location.' % video_id, | ||||
|                 expected=True) | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL') | ||||
|             r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL') | ||||
|  | ||||
|         upload_date_str = self._html_search_regex( | ||||
|             r'"created_at":"([^"]+)"', info_webpage, u'upload date') | ||||
|             r'"created_at":"([^"]+)"', info_webpage, 'upload date') | ||||
|         upload_date = ( | ||||
|             unified_strdate(upload_date_str) | ||||
|             if upload_date_str is not None | ||||
|   | ||||
| @@ -8,6 +8,7 @@ import itertools | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_HTTPError, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     clean_html, | ||||
| @@ -101,6 +102,15 @@ class VimeoIE(SubtitlesInfoExtractor): | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         if VimeoChannelIE.suitable(url): | ||||
|             # Otherwise channel urls like http://vimeo.com/channels/31259 would | ||||
|             # match | ||||
|             return False | ||||
|         else: | ||||
|             return super(VimeoIE, cls).suitable(url) | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
| @@ -172,7 +182,18 @@ class VimeoIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         request = compat_urllib_request.Request(url, None, headers) | ||||
|         webpage = self._download_webpage(request, video_id) | ||||
|         try: | ||||
|             webpage = self._download_webpage(request, video_id) | ||||
|         except ExtractorError as ee: | ||||
|             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: | ||||
|                 errmsg = ee.cause.read() | ||||
|                 if b'Because of its privacy settings, this video cannot be played here' in errmsg: | ||||
|                     raise ExtractorError( | ||||
|                         'Cannot download embed-only video without embedding ' | ||||
|                         'URL. Please call youtube-dl with the URL of the page ' | ||||
|                         'that embeds this video.', | ||||
|                         expected=True) | ||||
|             raise | ||||
|  | ||||
|         # Now we begin extracting as much information as we can from what we | ||||
|         # retrieved. First we extract the information common to all extractors, | ||||
| @@ -320,7 +341,7 @@ class VimeoIE(SubtitlesInfoExtractor): | ||||
|  | ||||
| class VimeoChannelIE(InfoExtractor): | ||||
|     IE_NAME = 'vimeo:channel' | ||||
|     _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)' | ||||
|     _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)/?(\?.*)?$' | ||||
|     _MORE_PAGES_INDICATOR = r'<a.+?rel="next"' | ||||
|     _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"' | ||||
|  | ||||
|   | ||||
| @@ -13,7 +13,7 @@ class VubeIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', | ||||
|         'md5': 'f81dcf6d0448e3291f54380181695821', | ||||
|         'md5': 'db7aba89d4603dadd627e9d1973946fe', | ||||
|         'info_dict': { | ||||
|             'id': 'YL2qNPkqon', | ||||
|             'ext': 'mp4', | ||||
| @@ -77,4 +77,4 @@ class VubeIE(InfoExtractor): | ||||
|             'like_count': like_count, | ||||
|             'dislike_count': dislike_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
|         } | ||||
|   | ||||
							
								
								
									
										114
									
								
								youtube_dl/extractor/wdr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								youtube_dl/extractor/wdr.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     compat_urlparse, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class WDRIE(InfoExtractor): | ||||
|     _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?' | ||||
|     _VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-362427', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Servicezeit', | ||||
|                 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', | ||||
|                 'upload_date': '20140310', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-363194', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Marga Spiegel ist tot', | ||||
|                 'description': 'md5:2309992a6716c347891c045be50992e4', | ||||
|                 'upload_date': '20140311', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', | ||||
|             'md5': '83e9e8fefad36f357278759870805898', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-194332', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', | ||||
|                 'description': 'md5:2309992a6716c347891c045be50992e4', | ||||
|                 'upload_date': '20091129', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html', | ||||
|             'md5': 'cfff440d4ee64114083ac44676df5d15', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-363068', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Grenzenlos lecker - Baklava', | ||||
|                 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', | ||||
|                 'upload_date': '20140311', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_url = mobj.group('url') | ||||
|         page_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|  | ||||
|         if mobj.group('player') is None: | ||||
|             entries = [ | ||||
|                 self.url_result(page_url + href, 'WDR') | ||||
|                 for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage) | ||||
|             ] | ||||
|             return self.playlist_result(entries, page_id) | ||||
|  | ||||
|         flashvars = compat_urlparse.parse_qs( | ||||
|             self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars')) | ||||
|  | ||||
|         page_id = flashvars['trackerClipId'][0] | ||||
|         video_url = flashvars['dslSrc'][0] | ||||
|         title = flashvars['trackerClipTitle'][0] | ||||
|         thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None | ||||
|  | ||||
|         if 'trackerClipAirTime' in flashvars: | ||||
|             upload_date = flashvars['trackerClipAirTime'][0] | ||||
|         else: | ||||
|             upload_date = self._html_search_meta('DC.Date', webpage, 'upload date') | ||||
|  | ||||
|         if upload_date: | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         if video_url.endswith('.f4m'): | ||||
|             video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' | ||||
|             ext = 'flv' | ||||
|         else: | ||||
|             ext = determine_ext(video_url) | ||||
|  | ||||
|         description = self._html_search_meta('Description', webpage, 'description') | ||||
|  | ||||
|         return { | ||||
|             'id': page_id, | ||||
|             'url': video_url, | ||||
|             'ext': ext, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
| @@ -1,55 +1,49 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class XNXXIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)' | ||||
|     VIDEO_URL_RE = r'flv_url=(.*?)&' | ||||
|     VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' | ||||
|     VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&' | ||||
|     _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||
|         u'file': u'1135332.flv', | ||||
|         u'md5': u'0831677e2b4761795f68d417e0b7b445', | ||||
|         u'info_dict': { | ||||
|             u"title": u"lida \u00bb Naked Funny Actress  (5)", | ||||
|             u"age_limit": 18, | ||||
|         'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||
|         'md5': '0831677e2b4761795f68d417e0b7b445', | ||||
|         'info_dict': { | ||||
|             'id': '1135332', | ||||
|             'ext': 'flv', | ||||
|             'title': 'lida » Naked Funny Actress  (5)', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group(1) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         # Get webpage content | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex(self.VIDEO_URL_RE, | ||||
|             webpage, u'video URL') | ||||
|         video_url = self._search_regex(r'flv_url=(.*?)&', | ||||
|             webpage, 'video URL') | ||||
|         video_url = compat_urllib_parse.unquote(video_url) | ||||
|  | ||||
|         video_title = self._html_search_regex(self.VIDEO_TITLE_RE, | ||||
|             webpage, u'title') | ||||
|         video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM', | ||||
|             webpage, 'title') | ||||
|  | ||||
|         video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE, | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|         video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return [{ | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'uploader': None, | ||||
|             'upload_date': None, | ||||
|             'title': video_title, | ||||
|             'ext': 'flv', | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'description': None, | ||||
|             'age_limit': 18, | ||||
|         }] | ||||
|         } | ||||
|   | ||||
| @@ -1,3 +1,6 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
|  | ||||
| import json | ||||
| import re | ||||
| import sys | ||||
| @@ -17,24 +20,25 @@ from ..aes import ( | ||||
|  | ||||
|  | ||||
| class YouPornIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))' | ||||
|     _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', | ||||
|         u'file': u'505835.mp4', | ||||
|         u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20101221", | ||||
|             u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", | ||||
|             u"uploader": u"Ask Dan And Jennifer", | ||||
|             u"title": u"Sex Ed: Is It Safe To Masturbate Daily?", | ||||
|             u"age_limit": 18, | ||||
|         'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', | ||||
|         'md5': '71ec5fcfddacf80f495efa8b6a8d9a89', | ||||
|         'info_dict': { | ||||
|             'id': '505835', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20101221', | ||||
|             'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?', | ||||
|             'uploader': 'Ask Dan And Jennifer', | ||||
|             'title': 'Sex Ed: Is It Safe To Masturbate Daily?', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         url = 'http://www.' + mobj.group('url') | ||||
|         url = mobj.group('proto') + 'www.' + mobj.group('url') | ||||
|  | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
| @@ -42,7 +46,7 @@ class YouPornIE(InfoExtractor): | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         # Get JSON parameters | ||||
|         json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') | ||||
|         json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters') | ||||
|         try: | ||||
|             params = json.loads(json_params) | ||||
|         except: | ||||
| @@ -61,7 +65,7 @@ class YouPornIE(InfoExtractor): | ||||
|         # Get all of the links from the page | ||||
|         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>' | ||||
|         download_list_html = self._search_regex(DOWNLOAD_LIST_RE, | ||||
|             webpage, u'download list').strip() | ||||
|             webpage, 'download list').strip() | ||||
|         LINK_RE = r'<a href="([^"]+)">' | ||||
|         links = re.findall(LINK_RE, download_list_html) | ||||
|  | ||||
| @@ -86,7 +90,7 @@ class YouPornIE(InfoExtractor): | ||||
|             resolution = format_parts[0] | ||||
|             height = int(resolution[:-len('p')]) | ||||
|             bitrate = int(format_parts[1][:-len('k')]) | ||||
|             format = u'-'.join(format_parts) + u'-' + dn | ||||
|             format = '-'.join(format_parts) + '-' + dn | ||||
|  | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|   | ||||
| @@ -194,14 +194,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         '151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10}, | ||||
|  | ||||
|         # DASH mp4 video | ||||
|         '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|  | ||||
|         # Dash mp4 audio | ||||
|         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, | ||||
| @@ -209,12 +209,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50}, | ||||
|  | ||||
|         # Dash webm | ||||
|         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40}, | ||||
|         '243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40}, | ||||
|         '244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40}, | ||||
| @@ -1130,14 +1130,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         proto = ( | ||||
|             u'http' if self._downloader.params.get('prefer_insecure', False) | ||||
|             else u'https') | ||||
|  | ||||
|         # Extract original video URL from URL with redirection, like age verification, using next_url parameter | ||||
|         mobj = re.search(self._NEXT_URL_RE, url) | ||||
|         if mobj: | ||||
|             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') | ||||
|             url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') | ||||
|         video_id = self.extract_id(url) | ||||
|  | ||||
|         # Get video webpage | ||||
|         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id | ||||
|         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id | ||||
|         video_webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # Attempt to extract SWF player URL | ||||
| @@ -1162,7 +1166,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                                                   'asv': 3, | ||||
|                                                   'sts':'1588', | ||||
|                                                   }) | ||||
|             video_info_url = 'https://www.youtube.com/get_video_info?' + data | ||||
|             video_info_url = proto + '://www.youtube.com/get_video_info?' + data | ||||
|             video_info_webpage = self._download_webpage(video_info_url, video_id, | ||||
|                                     note=False, | ||||
|                                     errnote='unable to download video info webpage') | ||||
| @@ -1170,7 +1174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         else: | ||||
|             age_gate = False | ||||
|             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: | ||||
|                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' | ||||
|                 video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' | ||||
|                         % (video_id, el_type)) | ||||
|                 video_info_webpage = self._download_webpage(video_info_url, video_id, | ||||
|                                         note=False, | ||||
| @@ -1285,10 +1289,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         # Decide which formats to download | ||||
|         try: | ||||
|             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) | ||||
|             mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage) | ||||
|             if not mobj: | ||||
|                 raise ValueError('Could not find vevo ID') | ||||
|             ytplayer_config = json.loads(mobj.group(1)) | ||||
|             json_code = uppercase_escape(mobj.group(1)) | ||||
|             ytplayer_config = json.loads(json_code) | ||||
|             args = ytplayer_config['args'] | ||||
|             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map | ||||
|             # this signatures are encrypted | ||||
| @@ -1444,7 +1449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             'duration':     video_duration, | ||||
|             'age_limit':    18 if age_gate else 0, | ||||
|             'annotations':  video_annotations, | ||||
|             'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id, | ||||
|             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id, | ||||
|             'view_count':   view_count, | ||||
|             'like_count': like_count, | ||||
|             'dislike_count': dislike_count, | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import ctypes | ||||
| import datetime | ||||
| import email.utils | ||||
| import errno | ||||
| import getpass | ||||
| import gzip | ||||
| import itertools | ||||
| import io | ||||
| @@ -22,6 +23,7 @@ import struct | ||||
| import subprocess | ||||
| import sys | ||||
| import traceback | ||||
| import xml.etree.ElementTree | ||||
| import zlib | ||||
|  | ||||
| try: | ||||
| @@ -777,6 +779,7 @@ def unified_strdate(date_str): | ||||
|         '%Y/%m/%d %H:%M:%S', | ||||
|         '%Y-%m-%d %H:%M:%S', | ||||
|         '%d.%m.%Y %H:%M', | ||||
|         '%d.%m.%Y %H.%M', | ||||
|         '%Y-%m-%dT%H:%M:%SZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%fZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||
| @@ -1263,3 +1266,35 @@ def read_batch_urls(batch_fd): | ||||
|  | ||||
|     with contextlib.closing(batch_fd) as fd: | ||||
|         return [url for url in map(fixup, fd) if url] | ||||
|  | ||||
|  | ||||
| def urlencode_postdata(*args, **kargs): | ||||
|     return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii') | ||||
|  | ||||
|  | ||||
| def parse_xml(s): | ||||
|     class TreeBuilder(xml.etree.ElementTree.TreeBuilder): | ||||
|         def doctype(self, name, pubid, system): | ||||
|             pass  # Ignore doctypes | ||||
|  | ||||
|     parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder()) | ||||
|     kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {} | ||||
|     return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs) | ||||
|  | ||||
|  | ||||
| if sys.version_info < (3, 0) and sys.platform == 'win32': | ||||
|     def compat_getpass(prompt, *args, **kwargs): | ||||
|         if isinstance(prompt, compat_str): | ||||
|             prompt = prompt.encode(preferredencoding()) | ||||
|         return getpass.getpass(prompt, *args, **kwargs) | ||||
| else: | ||||
|     compat_getpass = getpass.getpass | ||||
|  | ||||
|  | ||||
| US_RATINGS = { | ||||
|     'G': 0, | ||||
|     'PG': 10, | ||||
|     'PG-13': 13, | ||||
|     'R': 16, | ||||
|     'NC': 18, | ||||
| } | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.03.04.2' | ||||
| __version__ = '2014.03.21.2' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user