Compare commits
	
		
			162 Commits
		
	
	
		
			2014.02.27
			...
			2014.03.17
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 9d840c43b5 | ||
|  | 6f50f63382 | ||
|  | ff14fc4964 | ||
|  | e125c21531 | ||
|  | 93d020dd65 | ||
|  | a7515ec265 | ||
|  | b6c1ceccc2 | ||
|  | 4056ad8f36 | ||
|  | 6563837ee1 | ||
|  | fd5e6f7ef2 | ||
|  | 15fd51b37c | ||
|  | f1cef7a9ff | ||
|  | 8264223511 | ||
|  | bc6d597828 | ||
|  | aba77bbfc2 | ||
|  | 955c451456 | ||
|  | e5de3f6c89 | ||
|  | 2a1db721d4 | ||
|  | 1e0eb60f1a | ||
|  | 87a29e6f25 | ||
|  | c3d36f134f | ||
|  | 84769e708c | ||
|  | 9d2ecdbc71 | ||
|  | 9b69af5342 | ||
|  | c21215b421 | ||
|  | cddcfd90b4 | ||
|  | f36aacba0f | ||
|  | 355271fb61 | ||
|  | 2a5b502364 | ||
|  | 98ff9d82d4 | ||
|  | b1ff87224c | ||
|  | b461641fb9 | ||
|  | b047de6f6e | ||
|  | 34ca5d9ba0 | ||
|  | 60cc4dc4b4 | ||
|  | db95dc13a1 | ||
|  | 777ac90791 | ||
|  | 04f9bebbcb | ||
|  | 4ea3137e41 | ||
|  | a0792b738e | ||
|  | 19a41fc613 | ||
|  | 3ee52157fb | ||
|  | c4d197ee2d | ||
|  | a33932cfe3 | ||
|  | bcf89ce62c | ||
|  | e3899d0e00 | ||
|  | dcb00da49c | ||
|  | aa51d20d19 | ||
|  | ae7ed92057 | ||
|  | e45b31d9bd | ||
|  | 5a25f39653 | ||
|  | 963d7ec412 | ||
|  | e712d94adf | ||
|  | 6a72423955 | ||
|  | 4126826b10 | ||
|  | b773ead7fd | ||
|  | 855e2750bc | ||
|  | 805ef3c60b | ||
|  | fbc2dcb40b | ||
|  | 5375d7ad84 | ||
|  | 90f3476180 | ||
|  | ee95c09333 | ||
|  | 75d06db9fc | ||
|  | 439a1fffcb | ||
|  | 9d9d70c462 | ||
|  | b4a186b7be | ||
|  | bdebf51c8f | ||
|  | 264b86f9b4 | ||
|  | 9e55e37a2e | ||
|  | 1471956573 | ||
|  | 27865b2169 | ||
|  | 6d07ce0162 | ||
|  | edb7fc5435 | ||
|  | 31f77343f2 | ||
|  | 63ad031583 | ||
|  | 957688cee6 | ||
|  | 806d6c2e8c | ||
|  | 0ef68e04d9 | ||
|  | a496524db2 | ||
|  | 935c7360cc | ||
|  | 340b046876 | ||
|  | cc1db7f9b7 | ||
|  | a4ff6c4762 | ||
|  | 1060425cbb | ||
|  | e9c092f125 | ||
|  | 22ff5d2105 | ||
|  | 136db7881b | ||
|  | dae313e725 | ||
|  | b74fa8cd2c | ||
|  | 94eae04c94 | ||
|  | 16ff7ebc77 | ||
|  | c361c505b0 | ||
|  | d37c07c575 | ||
|  | 9d6105c9f0 | ||
|  | 8dec03ecba | ||
|  | 826547870b | ||
|  | 52d6a9a61d | ||
|  | ad242b5fbc | ||
|  | 3524175625 | ||
|  | 7b9965ea93 | ||
|  | 0a5bce566f | ||
|  | 8012bd2424 | ||
|  | f55a1f0a88 | ||
|  | bacac173a9 | ||
|  | ca1fee34f2 | ||
|  | 6dadaa9930 | ||
|  | 553f6e4633 | ||
|  | 652bee05f0 | ||
|  | d63516e9cd | ||
|  | e477dcf649 | ||
|  | 9d3f7781f3 | ||
|  | c7095dada3 | ||
|  | 607dbbad76 | ||
|  | 17b75c0de1 | ||
|  | ab24f4f3be | ||
|  | e1a52d9e10 | ||
|  | d0ff838433 | ||
|  | b37b94501c | ||
|  | cb3bb2cfef | ||
|  | e2cc7983e9 | ||
|  | c9ae7b9565 | ||
|  | 86fb4347f7 | ||
|  | 2fcec131f5 | ||
|  | 9f62eaf4ef | ||
|  | f92259c026 | ||
|  | 0afef30b23 | ||
|  | dcdfd1c711 | ||
|  | 2acc1f8f50 | ||
|  | 2c39b0c695 | ||
|  | e77c5b4f63 | ||
|  | 409a16cb72 | ||
|  | 94d5e90b4f | ||
|  | 2d73b45805 | ||
|  | 271a2dbfa2 | ||
|  | bf4adcac66 | ||
|  | fb8b8fdd62 | ||
|  | 5a0b26252e | ||
|  | 7d78f0cc48 | ||
|  | f00fc78674 | ||
|  | 392017874c | ||
|  | c3cb92d1ab | ||
|  | aa5590fa07 | ||
|  | 8cfb5bbf92 | ||
|  | 69bb54ebf9 | ||
|  | ca97a56e4b | ||
|  | fc26f3b4c2 | ||
|  | f604c93c64 | ||
|  | dc3727b65c | ||
|  | aba3231de1 | ||
|  | 9193bab91d | ||
|  | fbcf3e416d | ||
|  | c0e5d85631 | ||
|  | ca7fa3dcb3 | ||
|  | 4ccfba28d9 | ||
|  | abb82f1ddc | ||
|  | 546582ec3e | ||
|  | 4534485586 | ||
|  | 50a138d95c | ||
|  | 1b86cc41cf | ||
|  | 83cebb8b7a | ||
|  | 9e68f9fdf1 | ||
|  | 60daf7f0bb | 
							
								
								
									
										14
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								README.md
									
									
									
									
									
								
							| @@ -124,8 +124,12 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      video id, %(playlist)s for the playlist the | ||||
|                                      video is in, %(playlist_index)s for the | ||||
|                                      position in the playlist and %% for a | ||||
|                                      literal percent. Use - to output to stdout. | ||||
|                                      Can also be used to download to a different | ||||
|                                      literal percent. %(height)s and %(width)s | ||||
|                                      for the width and height of the video | ||||
|                                      format. %(resolution)s for a textual | ||||
|                                      description of the resolution of the video | ||||
|                                      format. Use - to output to stdout. Can also | ||||
|                                      be used to download to a different | ||||
|                                      directory, for example with -o '/my/downloa | ||||
|                                      ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' . | ||||
|     --autonumber-size NUMBER         Specifies the number of digits in | ||||
| @@ -187,9 +191,9 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      preference using slashes: "-f 22/17/18". | ||||
|                                      "-f mp4" and "-f flv" are also supported. | ||||
|                                      You can also use the special names "best", | ||||
|                                      "bestaudio", "worst", and "worstaudio". By | ||||
|                                      default, youtube-dl will pick the best | ||||
|                                      quality. | ||||
|                                      "bestvideo", "bestaudio", "worst", | ||||
|                                      "worstvideo" and "worstaudio". By default, | ||||
|                                      youtube-dl will pick the best quality. | ||||
|     --all-formats                    download all available video formats | ||||
|     --prefer-free-formats            prefer free video formats unless a specific | ||||
|                                      one is requested | ||||
|   | ||||
| @@ -97,7 +97,7 @@ rm -rf build | ||||
|  | ||||
| make pypi-files | ||||
| echo "Uploading to PyPi ..." | ||||
| python setup.py sdist upload | ||||
| python setup.py sdist bdist_wheel upload | ||||
| make clean | ||||
|  | ||||
| /bin/echo -e "\n### DONE!" | ||||
|   | ||||
| @@ -71,7 +71,7 @@ class FakeYDL(YoutubeDL): | ||||
|             old_report_warning(message) | ||||
|         self.report_warning = types.MethodType(report_warning, self) | ||||
|  | ||||
| def get_testcases(): | ||||
| def gettestcases(): | ||||
|     for ie in youtube_dl.extractor.gen_extractors(): | ||||
|         t = getattr(ie, '_TEST', None) | ||||
|         if t: | ||||
|   | ||||
							
								
								
									
										44
									
								
								test/test_InfoExtractor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								test/test_InfoExtractor.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import FakeYDL | ||||
| from youtube_dl.extractor.common import InfoExtractor | ||||
| from youtube_dl.extractor import YoutubeIE, get_info_extractor | ||||
|  | ||||
|  | ||||
| class TestIE(InfoExtractor): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class TestInfoExtractor(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         self.ie = TestIE(FakeYDL()) | ||||
|  | ||||
|     def test_ie_key(self): | ||||
|         self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) | ||||
|  | ||||
|     def test_html_search_regex(self): | ||||
|         html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>' | ||||
|         search = lambda re, *args: self.ie._html_search_regex(re, html, *args) | ||||
|         self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video') | ||||
|  | ||||
|     def test_opengraph(self): | ||||
|         ie = self.ie | ||||
|         html = ''' | ||||
|             <meta name="og:title" content='Foo'/> | ||||
|             <meta content="Some video's description " name="og:description"/> | ||||
|             <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/> | ||||
|             ''' | ||||
|         self.assertEqual(ie._og_search_title(html), 'Foo') | ||||
|         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') | ||||
|         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -182,6 +182,24 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'vid-high') | ||||
|  | ||||
|     def test_format_selection_video(self): | ||||
|         formats = [ | ||||
|             {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'}, | ||||
|             {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'}, | ||||
|             {'format_id': 'vid', 'ext': 'mp4', 'preference': 3}, | ||||
|         ] | ||||
|         info_dict = {'formats': formats, 'extractor': 'test'} | ||||
|  | ||||
|         ydl = YDL({'format': 'bestvideo'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'dash-video-high') | ||||
|  | ||||
|         ydl = YDL({'format': 'worstvideo'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'dash-video-low') | ||||
|  | ||||
|     def test_youtube_format_selection(self): | ||||
|         order = [ | ||||
|             '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13', | ||||
|   | ||||
| @@ -9,7 +9,7 @@ import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
|  | ||||
| from test.helper import get_testcases | ||||
| from test.helper import gettestcases | ||||
|  | ||||
| from youtube_dl.extractor import ( | ||||
|     FacebookIE, | ||||
| @@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|     def test_youtube_truncated(self): | ||||
|         self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) | ||||
|  | ||||
|     def test_youtube_search_matching(self): | ||||
|         self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) | ||||
|         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) | ||||
|  | ||||
|     def test_justin_tv_channelid_matching(self): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) | ||||
| @@ -101,7 +105,7 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|  | ||||
|     def test_no_duplicates(self): | ||||
|         ies = gen_extractors() | ||||
|         for tc in get_testcases(): | ||||
|         for tc in gettestcases(): | ||||
|             url = tc['url'] | ||||
|             for ie in ies: | ||||
|                 if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): | ||||
| @@ -120,6 +124,8 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|  | ||||
|     def test_vimeo_matching(self): | ||||
|         self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel']) | ||||
|         self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel']) | ||||
|         self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo']) | ||||
|         self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user']) | ||||
|         self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user']) | ||||
|         self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) | ||||
|   | ||||
| @@ -8,7 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import ( | ||||
|     get_params, | ||||
|     get_testcases, | ||||
|     gettestcases, | ||||
|     try_rm, | ||||
|     md5, | ||||
|     report_warning | ||||
| @@ -51,7 +51,7 @@ def _file_md5(fn): | ||||
|     with open(fn, 'rb') as f: | ||||
|         return hashlib.md5(f.read()).hexdigest() | ||||
|  | ||||
| defs = get_testcases() | ||||
| defs = gettestcases() | ||||
|  | ||||
|  | ||||
| class TestDownload(unittest.TestCase): | ||||
| @@ -144,6 +144,10 @@ def generator(test_case): | ||||
|                         self.assertTrue( | ||||
|                             isinstance(got, compat_str) and match_rex.match(got), | ||||
|                             u'field %s (value: %r) should match %r' % (info_field, got, match_str)) | ||||
|                     elif isinstance(expected, type): | ||||
|                         got = info_dict.get(info_field) | ||||
|                         self.assertTrue(isinstance(got, expected), | ||||
|                             u'Expected type %r, but got value %r of type %r' % (expected, got, type(got))) | ||||
|                     else: | ||||
|                         if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||
|                             got = 'md5:' + md5(info_dict.get(info_field)) | ||||
| @@ -152,19 +156,19 @@ def generator(test_case): | ||||
|                         self.assertEqual(expected, got, | ||||
|                             u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) | ||||
|  | ||||
|                 # If checkable fields are missing from the test case, print the info_dict | ||||
|                 test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) | ||||
|                     for key, value in info_dict.items() | ||||
|                     if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location')) | ||||
|                 if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): | ||||
|                     sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') | ||||
|  | ||||
|                 # Check for the presence of mandatory fields | ||||
|                 for key in ('id', 'url', 'title', 'ext'): | ||||
|                     self.assertTrue(key in info_dict.keys() and info_dict[key]) | ||||
|                 # Check for mandatory fields that are automatically set by YoutubeDL | ||||
|                 for key in ['webpage_url', 'extractor', 'extractor_key']: | ||||
|                     self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) | ||||
|  | ||||
|                 # If checkable fields are missing from the test case, print the info_dict | ||||
|                 test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) | ||||
|                     for key, value in info_dict.items() | ||||
|                     if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) | ||||
|                 if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): | ||||
|                     sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') | ||||
|         finally: | ||||
|             try_rm_tcs_files() | ||||
|  | ||||
|   | ||||
| @@ -36,6 +36,7 @@ from youtube_dl.extractor import ( | ||||
|     RutubeChannelIE, | ||||
|     GoogleSearchIE, | ||||
|     GenericIE, | ||||
|     TEDIE, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -98,7 +99,7 @@ class TestPlaylists(unittest.TestCase): | ||||
|         result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], '5124905') | ||||
|         self.assertTrue(len(result['entries']) >= 11) | ||||
|         self.assertTrue(len(result['entries']) >= 6) | ||||
|  | ||||
|     def test_soundcloud_set(self): | ||||
|         dl = FakeYDL() | ||||
| @@ -248,16 +249,25 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'python language') | ||||
|         self.assertEqual(result['title'], 'python language') | ||||
|         self.assertTrue(len(result['entries']) == 15) | ||||
|         self.assertEqual(len(result['entries']), 15) | ||||
|  | ||||
|     def test_generic_rss_feed(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = GenericIE(dl) | ||||
|         result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml') | ||||
|         result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml') | ||||
|         self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml') | ||||
|         self.assertEqual(result['title'], 'Zero Punctuation') | ||||
|         self.assertTrue(len(result['entries']) > 10) | ||||
|  | ||||
|     def test_ted_playlist(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = TEDIE(dl) | ||||
|         result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], '10') | ||||
|         self.assertEqual(result['title'], 'Who are the hackers?') | ||||
|         self.assertTrue(len(result['entries']) >= 6) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -33,6 +33,7 @@ from youtube_dl.utils import ( | ||||
|     unified_strdate, | ||||
|     unsmuggle_url, | ||||
|     url_basename, | ||||
|     urlencode_postdata, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
|  | ||||
| @@ -261,5 +262,9 @@ class TestUtil(unittest.TestCase): | ||||
|             bam''') | ||||
|         self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) | ||||
|  | ||||
|     def test_urlencode_postdata(self): | ||||
|         data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) | ||||
|         self.assertTrue(isinstance(data, bytes)) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -16,6 +16,7 @@ from youtube_dl.extractor import ( | ||||
|     YoutubeChannelIE, | ||||
|     YoutubeShowIE, | ||||
|     YoutubeTopListIE, | ||||
|     YoutubeSearchURLIE, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -133,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         entries = result['entries'] | ||||
|         self.assertTrue(len(entries) >= 5) | ||||
|  | ||||
|     def test_youtube_search_url(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeSearchURLIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video') | ||||
|         entries = result['entries'] | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], 'youtube-dl test video') | ||||
|         self.assertTrue(len(entries) >= 5) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -4,6 +4,7 @@ | ||||
| from __future__ import absolute_import, unicode_literals | ||||
|  | ||||
| import collections | ||||
| import datetime | ||||
| import errno | ||||
| import io | ||||
| import json | ||||
| @@ -370,12 +371,15 @@ class YoutubeDL(object): | ||||
|         Print the message to stderr, it will be prefixed with 'WARNING:' | ||||
|         If stderr is a tty file the 'WARNING:' will be colored | ||||
|         ''' | ||||
|         if self._err_file.isatty() and os.name != 'nt': | ||||
|             _msg_header = '\033[0;33mWARNING:\033[0m' | ||||
|         if self.params.get('logger') is not None: | ||||
|             self.params['logger'].warning(message) | ||||
|         else: | ||||
|             _msg_header = 'WARNING:' | ||||
|         warning_message = '%s %s' % (_msg_header, message) | ||||
|         self.to_stderr(warning_message) | ||||
|             if self._err_file.isatty() and os.name != 'nt': | ||||
|                 _msg_header = '\033[0;33mWARNING:\033[0m' | ||||
|             else: | ||||
|                 _msg_header = 'WARNING:' | ||||
|             warning_message = '%s %s' % (_msg_header, message) | ||||
|             self.to_stderr(warning_message) | ||||
|  | ||||
|     def report_error(self, message, tb=None): | ||||
|         ''' | ||||
| @@ -409,6 +413,13 @@ class YoutubeDL(object): | ||||
|             template_dict['autonumber'] = autonumber_templ % self._num_downloads | ||||
|             if template_dict.get('playlist_index') is not None: | ||||
|                 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index'] | ||||
|             if template_dict.get('resolution') is None: | ||||
|                 if template_dict.get('width') and template_dict.get('height'): | ||||
|                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) | ||||
|                 elif template_dict.get('height'): | ||||
|                     template_dict['resolution'] = '%sp' % template_dict['height'] | ||||
|                 elif template_dict.get('width'): | ||||
|                     template_dict['resolution'] = '?x%d' % template_dict['width'] | ||||
|  | ||||
|             sanitize = lambda k, v: sanitize_filename( | ||||
|                 compat_str(v), | ||||
| @@ -656,6 +667,18 @@ class YoutubeDL(object): | ||||
|                 if f.get('vcodec') == 'none'] | ||||
|             if audio_formats: | ||||
|                 return audio_formats[0] | ||||
|         elif format_spec == 'bestvideo': | ||||
|             video_formats = [ | ||||
|                 f for f in available_formats | ||||
|                 if f.get('acodec') == 'none'] | ||||
|             if video_formats: | ||||
|                 return video_formats[-1] | ||||
|         elif format_spec == 'worstvideo': | ||||
|             video_formats = [ | ||||
|                 f for f in available_formats | ||||
|                 if f.get('acodec') == 'none'] | ||||
|             if video_formats: | ||||
|                 return video_formats[0] | ||||
|         else: | ||||
|             extensions = ['mp4', 'flv', 'webm', '3gp'] | ||||
|             if format_spec in extensions: | ||||
| @@ -675,6 +698,14 @@ class YoutubeDL(object): | ||||
|             info_dict['playlist'] = None | ||||
|             info_dict['playlist_index'] = None | ||||
|  | ||||
|         if 'display_id' not in info_dict and 'id' in info_dict: | ||||
|             info_dict['display_id'] = info_dict['id'] | ||||
|  | ||||
|         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None: | ||||
|             upload_date = datetime.datetime.utcfromtimestamp( | ||||
|                 info_dict['timestamp']) | ||||
|             info_dict['upload_date'] = upload_date.strftime('%Y%m%d') | ||||
|  | ||||
|         # This extractors handle format selection themselves | ||||
|         if info_dict['extractor'] in ['Youku']: | ||||
|             if download: | ||||
| @@ -688,8 +719,11 @@ class YoutubeDL(object): | ||||
|         else: | ||||
|             formats = info_dict['formats'] | ||||
|  | ||||
|         if not formats: | ||||
|             raise ExtractorError('No video formats found!') | ||||
|  | ||||
|         # We check that all the formats have the format and format_id fields | ||||
|         for (i, format) in enumerate(formats): | ||||
|         for i, format in enumerate(formats): | ||||
|             if format.get('format_id') is None: | ||||
|                 format['format_id'] = compat_str(i) | ||||
|             if format.get('format') is None: | ||||
| @@ -908,7 +942,7 @@ class YoutubeDL(object): | ||||
|                     self.to_screen('[%s] %s: Downloading thumbnail ...' % | ||||
|                                    (info_dict['extractor'], info_dict['id'])) | ||||
|                     try: | ||||
|                         uf = compat_urllib_request.urlopen(info_dict['thumbnail']) | ||||
|                         uf = self.urlopen(info_dict['thumbnail']) | ||||
|                         with open(thumb_filename, 'wb') as thumbf: | ||||
|                             shutil.copyfileobj(uf, thumbf) | ||||
|                         self.to_screen('[%s] %s: Writing thumbnail to: %s' % | ||||
| @@ -1154,7 +1188,7 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def urlopen(self, req): | ||||
|         """ Start an HTTP download """ | ||||
|         return self._opener.open(req) | ||||
|         return self._opener.open(req, timeout=self._socket_timeout) | ||||
|  | ||||
|     def print_debug_header(self): | ||||
|         if not self.params.get('verbose'): | ||||
| @@ -1185,7 +1219,7 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def _setup_opener(self): | ||||
|         timeout_val = self.params.get('socket_timeout') | ||||
|         timeout = 600 if timeout_val is None else float(timeout_val) | ||||
|         self._socket_timeout = 600 if timeout_val is None else float(timeout_val) | ||||
|  | ||||
|         opts_cookiefile = self.params.get('cookiefile') | ||||
|         opts_proxy = self.params.get('proxy') | ||||
| @@ -1223,7 +1257,3 @@ class YoutubeDL(object): | ||||
|         # (See https://github.com/rg3/youtube-dl/issues/1309 for details) | ||||
|         opener.addheaders = [] | ||||
|         self._opener = opener | ||||
|  | ||||
|         # TODO remove this global modification | ||||
|         compat_urllib_request.install_opener(opener) | ||||
|         socket.setdefaulttimeout(timeout) | ||||
|   | ||||
| @@ -48,6 +48,9 @@ __authors__  = ( | ||||
|     'Niklas Laxström', | ||||
|     'David Triendl', | ||||
|     'Anthony Weems', | ||||
|     'David Wagner', | ||||
|     'Juan C. Olivares', | ||||
|     'Mattias Harrysson', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
| @@ -313,7 +316,7 @@ def parseOpts(overrideArguments=None): | ||||
|  | ||||
|     video_format.add_option('-f', '--format', | ||||
|             action='store', dest='format', metavar='FORMAT', default=None, | ||||
|             help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestaudio", "worst", and "worstaudio". By default, youtube-dl will pick the best quality.') | ||||
|             help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') | ||||
|     video_format.add_option('--all-formats', | ||||
|             action='store_const', dest='format', help='download all available video formats', const='all') | ||||
|     video_format.add_option('--prefer-free-formats', | ||||
| @@ -428,6 +431,8 @@ def parseOpts(overrideArguments=None): | ||||
|                   '%(extractor)s for the provider (youtube, metacafe, etc), ' | ||||
|                   '%(id)s for the video id, %(playlist)s for the playlist the video is in, ' | ||||
|                   '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' | ||||
|                   '%(height)s and %(width)s for the width and height of the video format. ' | ||||
|                   '%(resolution)s for a textual description of the resolution of the video format. ' | ||||
|                   'Use - to output to stdout. Can also be used to download to a different directory, ' | ||||
|                   'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) | ||||
|     filesystem.add_option('--autonumber-size', | ||||
|   | ||||
| @@ -49,7 +49,7 @@ class HttpFD(FileDownloader): | ||||
|         while count <= retries: | ||||
|             # Establish connection | ||||
|             try: | ||||
|                 data = compat_urllib_request.urlopen(request) | ||||
|                 data = self.ydl.urlopen(request) | ||||
|                 break | ||||
|             except (compat_urllib_error.HTTPError, ) as err: | ||||
|                 if (err.code < 500 or err.code >= 600) and err.code != 416: | ||||
| @@ -59,7 +59,7 @@ class HttpFD(FileDownloader): | ||||
|                     # Unable to resume (requested range not satisfiable) | ||||
|                     try: | ||||
|                         # Open the connection again without the range header | ||||
|                         data = compat_urllib_request.urlopen(basic_request) | ||||
|                         data = self.ydl.urlopen(basic_request) | ||||
|                         content_length = data.info()['Content-Length'] | ||||
|                     except (compat_urllib_error.HTTPError, ) as err: | ||||
|                         if err.code < 500 or err.code >= 600: | ||||
| @@ -85,6 +85,7 @@ class HttpFD(FileDownloader): | ||||
|                         else: | ||||
|                             # The length does not match, we start the download over | ||||
|                             self.report_unable_to_resume() | ||||
|                             resume_len = 0 | ||||
|                             open_mode = 'wb' | ||||
|                             break | ||||
|             # Retry | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| import subprocess | ||||
| @@ -22,7 +24,7 @@ class RtmpFD(FileDownloader): | ||||
|             proc_stderr_closed = False | ||||
|             while not proc_stderr_closed: | ||||
|                 # read line from stderr | ||||
|                 line = u'' | ||||
|                 line = '' | ||||
|                 while True: | ||||
|                     char = proc.stderr.read(1) | ||||
|                     if not char: | ||||
| @@ -46,7 +48,7 @@ class RtmpFD(FileDownloader): | ||||
|                     data_len = None | ||||
|                     if percent > 0: | ||||
|                         data_len = int(downloaded_data_len * 100 / percent) | ||||
|                     data_len_str = u'~' + format_bytes(data_len) | ||||
|                     data_len_str = '~' + format_bytes(data_len) | ||||
|                     self.report_progress(percent, data_len_str, speed, eta) | ||||
|                     cursor_in_new_line = False | ||||
|                     self._hook_progress({ | ||||
| @@ -76,12 +78,12 @@ class RtmpFD(FileDownloader): | ||||
|                         }) | ||||
|                     elif self.params.get('verbose', False): | ||||
|                         if not cursor_in_new_line: | ||||
|                             self.to_screen(u'') | ||||
|                             self.to_screen('') | ||||
|                         cursor_in_new_line = True | ||||
|                         self.to_screen(u'[rtmpdump] '+line) | ||||
|                         self.to_screen('[rtmpdump] '+line) | ||||
|             proc.wait() | ||||
|             if not cursor_in_new_line: | ||||
|                 self.to_screen(u'') | ||||
|                 self.to_screen('') | ||||
|             return proc.returncode | ||||
|  | ||||
|         url = info_dict['url'] | ||||
| @@ -102,7 +104,7 @@ class RtmpFD(FileDownloader): | ||||
|         try: | ||||
|             subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | ||||
|         except (OSError, IOError): | ||||
|             self.report_error(u'RTMP download detected but "rtmpdump" could not be run') | ||||
|             self.report_error('RTMP download detected but "rtmpdump" could not be run') | ||||
|             return False | ||||
|  | ||||
|         # Download using rtmpdump. rtmpdump returns exit code 2 when | ||||
| @@ -127,7 +129,7 @@ class RtmpFD(FileDownloader): | ||||
|             basic_args += ['--live'] | ||||
|         if conn: | ||||
|             basic_args += ['--conn', conn] | ||||
|         args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] | ||||
|         args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)] | ||||
|  | ||||
|         if sys.platform == 'win32' and sys.version_info < (3, 0): | ||||
|             # Windows subprocess module does not actually support Unicode | ||||
| @@ -150,26 +152,35 @@ class RtmpFD(FileDownloader): | ||||
|                 shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) | ||||
|             except ImportError: | ||||
|                 shell_quote = repr | ||||
|             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args)) | ||||
|             self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args)) | ||||
|  | ||||
|         RD_SUCCESS = 0 | ||||
|         RD_FAILED = 1 | ||||
|         RD_INCOMPLETE = 2 | ||||
|         RD_NO_CONNECT = 3 | ||||
|  | ||||
|         retval = run_rtmpdump(args) | ||||
|  | ||||
|         while (retval == 2 or retval == 1) and not test: | ||||
|         if retval == RD_NO_CONNECT: | ||||
|             self.report_error('[rtmpdump] Could not connect to RTMP server.') | ||||
|             return False | ||||
|  | ||||
|         while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live: | ||||
|             prevsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen(u'[rtmpdump] %s bytes' % prevsize) | ||||
|             self.to_screen('[rtmpdump] %s bytes' % prevsize) | ||||
|             time.sleep(5.0) # This seems to be needed | ||||
|             retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) | ||||
|             retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED]) | ||||
|             cursize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             if prevsize == cursize and retval == 1: | ||||
|             if prevsize == cursize and retval == RD_FAILED: | ||||
|                 break | ||||
|              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those | ||||
|             if prevsize == cursize and retval == 2 and cursize > 1024: | ||||
|                 self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.') | ||||
|                 retval = 0 | ||||
|             if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024: | ||||
|                 self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.') | ||||
|                 retval = RD_SUCCESS | ||||
|                 break | ||||
|         if retval == 0 or (test and retval == 2): | ||||
|         if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): | ||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen(u'[rtmpdump] %s bytes' % fsize) | ||||
|             self.to_screen('[rtmpdump] %s bytes' % fsize) | ||||
|             self.try_rename(tmpfilename, filename) | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': fsize, | ||||
| @@ -179,6 +190,6 @@ class RtmpFD(FileDownloader): | ||||
|             }) | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr(u"\n") | ||||
|             self.report_error(u'rtmpdump exited with code %d' % retval) | ||||
|             self.to_stderr('\n') | ||||
|             self.report_error('rtmpdump exited with code %d' % retval) | ||||
|             return False | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| from .academicearth import AcademicEarthCourseIE | ||||
| from .addanime import AddAnimeIE | ||||
| from .aftonbladet import AftonbladetIE | ||||
| from .anitube import AnitubeIE | ||||
| from .aparat import AparatIE | ||||
| from .appletrailers import AppleTrailersIE | ||||
| @@ -23,9 +24,11 @@ from .br import BRIE | ||||
| from .breakcom import BreakIE | ||||
| from .brightcove import BrightcoveIE | ||||
| from .c56 import C56IE | ||||
| from .canal13cl import Canal13clIE | ||||
| from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .cbs import CBSIE | ||||
| from .ceskatelevize import CeskaTelevizeIE | ||||
| from .channel9 import Channel9IE | ||||
| from .chilloutzone import ChilloutzoneIE | ||||
| from .cinemassacre import CinemassacreIE | ||||
| @@ -50,7 +53,6 @@ from .dailymotion import ( | ||||
|     DailymotionUserIE, | ||||
| ) | ||||
| from .daum import DaumIE | ||||
| from .depositfiles import DepositFilesIE | ||||
| from .dotsub import DotsubIE | ||||
| from .dreisat import DreiSatIE | ||||
| from .defense import DefenseGouvFrIE | ||||
| @@ -89,6 +91,7 @@ from .funnyordie import FunnyOrDieIE | ||||
| from .gamekings import GamekingsIE | ||||
| from .gamespot import GameSpotIE | ||||
| from .gametrailers import GametrailersIE | ||||
| from .gdcvault import GDCVaultIE | ||||
| from .generic import GenericIE | ||||
| from .googleplus import GooglePlusIE | ||||
| from .googlesearch import GoogleSearchIE | ||||
| @@ -133,6 +136,7 @@ from .lynda import ( | ||||
| ) | ||||
| from .m6 import M6IE | ||||
| from .macgamestore import MacGameStoreIE | ||||
| from .mailru import MailRuIE | ||||
| from .malemotion import MalemotionIE | ||||
| from .mdr import MDRIE | ||||
| from .metacafe import MetacafeIE | ||||
| @@ -171,6 +175,7 @@ from .ooyala import OoyalaIE | ||||
| from .orf import ORFIE | ||||
| from .pbs import PBSIE | ||||
| from .photobucket import PhotobucketIE | ||||
| from .playvid import PlayvidIE | ||||
| from .podomatic import PodomaticIE | ||||
| from .pornhd import PornHdIE | ||||
| from .pornhub import PornHubIE | ||||
| @@ -191,6 +196,7 @@ from .rutube import ( | ||||
|     RutubeMovieIE, | ||||
|     RutubePersonIE, | ||||
| ) | ||||
| from .rutv import RUTVIE | ||||
| from .savefrom import SaveFromIE | ||||
| from .servingsys import ServingSysIE | ||||
| from .sina import SinaIE | ||||
| @@ -235,7 +241,12 @@ from .tube8 import Tube8IE | ||||
| from .tudou import TudouIE | ||||
| from .tumblr import TumblrIE | ||||
| from .tutv import TutvIE | ||||
| from .tvigle import TvigleIE | ||||
| from .tvp import TvpIE | ||||
| from .udemy import ( | ||||
|     UdemyIE, | ||||
|     UdemyCourseIE | ||||
| ) | ||||
| from .unistra import UnistraIE | ||||
| from .ustream import UstreamIE, UstreamChannelIE | ||||
| from .vbox7 import Vbox7IE | ||||
| @@ -262,6 +273,7 @@ from .viki import VikiIE | ||||
| from .vk import VKIE | ||||
| from .vube import VubeIE | ||||
| from .wat import WatIE | ||||
| from .wdr import WDRIE | ||||
| from .weibo import WeiboIE | ||||
| from .wimp import WimpIE | ||||
| from .wistia import WistiaIE | ||||
| @@ -280,19 +292,20 @@ from .youku import YoukuIE | ||||
| from .youporn import YouPornIE | ||||
| from .youtube import ( | ||||
|     YoutubeIE, | ||||
|     YoutubePlaylistIE, | ||||
|     YoutubeSearchIE, | ||||
|     YoutubeSearchDateIE, | ||||
|     YoutubeUserIE, | ||||
|     YoutubeChannelIE, | ||||
|     YoutubeShowIE, | ||||
|     YoutubeSubscriptionsIE, | ||||
|     YoutubeRecommendedIE, | ||||
|     YoutubeTruncatedURLIE, | ||||
|     YoutubeWatchLaterIE, | ||||
|     YoutubeFavouritesIE, | ||||
|     YoutubeHistoryIE, | ||||
|     YoutubePlaylistIE, | ||||
|     YoutubeRecommendedIE, | ||||
|     YoutubeSearchDateIE, | ||||
|     YoutubeSearchIE, | ||||
|     YoutubeSearchURLIE, | ||||
|     YoutubeShowIE, | ||||
|     YoutubeSubscriptionsIE, | ||||
|     YoutubeTopListIE, | ||||
|     YoutubeTruncatedURLIE, | ||||
|     YoutubeUserIE, | ||||
|     YoutubeWatchLaterIE, | ||||
| ) | ||||
| from .zdf import ZDFIE | ||||
|  | ||||
|   | ||||
							
								
								
									
										69
									
								
								youtube_dl/extractor/aftonbladet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								youtube_dl/extractor/aftonbladet.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,69 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class AftonbladetIE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])' | ||||
|     _TEST = { | ||||
|         'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab', | ||||
|         'info_dict': { | ||||
|             'id': 'article36015', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna', | ||||
|             'description': 'Jupiters måne mest aktiv av alla himlakroppar', | ||||
|             'upload_date': '20140306', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.search(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('video_id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # find internal video meta data | ||||
|         META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' | ||||
|         internal_meta_id = self._html_search_regex( | ||||
|             r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id') | ||||
|         internal_meta_url = META_URL % internal_meta_id | ||||
|         internal_meta_json = self._download_json( | ||||
|             internal_meta_url, video_id, 'Downloading video meta data') | ||||
|  | ||||
|         # find internal video formats | ||||
|         FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s' | ||||
|         internal_video_id = internal_meta_json['videoId'] | ||||
|         internal_formats_url = FORMATS_URL % internal_video_id | ||||
|         internal_formats_json = self._download_json( | ||||
|             internal_formats_url, video_id, 'Downloading video formats') | ||||
|  | ||||
|         formats = [] | ||||
|         for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']: | ||||
|             p = fmt['paths'][0] | ||||
|             formats.append({ | ||||
|                 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']), | ||||
|                 'ext': 'mp4', | ||||
|                 'width': fmt['width'], | ||||
|                 'height': fmt['height'], | ||||
|                 'tbr': fmt['bitrate'], | ||||
|                 'protocol': 'http', | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished']) | ||||
|         upload_date = timestamp.strftime('%Y%m%d') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': internal_meta_json['title'], | ||||
|             'formats': formats, | ||||
|             'thumbnail': internal_meta_json['imageUrl'], | ||||
|             'description': internal_meta_json['shortPreamble'], | ||||
|             'upload_date': upload_date, | ||||
|             'duration': internal_meta_json['duration'], | ||||
|             'view_count': internal_meta_json['views'], | ||||
|         } | ||||
| @@ -72,18 +72,22 @@ class ArteTvIE(InfoExtractor): | ||||
|             return self._extract_liveweb(url, name, lang) | ||||
|  | ||||
|         if re.search(self._LIVE_URL, url) is not None: | ||||
|             raise ExtractorError(u'Arte live streams are not yet supported, sorry') | ||||
|             raise ExtractorError('Arte live streams are not yet supported, sorry') | ||||
|             # self.extractLiveStream(url) | ||||
|             # return | ||||
|  | ||||
|         raise ExtractorError('No video found') | ||||
|  | ||||
|     def _extract_video(self, url, video_id, lang): | ||||
|         """Extract from videos.arte.tv""" | ||||
|         ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') | ||||
|         ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') | ||||
|         ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata') | ||||
|         ref_xml_doc = self._download_xml( | ||||
|             ref_xml_url, video_id, note='Downloading metadata') | ||||
|         config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) | ||||
|         config_xml_url = config_node.attrib['ref'] | ||||
|         config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration') | ||||
|         config_xml = self._download_webpage( | ||||
|             config_xml_url, video_id, note='Downloading configuration') | ||||
|  | ||||
|         video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml)) | ||||
|         def _key(m): | ||||
|   | ||||
| @@ -9,21 +9,35 @@ from ..utils import ExtractorError | ||||
|  | ||||
| class BRIE(InfoExtractor): | ||||
|     IE_DESC = "Bayerischer Rundfunk Mediathek" | ||||
|     _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$" | ||||
|     _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$" | ||||
|     _BASE_URL = "http://www.br.de" | ||||
|  | ||||
|     _TEST = { | ||||
|         "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", | ||||
|         "md5": "c4f83cf0f023ba5875aba0bf46860df2", | ||||
|         "info_dict": { | ||||
|             "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", | ||||
|             "ext": "mp4", | ||||
|             "title": "Feiern und Verzichten", | ||||
|             "description": "Anselm Grün: Feiern und Verzichten", | ||||
|             "uploader": "BR/Birgit Baier", | ||||
|             "upload_date": "20140301" | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", | ||||
|             "md5": "c4f83cf0f023ba5875aba0bf46860df2", | ||||
|             "info_dict": { | ||||
|                 "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", | ||||
|                 "ext": "mp4", | ||||
|                 "title": "Feiern und Verzichten", | ||||
|                 "description": "Anselm Grün: Feiern und Verzichten", | ||||
|                 "uploader": "BR/Birgit Baier", | ||||
|                 "upload_date": "20140301" | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             "url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html", | ||||
|             "md5": "ab451b09d861dbed7d7cc9ab0be19ebe", | ||||
|             "info_dict": { | ||||
|                 "id": "2c060e69-3a27-4e13-b0f0-668fac17d812", | ||||
|                 "ext": "mp4", | ||||
|                 "title": "Über den Pass", | ||||
|                 "description": "Die Eroberung der Alpen: Über den Pass", | ||||
|                 "uploader": None, | ||||
|                 "upload_date": None | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -33,16 +47,21 @@ class BRIE(InfoExtractor): | ||||
|             r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL") | ||||
|         xml = self._download_xml(self._BASE_URL + xml_url, None) | ||||
|  | ||||
|         videos = [{ | ||||
|             "id": xml_video.get("externalId"), | ||||
|             "title": xml_video.find("title").text, | ||||
|             "formats": self._extract_formats(xml_video.find("assets")), | ||||
|             "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")), | ||||
|             "description": " ".join(xml_video.find("shareTitle").text.splitlines()), | ||||
|             "uploader": xml_video.find("author").text, | ||||
|             "upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))), | ||||
|             "webpage_url": xml_video.find("permalink").text, | ||||
|         } for xml_video in xml.findall("video")] | ||||
|         videos = [] | ||||
|         for xml_video in xml.findall("video"): | ||||
|             video = { | ||||
|                 "id": xml_video.get("externalId"), | ||||
|                 "title": xml_video.find("title").text, | ||||
|                 "formats": self._extract_formats(xml_video.find("assets")), | ||||
|                 "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")), | ||||
|                 "description": " ".join(xml_video.find("shareTitle").text.splitlines()), | ||||
|                 "webpage_url": xml_video.find("permalink").text | ||||
|             } | ||||
|             if xml_video.find("author").text: | ||||
|                 video["uploader"] = xml_video.find("author").text | ||||
|             if xml_video.find("broadcastDate").text: | ||||
|                 video["upload_date"] =  "".join(reversed(xml_video.find("broadcastDate").text.split("."))) | ||||
|             videos.append(video) | ||||
|  | ||||
|         if len(videos) > 1: | ||||
|             self._downloader.report_warning( | ||||
|   | ||||
							
								
								
									
										48
									
								
								youtube_dl/extractor/canal13cl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								youtube_dl/extractor/canal13cl.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class Canal13clIE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', | ||||
|         'md5': '4cb1fa38adcad8fea88487a078831755', | ||||
|         'info_dict': { | ||||
|             'id': '1403022125', | ||||
|             'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda', | ||||
|             'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         title = self._html_search_meta( | ||||
|             'twitter:title', webpage, 'title', fatal=True) | ||||
|         description = self._html_search_meta( | ||||
|             'twitter:description', webpage, 'description') | ||||
|         url = self._html_search_regex( | ||||
|             r'articuloVideo = \"(.*?)\"', webpage, 'url') | ||||
|         real_id = self._search_regex( | ||||
|             r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id) | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail') | ||||
|  | ||||
|         return { | ||||
|             'id': real_id, | ||||
|             'display_id': display_id, | ||||
|             'url': url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'ext': 'mp4', | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
							
								
								
									
										126
									
								
								youtube_dl/extractor/ceskatelevize.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										126
									
								
								youtube_dl/extractor/ceskatelevize.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,126 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CeskaTelevizeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka', | ||||
|             'info_dict': { | ||||
|                 'id': '213512120230004', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'První republika: Španělská chřipka', | ||||
|                 'duration': 3107.4, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # requires rtmpdump | ||||
|             }, | ||||
|             'skip': 'Works only from Czech Republic.', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt', | ||||
|             'info_dict': { | ||||
|                 'id': '20138143440', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Tsatsiki, maminka a policajt', | ||||
|                 'duration': 6754.1, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # requires rtmpdump | ||||
|             }, | ||||
|             'skip': 'Works only from Czech Republic.', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', | ||||
|             'info_dict': { | ||||
|                 'id': '14716', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'První republika: Zpěvačka z Dupárny Bobina', | ||||
|                 'duration': 90, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # requires rtmpdump | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' | ||||
|         if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | ||||
|             raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | ||||
|  | ||||
|         typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') | ||||
|         episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') | ||||
|  | ||||
|         data = { | ||||
|             'playlist[0][type]': typ, | ||||
|             'playlist[0][id]': episode_id, | ||||
|             'requestUrl': compat_urllib_parse_urlparse(url).path, | ||||
|             'requestSource': 'iVysilani', | ||||
|         } | ||||
|  | ||||
|         req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url', | ||||
|                                             data=compat_urllib_parse.urlencode(data)) | ||||
|  | ||||
|         req.add_header('Content-type', 'application/x-www-form-urlencoded') | ||||
|         req.add_header('x-addr', '127.0.0.1') | ||||
|         req.add_header('X-Requested-With', 'XMLHttpRequest') | ||||
|         req.add_header('Referer', url) | ||||
|  | ||||
|         playlistpage = self._download_json(req, video_id) | ||||
|  | ||||
|         req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url'])) | ||||
|         req.add_header('Referer', url) | ||||
|  | ||||
|         playlist = self._download_xml(req, video_id) | ||||
|          | ||||
|         formats = [] | ||||
|         for i in playlist.find('smilRoot/body'): | ||||
|             if 'AD' not in i.attrib['id']: | ||||
|                 base_url = i.attrib['base'] | ||||
|                 parsedurl = compat_urllib_parse_urlparse(base_url) | ||||
|                 duration = i.attrib['duration'] | ||||
|  | ||||
|                 for video in i.findall('video'): | ||||
|                     if video.attrib['label'] != 'AD': | ||||
|                         format_id = video.attrib['label'] | ||||
|                         play_path = video.attrib['src'] | ||||
|                         vbr = int(video.attrib['system-bitrate']) | ||||
|  | ||||
|                         formats.append({ | ||||
|                             'format_id': format_id, | ||||
|                             'url': base_url, | ||||
|                             'vbr': vbr, | ||||
|                             'play_path': play_path, | ||||
|                             'app': parsedurl.path[1:] + '?' + parsedurl.query, | ||||
|                             'rtmp_live': True, | ||||
|                             'ext': 'flv', | ||||
|                         }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': episode_id, | ||||
|             'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'), | ||||
|             'duration': float(duration), | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -17,8 +17,9 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             'id': '6902724', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Comic-Con Cosplay Catastrophe', | ||||
|             'description': 'Fans get creative this year', | ||||
|             'description': "Fans get creative this year at San Diego.  Too creative.  And yes, that's really Joss Whedon.", | ||||
|             'age_limit': 13, | ||||
|             'duration': 187, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
| @@ -28,22 +29,22 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             'id': '3505939', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Font Conference', | ||||
|             'description': 'This video wasn\'t long enough,', | ||||
|             'description': "This video wasn't long enough, so we made it double-spaced.", | ||||
|             'age_limit': 10, | ||||
|             'duration': 179, | ||||
|         }, | ||||
|     }, | ||||
|     # embedded youtube video | ||||
|     { | ||||
|         'url': 'http://www.collegehumor.com/embed/6950457', | ||||
|         'url': 'http://www.collegehumor.com/embed/6950306', | ||||
|         'info_dict': { | ||||
|             'id': 'W5gMp3ZjYg4', | ||||
|             'id': 'Z-bao9fg6Yc', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', | ||||
|             'uploader': 'Funnyplox TV', | ||||
|             'uploader_id': 'funnyploxtv', | ||||
|             'description': 'md5:7ded37421526d54afdf005e25bc2b7a3', | ||||
|             'upload_date': '20140128', | ||||
|             'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!', | ||||
|             'uploader': 'Mark Dice', | ||||
|             'uploader_id': 'MarkDice', | ||||
|             'description': 'md5:62c3dab9351fac7bb44b53b69511d87f', | ||||
|             'upload_date': '20140127', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -87,6 +88,7 @@ class CollegeHumorIE(InfoExtractor): | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         duration = int_or_none(vdata.get('duration'), 1000) | ||||
|         like_count = int_or_none(vdata.get('likes')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -96,4 +98,5 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             'formats': formats, | ||||
|             'age_limit': age_limit, | ||||
|             'duration': duration, | ||||
|             'like_count': like_count, | ||||
|         } | ||||
|   | ||||
| @@ -88,12 +88,18 @@ class InfoExtractor(object): | ||||
|  | ||||
|     The following fields are optional: | ||||
|  | ||||
|     display_id      An alternative identifier for the video, not necessarily | ||||
|                     unique, but available before title. Typically, id is | ||||
|                     something like "4234987", title "Dancing naked mole rats", | ||||
|                     and display_id "dancing-naked-mole-rats" | ||||
|     thumbnails:     A list of dictionaries (with the entries "resolution" and | ||||
|                     "url") for the varying thumbnails | ||||
|     thumbnail:      Full URL to a video thumbnail image. | ||||
|     description:    One-line video description. | ||||
|     uploader:       Full name of the video uploader. | ||||
|     timestamp:      UNIX timestamp of the moment the video became available. | ||||
|     upload_date:    Video upload date (YYYYMMDD). | ||||
|                     If not explicitly set, calculated from timestamp. | ||||
|     uploader_id:    Nickname or id of the video uploader. | ||||
|     location:       Physical location of the video. | ||||
|     subtitles:      The subtitle file contents as a dictionary in the format | ||||
| @@ -114,9 +120,6 @@ class InfoExtractor(object): | ||||
|     _real_extract() methods and define a _VALID_URL regexp. | ||||
|     Probably, they should also be added to the list of extractors. | ||||
|  | ||||
|     _real_extract() must return a *list* of information dictionaries as | ||||
|     described above. | ||||
|  | ||||
|     Finally, the _WORKING attribute should be set to False for broken IEs | ||||
|     in order to warn the users and skip the tests. | ||||
|     """ | ||||
| @@ -432,14 +435,14 @@ class InfoExtractor(object): | ||||
|         if secure: regexes = self._og_regexes('video:secure_url') + regexes | ||||
|         return self._html_search_regex(regexes, html, name, **kargs) | ||||
|  | ||||
|     def _html_search_meta(self, name, html, display_name=None): | ||||
|     def _html_search_meta(self, name, html, display_name=None, fatal=False): | ||||
|         if display_name is None: | ||||
|             display_name = name | ||||
|         return self._html_search_regex( | ||||
|             r'''(?ix)<meta | ||||
|                     (?=[^>]+(?:itemprop|name|property)=["\']%s["\']) | ||||
|                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), | ||||
|             html, display_name, fatal=False) | ||||
|             html, display_name, fatal=fatal) | ||||
|  | ||||
|     def _dc_search_uploader(self, html): | ||||
|         return self._html_search_meta('dc.creator', html, 'uploader') | ||||
|   | ||||
| @@ -12,6 +12,7 @@ from ..utils import ( | ||||
|     get_element_by_id, | ||||
|     orderedSet, | ||||
|     str_to_int, | ||||
|     int_or_none, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
| @@ -124,7 +125,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             if video_url is not None: | ||||
|                 m_size = re.search(r'H264-(\d+)x(\d+)', video_url) | ||||
|                 if m_size is not None: | ||||
|                     width, height = m_size.group(1), m_size.group(2) | ||||
|                     width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) | ||||
|                 else: | ||||
|                     width, height = None, None | ||||
|                 formats.append({ | ||||
|   | ||||
| @@ -1,60 +0,0 @@ | ||||
| import re | ||||
| import os | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DepositFilesIE(InfoExtractor): | ||||
|     """Information extractor for depositfiles.com""" | ||||
|  | ||||
|     _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         file_id = url.split('/')[-1] | ||||
|         # Rebuild url in english locale | ||||
|         url = 'http://depositfiles.com/en/files/' + file_id | ||||
|  | ||||
|         # Retrieve file webpage with 'Free download' button pressed | ||||
|         free_download_indication = {'gateway_result' : '1'} | ||||
|         request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) | ||||
|         try: | ||||
|             self.report_download_webpage(file_id) | ||||
|             webpage = compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) | ||||
|  | ||||
|         # Search for the real file URL | ||||
|         mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage) | ||||
|         if (mobj is None) or (mobj.group(1) is None): | ||||
|             # Try to figure out reason of the error. | ||||
|             mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) | ||||
|             if (mobj is not None) and (mobj.group(1) is not None): | ||||
|                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() | ||||
|                 raise ExtractorError(u'%s' % restriction_message) | ||||
|             else: | ||||
|                 raise ExtractorError(u'Unable to extract download URL from: %s' % url) | ||||
|  | ||||
|         file_url = mobj.group(1) | ||||
|         file_extension = os.path.splitext(file_url)[1][1:] | ||||
|  | ||||
|         # Search for file title | ||||
|         file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title') | ||||
|  | ||||
|         return [{ | ||||
|             'id':       file_id.decode('utf-8'), | ||||
|             'url':      file_url.decode('utf-8'), | ||||
|             'uploader': None, | ||||
|             'upload_date':  None, | ||||
|             'title':    file_title, | ||||
|             'ext':      file_extension.decode('utf-8'), | ||||
|         }] | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
| import socket | ||||
| @@ -9,16 +11,15 @@ from ..utils import ( | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     urlencode_postdata, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FacebookIE(InfoExtractor): | ||||
|     """Information Extractor for Facebook""" | ||||
|  | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://)?(?:\w+\.)?facebook\.com/ | ||||
|         https?://(?:\w+\.)?facebook\.com/ | ||||
|         (?:[^#?]*\#!/)? | ||||
|         (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) | ||||
|         (?:v|video_id)=(?P<id>[0-9]+) | ||||
| @@ -26,21 +27,18 @@ class FacebookIE(InfoExtractor): | ||||
|     _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' | ||||
|     _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' | ||||
|     _NETRC_MACHINE = 'facebook' | ||||
|     IE_NAME = u'facebook' | ||||
|     IE_NAME = 'facebook' | ||||
|     _TEST = { | ||||
|         u'url': u'https://www.facebook.com/photo.php?v=120708114770723', | ||||
|         u'file': u'120708114770723.mp4', | ||||
|         u'md5': u'48975a41ccc4b7a581abd68651c1a5a8', | ||||
|         u'info_dict': { | ||||
|             u"duration": 279, | ||||
|             u"title": u"PEOPLE ARE AWESOME 2013" | ||||
|         'url': 'https://www.facebook.com/photo.php?v=120708114770723', | ||||
|         'md5': '48975a41ccc4b7a581abd68651c1a5a8', | ||||
|         'info_dict': { | ||||
|             'id': '120708114770723', | ||||
|             'ext': 'mp4', | ||||
|             'duration': 279, | ||||
|             'title': 'PEOPLE ARE AWESOME 2013', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def report_login(self): | ||||
|         """Report attempt to log in.""" | ||||
|         self.to_screen(u'Logging in') | ||||
|  | ||||
|     def _login(self): | ||||
|         (useremail, password) = self._get_login_info() | ||||
|         if useremail is None: | ||||
| @@ -48,11 +46,13 @@ class FacebookIE(InfoExtractor): | ||||
|  | ||||
|         login_page_req = compat_urllib_request.Request(self._LOGIN_URL) | ||||
|         login_page_req.add_header('Cookie', 'locale=en_US') | ||||
|         self.report_login() | ||||
|         login_page = self._download_webpage(login_page_req, None, note=False, | ||||
|             errnote=u'Unable to download login page') | ||||
|         lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd') | ||||
|         lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd') | ||||
|         login_page = self._download_webpage(login_page_req, None, | ||||
|             note='Downloading login page', | ||||
|             errnote='Unable to download login page') | ||||
|         lsd = self._search_regex( | ||||
|             r'<input type="hidden" name="lsd" value="([^"]*)"', | ||||
|             login_page, 'lsd') | ||||
|         lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd') | ||||
|  | ||||
|         login_form = { | ||||
|             'email': useremail, | ||||
| @@ -65,27 +65,29 @@ class FacebookIE(InfoExtractor): | ||||
|             'timezone': '-60', | ||||
|             'trynum': '1', | ||||
|             } | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form)) | ||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         try: | ||||
|             login_results = compat_urllib_request.urlopen(request).read() | ||||
|             login_results = self._download_webpage(request, None, | ||||
|                 note='Logging in', errnote='unable to fetch login page') | ||||
|             if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: | ||||
|                 self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') | ||||
|                 self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') | ||||
|                 return | ||||
|  | ||||
|             check_form = { | ||||
|                 'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'), | ||||
|                 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'), | ||||
|                 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), | ||||
|                 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'), | ||||
|                 'name_action_selected': 'dont_save', | ||||
|                 'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'), | ||||
|                 'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'), | ||||
|             } | ||||
|             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form)) | ||||
|             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) | ||||
|             check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|             check_response = compat_urllib_request.urlopen(check_req).read() | ||||
|             check_response = self._download_webpage(check_req, None, | ||||
|                 note='Confirming login') | ||||
|             if re.search(r'id="checkpointSubmitButton"', check_response) is not None: | ||||
|                 self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.') | ||||
|                 self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) | ||||
|             self._downloader.report_warning('unable to log in: %s' % compat_str(err)) | ||||
|             return | ||||
|  | ||||
|     def _real_initialize(self): | ||||
| @@ -93,8 +95,6 @@ class FacebookIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         url = 'https://www.facebook.com/video/video.php?v=%s' % video_id | ||||
| @@ -107,10 +107,10 @@ class FacebookIE(InfoExtractor): | ||||
|             m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) | ||||
|             if m_msg is not None: | ||||
|                 raise ExtractorError( | ||||
|                     u'The video is not available, Facebook said: "%s"' % m_msg.group(1), | ||||
|                     'The video is not available, Facebook said: "%s"' % m_msg.group(1), | ||||
|                     expected=True) | ||||
|             else: | ||||
|                 raise ExtractorError(u'Cannot parse data') | ||||
|                 raise ExtractorError('Cannot parse data') | ||||
|         data = dict(json.loads(m.group(1))) | ||||
|         params_raw = compat_urllib_parse.unquote(data['params']) | ||||
|         params = json.loads(params_raw) | ||||
| @@ -119,19 +119,15 @@ class FacebookIE(InfoExtractor): | ||||
|         if not video_url: | ||||
|             video_url = video_data['sd_src'] | ||||
|         if not video_url: | ||||
|             raise ExtractorError(u'Cannot find video URL') | ||||
|         video_duration = int(video_data['video_duration']) | ||||
|         thumbnail = video_data['thumbnail_src'] | ||||
|             raise ExtractorError('Cannot find video URL') | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title') | ||||
|             r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title') | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'duration': video_duration, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': int(video_data['video_duration']), | ||||
|             'thumbnail': video_data['thumbnail_src'], | ||||
|         } | ||||
|         return [info] | ||||
|   | ||||
| @@ -8,8 +8,8 @@ from ..utils import ( | ||||
|     unified_strdate, | ||||
|     str_to_int, | ||||
|     parse_duration, | ||||
|     clean_html, | ||||
| ) | ||||
| from youtube_dl.utils import clean_html | ||||
|  | ||||
|  | ||||
| class FourTubeIE(InfoExtractor): | ||||
|   | ||||
| @@ -1,12 +1,13 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class FunnyOrDieIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', | ||||
|         'file': '0732f586d7.mp4', | ||||
| @@ -30,10 +31,23 @@ class FunnyOrDieIE(InfoExtractor): | ||||
|             [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''], | ||||
|             webpage, 'video URL', flags=re.DOTALL) | ||||
|  | ||||
|         if mobj.group('type') == 'embed': | ||||
|             post_json = self._search_regex( | ||||
|                 r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') | ||||
|             post = json.loads(post_json) | ||||
|             title = post['name'] | ||||
|             description = post.get('description') | ||||
|             thumbnail = post.get('picture') | ||||
|         else: | ||||
|             title = self._og_search_title(webpage) | ||||
|             description = self._og_search_description(webpage) | ||||
|             thumbnail = None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -6,13 +8,14 @@ from .common import InfoExtractor | ||||
| class GamekingsIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", | ||||
|         u'file': u'20130811.mp4', | ||||
|         'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', | ||||
|         # MD5 is flaky, seems to change regularly | ||||
|         #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3', | ||||
|         # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review", | ||||
|             u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.", | ||||
|             'id': '20130811', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', | ||||
|             'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
							
								
								
									
										134
									
								
								youtube_dl/extractor/gdcvault.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								youtube_dl/extractor/gdcvault.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,134 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
| class GDCVaultIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', | ||||
|             'md5': '7ce8388f544c88b7ac11c7ab1b593704', | ||||
|             'info_dict': { | ||||
|                 'id': '1019721', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', | ||||
|             'info_dict': { | ||||
|                 'id': '1015683', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # Requires rtmpdump | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _parse_mp4(self, xml_description): | ||||
|         video_formats = [] | ||||
|         mp4_video = xml_description.find('./metadata/mp4video') | ||||
|         if mp4_video is None: | ||||
|             return None | ||||
|  | ||||
|         mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text) | ||||
|         video_root = mobj.group('root') | ||||
|         formats = xml_description.findall('./metadata/MBRVideos/MBRVideo') | ||||
|         for format in formats: | ||||
|             mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text) | ||||
|             url = video_root + mobj.group('path') | ||||
|             vbr = format.find('bitrate').text | ||||
|             video_formats.append({ | ||||
|                 'url': url, | ||||
|                 'vbr': int(vbr), | ||||
|             }) | ||||
|         return video_formats | ||||
|  | ||||
|     def _parse_flv(self, xml_description): | ||||
|         video_formats = [] | ||||
|         akami_url = xml_description.find('./metadata/akamaiHost').text | ||||
|         slide_video_path = xml_description.find('./metadata/slideVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + slide_video_path, | ||||
|             'format_note': 'slide deck video', | ||||
|             'quality': -2, | ||||
|             'preference': -2, | ||||
|             'format_id': 'slides', | ||||
|         }) | ||||
|         speaker_video_path = xml_description.find('./metadata/speakerVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + speaker_video_path, | ||||
|             'format_note': 'speaker video', | ||||
|             'quality': -1, | ||||
|             'preference': -1, | ||||
|             'format_id': 'speaker', | ||||
|         }) | ||||
|         return video_formats | ||||
|  | ||||
|     def _login(self, webpage_url, video_id): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None or password is None: | ||||
|             self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.') | ||||
|             return None | ||||
|  | ||||
|         mobj = re.match(r'(?P<root_url>https?://.*?/).*', webpage_url) | ||||
|         login_url = mobj.group('root_url') + 'api/login.php' | ||||
|         logout_url = mobj.group('root_url') + 'logout' | ||||
|  | ||||
|         login_form = { | ||||
|             'email': username, | ||||
|             'password': password, | ||||
|         } | ||||
|  | ||||
|         request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form)) | ||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         self._download_webpage(request, video_id, 'Logging in') | ||||
|         start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page') | ||||
|         self._download_webpage(logout_url, video_id, 'Logging out') | ||||
|  | ||||
|         return start_page | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage_url = 'http://www.gdcvault.com/play/' + video_id | ||||
|         start_page = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False) | ||||
|  | ||||
|         if xml_root is None: | ||||
|             # Probably need to authenticate | ||||
|             start_page = self._login(webpage_url, video_id) | ||||
|             if start_page is None: | ||||
|                 self.report_warning('Could not login.') | ||||
|             else: | ||||
|                 # Grab the url from the authenticated page | ||||
|                 xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root') | ||||
|  | ||||
|         xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False) | ||||
|         if xml_name is None: | ||||
|             # Fallback to the older format | ||||
|             xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename') | ||||
|  | ||||
|         xml_decription_url = xml_root + 'xml/' + xml_name | ||||
|         xml_description = self._download_xml(xml_decription_url, video_id) | ||||
|  | ||||
|         video_title = xml_description.find('./metadata/title').text | ||||
|         video_formats = self._parse_mp4(xml_description) | ||||
|         if video_formats is None: | ||||
|             video_formats = self._parse_flv(xml_description) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'formats': video_formats, | ||||
|         } | ||||
| @@ -4,7 +4,6 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| @@ -17,6 +16,7 @@ from ..utils import ( | ||||
|  | ||||
|     ExtractorError, | ||||
|     HEADRequest, | ||||
|     parse_xml, | ||||
|     smuggle_url, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
| @@ -24,6 +24,7 @@ from ..utils import ( | ||||
| ) | ||||
| from .brightcove import BrightcoveIE | ||||
| from .ooyala import OoyalaIE | ||||
| from .rutv import RUTVIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -116,6 +117,48 @@ class GenericIE(InfoExtractor): | ||||
|             'params': { | ||||
|                 'skip_download': False, | ||||
|             } | ||||
|         }, | ||||
|         # embed.ly video | ||||
|         { | ||||
|             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', | ||||
|             'info_dict': { | ||||
|                 'id': '9ODmcdjQcHQ', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second', | ||||
|                 'upload_date': '20140225', | ||||
|                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff', | ||||
|                 'uploader': 'Tested', | ||||
|                 'uploader_id': 'testedcom', | ||||
|             }, | ||||
|             # No need to test YoutubeIE here | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # funnyordie embed | ||||
|         { | ||||
|             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns', | ||||
|             'md5': '7cf780be104d40fea7bae52eed4a470e', | ||||
|             'info_dict': { | ||||
|                 'id': '18e820ec3f', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama', | ||||
|                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', | ||||
|             }, | ||||
|         }, | ||||
|         # RUTV embed | ||||
|         { | ||||
|             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html', | ||||
|             'info_dict': { | ||||
|                 'id': '776940', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Охотское море стало целиком российским', | ||||
|                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -211,7 +254,7 @@ class GenericIE(InfoExtractor): | ||||
|             else: | ||||
|                 assert ':' in default_search | ||||
|                 return self.url_result(default_search + url) | ||||
|         video_id = os.path.splitext(url.split('/')[-1])[0] | ||||
|         video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0] | ||||
|  | ||||
|         self.to_screen('%s: Requesting header' % video_id) | ||||
|  | ||||
| @@ -257,7 +300,7 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Is it an RSS feed? | ||||
|         try: | ||||
|             doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8')) | ||||
|             doc = parse_xml(webpage) | ||||
|             if doc.tag == 'rss': | ||||
|                 return self._extract_rss(url, video_id, doc) | ||||
|         except compat_xml_parse_error: | ||||
| @@ -296,9 +339,9 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for embedded (iframe) Vimeo player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage) | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) | ||||
|         if mobj: | ||||
|             player_url = unescapeHTML(mobj.group(1)) | ||||
|             player_url = unescapeHTML(mobj.group('url')) | ||||
|             surl = smuggle_url(player_url, {'Referer': url}) | ||||
|             return self.url_result(surl, 'Vimeo') | ||||
|  | ||||
| @@ -396,12 +439,38 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Facebook') | ||||
|  | ||||
|         # Look for embedded VK player | ||||
|         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'VK') | ||||
|  | ||||
|         # Look for embedded Huffington Post player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'HuffPost') | ||||
|  | ||||
|         # Look for embed.ly | ||||
|         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url')) | ||||
|         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(compat_urllib_parse.unquote(mobj.group('url'))) | ||||
|  | ||||
|         # Look for funnyordie embed | ||||
|         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) | ||||
|         if matches: | ||||
|             urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie') | ||||
|                      for eurl in matches] | ||||
|             return self.playlist_result( | ||||
|                 urlrs, playlist_id=video_id, playlist_title=video_title) | ||||
|  | ||||
|         # Look for embedded RUTV player | ||||
|         rutv_url = RUTVIE._extract_url(webpage) | ||||
|         if rutv_url: | ||||
|             return self.url_result(rutv_url, 'RUTV') | ||||
|  | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|   | ||||
| @@ -46,6 +46,6 @@ class GoogleSearchIE(SearchInfoExtractor): | ||||
|                     'url': mobj.group(1) | ||||
|                 }) | ||||
|  | ||||
|             if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage): | ||||
|             if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage): | ||||
|                 res['entries'] = entries[:n] | ||||
|                 return res | ||||
|   | ||||
| @@ -6,7 +6,10 @@ from random import random | ||||
| from math import floor | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_request | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class IPrimaIE(InfoExtractor): | ||||
| @@ -36,6 +39,7 @@ class IPrimaIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True,  # requires rtmpdump | ||||
|         }, | ||||
|         'skip': 'Do not have permission to access this page', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -44,6 +48,10 @@ class IPrimaIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if re.search(r'Nemáte oprávnění přistupovat na tuto stránku.\s*</div>', webpage): | ||||
|             raise ExtractorError( | ||||
|                 '%s said: You do not have permission to access this page' % self.IE_NAME, expected=True) | ||||
|  | ||||
|         player_url = ( | ||||
|             'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % | ||||
|             (floor(random()*1073741824), floor(random()*1073741824)) | ||||
|   | ||||
| @@ -1,56 +1,61 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     RegexNotFoundError, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class JukeboxIE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html' | ||||
|     _IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>' | ||||
|     _VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"' | ||||
|     _TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>' | ||||
|     _IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html', | ||||
|         'md5': '5dc6477e74b1e37042ac5acedd8413e5', | ||||
|         'info_dict': { | ||||
|             'id': 'r303r', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Kosheen-En Vivo Pride', | ||||
|             'uploader': 'Kosheen', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         html = self._download_webpage(url, video_id) | ||||
|  | ||||
|         mobj = re.search(self._IFRAME, html) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Cannot extract iframe url') | ||||
|         iframe_url = unescapeHTML(mobj.group('iframe')) | ||||
|         iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url')) | ||||
|  | ||||
|         iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe') | ||||
|         mobj = re.search(r'class="jkb_waiting"', iframe_html) | ||||
|         if mobj is not None: | ||||
|             raise ExtractorError(u'Video is not available(in your country?)!') | ||||
|         if re.search(r'class="jkb_waiting"', iframe_html) is not None: | ||||
|             raise ExtractorError('Video is not available(in your country?)!') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         mobj = re.search(self._VIDEO_URL, iframe_html) | ||||
|         if mobj is None: | ||||
|             mobj = re.search(self._IS_YOUTUBE, iframe_html) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'Cannot extract video url') | ||||
|             youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/') | ||||
|             self.to_screen(u'Youtube video detected') | ||||
|             return self.url_result(youtube_url,ie='Youtube') | ||||
|         video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/') | ||||
|         video_ext = unescapeHTML(mobj.group('video_ext')) | ||||
|         try: | ||||
|             video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"', | ||||
|                 iframe_html, 'video url') | ||||
|             video_url = unescapeHTML(video_url).replace('\/', '/') | ||||
|         except RegexNotFoundError: | ||||
|             youtube_url = self._search_regex( | ||||
|                 r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"', | ||||
|                 iframe_html, 'youtube url') | ||||
|             youtube_url = unescapeHTML(youtube_url).replace('\/', '/') | ||||
|             self.to_screen('Youtube video detected') | ||||
|             return self.url_result(youtube_url, ie='Youtube') | ||||
|  | ||||
|         mobj = re.search(self._TITLE, html) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Cannot extract title') | ||||
|         title = unescapeHTML(mobj.group('title')) | ||||
|         artist = unescapeHTML(mobj.group('artist')) | ||||
|         title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>', | ||||
|             html, 'title') | ||||
|         artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>', | ||||
|             html, 'artist') | ||||
|  | ||||
|         return [{'id': video_id, | ||||
|                  'url': video_url, | ||||
|                  'title': artist + '-' + title, | ||||
|                  'ext': video_ext | ||||
|                  }] | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': artist + '-' + title, | ||||
|             'uploader': artist, | ||||
|         } | ||||
|   | ||||
| @@ -6,7 +6,8 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     unified_strdate | ||||
|     unified_strdate, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -34,11 +35,9 @@ class LifeNewsIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page') | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL') | ||||
|  | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail') | ||||
|         videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage) | ||||
|         if not videos: | ||||
|             raise ExtractorError('No media links available for %s' % video_id) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS' | ||||
| @@ -57,13 +56,19 @@ class LifeNewsIE(InfoExtractor): | ||||
|         if upload_date is not None: | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'thumbnail': thumbnail, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'view_count': int_or_none(view_count), | ||||
|             'comment_count': int_or_none(comment_count), | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
|         def make_entry(video_id, media, video_number=None): | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'url': media[1], | ||||
|                 'thumbnail': media[0], | ||||
|                 'title': title if video_number is None else '%s-video%s' % (title, video_number), | ||||
|                 'description': description, | ||||
|                 'view_count': int_or_none(view_count), | ||||
|                 'comment_count': int_or_none(comment_count), | ||||
|                 'upload_date': upload_date, | ||||
|             } | ||||
|  | ||||
|         if len(videos) == 1: | ||||
|             return make_entry(video_id, videos[0]) | ||||
|         else: | ||||
|             return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)] | ||||
| @@ -8,7 +8,9 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     ExtractorError | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -19,16 +21,17 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|     _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' | ||||
|     _NETRC_MACHINE = 'lynda' | ||||
|  | ||||
|     _SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account' | ||||
|     _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true' | ||||
|     _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' | ||||
|  | ||||
|     ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', | ||||
|         'file': '114408.mp4', | ||||
|         'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', | ||||
|         'info_dict': { | ||||
|             'id': '114408', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Using the exercise files', | ||||
|             'duration': 68 | ||||
|         } | ||||
| @@ -41,27 +44,44 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, | ||||
|                                       video_id, 'Downloading video JSON') | ||||
|         page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id, | ||||
|             'Downloading video JSON') | ||||
|         video_json = json.loads(page) | ||||
|  | ||||
|         if 'Status' in video_json: | ||||
|             raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True) | ||||
|  | ||||
|         if video_json['HasAccess'] is False: | ||||
|             raise ExtractorError('Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True) | ||||
|             raise ExtractorError( | ||||
|                 'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True) | ||||
|  | ||||
|         video_id = video_json['ID'] | ||||
|         video_id = compat_str(video_json['ID']) | ||||
|         duration = video_json['DurationInSeconds'] | ||||
|         title = video_json['Title'] | ||||
|  | ||||
|         formats = [{'url': fmt['Url'], | ||||
|         formats = [] | ||||
|  | ||||
|         fmts = video_json.get('Formats') | ||||
|         if fmts: | ||||
|             formats.extend([ | ||||
|                 { | ||||
|                     'url': fmt['Url'], | ||||
|                     'ext': fmt['Extension'], | ||||
|                     'width': fmt['Width'], | ||||
|                     'height': fmt['Height'], | ||||
|                     'filesize': fmt['FileSize'], | ||||
|                     'format_id': str(fmt['Resolution']) | ||||
|                     } for fmt in video_json['Formats']] | ||||
|                 } for fmt in fmts]) | ||||
|  | ||||
|         prioritized_streams = video_json.get('PrioritizedStreams') | ||||
|         if prioritized_streams: | ||||
|             formats.extend([ | ||||
|                 { | ||||
|                     'url': video_url, | ||||
|                     'width': int_or_none(format_id), | ||||
|                     'format_id': format_id, | ||||
|                 } for format_id, video_url in prioritized_streams['0'].items() | ||||
|             ]) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -91,7 +111,7 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|             'stayPut': 'false' | ||||
|         }         | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | ||||
|         login_page = self._download_webpage(request, None, note='Logging in as %s' % username) | ||||
|         login_page = self._download_webpage(request, None, 'Logging in as %s' % username) | ||||
|  | ||||
|         # Not (yet) logged in | ||||
|         m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page) | ||||
| @@ -116,7 +136,7 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|                     'stayPut': 'false', | ||||
|                 } | ||||
|                 request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form)) | ||||
|                 login_page = self._download_webpage(request, None, note='Confirming log in and log out from another device') | ||||
|                 login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device') | ||||
|  | ||||
|         if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: | ||||
|             raise ExtractorError('Unable to log in') | ||||
| @@ -150,7 +170,7 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|         url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id | ||||
|         sub = self._download_webpage(url, None, note=False) | ||||
|         sub = self._download_webpage(url, None, False) | ||||
|         sub_json = json.loads(sub) | ||||
|         return {'en': url} if len(sub_json) > 0 else {} | ||||
|  | ||||
| @@ -179,6 +199,9 @@ class LyndaCourseIE(InfoExtractor): | ||||
|         videos = [] | ||||
|         (username, _) = self._get_login_info() | ||||
|  | ||||
|         # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided | ||||
|         # by single video API anymore | ||||
|  | ||||
|         for chapter in course_json['Chapters']: | ||||
|             for video in chapter['Videos']: | ||||
|                 if username is None and video['HasAccess'] is False: | ||||
|   | ||||
							
								
								
									
										66
									
								
								youtube_dl/extractor/mailru.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								youtube_dl/extractor/mailru.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import datetime | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class MailRuIE(InfoExtractor): | ||||
|     IE_NAME = 'mailru' | ||||
|     IE_DESC = 'Видео@Mail.Ru' | ||||
|     _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', | ||||
|         'md5': 'dea205f03120046894db4ebb6159879a', | ||||
|         'info_dict': { | ||||
|             'id': '46301138', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', | ||||
|             'upload_date': '20140224', | ||||
|             'uploader': 'sonypicturesrus', | ||||
|             'uploader_id': 'sonypicturesrus@mail.ru', | ||||
|             'duration': 184, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_data = self._download_json( | ||||
|             'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON') | ||||
|  | ||||
|         author = video_data['author'] | ||||
|         uploader = author['name'] | ||||
|         uploader_id = author['id'] | ||||
|  | ||||
|         movie = video_data['movie'] | ||||
|         content_id = str(movie['contentId']) | ||||
|         title = movie['title'] | ||||
|         thumbnail = movie['poster'] | ||||
|         duration = movie['duration'] | ||||
|  | ||||
|         upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d') | ||||
|         view_count = video_data['views_count'] | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': video['url'], | ||||
|                 'format_id': video['name'], | ||||
|             } for video in video_data['videos'] | ||||
|         ] | ||||
|  | ||||
|         return { | ||||
|             'id': content_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -5,6 +5,7 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     compat_urllib_parse, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| @@ -15,8 +16,9 @@ class MixcloudIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', | ||||
|         'file': 'dholbach-cryptkeeper.mp3', | ||||
|         'info_dict': { | ||||
|             'id': 'dholbach-cryptkeeper', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Cryptkeeper', | ||||
|             'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | ||||
|             'uploader': 'Daniel Holbach', | ||||
| @@ -45,7 +47,7 @@ class MixcloudIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         uploader = mobj.group(1) | ||||
|         cloudcast_name = mobj.group(2) | ||||
|         track_id = '-'.join((uploader, cloudcast_name)) | ||||
|         track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name))) | ||||
|  | ||||
|         webpage = self._download_webpage(url, track_id) | ||||
|  | ||||
|   | ||||
| @@ -5,9 +5,12 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     ExtractorError, | ||||
|     find_xpath_attr, | ||||
|     fix_xml_ampersands, | ||||
|     HEADRequest, | ||||
|     unescapeHTML, | ||||
|     url_basename, | ||||
|     RegexNotFoundError, | ||||
| ) | ||||
| @@ -18,6 +21,7 @@ def _media_xml_tag(tag): | ||||
|  | ||||
|  | ||||
| class MTVServicesInfoExtractor(InfoExtractor): | ||||
|     _MOBILE_TEMPLATE = None | ||||
|     @staticmethod | ||||
|     def _id_from_uri(uri): | ||||
|         return uri.split(':')[-1] | ||||
| @@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         else: | ||||
|             return thumb_node.attrib['url'] | ||||
|  | ||||
|     def _extract_video_formats(self, mdoc): | ||||
|         if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None: | ||||
|             raise ExtractorError('This video is not available from your country.', expected=True) | ||||
|     def _extract_mobile_video_formats(self, mtvn_id): | ||||
|         webpage_url = self._MOBILE_TEMPLATE % mtvn_id | ||||
|         req = compat_urllib_request.Request(webpage_url) | ||||
|         # Otherwise we get a webpage that would execute some javascript | ||||
|         req.add_header('Youtubedl-user-agent', 'curl/7') | ||||
|         webpage = self._download_webpage(req, mtvn_id, | ||||
|             'Downloading mobile page') | ||||
|         metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url')) | ||||
|         req = HEADRequest(metrics_url) | ||||
|         response = self._request_webpage(req, mtvn_id, 'Resolving url') | ||||
|         url = response.geturl() | ||||
|         # Transform the url to get the best quality: | ||||
|         url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1) | ||||
|         return [{'url': url,'ext': 'mp4'}] | ||||
|  | ||||
|     def _extract_video_formats(self, mdoc, mtvn_id): | ||||
|         if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None: | ||||
|             if mtvn_id is not None and self._MOBILE_TEMPLATE is not None: | ||||
|                 self.to_screen('The normal version is not available from your ' | ||||
|                     'country, trying with the mobile version') | ||||
|                 return self._extract_mobile_video_formats(mtvn_id) | ||||
|             raise ExtractorError('This video is not available from your country.', | ||||
|                 expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for rendition in mdoc.findall('.//rendition'): | ||||
| @@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|             raise ExtractorError('Could not find video title') | ||||
|         title = title.strip() | ||||
|  | ||||
|         # This a short id that's used in the webpage urls | ||||
|         mtvn_id = None | ||||
|         mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category', | ||||
|                 'scheme', 'urn:mtvn:id') | ||||
|         if mtvn_id_node is not None: | ||||
|             mtvn_id = mtvn_id_node.text | ||||
|  | ||||
|         return { | ||||
|             'title': title, | ||||
|             'formats': self._extract_video_formats(mediagen_doc), | ||||
|             'formats': self._extract_video_formats(mediagen_doc, mtvn_id), | ||||
|             'id': video_id, | ||||
|             'thumbnail': self._get_thumbnail_url(uri, itemdoc), | ||||
|             'description': description, | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import binascii | ||||
| import base64 | ||||
| import hashlib | ||||
| @@ -14,18 +16,16 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
|  | ||||
| class MyVideoIE(InfoExtractor): | ||||
|     """Information Extractor for myvideo.de.""" | ||||
|  | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*' | ||||
|     IE_NAME = u'myvideo' | ||||
|     _VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*' | ||||
|     IE_NAME = 'myvideo' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', | ||||
|         u'file': u'8229274.flv', | ||||
|         u'md5': u'2d2753e8130479ba2cb7e0a37002053e', | ||||
|         u'info_dict': { | ||||
|             u"title": u"bowling-fail-or-win" | ||||
|         'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', | ||||
|         'md5': '2d2753e8130479ba2cb7e0a37002053e', | ||||
|         'info_dict': { | ||||
|             'id': '8229274', | ||||
|             'ext': 'flv', | ||||
|             'title': 'bowling-fail-or-win', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -53,10 +53,7 @@ class MyVideoIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group(1) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         GK = ( | ||||
|           b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt' | ||||
| @@ -74,37 +71,33 @@ class MyVideoIE(InfoExtractor): | ||||
|             video_url = mobj.group(1) + '.flv' | ||||
|  | ||||
|             video_title = self._html_search_regex('<title>([^<]+)</title>', | ||||
|                 webpage, u'title') | ||||
|                 webpage, 'title') | ||||
|  | ||||
|             video_ext = self._search_regex('[.](.+?)$', video_url, u'extension') | ||||
|  | ||||
|             return [{ | ||||
|                 'id':       video_id, | ||||
|                 'url':      video_url, | ||||
|                 'uploader': None, | ||||
|                 'upload_date':  None, | ||||
|                 'title':    video_title, | ||||
|                 'ext':      video_ext, | ||||
|             }] | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'title': video_title, | ||||
|             } | ||||
|  | ||||
|         mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) | ||||
|         if mobj is not None: | ||||
|             request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') | ||||
|             response = self._download_webpage(request, video_id, | ||||
|                                               u'Downloading video info') | ||||
|                                               'Downloading video info') | ||||
|             info = json.loads(base64.b64decode(response).decode('utf-8')) | ||||
|             return {'id': video_id, | ||||
|                     'title': info['title'], | ||||
|                     'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), | ||||
|                     'play_path': info['filename'], | ||||
|                     'ext': 'flv', | ||||
|                     'thumbnail': info['thumbnail'][0]['url'], | ||||
|                     } | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'title': info['title'], | ||||
|                 'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), | ||||
|                 'play_path': info['filename'], | ||||
|                 'ext': 'flv', | ||||
|                 'thumbnail': info['thumbnail'][0]['url'], | ||||
|             } | ||||
|  | ||||
|         # try encxml | ||||
|         mobj = re.search('var flashvars={(.+?)}', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract video') | ||||
|             raise ExtractorError('Unable to extract video') | ||||
|  | ||||
|         params = {} | ||||
|         encxml = '' | ||||
| @@ -118,7 +111,7 @@ class MyVideoIE(InfoExtractor): | ||||
|             params['domain'] = 'www.myvideo.de' | ||||
|         xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params)) | ||||
|         if 'flash_playertype=MTV' in xmldata_url: | ||||
|             self._downloader.report_warning(u'avoiding MTV player') | ||||
|             self._downloader.report_warning('avoiding MTV player') | ||||
|             xmldata_url = ( | ||||
|                 'http://www.myvideo.de/dynamic/get_player_video_xml.php' | ||||
|                 '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes' | ||||
| @@ -144,7 +137,7 @@ class MyVideoIE(InfoExtractor): | ||||
|             video_url = compat_urllib_parse.unquote(mobj.group(1)) | ||||
|             if 'myvideo2flash' in video_url: | ||||
|                 self.report_warning( | ||||
|                     u'Rewriting URL to use unencrypted rtmp:// ...', | ||||
|                     'Rewriting URL to use unencrypted rtmp:// ...', | ||||
|                     video_id) | ||||
|                 video_url = video_url.replace('rtmpe://', 'rtmp://') | ||||
|  | ||||
| @@ -152,39 +145,31 @@ class MyVideoIE(InfoExtractor): | ||||
|             # extract non rtmp videos | ||||
|             mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'unable to extract url') | ||||
|                 raise ExtractorError('unable to extract url') | ||||
|             video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2)) | ||||
|  | ||||
|         video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file') | ||||
|         video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file') | ||||
|         video_file = compat_urllib_parse.unquote(video_file) | ||||
|  | ||||
|         if not video_file.endswith('f4m'): | ||||
|             ppath, prefix = video_file.split('.') | ||||
|             video_playpath = '%s:%s' % (prefix, ppath) | ||||
|             video_hls_playlist = '' | ||||
|         else: | ||||
|             video_playpath = '' | ||||
|             video_hls_playlist = ( | ||||
|                 video_file | ||||
|             ).replace('.f4m', '.m3u8') | ||||
|  | ||||
|         video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj') | ||||
|         video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj') | ||||
|         video_swfobj = compat_urllib_parse.unquote(video_swfobj) | ||||
|  | ||||
|         video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", | ||||
|             webpage, u'title') | ||||
|             webpage, 'title') | ||||
|  | ||||
|         return [{ | ||||
|             'id':                 video_id, | ||||
|             'url':                video_url, | ||||
|             'tc_url':             video_url, | ||||
|             'uploader':           None, | ||||
|             'upload_date':        None, | ||||
|             'title':              video_title, | ||||
|             'ext':                u'flv', | ||||
|             'play_path':          video_playpath, | ||||
|             'video_file':         video_file, | ||||
|             'video_hls_playlist': video_hls_playlist, | ||||
|             'player_url':         video_swfobj, | ||||
|         }] | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'tc_url': video_url, | ||||
|             'title': video_title, | ||||
|             'ext': 'flv', | ||||
|             'play_path': video_playpath, | ||||
|             'player_url': video_swfobj, | ||||
|         } | ||||
|  | ||||
|   | ||||
| @@ -15,7 +15,9 @@ class NineGagIE(InfoExtractor): | ||||
|         "file": "1912.mp4", | ||||
|         "info_dict": { | ||||
|             "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", | ||||
|             "title": "\"People Are Awesome 2013\" Is Absolutely Awesome" | ||||
|             "title": "\"People Are Awesome 2013\" Is Absolutely Awesome", | ||||
|             "view_count": int, | ||||
|             "thumbnail": "re:^https?://", | ||||
|         }, | ||||
|         'add_ie': ['Youtube'] | ||||
|     } | ||||
| @@ -25,21 +27,27 @@ class NineGagIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         data_json = self._html_search_regex(r'''(?x) | ||||
|             <div\s*id="tv-video"\s*data-video-source="youtube"\s* | ||||
|                 data-video-meta="([^"]+)"''', webpage, 'video metadata') | ||||
|  | ||||
|         data = json.loads(data_json) | ||||
|         youtube_id = self._html_search_regex( | ||||
|             r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"', | ||||
|             webpage, 'video ID') | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage, | ||||
|             'description', fatal=False) | ||||
|         view_count_str = self._html_search_regex( | ||||
|             r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count', | ||||
|             fatal=False) | ||||
|         view_count = ( | ||||
|             None if view_count_str is None | ||||
|             else int(view_count_str.replace(',', ''))) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': data['youtubeVideoId'], | ||||
|             'url': youtube_id, | ||||
|             'ie_key': 'Youtube', | ||||
|             'id': video_id, | ||||
|             'title': data['title'], | ||||
|             'description': data['description'], | ||||
|             'view_count': int(data['view_count']), | ||||
|             'like_count': int(data['statistic']['like']), | ||||
|             'dislike_count': int(data['statistic']['dislike']), | ||||
|             'thumbnail': data['thumbnail_url'], | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': description, | ||||
|             'view_count': view_count, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|         } | ||||
|   | ||||
| @@ -8,6 +8,7 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     HEADRequest, | ||||
|     unified_strdate, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -35,7 +36,15 @@ class ORFIE(InfoExtractor): | ||||
|         data_json = self._search_regex( | ||||
|             r'initializeAdworx\((.+?)\);\n', webpage, 'video info') | ||||
|         all_data = json.loads(data_json) | ||||
|         sdata = all_data[0]['values']['segments'] | ||||
|  | ||||
|         def get_segments(all_data): | ||||
|             for data in all_data: | ||||
|                 if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM': | ||||
|                     return data['values']['segments'] | ||||
|  | ||||
|         sdata = get_segments(all_data) | ||||
|         if not sdata: | ||||
|             raise ExtractorError('Unable to extract segments') | ||||
|  | ||||
|         def quality_to_int(s): | ||||
|             m = re.search('([0-9]+)', s) | ||||
|   | ||||
| @@ -1,76 +1,43 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class PhotobucketIE(InfoExtractor): | ||||
|     """Information extractor for photobucket.com.""" | ||||
|  | ||||
|     # TODO: the original _VALID_URL was: | ||||
|     # r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' | ||||
|     # Check if it's necessary to keep the old extracion process | ||||
|     _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' | ||||
|     IE_NAME = u'photobucket' | ||||
|     _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' | ||||
|     _TEST = { | ||||
|         u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', | ||||
|         u'file': u'zpsc0c3b9fa.mp4', | ||||
|         u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20130504",  | ||||
|             u"uploader": u"rachaneronas",  | ||||
|             u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!" | ||||
|         'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', | ||||
|         'file': 'zpsc0c3b9fa.mp4', | ||||
|         'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', | ||||
|         'info_dict': { | ||||
|             'upload_date': '20130504', | ||||
|             'uploader': 'rachaneronas', | ||||
|             'title': 'Tired of Link Building? Try BacklinkMyDomain.com!', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract id from URL | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_extension = mobj.group('ext') | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # Extract URL, uploader, and title from webpage | ||||
|         self.report_extraction(video_id) | ||||
|         # We try first by looking the javascript code: | ||||
|         mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage) | ||||
|         if mobj is not None: | ||||
|             info = json.loads(mobj.group('json')) | ||||
|             return [{ | ||||
|                 'id':       video_id, | ||||
|                 'url':      info[u'downloadUrl'], | ||||
|                 'uploader': info[u'username'], | ||||
|                 'upload_date':  datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'), | ||||
|                 'title':    info[u'title'], | ||||
|                 'ext':      video_extension, | ||||
|                 'thumbnail': info[u'thumbUrl'], | ||||
|             }] | ||||
|  | ||||
|         # We try looking in other parts of the webpage | ||||
|         video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />', | ||||
|             webpage, u'video URL') | ||||
|  | ||||
|         mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract title') | ||||
|         video_title = mobj.group(1).decode('utf-8') | ||||
|         video_uploader = mobj.group(2).decode('utf-8') | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id.decode('utf-8'), | ||||
|             'url':      video_url.decode('utf-8'), | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date':  None, | ||||
|             'title':    video_title, | ||||
|             'ext':      video_extension.decode('utf-8'), | ||||
|         }] | ||||
|         info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', | ||||
|             webpage, 'info json') | ||||
|         info = json.loads(info_json) | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': info['downloadUrl'], | ||||
|             'uploader': info['username'], | ||||
|             'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'), | ||||
|             'title': info['title'], | ||||
|             'ext': video_extension, | ||||
|             'thumbnail': info['thumbUrl'], | ||||
|         } | ||||
|   | ||||
							
								
								
									
										80
									
								
								youtube_dl/extractor/playvid.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/extractor/playvid.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PlayvidIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.playvid.com/watch/agbDDi7WZTV', | ||||
|         'md5': '44930f8afa616efdf9482daf4fe53e1e', | ||||
|         'info_dict': { | ||||
|             'id': 'agbDDi7WZTV', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Michelle Lewin in Miami Beach', | ||||
|             'duration': 240, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = None | ||||
|         duration = None | ||||
|         video_thumbnail = None | ||||
|         formats = [] | ||||
|  | ||||
|         # most of the information is stored in the flashvars | ||||
|         flashvars = self._html_search_regex( | ||||
|             r'flashvars="(.+?)"', webpage, 'flashvars') | ||||
|  | ||||
|         infos = compat_urllib_parse.unquote(flashvars).split(r'&') | ||||
|         for info in infos: | ||||
|             videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) | ||||
|             if videovars_match: | ||||
|                 key = videovars_match.group(1) | ||||
|                 val = videovars_match.group(2) | ||||
|  | ||||
|                 if key == 'title': | ||||
|                     video_title = compat_urllib_parse.unquote_plus(val) | ||||
|                 if key == 'duration': | ||||
|                     try: | ||||
|                         duration = int(val) | ||||
|                     except ValueError: | ||||
|                         pass | ||||
|                 if key == 'big_thumb': | ||||
|                     video_thumbnail = val | ||||
|  | ||||
|                 videourl_match = re.match( | ||||
|                     r'^video_urls\]\[(?P<resolution>[0-9]+)p', key) | ||||
|                 if videourl_match: | ||||
|                     height = int(videourl_match.group('resolution')) | ||||
|                     formats.append({ | ||||
|                         'height': height, | ||||
|                         'url': val, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         # Extract title - should be in the flashvars; if not, look elsewhere | ||||
|         if video_title is None: | ||||
|             video_title = self._html_search_regex( | ||||
|                 r'<title>(.*?)</title', webpage, 'title') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': video_title, | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'duration': duration, | ||||
|             'description': None, | ||||
|             'age_limit': 18 | ||||
|         } | ||||
| @@ -44,7 +44,7 @@ class PornHubIE(InfoExtractor): | ||||
|  | ||||
|         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) | ||||
|         if webpage.find('"encrypted":true') != -1: | ||||
|             password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ') | ||||
|             password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password')) | ||||
|             video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) | ||||
|  | ||||
|         formats = [] | ||||
|   | ||||
| @@ -51,14 +51,14 @@ class ProSiebenSat1IE(InfoExtractor): | ||||
|             'skip': 'Seems to be broken', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.prosiebenmaxx.de/yep/one-piece/video/148-folge-48-gold-rogers-heimat-ganze-folge', | ||||
|             'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge', | ||||
|             'info_dict': { | ||||
|                 'id': '2437108', | ||||
|                 'id': '2429369', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Folge 48: Gold Rogers Heimat', | ||||
|                 'description': 'Ruffy erreicht die Insel, auf der der berühmte Gold Roger lebte und hingerichtet wurde.', | ||||
|                 'upload_date': '20140226', | ||||
|                 'duration': 1401.48, | ||||
|                 'title': 'Countdown für die Autowerkstatt', | ||||
|                 'description': 'md5:809fc051a457b5d8666013bc40698817', | ||||
|                 'upload_date': '20140223', | ||||
|                 'duration': 2595.04, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|   | ||||
							
								
								
									
										183
									
								
								youtube_dl/extractor/rutv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										183
									
								
								youtube_dl/extractor/rutv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,183 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RUTVIE(InfoExtractor): | ||||
|     IE_DESC = 'RUTV.RU' | ||||
|     _VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724', | ||||
|             'info_dict': { | ||||
|                 'id': '774471', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Монологи на все времена', | ||||
|                 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5', | ||||
|                 'duration': 2906, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638', | ||||
|             'info_dict': { | ||||
|                 'id': '774016', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Чужой в семье Сталина', | ||||
|                 'description': '', | ||||
|                 'duration': 2539, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000', | ||||
|             'info_dict': { | ||||
|                 'id': '766888', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', | ||||
|                 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', | ||||
|                 'duration': 279, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169', | ||||
|             'info_dict': { | ||||
|                 'id': '771852', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет', | ||||
|                 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8', | ||||
|                 'duration': 3096, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014', | ||||
|             'info_dict': { | ||||
|                 'id': '51499', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', | ||||
|                 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Translation has finished', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     @classmethod | ||||
|     def _extract_url(cls, webpage): | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|         mobj = re.search( | ||||
|             r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)', | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_type = mobj.group('type') | ||||
|  | ||||
|         if not video_type or video_type == 'swf': | ||||
|             video_type = 'video' | ||||
|  | ||||
|         json_data = self._download_json( | ||||
|             'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id), | ||||
|             video_id, 'Downloading JSON') | ||||
|  | ||||
|         if json_data['errors']: | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True) | ||||
|  | ||||
|         playlist = json_data['data']['playlist'] | ||||
|         medialist = playlist['medialist'] | ||||
|         media = medialist[0] | ||||
|  | ||||
|         if media['errors']: | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True) | ||||
|  | ||||
|         view_count = playlist.get('count_views') | ||||
|         priority_transport = playlist['priority_transport'] | ||||
|  | ||||
|         thumbnail = media['picture'] | ||||
|         width = int_or_none(media['width']) | ||||
|         height = int_or_none(media['height']) | ||||
|         description = media['anons'] | ||||
|         title = media['title'] | ||||
|         duration = int_or_none(media.get('duration')) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for transport, links in media['sources'].items(): | ||||
|             for quality, url in links.items(): | ||||
|                 if transport == 'rtmp': | ||||
|                     mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url) | ||||
|                     if not mobj: | ||||
|                         continue | ||||
|                     fmt = { | ||||
|                         'url': mobj.group('url'), | ||||
|                         'play_path': mobj.group('playpath'), | ||||
|                         'app': mobj.group('app'), | ||||
|                         'page_url': 'http://player.rutv.ru', | ||||
|                         'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22', | ||||
|                         'rtmp_live': True, | ||||
|                         'ext': 'flv', | ||||
|                         'vbr': int(quality), | ||||
|                     } | ||||
|                 elif transport == 'm3u8': | ||||
|                     fmt = { | ||||
|                         'url': url, | ||||
|                         'ext': 'mp4', | ||||
|                     } | ||||
|                 else: | ||||
|                     fmt = { | ||||
|                         'url': url | ||||
|                     } | ||||
|                 fmt.update({ | ||||
|                     'width': width, | ||||
|                     'height': height, | ||||
|                     'format_id': '%s-%s' % (transport, quality), | ||||
|                     'preference': -1 if priority_transport == transport else -2, | ||||
|                 }) | ||||
|                 formats.append(fmt) | ||||
|  | ||||
|         if not formats: | ||||
|             raise ExtractorError('No media links available for %s' % video_id) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'view_count': view_count, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|                 'id': '47127627', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Goldrushed', | ||||
|                 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', | ||||
|                 'uploader': 'The Royal Concept', | ||||
|                 'upload_date': '20120521', | ||||
|             }, | ||||
| @@ -217,7 +218,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|         return self._extract_info_dict(info, full_title, secret_token=token) | ||||
|  | ||||
| class SoundcloudSetIE(SoundcloudIE): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' | ||||
|     IE_NAME = 'soundcloud:set' | ||||
|     # it's in tests/test_playlists.py | ||||
|     _TESTS = [] | ||||
|   | ||||
| @@ -1,10 +1,15 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
|  | ||||
|  | ||||
| class SpikeIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+' | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|         (www\.spike\.com/(video-clips|episodes)/.+| | ||||
|          m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+)) | ||||
|         ''' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', | ||||
|         'md5': '1a9265f32b0c375793d6c4ce45255256', | ||||
| @@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor): | ||||
|     } | ||||
|  | ||||
|     _FEED_URL = 'http://www.spike.com/feeds/mrss/' | ||||
|     _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.search(self._VALID_URL, url) | ||||
|         mobile_id = mobj.group('mobile_id') | ||||
|         if mobile_id is not None: | ||||
|             url = 'http://www.spike.com/video-clips/%s' % mobile_id | ||||
|         return super(SpikeIE, self)._real_extract(url) | ||||
|   | ||||
| @@ -6,115 +6,114 @@ import re | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     RegexNotFoundError, | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TEDIE(SubtitlesInfoExtractor): | ||||
|     _VALID_URL=r'''http://www\.ted\.com/ | ||||
|                    ( | ||||
|                         ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist | ||||
|                         | | ||||
|                         ((?P<type_talk>talks)) # We have a simple talk | ||||
|                    ) | ||||
|                    (/lang/(.*?))? # The url may contain the language | ||||
|                    /(?P<name>\w+) # Here goes the name and then ".html" | ||||
|                    ''' | ||||
|     _VALID_URL = r'''(?x)http://www\.ted\.com/ | ||||
|         ( | ||||
|             (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist | ||||
|             | | ||||
|             ((?P<type_talk>talks)) # We have a simple talk | ||||
|         ) | ||||
|         (/lang/(.*?))? # The url may contain the language | ||||
|         /(?P<name>\w+) # Here goes the name and then ".html" | ||||
|         ''' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', | ||||
|         'file': '102.mp4', | ||||
|         'md5': '4ea1dada91e4174b53dac2bb8ace429d', | ||||
|         'info_dict': { | ||||
|             "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922", | ||||
|             "title": "Dan Dennett: The illusion of consciousness" | ||||
|             'id': '102', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The illusion of consciousness', | ||||
|             'description': ('Philosopher Dan Dennett makes a compelling ' | ||||
|                 'argument that not only don\'t we understand our own ' | ||||
|                 'consciousness, but that half the time our brains are ' | ||||
|                 'actively fooling us.'), | ||||
|             'uploader': 'Dan Dennett', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|     _FORMATS_PREFERENCE = { | ||||
|         'low': 1, | ||||
|         'medium': 2, | ||||
|         'high': 3, | ||||
|     } | ||||
|  | ||||
|     def _extract_info(self, webpage): | ||||
|         info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>', | ||||
|             webpage, 'info json') | ||||
|         return json.loads(info_json) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m=re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|         m = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|         name = m.group('name') | ||||
|         if m.group('type_talk'): | ||||
|             return self._talk_info(url) | ||||
|         else : | ||||
|             playlist_id=m.group('playlist_id') | ||||
|             name=m.group('name') | ||||
|             self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name)) | ||||
|             return [self._playlist_videos_info(url,name,playlist_id)] | ||||
|             return self._talk_info(url, name) | ||||
|         else: | ||||
|             return self._playlist_videos_info(url, name) | ||||
|  | ||||
|  | ||||
|     def _playlist_videos_info(self, url, name, playlist_id): | ||||
|     def _playlist_videos_info(self, url, name): | ||||
|         '''Returns the videos of the playlist''' | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             url, playlist_id, 'Downloading playlist webpage') | ||||
|         matches = re.finditer( | ||||
|             r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>', | ||||
|             webpage) | ||||
|  | ||||
|         playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>', | ||||
|                                                  webpage, 'playlist title') | ||||
|         webpage = self._download_webpage(url, name, | ||||
|             'Downloading playlist webpage') | ||||
|         info = self._extract_info(webpage) | ||||
|         playlist_info = info['playlist'] | ||||
|  | ||||
|         playlist_entries = [ | ||||
|             self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED') | ||||
|             for m in matches | ||||
|             self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key()) | ||||
|             for talk in info['talks'] | ||||
|         ] | ||||
|         return self.playlist_result( | ||||
|             playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title) | ||||
|             playlist_entries, | ||||
|             playlist_id=compat_str(playlist_info['id']), | ||||
|             playlist_title=playlist_info['title']) | ||||
|  | ||||
|     def _talk_info(self, url, video_id=0): | ||||
|         """Return the video for the talk in the url""" | ||||
|         m = re.match(self._VALID_URL, url,re.VERBOSE) | ||||
|         video_name = m.group('name') | ||||
|         webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name) | ||||
|     def _talk_info(self, url, video_name): | ||||
|         webpage = self._download_webpage(url, video_name) | ||||
|         self.report_extraction(video_name) | ||||
|         # If the url includes the language we get the title translated | ||||
|         title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>', | ||||
|                                         webpage, 'title') | ||||
|         json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>', | ||||
|                                     webpage, 'json data') | ||||
|         info = json.loads(json_data) | ||||
|         desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>', | ||||
|                                        webpage, 'description', flags = re.DOTALL) | ||||
|          | ||||
|         thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"', | ||||
|                                        webpage, 'thumbnail') | ||||
|  | ||||
|         talk_info = self._extract_info(webpage)['talks'][0] | ||||
|  | ||||
|         formats = [{ | ||||
|             'ext': 'mp4', | ||||
|             'url': stream['file'], | ||||
|             'format': stream['id'] | ||||
|         } for stream in info['htmlStreams']] | ||||
|  | ||||
|         video_id = info['id'] | ||||
|             'url': format_url, | ||||
|             'format_id': format_id, | ||||
|             'format': format_id, | ||||
|             'preference': self._FORMATS_PREFERENCE.get(format_id, -1), | ||||
|         } for (format_id, format_url) in talk_info['nativeDownloads'].items()] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         video_id = compat_str(talk_info['id']) | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, webpage) | ||||
|         video_subtitles = self.extract_subtitles(video_id, talk_info) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, webpage) | ||||
|             self._list_available_subtitles(video_id, talk_info) | ||||
|             return | ||||
|  | ||||
|         thumbnail = talk_info['thumb'] | ||||
|         if not thumbnail.startswith('http'): | ||||
|             thumbnail = 'http://' + thumbnail | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'title': talk_info['title'], | ||||
|             'uploader': talk_info['speaker'], | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': desc, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'subtitles': video_subtitles, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|         try: | ||||
|             options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL) | ||||
|             languages = re.findall(r'(?:<option value=")(\S+)"', options) | ||||
|             if languages: | ||||
|                 sub_lang_list = {} | ||||
|                 for l in languages: | ||||
|                     url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) | ||||
|                     sub_lang_list[l] = url | ||||
|                 return sub_lang_list | ||||
|         except RegexNotFoundError: | ||||
|     def _get_available_subtitles(self, video_id, talk_info): | ||||
|         languages = [lang['languageCode'] for lang in talk_info.get('languages', [])] | ||||
|         if languages: | ||||
|             sub_lang_list = {} | ||||
|             for l in languages: | ||||
|                 url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) | ||||
|                 sub_lang_list[l] = url | ||||
|             return sub_lang_list | ||||
|         else: | ||||
|             self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|         return {} | ||||
|             return {} | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from youtube_dl.utils import ExtractorError | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class TinyPicIE(InfoExtractor): | ||||
|   | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/tvigle.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/tvigle.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     clean_html, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TvigleIE(InfoExtractor): | ||||
|     IE_NAME = 'tvigle' | ||||
|     IE_DESC = 'Интернет-телевидение Tvigle.ru' | ||||
|     _VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081', | ||||
|             'md5': '09afba4616666249f087efc6dcf83cb3', | ||||
|             'info_dict': { | ||||
|                 'id': '503081', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Брат 2 ', | ||||
|                 'description': 'md5:f5a42970f50648cee3d7ad740f3ae769', | ||||
|                 'upload_date': '20110919', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433', | ||||
|             'md5': 'e7efe5350dd5011d0de6550b53c3ba7b', | ||||
|             'info_dict': { | ||||
|                 'id': '676433', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком', | ||||
|                 'description': 'md5:027f7dc872948f14c96d19b4178428a4', | ||||
|                 'upload_date': '20121218', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_data = self._download_xml( | ||||
|             'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML') | ||||
|  | ||||
|         video = video_data.find('./video') | ||||
|  | ||||
|         title = video.get('name') | ||||
|         description = video.get('anons') | ||||
|         if description: | ||||
|             description = clean_html(description) | ||||
|         thumbnail = video_data.get('img') | ||||
|         upload_date = unified_strdate(video.get('date')) | ||||
|         like_count = int_or_none(video.get('vtp')) | ||||
|  | ||||
|         formats = [] | ||||
|         for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]): | ||||
|             video_url = video.get(format_id) | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': format_id, | ||||
|                 'format_note': format_note, | ||||
|                 'quality': num, | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|             'like_count': like_count, | ||||
|             'age_limit': 18, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										164
									
								
								youtube_dl/extractor/udemy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										164
									
								
								youtube_dl/extractor/udemy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,164 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class UdemyIE(InfoExtractor): | ||||
|     IE_NAME = 'udemy' | ||||
|     _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)' | ||||
|     _LOGIN_URL = 'https://www.udemy.com/join/login-submit/' | ||||
|     _NETRC_MACHINE = 'udemy' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757', | ||||
|         'md5': '98eda5b657e752cf945d8445e261b5c5', | ||||
|         'info_dict': { | ||||
|             'id': '160614', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Introduction and Installation', | ||||
|             'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876', | ||||
|             'duration': 579.29, | ||||
|         }, | ||||
|         'skip': 'Requires udemy account credentials', | ||||
|     }] | ||||
|  | ||||
|     def _handle_error(self, response): | ||||
|         if not isinstance(response, dict): | ||||
|             return | ||||
|         error = response.get('error') | ||||
|         if error: | ||||
|             error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message')) | ||||
|             error_data = error.get('data') | ||||
|             if error_data: | ||||
|                 error_str += ' - %s' % error_data.get('formErrors') | ||||
|             raise ExtractorError(error_str, expected=True) | ||||
|  | ||||
|     def _download_json(self, url, video_id, note='Downloading JSON metadata'): | ||||
|         response = super(UdemyIE, self)._download_json(url, video_id, note) | ||||
|         self._handle_error(response) | ||||
|         return response | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             raise ExtractorError( | ||||
|                 'Udemy account is required, use --username and --password options to provide account credentials.', | ||||
|                 expected=True) | ||||
|  | ||||
|         login_popup = self._download_webpage( | ||||
|             'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None, | ||||
|             'Downloading login popup') | ||||
|  | ||||
|         if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>': | ||||
|             return | ||||
|  | ||||
|         csrf = self._html_search_regex(r'<input type="hidden" name="csrf" value="(.+?)"', login_popup, 'csrf token') | ||||
|  | ||||
|         login_form = { | ||||
|             'email': username, | ||||
|             'password': password, | ||||
|             'csrf': csrf, | ||||
|             'displayType': 'json', | ||||
|             'isSubmitted': '1', | ||||
|         } | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | ||||
|         response = self._download_json(request, None, 'Logging in as %s' % username) | ||||
|  | ||||
|         if 'returnUrl' not in response: | ||||
|             raise ExtractorError('Unable to log in') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         lecture_id = mobj.group('id') | ||||
|  | ||||
|         lecture = self._download_json( | ||||
|             'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, lecture_id, 'Downloading lecture JSON') | ||||
|  | ||||
|         if lecture['assetType'] != 'Video': | ||||
|             raise ExtractorError('Lecture %s is not a video' % lecture_id, expected=True) | ||||
|  | ||||
|         asset = lecture['asset'] | ||||
|  | ||||
|         stream_url = asset['streamUrl'] | ||||
|         mobj = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url) | ||||
|         if mobj: | ||||
|             return self.url_result(mobj.group(1), 'Youtube') | ||||
|  | ||||
|         video_id = asset['id'] | ||||
|         thumbnail = asset['thumbnailUrl'] | ||||
|         duration = asset['data']['duration'] | ||||
|  | ||||
|         download_url = asset['downloadUrl'] | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': download_url['Video480p'][0], | ||||
|                 'format_id': '360p', | ||||
|             }, | ||||
|             { | ||||
|                 'url': download_url['Video'][0], | ||||
|                 'format_id': '720p', | ||||
|             }, | ||||
|         ] | ||||
|  | ||||
|         title = lecture['title'] | ||||
|         description = lecture['description'] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats | ||||
|         } | ||||
|  | ||||
|  | ||||
| class UdemyCourseIE(UdemyIE): | ||||
|     IE_NAME = 'udemy:course' | ||||
|     _VALID_URL = r'https?://www\.udemy\.com/(?P<coursepath>[\da-z-]+)' | ||||
|     _SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<' | ||||
|     _ALREADY_ENROLLED = '>You are already taking this course.<' | ||||
|     _TESTS = [] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         course_path = mobj.group('coursepath') | ||||
|  | ||||
|         response = self._download_json( | ||||
|             'https://www.udemy.com/api-1.1/courses/%s' % course_path, course_path, 'Downloading course JSON') | ||||
|  | ||||
|         course_id = int(response['id']) | ||||
|         course_title = response['title'] | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id, course_id, 'Enrolling in the course') | ||||
|  | ||||
|         if self._SUCCESSFULLY_ENROLLED in webpage: | ||||
|             self.to_screen('%s: Successfully enrolled in' % course_id) | ||||
|         elif self._ALREADY_ENROLLED in webpage: | ||||
|             self.to_screen('%s: Already enrolled in' % course_id) | ||||
|  | ||||
|         response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id, | ||||
|             course_id, 'Downloading course curriculum') | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy') | ||||
|             for asset in response if asset.get('assetType') == 'Video' | ||||
|         ] | ||||
|  | ||||
|         return self.playlist_result(entries, course_id, course_title) | ||||
| @@ -4,14 +4,11 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none | ||||
| ) | ||||
| from ..utils import ExtractorError | ||||
| from .rutv import RUTVIE | ||||
|  | ||||
|  | ||||
| class VestiIE(InfoExtractor): | ||||
|     IE_NAME = 'vesti' | ||||
|     IE_DESC = 'Вести.Ru' | ||||
|     _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)' | ||||
|  | ||||
| @@ -30,6 +27,20 @@ class VestiIE(InfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.vesti.ru/doc.html?id=1349233', | ||||
|             'info_dict': { | ||||
|                 'id': '773865', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Участники митинга штурмуют Донецкую областную администрацию', | ||||
|                 'description': 'md5:1a160e98b3195379b4c849f2f4958009', | ||||
|                 'duration': 210, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.vesti.ru/only_video.html?vid=576180', | ||||
|             'info_dict': { | ||||
| @@ -44,6 +55,20 @@ class VestiIE(InfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://hitech.vesti.ru/news/view/id/4000', | ||||
|             'info_dict': { | ||||
|                 'id': '766888', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', | ||||
|                 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', | ||||
|                 'duration': 279, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403', | ||||
|             'info_dict': { | ||||
| @@ -57,7 +82,7 @@ class VestiIE(InfoExtractor): | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Blocked outside Russia' | ||||
|             'skip': 'Blocked outside Russia', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://sochi2014.vesti.ru/live/play/live_id/301', | ||||
| @@ -72,7 +97,7 @@ class VestiIE(InfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Translation has finished' | ||||
|         } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -81,90 +106,16 @@ class VestiIE(InfoExtractor): | ||||
|  | ||||
|         page = self._download_webpage(url, video_id, 'Downloading page') | ||||
|  | ||||
|         mobj = re.search(r'<meta property="og:video" content=".+?\.swf\?v?id=(?P<id>\d+).*?" />', page) | ||||
|         mobj = re.search( | ||||
|             r'<meta[^>]+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)', | ||||
|             page) | ||||
|         if mobj: | ||||
|             video_type = 'video' | ||||
|             video_id = mobj.group('id') | ||||
|         else: | ||||
|             mobj = re.search( | ||||
|                 r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>', page) | ||||
|             page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id, | ||||
|                 'Downloading video page') | ||||
|  | ||||
|             if not mobj: | ||||
|                 raise ExtractorError('No media found') | ||||
|         rutv_url = RUTVIE._extract_url(page) | ||||
|         if rutv_url: | ||||
|             return self.url_result(rutv_url, 'RUTV') | ||||
|  | ||||
|             video_type = mobj.group('type') | ||||
|             video_id = mobj.group('id') | ||||
|  | ||||
|         json_data = self._download_json( | ||||
|             'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id), | ||||
|             video_id, 'Downloading JSON') | ||||
|  | ||||
|         if json_data['errors']: | ||||
|             raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True) | ||||
|  | ||||
|         playlist = json_data['data']['playlist'] | ||||
|         medialist = playlist['medialist'] | ||||
|         media = medialist[0] | ||||
|  | ||||
|         if media['errors']: | ||||
|             raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True) | ||||
|  | ||||
|         view_count = playlist.get('count_views') | ||||
|         priority_transport = playlist['priority_transport'] | ||||
|  | ||||
|         thumbnail = media['picture'] | ||||
|         width = media['width'] | ||||
|         height = media['height'] | ||||
|         description = media['anons'] | ||||
|         title = media['title'] | ||||
|         duration = int_or_none(media.get('duration')) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for transport, links in media['sources'].items(): | ||||
|             for quality, url in links.items(): | ||||
|                 if transport == 'rtmp': | ||||
|                     mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url) | ||||
|                     if not mobj: | ||||
|                         continue | ||||
|                     fmt = { | ||||
|                         'url': mobj.group('url'), | ||||
|                         'play_path': mobj.group('playpath'), | ||||
|                         'app': mobj.group('app'), | ||||
|                         'page_url': 'http://player.rutv.ru', | ||||
|                         'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22', | ||||
|                         'rtmp_live': True, | ||||
|                         'ext': 'flv', | ||||
|                         'vbr': int(quality), | ||||
|                     } | ||||
|                 elif transport == 'm3u8': | ||||
|                     fmt = { | ||||
|                         'url': url, | ||||
|                         'ext': 'mp4', | ||||
|                     } | ||||
|                 else: | ||||
|                     fmt = { | ||||
|                         'url': url | ||||
|                     } | ||||
|                 fmt.update({ | ||||
|                     'width': width, | ||||
|                     'height': height, | ||||
|                     'format_id': '%s-%s' % (transport, quality), | ||||
|                     'preference': -1 if priority_transport == transport else -2, | ||||
|                 }) | ||||
|                 formats.append(fmt) | ||||
|  | ||||
|         if not formats: | ||||
|             raise ExtractorError('No media links available for %s' % video_id) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'view_count': view_count, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|         raise ExtractorError('No video found', expected=True) | ||||
| @@ -2,7 +2,6 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
| import datetime | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| @@ -22,6 +21,7 @@ class VevoIE(InfoExtractor): | ||||
|            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| | ||||
|            vevo:) | ||||
|         (?P<id>[^&?#]+)''' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | ||||
|         "md5": "06bea460acb744eab74a9d7dcb4bfd61", | ||||
| @@ -34,6 +34,8 @@ class VevoIE(InfoExtractor): | ||||
|             "duration": 230.12, | ||||
|             "width": 1920, | ||||
|             "height": 1080, | ||||
|             # timestamp and upload_date are often incorrect; seem to change randomly | ||||
|             'timestamp': int, | ||||
|         } | ||||
|     }, { | ||||
|         'note': 'v3 SMIL format', | ||||
| @@ -47,6 +49,7 @@ class VevoIE(InfoExtractor): | ||||
|             'title': 'I Wish I Could Break Your Heart', | ||||
|             'duration': 226.101, | ||||
|             'age_limit': 0, | ||||
|             'timestamp': int, | ||||
|         } | ||||
|     }, { | ||||
|         'note': 'Age-limited video', | ||||
| @@ -57,7 +60,8 @@ class VevoIE(InfoExtractor): | ||||
|             'age_limit': 18, | ||||
|             'title': 'Tunnel Vision (Explicit)', | ||||
|             'uploader': 'Justin Timberlake', | ||||
|             'upload_date': '20130704', | ||||
|             'upload_date': 're:2013070[34]', | ||||
|             'timestamp': int, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'true', | ||||
| @@ -169,13 +173,13 @@ class VevoIE(InfoExtractor): | ||||
|  | ||||
|         timestamp_ms = int(self._search_regex( | ||||
|             r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date')) | ||||
|         upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_info['title'], | ||||
|             'formats': formats, | ||||
|             'thumbnail': video_info['imageUrl'], | ||||
|             'upload_date': upload_date.strftime('%Y%m%d'), | ||||
|             'timestamp': timestamp_ms // 1000, | ||||
|             'uploader': video_info['mainArtists'][0]['artistName'], | ||||
|             'duration': video_info['duration'], | ||||
|             'age_limit': age_limit, | ||||
|   | ||||
| @@ -29,6 +29,7 @@ class VideoBamIE(InfoExtractor): | ||||
|             'info_dict': { | ||||
|                 'id': 'pqLvq', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '_', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
| @@ -61,7 +62,7 @@ class VideoBamIE(InfoExtractor): | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._og_search_title(page, default='VideoBam', fatal=False) | ||||
|         title = self._og_search_title(page, default='_', fatal=False) | ||||
|         description = self._og_search_description(page, default=None) | ||||
|         thumbnail = self._og_search_thumbnail(page) | ||||
|         uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None) | ||||
|   | ||||
| @@ -1,22 +1,23 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .internetvideoarchive import InternetVideoArchiveIE | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import compat_urlparse | ||||
|  | ||||
|  | ||||
| class VideoDetectiveIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487', | ||||
|         u'file': u'194487.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'KICK-ASS 2', | ||||
|             u'description': u'md5:65ba37ad619165afac7d432eaded6013', | ||||
|             u'duration': 135, | ||||
|         'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', | ||||
|         'info_dict': { | ||||
|             'id': '194487', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'KICK-ASS 2', | ||||
|             'description': 'md5:65ba37ad619165afac7d432eaded6013', | ||||
|             'duration': 135, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         og_video = self._og_search_video_url(webpage) | ||||
|         query = compat_urlparse.urlparse(og_video).query | ||||
|         return self.url_result(InternetVideoArchiveIE._build_url(query), | ||||
|             ie=InternetVideoArchiveIE.ie_key()) | ||||
|         return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key()) | ||||
|   | ||||
| @@ -8,6 +8,7 @@ import itertools | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_HTTPError, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     clean_html, | ||||
| @@ -101,6 +102,15 @@ class VimeoIE(SubtitlesInfoExtractor): | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         if VimeoChannelIE.suitable(url): | ||||
|             # Otherwise channel urls like http://vimeo.com/channels/31259 would | ||||
|             # match | ||||
|             return False | ||||
|         else: | ||||
|             return super(VimeoIE, cls).suitable(url) | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
| @@ -172,7 +182,18 @@ class VimeoIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         request = compat_urllib_request.Request(url, None, headers) | ||||
|         webpage = self._download_webpage(request, video_id) | ||||
|         try: | ||||
|             webpage = self._download_webpage(request, video_id) | ||||
|         except ExtractorError as ee: | ||||
|             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: | ||||
|                 errmsg = ee.cause.read() | ||||
|                 if b'Because of its privacy settings, this video cannot be played here' in errmsg: | ||||
|                     raise ExtractorError( | ||||
|                         'Cannot download embed-only video without embedding ' | ||||
|                         'URL. Please call youtube-dl with the URL of the page ' | ||||
|                         'that embeds this video.', | ||||
|                         expected=True) | ||||
|             raise | ||||
|  | ||||
|         # Now we begin extracting as much information as we can from what we | ||||
|         # retrieved. First we extract the information common to all extractors, | ||||
| @@ -221,7 +242,9 @@ class VimeoIE(SubtitlesInfoExtractor): | ||||
|         # Extract video thumbnail | ||||
|         video_thumbnail = config["video"].get("thumbnail") | ||||
|         if video_thumbnail is None: | ||||
|             _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1] | ||||
|             video_thumbs = config["video"].get("thumbs") | ||||
|             if video_thumbs and isinstance(video_thumbs, dict): | ||||
|                 _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1] | ||||
|  | ||||
|         # Extract video description | ||||
|         video_description = None | ||||
| @@ -318,7 +341,7 @@ class VimeoIE(SubtitlesInfoExtractor): | ||||
|  | ||||
| class VimeoChannelIE(InfoExtractor): | ||||
|     IE_NAME = 'vimeo:channel' | ||||
|     _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)' | ||||
|     _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)/?(\?.*)?$' | ||||
|     _MORE_PAGES_INDICATOR = r'<a.+?rel="next"' | ||||
|     _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"' | ||||
|  | ||||
|   | ||||
| @@ -16,7 +16,7 @@ from ..utils import ( | ||||
|  | ||||
| class VKIE(InfoExtractor): | ||||
|     IE_NAME = 'vk.com' | ||||
|     _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)' | ||||
|     _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))' | ||||
|     _NETRC_MACHINE = 'vk' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -42,6 +42,18 @@ class VKIE(InfoExtractor): | ||||
|                 'duration': 558, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'note': 'Embedded video', | ||||
|             'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1', | ||||
|             'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a', | ||||
|             'info_dict': { | ||||
|                 'id': '162925554', | ||||
|                 'ext': 'mp4', | ||||
|                 'uploader': 'Vladimir Gavrin', | ||||
|                 'title': 'Lin Dan', | ||||
|                 'duration': 101, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vk.com/video-8871596_164049491', | ||||
|             'md5': 'a590bcaf3d543576c9bd162812387666', | ||||
| @@ -54,7 +66,7 @@ class VKIE(InfoExtractor): | ||||
|                 'duration': 8352, | ||||
|             }, | ||||
|             'skip': 'Requires vk account credentials', | ||||
|         } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _login(self): | ||||
| @@ -82,7 +94,10 @@ class VKIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = mobj.group('videoid') | ||||
|  | ||||
|         if not video_id: | ||||
|             video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) | ||||
|  | ||||
|         info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id | ||||
|         info_page = self._download_webpage(info_url, video_id) | ||||
|   | ||||
| @@ -13,7 +13,7 @@ class VubeIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', | ||||
|         'md5': 'f81dcf6d0448e3291f54380181695821', | ||||
|         'md5': 'db7aba89d4603dadd627e9d1973946fe', | ||||
|         'info_dict': { | ||||
|             'id': 'YL2qNPkqon', | ||||
|             'ext': 'mp4', | ||||
| @@ -77,4 +77,4 @@ class VubeIE(InfoExtractor): | ||||
|             'like_count': like_count, | ||||
|             'dislike_count': dislike_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
|         } | ||||
|   | ||||
							
								
								
									
										114
									
								
								youtube_dl/extractor/wdr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								youtube_dl/extractor/wdr.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     compat_urlparse, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class WDRIE(InfoExtractor): | ||||
|     _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?' | ||||
|     _VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-362427', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Servicezeit', | ||||
|                 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', | ||||
|                 'upload_date': '20140310', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-363194', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Marga Spiegel ist tot', | ||||
|                 'description': 'md5:2309992a6716c347891c045be50992e4', | ||||
|                 'upload_date': '20140311', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', | ||||
|             'md5': '83e9e8fefad36f357278759870805898', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-194332', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', | ||||
|                 'description': 'md5:2309992a6716c347891c045be50992e4', | ||||
|                 'upload_date': '20091129', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html', | ||||
|             'md5': 'cfff440d4ee64114083ac44676df5d15', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-363068', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Grenzenlos lecker - Baklava', | ||||
|                 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', | ||||
|                 'upload_date': '20140311', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_url = mobj.group('url') | ||||
|         page_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|  | ||||
|         if mobj.group('player') is None: | ||||
|             entries = [ | ||||
|                 self.url_result(page_url + href, 'WDR') | ||||
|                 for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage) | ||||
|             ] | ||||
|             return self.playlist_result(entries, page_id) | ||||
|  | ||||
|         flashvars = compat_urlparse.parse_qs( | ||||
|             self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars')) | ||||
|  | ||||
|         page_id = flashvars['trackerClipId'][0] | ||||
|         video_url = flashvars['dslSrc'][0] | ||||
|         title = flashvars['trackerClipTitle'][0] | ||||
|         thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None | ||||
|  | ||||
|         if 'trackerClipAirTime' in flashvars: | ||||
|             upload_date = flashvars['trackerClipAirTime'][0] | ||||
|         else: | ||||
|             upload_date = self._html_search_meta('DC.Date', webpage, 'upload date') | ||||
|  | ||||
|         if upload_date: | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         if video_url.endswith('.f4m'): | ||||
|             video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' | ||||
|             ext = 'flv' | ||||
|         else: | ||||
|             ext = determine_ext(video_url) | ||||
|  | ||||
|         description = self._html_search_meta('Description', webpage, 'description') | ||||
|  | ||||
|         return { | ||||
|             'id': page_id, | ||||
|             'url': video_url, | ||||
|             'ext': ext, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
| @@ -1,55 +1,49 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class XNXXIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)' | ||||
|     VIDEO_URL_RE = r'flv_url=(.*?)&' | ||||
|     VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' | ||||
|     VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&' | ||||
|     _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||
|         u'file': u'1135332.flv', | ||||
|         u'md5': u'0831677e2b4761795f68d417e0b7b445', | ||||
|         u'info_dict': { | ||||
|             u"title": u"lida \u00bb Naked Funny Actress  (5)", | ||||
|             u"age_limit": 18, | ||||
|         'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||
|         'md5': '0831677e2b4761795f68d417e0b7b445', | ||||
|         'info_dict': { | ||||
|             'id': '1135332', | ||||
|             'ext': 'flv', | ||||
|             'title': 'lida » Naked Funny Actress  (5)', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group(1) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         # Get webpage content | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex(self.VIDEO_URL_RE, | ||||
|             webpage, u'video URL') | ||||
|         video_url = self._search_regex(r'flv_url=(.*?)&', | ||||
|             webpage, 'video URL') | ||||
|         video_url = compat_urllib_parse.unquote(video_url) | ||||
|  | ||||
|         video_title = self._html_search_regex(self.VIDEO_TITLE_RE, | ||||
|             webpage, u'title') | ||||
|         video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM', | ||||
|             webpage, 'title') | ||||
|  | ||||
|         video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE, | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|         video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return [{ | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'uploader': None, | ||||
|             'upload_date': None, | ||||
|             'title': video_title, | ||||
|             'ext': 'flv', | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'description': None, | ||||
|             'age_limit': 18, | ||||
|         }] | ||||
|         } | ||||
|   | ||||
| @@ -7,19 +7,24 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     parse_duration, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class XTubeIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', | ||||
|         'file': 'kVTUy_G222_.mp4', | ||||
|         'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', | ||||
|         'info_dict': { | ||||
|             "title": "strange erotica", | ||||
|             "description": "surreal gay themed erotica...almost an ET kind of thing", | ||||
|             "uploader": "greenshowers", | ||||
|             "age_limit": 18, | ||||
|             'id': 'kVTUy_G222_', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'strange erotica', | ||||
|             'description': 'surreal gay themed erotica...almost an ET kind of thing', | ||||
|             'uploader': 'greenshowers', | ||||
|             'duration': 450, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -32,10 +37,23 @@ class XTubeIE(InfoExtractor): | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, 'title') | ||||
|         video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False) | ||||
|         video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, 'description', fatal=False) | ||||
|         video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/') | ||||
|         video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title') | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False) | ||||
|         video_description = self._html_search_regex( | ||||
|             r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False) | ||||
|         video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/') | ||||
|         duration = parse_duration(self._html_search_regex( | ||||
|             r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False)) | ||||
|         view_count = self._html_search_regex( | ||||
|             r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False) | ||||
|         if view_count: | ||||
|             view_count = str_to_int(view_count) | ||||
|         comment_count = self._html_search_regex( | ||||
|             r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False) | ||||
|         if comment_count: | ||||
|             comment_count = str_to_int(comment_count) | ||||
|  | ||||
|         path = compat_urllib_parse_urlparse(video_url).path | ||||
|         extension = os.path.splitext(path)[1][1:] | ||||
|         format = path.split('/')[5].split('_')[:2] | ||||
| @@ -48,6 +66,9 @@ class XTubeIE(InfoExtractor): | ||||
|             'title': video_title, | ||||
|             'uploader': video_uploader, | ||||
|             'description': video_description, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'url': video_url, | ||||
|             'ext': extension, | ||||
|             'format': format, | ||||
|   | ||||
| @@ -194,14 +194,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         '151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10}, | ||||
|  | ||||
|         # DASH mp4 video | ||||
|         '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|  | ||||
|         # Dash mp4 audio | ||||
|         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, | ||||
| @@ -209,12 +209,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50}, | ||||
|  | ||||
|         # Dash webm | ||||
|         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||
|         '242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40}, | ||||
|         '243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40}, | ||||
|         '244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40}, | ||||
| @@ -1285,10 +1285,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         # Decide which formats to download | ||||
|         try: | ||||
|             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) | ||||
|             mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage) | ||||
|             if not mobj: | ||||
|                 raise ValueError('Could not find vevo ID') | ||||
|             ytplayer_config = json.loads(mobj.group(1)) | ||||
|             json_code = uppercase_escape(mobj.group(1)) | ||||
|             ytplayer_config = json.loads(json_code) | ||||
|             args = ytplayer_config['args'] | ||||
|             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map | ||||
|             # this signatures are encrypted | ||||
| @@ -1645,7 +1646,7 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|  | ||||
| class YoutubeUserIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' | ||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' | ||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' | ||||
|     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' | ||||
|     _GDATA_PAGE_SIZE = 50 | ||||
|     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' | ||||
| @@ -1744,12 +1745,50 @@ class YoutubeSearchIE(SearchInfoExtractor): | ||||
|                   for video_id in video_ids] | ||||
|         return self.playlist_result(videos, query) | ||||
|  | ||||
|  | ||||
| class YoutubeSearchDateIE(YoutubeSearchIE): | ||||
|     IE_NAME = YoutubeSearchIE.IE_NAME + ':date' | ||||
|     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' | ||||
|     _SEARCH_KEY = 'ytsearchdate' | ||||
|     IE_DESC = u'YouTube.com searches, newest videos first' | ||||
|  | ||||
|  | ||||
| class YoutubeSearchURLIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com search URLs' | ||||
|     IE_NAME = u'youtube:search_url' | ||||
|     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         query = compat_urllib_parse.unquote_plus(mobj.group('query')) | ||||
|  | ||||
|         webpage = self._download_webpage(url, query) | ||||
|         result_code = self._search_regex( | ||||
|             r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML') | ||||
|  | ||||
|         part_codes = re.findall( | ||||
|             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code) | ||||
|         entries = [] | ||||
|         for part_code in part_codes: | ||||
|             part_title = self._html_search_regex( | ||||
|                 r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) | ||||
|             part_url_snippet = self._html_search_regex( | ||||
|                 r'(?s)href="([^"]+)"', part_code, 'item URL') | ||||
|             part_url = compat_urlparse.urljoin( | ||||
|                 'https://www.youtube.com/', part_url_snippet) | ||||
|             entries.append({ | ||||
|                 '_type': 'url', | ||||
|                 'url': part_url, | ||||
|                 'title': part_title, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'entries': entries, | ||||
|             'title': query, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class YoutubeShowIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com (multi-season) shows' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' | ||||
|   | ||||
| @@ -22,6 +22,7 @@ import struct | ||||
| import subprocess | ||||
| import sys | ||||
| import traceback | ||||
| import xml.etree.ElementTree | ||||
| import zlib | ||||
|  | ||||
| try: | ||||
| @@ -777,6 +778,7 @@ def unified_strdate(date_str): | ||||
|         '%Y/%m/%d %H:%M:%S', | ||||
|         '%Y-%m-%d %H:%M:%S', | ||||
|         '%d.%m.%Y %H:%M', | ||||
|         '%d.%m.%Y %H.%M', | ||||
|         '%Y-%m-%dT%H:%M:%SZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%fZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||
| @@ -1263,3 +1265,17 @@ def read_batch_urls(batch_fd): | ||||
|  | ||||
|     with contextlib.closing(batch_fd) as fd: | ||||
|         return [url for url in map(fixup, fd) if url] | ||||
|  | ||||
|  | ||||
| def urlencode_postdata(*args, **kargs): | ||||
|     return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii') | ||||
|  | ||||
|  | ||||
| def parse_xml(s): | ||||
|     class TreeBuilder(xml.etree.ElementTree.TreeBuilder): | ||||
|         def doctype(self, name, pubid, system): | ||||
|             pass  # Ignore doctypes | ||||
|  | ||||
|     parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder()) | ||||
|     kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {} | ||||
|     return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs) | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.02.27.1' | ||||
| __version__ = '2014.03.17' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user