Compare commits
	
		
			51 Commits
		
	
	
		
			2013.09.06
			...
			2013.09.12
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 07ac9e2cc2 | ||
|  | 6bc520c207 | ||
|  | e3dc22ca3a | ||
|  | d665f8d3cb | ||
|  | 055e6f3657 | ||
|  | ac4f319ba1 | ||
|  | 542cca0e8c | ||
|  | 6a2449df3b | ||
|  | 7fad1c6328 | ||
|  | d82134c339 | ||
|  | 54d39d8b2f | ||
|  | de7f3446e0 | ||
|  | f8e52269c1 | ||
|  | cf1dd0c59e | ||
|  | 22c8b52545 | ||
|  | 1f7dc42cd0 | ||
|  | aa8f2641da | ||
|  | 648d25d43d | ||
|  | df3e61003a | ||
|  | 6b361ad5ee | ||
|  | 5d8afe69f7 | ||
|  | a1ab553858 | ||
|  | 07463ea162 | ||
|  | 6d2d21f713 | ||
|  | 061b2889a9 | ||
|  | 8963d9c266 | ||
|  | 890f62e868 | ||
|  | 8f362589a5 | ||
|  | a27a2470cd | ||
|  | 72836fcee4 | ||
|  | a7130543fa | ||
|  | a490fda746 | ||
|  | 7e77275293 | ||
|  | d6e203b3dc | ||
|  | e3ea479087 | ||
|  | faab1d3836 | ||
|  | 8851a574a3 | ||
|  | 06a401c845 | ||
|  | bd2dee6c67 | ||
|  | 18b4e04f1c | ||
|  | d80a064eff | ||
|  | d55de6eec2 | ||
|  | 69df680b97 | ||
|  | 447591e1ae | ||
|  | 33eb0ce4c4 | ||
|  | 505c28aac9 | ||
|  | 8377574c9c | ||
|  | 372297e713 | ||
|  | 953e32b2c1 | ||
|  | 5898e28272 | ||
|  | 67dfbc0cb9 | 
							
								
								
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -17,4 +17,10 @@ youtube-dl.tar.gz | ||||
| .coverage | ||||
| cover/ | ||||
| updates_key.pem | ||||
| *.egg-info | ||||
| *.egg-info | ||||
| *.srt | ||||
| *.sbv | ||||
| *.vtt | ||||
| *.flv | ||||
| *.mp4 | ||||
| *.part | ||||
|   | ||||
| @@ -123,10 +123,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                only) | ||||
|  | ||||
| ## Subtitle Options: | ||||
|     --write-sub                write subtitle file (currently youtube only) | ||||
|     --write-auto-sub           write automatic subtitle file (currently youtube | ||||
|                                only) | ||||
|     --only-sub                 [deprecated] alias of --skip-download | ||||
|     --write-sub                write subtitle file | ||||
|     --write-auto-sub           write automatic subtitle file (youtube only) | ||||
|     --all-subs                 downloads all the available subtitles of the | ||||
|                                video | ||||
|     --list-subs                lists all available subtitles for the video | ||||
|   | ||||
| @@ -3,7 +3,8 @@ | ||||
| import json | ||||
| import sys | ||||
| import hashlib | ||||
| import urllib.request | ||||
| import os.path | ||||
|  | ||||
|  | ||||
| if len(sys.argv) <= 1: | ||||
|     print('Specify the version number as parameter') | ||||
| @@ -25,6 +26,7 @@ filenames = { | ||||
|     'tar': 'youtube-dl-%s.tar.gz' % version} | ||||
| build_dir = os.path.join('..', '..', 'build', version) | ||||
| for key, filename in filenames.items(): | ||||
|     url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename) | ||||
|     fn = os.path.join(build_dir, filename) | ||||
|     with open(fn, 'rb') as f: | ||||
|         data = f.read() | ||||
|   | ||||
| @@ -14,7 +14,7 @@ def main(): | ||||
|         template = tmplf.read() | ||||
|  | ||||
|     ie_htmls = [] | ||||
|     for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME): | ||||
|     for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()): | ||||
|         ie_html = '<b>{}</b>'.format(ie.IE_NAME) | ||||
|         try: | ||||
|             ie_html += ': {}'.format(ie.IE_DESC) | ||||
|   | ||||
| @@ -23,9 +23,9 @@ tests = [ | ||||
|     # 86 - vfluy6kdb 2013/09/06 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", | ||||
|      "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"), | ||||
|     # 85 | ||||
|     # 85 - vflkuzxcs 2013/09/11 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", | ||||
|      ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), | ||||
|      "T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"), | ||||
|     # 84 - vflg0g8PQ 2013/08/29 (sporadic) | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", | ||||
|      ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"), | ||||
|   | ||||
| @@ -38,7 +38,6 @@ | ||||
|     "writedescription": false,  | ||||
|     "writeinfojson": true,  | ||||
|     "writesubtitles": false, | ||||
|     "onlysubtitles": false, | ||||
|     "allsubtitles": false, | ||||
|     "listssubtitles": false | ||||
| } | ||||
|   | ||||
| @@ -21,14 +21,15 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertEqual(self.matching_ies(url), ie_list) | ||||
|  | ||||
|     def test_youtube_playlist_matching(self): | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585 | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 | ||||
|         self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M')) | ||||
|         assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) | ||||
|         assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | ||||
|         assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585 | ||||
|         assertPlaylist(u'PL63F0C78739B09958') | ||||
|         assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') | ||||
|         assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | ||||
|         assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') | ||||
|         assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668 | ||||
|         self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M')) | ||||
|  | ||||
|     def test_youtube_matching(self): | ||||
|         self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M')) | ||||
| @@ -37,13 +38,23 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) | ||||
|  | ||||
|     def test_youtube_channel_matching(self): | ||||
|         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM')) | ||||
|         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')) | ||||
|         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')) | ||||
|         assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) | ||||
|         assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') | ||||
|         assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') | ||||
|         assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') | ||||
|  | ||||
|     def test_youtube_user_matching(self): | ||||
|         self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) | ||||
|  | ||||
|     def test_youtube_feeds(self): | ||||
|         self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later']) | ||||
|         self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) | ||||
|         self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) | ||||
|         self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) | ||||
|  | ||||
|     def test_youtube_show_matching(self): | ||||
|         self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) | ||||
|  | ||||
|     def test_justin_tv_channelid_matching(self): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) | ||||
| @@ -61,10 +72,13 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) | ||||
|  | ||||
|     def test_youtube_extract(self): | ||||
|         self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id) | ||||
|         assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') | ||||
|         assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') | ||||
|         assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') | ||||
|         assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') | ||||
|         assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') | ||||
|         assertExtractId('BaW_jenozKc', 'BaW_jenozKc') | ||||
|  | ||||
|     def test_no_duplicates(self): | ||||
|         ies = gen_extractors() | ||||
|   | ||||
							
								
								
									
										69
									
								
								test/test_dailymotion_subtitles.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								test/test_dailymotion_subtitles.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,69 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| import sys | ||||
| import unittest | ||||
| import json | ||||
| import io | ||||
| import hashlib | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.extractor import DailymotionIE | ||||
| from youtube_dl.utils import * | ||||
| from helper import FakeYDL | ||||
|  | ||||
| md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
| class TestDailymotionSubtitles(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         self.DL = FakeYDL() | ||||
|         self.url = 'http://www.dailymotion.com/video/xczg00' | ||||
|     def getInfoDict(self): | ||||
|         IE = DailymotionIE(self.DL) | ||||
|         info_dict = IE.extract(self.url) | ||||
|         return info_dict | ||||
|     def getSubtitles(self): | ||||
|         info_dict = self.getInfoDict() | ||||
|         return info_dict[0]['subtitles'] | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['fr'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 5) | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|     def test_automatic_captions(self): | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslang'] = ['en'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) == 0) | ||||
|     def test_nosubtitles(self): | ||||
|         self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|     def test_multiple_langs(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['es', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -18,85 +18,63 @@ md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
| class TestYoutubeSubtitles(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['allsubtitles'] = False | ||||
|         DL.params['writesubtitles'] = False | ||||
|         DL.params['subtitlesformat'] = 'srt' | ||||
|         DL.params['listsubtitles'] = False | ||||
|     def test_youtube_no_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = False | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         subtitles = info_dict[0]['subtitles'] | ||||
|         self.DL = FakeYDL() | ||||
|         self.url = 'QRS8MkLhQmM' | ||||
|     def getInfoDict(self): | ||||
|         IE = YoutubeIE(self.DL) | ||||
|         info_dict = IE.extract(self.url) | ||||
|         return info_dict | ||||
|     def getSubtitles(self): | ||||
|         info_dict = self.getInfoDict() | ||||
|         return info_dict[0]['subtitles']         | ||||
|     def test_youtube_no_writesubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = False | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|     def test_youtube_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260') | ||||
|     def test_youtube_subtitles_it(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitleslangs'] = ['it'] | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles']['it'] | ||||
|         self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d') | ||||
|     def test_youtube_onlysubtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['onlysubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') | ||||
|     def test_youtube_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['it'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') | ||||
|     def test_youtube_allsubtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['allsubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         subtitles = info_dict[0]['subtitles'] | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 13) | ||||
|     def test_youtube_subtitles_sbv_format(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitlesformat'] = 'sbv' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitlesformat'] = 'sbv' | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b') | ||||
|     def test_youtube_subtitles_vtt_format(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitlesformat'] = 'vtt' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitlesformat'] = 'vtt' | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7') | ||||
|     def test_youtube_list_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['listsubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|     def test_youtube_automatic_captions(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writeautomaticsub'] = True | ||||
|         DL.params['subtitleslangs'] = ['it'] | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('8YoUxe5ncPo') | ||||
|         sub = info_dict[0]['subtitles']['it'] | ||||
|         self.assertTrue(sub is not None) | ||||
|         self.url = '8YoUxe5ncPo' | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['it'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(subtitles['it'] is not None) | ||||
|     def test_youtube_nosubtitles(self): | ||||
|         self.url = 'sAjKT8FhjI8' | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|     def test_youtube_multiple_langs(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         self.url = 'QRS8MkLhQmM' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['it', 'fr', 'de'] | ||||
|         DL.params['subtitleslangs'] = langs | ||||
|         IE = YoutubeIE(DL) | ||||
|         subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
|   | ||||
| @@ -66,7 +66,7 @@ class FileDownloader(object): | ||||
|     @staticmethod | ||||
|     def format_seconds(seconds): | ||||
|         (mins, secs) = divmod(seconds, 60) | ||||
|         (hours, eta_mins) = divmod(mins, 60) | ||||
|         (hours, mins) = divmod(mins, 60) | ||||
|         if hours > 99: | ||||
|             return '--:--:--' | ||||
|         if hours == 0: | ||||
|   | ||||
| @@ -29,6 +29,7 @@ __authors__  = ( | ||||
|     'Albert Kim', | ||||
|     'Pierre Rudloff', | ||||
|     'Huarong Huo', | ||||
|     'Ismael Mejía', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
| @@ -205,13 +206,10 @@ def parseOpts(overrideArguments=None): | ||||
|  | ||||
|     subtitles.add_option('--write-sub', '--write-srt', | ||||
|             action='store_true', dest='writesubtitles', | ||||
|             help='write subtitle file (currently youtube only)', default=False) | ||||
|             help='write subtitle file', default=False) | ||||
|     subtitles.add_option('--write-auto-sub', '--write-automatic-sub', | ||||
|             action='store_true', dest='writeautomaticsub', | ||||
|             help='write automatic subtitle file (currently youtube only)', default=False) | ||||
|     subtitles.add_option('--only-sub', | ||||
|             action='store_true', dest='skip_download', | ||||
|             help='[deprecated] alias of --skip-download', default=False) | ||||
|             help='write automatic subtitle file (youtube only)', default=False) | ||||
|     subtitles.add_option('--all-subs', | ||||
|             action='store_true', dest='allsubtitles', | ||||
|             help='downloads all the available subtitles of the video', default=False) | ||||
| @@ -222,7 +220,7 @@ def parseOpts(overrideArguments=None): | ||||
|             action='store', dest='subtitlesformat', metavar='FORMAT', | ||||
|             help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') | ||||
|     subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang', | ||||
|             action='callback', dest='subtitleslang', metavar='LANGS', type='str', | ||||
|             action='callback', dest='subtitleslangs', metavar='LANGS', type='str', | ||||
|             default=[], callback=_comma_separated_values_options_callback, | ||||
|             help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') | ||||
|  | ||||
| @@ -593,7 +591,7 @@ def _real_main(argv=None): | ||||
|         'allsubtitles': opts.allsubtitles, | ||||
|         'listsubtitles': opts.listsubtitles, | ||||
|         'subtitlesformat': opts.subtitlesformat, | ||||
|         'subtitleslangs': opts.subtitleslang, | ||||
|         'subtitleslangs': opts.subtitleslangs, | ||||
|         'matchtitle': decodeOption(opts.matchtitle), | ||||
|         'rejecttitle': decodeOption(opts.rejecttitle), | ||||
|         'max_downloads': opts.max_downloads, | ||||
|   | ||||
| @@ -29,6 +29,10 @@ from .escapist import EscapistIE | ||||
| from .exfm import ExfmIE | ||||
| from .facebook import FacebookIE | ||||
| from .flickr import FlickrIE | ||||
| from .francetv import ( | ||||
|     PluzzIE, | ||||
|     FranceTvInfoIE, | ||||
| ) | ||||
| from .freesound import FreesoundIE | ||||
| from .funnyordie import FunnyOrDieIE | ||||
| from .gamespot import GameSpotIE | ||||
| @@ -52,6 +56,7 @@ from .keek import KeekIE | ||||
| from .liveleak import LiveLeakIE | ||||
| from .livestream import LivestreamIE | ||||
| from .metacafe import MetacafeIE | ||||
| from .metacritic import MetacriticIE | ||||
| from .mit import TechTVMITIE, MITIE | ||||
| from .mixcloud import MixcloudIE | ||||
| from .mtv import MTVIE | ||||
| @@ -74,6 +79,7 @@ from .roxwel import RoxwelIE | ||||
| from .rtlnow import RTLnowIE | ||||
| from .sina import SinaIE | ||||
| from .slashdot import SlashdotIE | ||||
| from .slideshare import SlideshareIE | ||||
| from .sohu import SohuIE | ||||
| from .soundcloud import SoundcloudIE, SoundcloudSetIE | ||||
| from .spiegel import SpiegelIE | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class Canalc2IE(InfoExtractor): | ||||
|     _IE_NAME = 'canalc2.tv' | ||||
|     IE_NAME = 'canalc2.tv' | ||||
|     _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui' | ||||
|  | ||||
|     _TEST = { | ||||
|   | ||||
| @@ -3,18 +3,22 @@ import json | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     compat_str, | ||||
|     get_element_by_attribute, | ||||
|     get_element_by_id, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class DailymotionIE(InfoExtractor): | ||||
|  | ||||
| class DailymotionIE(SubtitlesInfoExtractor): | ||||
|     """Information Extractor for Dailymotion""" | ||||
|  | ||||
|     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' | ||||
|     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' | ||||
|     IE_NAME = u'dailymotion' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', | ||||
| @@ -33,6 +37,7 @@ class DailymotionIE(InfoExtractor): | ||||
|         video_id = mobj.group(1).split('_')[0].split('?')[0] | ||||
|  | ||||
|         video_extension = 'mp4' | ||||
|         url = 'http://www.dailymotion.com/video/%s' % video_id | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         request = compat_urllib_request.Request(url) | ||||
| @@ -72,6 +77,12 @@ class DailymotionIE(InfoExtractor): | ||||
|             raise ExtractorError(u'Unable to extract video URL') | ||||
|         video_url = info[max_quality] | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id) | ||||
|             return | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
| @@ -79,9 +90,25 @@ class DailymotionIE(InfoExtractor): | ||||
|             'upload_date':  video_upload_date, | ||||
|             'title':    self._og_search_title(webpage), | ||||
|             'ext':      video_extension, | ||||
|             'subtitles':    video_subtitles, | ||||
|             'thumbnail': info['thumbnail_url'] | ||||
|         }] | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         try: | ||||
|             sub_list = self._download_webpage( | ||||
|                 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, | ||||
|                 video_id, note=False) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) | ||||
|             return {} | ||||
|         info = json.loads(sub_list) | ||||
|         if (info['total'] > 0): | ||||
|             sub_lang_list = dict((l['language'], l['url']) for l in info['list']) | ||||
|             return sub_lang_list | ||||
|         self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|         return {} | ||||
|  | ||||
|  | ||||
| class DailymotionPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' | ||||
|   | ||||
							
								
								
									
										77
									
								
								youtube_dl/extractor/francetv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								youtube_dl/extractor/francetv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,77 @@ | ||||
| # encoding: utf-8 | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FranceTVBaseInfoExtractor(InfoExtractor): | ||||
|     def _extract_video(self, video_id): | ||||
|         xml_desc = self._download_webpage( | ||||
|             'http://www.francetvinfo.fr/appftv/webservices/video/' | ||||
|             'getInfosOeuvre.php?id-diffusion=' | ||||
|             + video_id, video_id, 'Downloading XML config') | ||||
|         info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8')) | ||||
|  | ||||
|         manifest_url = info.find('videos/video/url').text | ||||
|         video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8') | ||||
|         video_url = video_url.replace('/z/', '/i/') | ||||
|         thumbnail_path = info.find('image').text | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'ext': 'mp4', | ||||
|                 'url': video_url, | ||||
|                 'title': info.find('titre').text, | ||||
|                 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), | ||||
|                 'description': info.find('synopsis').text, | ||||
|                 } | ||||
|  | ||||
|  | ||||
| class PluzzIE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = u'pluzz.francetv.fr' | ||||
|     _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html', | ||||
|         u'file': u'88439064.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Allô Rufo', | ||||
|             u'description': u'md5:d909f1ebdf963814b65772aea250400e', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         title = re.match(self._VALID_URL, url).group(1) | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         video_id = self._search_regex( | ||||
|             r'data-diffusion="(\d+)"', webpage, 'ID') | ||||
|         return self._extract_video(video_id) | ||||
|  | ||||
|  | ||||
| class FranceTvInfoIE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = u'francetvinfo.fr' | ||||
|     _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', | ||||
|         u'file': u'84981923.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Soir 3', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, page_title) | ||||
|         video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id') | ||||
|         return self._extract_video(video_id) | ||||
| @@ -109,6 +109,11 @@ class GenericIE(InfoExtractor): | ||||
|         return new_url | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         parsed_url = compat_urlparse.urlparse(url) | ||||
|         if not parsed_url.scheme: | ||||
|             self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') | ||||
|             return self.url_result('http://' + url) | ||||
|  | ||||
|         try: | ||||
|             new_url = self._test_redirect(url) | ||||
|             if new_url: | ||||
|   | ||||
| @@ -19,8 +19,7 @@ class HowcastIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage_url = 'http://www.howcast.com/videos/' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|   | ||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/metacritic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/metacritic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
| import operator | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class MetacriticIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222', | ||||
|         u'file': u'3698222.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors', | ||||
|             u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.', | ||||
|             u'duration': 221, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         # The xml is not well formatted, there are raw '&' | ||||
|         info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id, | ||||
|             video_id, u'Downloading info xml').replace('&', '&') | ||||
|         info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) | ||||
|  | ||||
|         clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id) | ||||
|         formats = [] | ||||
|         for videoFile in clip.findall('httpURI/videoFile'): | ||||
|             rate_str = videoFile.find('rate').text | ||||
|             video_url = videoFile.find('filePath').text | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'format_id': rate_str, | ||||
|                 'rate': int(rate_str), | ||||
|             }) | ||||
|         formats.sort(key=operator.itemgetter('rate')) | ||||
|  | ||||
|         description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>', | ||||
|             webpage, u'description', flags=re.DOTALL) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': clip.find('title').text, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'duration': int(clip.find('duration').text), | ||||
|         } | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|         return info | ||||
							
								
								
									
										47
									
								
								youtube_dl/extractor/slideshare.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								youtube_dl/extractor/slideshare.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SlideshareIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', | ||||
|         u'file': u'25665706.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Managing Scale and Complexity', | ||||
|             u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, page_title) | ||||
|         slideshare_obj = self._search_regex( | ||||
|             r'var slideshare_object =  ({.*?}); var user_info =', | ||||
|             webpage, u'slideshare object') | ||||
|         info = json.loads(slideshare_obj) | ||||
|         if info['slideshow']['type'] != u'video': | ||||
|             raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True) | ||||
|  | ||||
|         doc = info['doc'] | ||||
|         bucket = info['jsplayer']['video_bucket'] | ||||
|         ext = info['jsplayer']['video_extension'] | ||||
|         video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id': info['slideshow']['id'], | ||||
|             'title': info['slideshow']['title'], | ||||
|             'ext': ext, | ||||
|             'url': video_url, | ||||
|             'thumbnail': info['slideshow']['pin_image_url'], | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
| @@ -8,7 +8,7 @@ from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class SohuIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?' | ||||
|     _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super', | ||||
| @@ -21,8 +21,11 @@ class SohuIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|  | ||||
|         def _fetch_data(vid_id): | ||||
|             base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid=' | ||||
|         def _fetch_data(vid_id, mytv=False): | ||||
|             if mytv: | ||||
|                 base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid=' | ||||
|             else: | ||||
|                 base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid=' | ||||
|             data_url = base_data_url + str(vid_id) | ||||
|             data_json = self._download_webpage( | ||||
|                 data_url, video_id, | ||||
| @@ -31,15 +34,16 @@ class SohuIE(InfoExtractor): | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         mytv = mobj.group('mytv') is not None | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>', | ||||
|                                             webpage, u'video title') | ||||
|         title = raw_title.partition('-')[0].strip() | ||||
|  | ||||
|         vid = self._html_search_regex(r'var vid="(\d+)"', webpage, | ||||
|         vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage, | ||||
|                                       u'video path') | ||||
|         data = _fetch_data(vid) | ||||
|         data = _fetch_data(vid, mytv) | ||||
|  | ||||
|         QUALITIES = ('ori', 'super', 'high', 'nor') | ||||
|         vid_ids = [data['data'][q + 'Vid'] | ||||
| @@ -51,7 +55,7 @@ class SohuIE(InfoExtractor): | ||||
|         # For now, we just pick the highest available quality | ||||
|         vid_id = vid_ids[-1] | ||||
|  | ||||
|         format_data = data if vid == vid_id else _fetch_data(vid_id) | ||||
|         format_data = data if vid == vid_id else _fetch_data(vid_id, mytv) | ||||
|         part_count = format_data['data']['totalBlocks'] | ||||
|         allot = format_data['allot'] | ||||
|         prot = format_data['prot'] | ||||
|   | ||||
							
								
								
									
										92
									
								
								youtube_dl/extractor/subtitles.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								youtube_dl/extractor/subtitles.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,92 @@ | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SubtitlesInfoExtractor(InfoExtractor): | ||||
|     @property | ||||
|     def _have_to_download_any_subtitles(self): | ||||
|         return any([self._downloader.params.get('writesubtitles', False), | ||||
|                     self._downloader.params.get('writeautomaticsub'), | ||||
|                     self._downloader.params.get('allsubtitles', False)]) | ||||
|  | ||||
|     def _list_available_subtitles(self, video_id, webpage=None): | ||||
|         """ outputs the available subtitles for the video """ | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         auto_captions_list = self._get_available_automatic_caption(video_id, webpage) | ||||
|         sub_lang = ",".join(list(sub_lang_list.keys())) | ||||
|         self.to_screen(u'%s: Available subtitles for video: %s' % | ||||
|                        (video_id, sub_lang)) | ||||
|         auto_lang = ",".join(auto_captions_list.keys()) | ||||
|         self.to_screen(u'%s: Available automatic captions for video: %s' % | ||||
|                        (video_id, auto_lang)) | ||||
|  | ||||
|     def extract_subtitles(self, video_id, video_webpage=None): | ||||
|         """ | ||||
|         returns {sub_lang: sub} ,{} if subtitles not found or None if the | ||||
|         subtitles aren't requested. | ||||
|         """ | ||||
|         if not self._have_to_download_any_subtitles: | ||||
|             return None | ||||
|         available_subs_list = {} | ||||
|         if self._downloader.params.get('writeautomaticsub', False): | ||||
|             available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage)) | ||||
|         if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): | ||||
|             available_subs_list.update(self._get_available_subtitles(video_id)) | ||||
|  | ||||
|         if not available_subs_list:  # error, it didn't get the available subtitles | ||||
|             return {} | ||||
|         if self._downloader.params.get('allsubtitles', False): | ||||
|             sub_lang_list = available_subs_list | ||||
|         else: | ||||
|             if self._downloader.params.get('subtitleslangs', False): | ||||
|                 requested_langs = self._downloader.params.get('subtitleslangs') | ||||
|             elif 'en' in available_subs_list: | ||||
|                 requested_langs = ['en'] | ||||
|             else: | ||||
|                 requested_langs = [list(available_subs_list.keys())[0]] | ||||
|  | ||||
|             sub_lang_list = {} | ||||
|             for sub_lang in requested_langs: | ||||
|                 if not sub_lang in available_subs_list: | ||||
|                     self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) | ||||
|                     continue | ||||
|                 sub_lang_list[sub_lang] = available_subs_list[sub_lang] | ||||
|  | ||||
|         subtitles = {} | ||||
|         for sub_lang, url in sub_lang_list.items(): | ||||
|             subtitle = self._request_subtitle_url(sub_lang, url) | ||||
|             if subtitle: | ||||
|                 subtitles[sub_lang] = subtitle | ||||
|         return subtitles | ||||
|  | ||||
|     def _request_subtitle_url(self, sub_lang, url): | ||||
|         """ makes the http request for the subtitle """ | ||||
|         try: | ||||
|             sub = self._download_webpage(url, None, note=False) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) | ||||
|             return | ||||
|         if not sub: | ||||
|             self._downloader.report_warning(u'Did not fetch video subtitles') | ||||
|             return | ||||
|         return sub | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         """ | ||||
|         returns {sub_lang: url} or {} if not available | ||||
|         Must be redefined by the subclasses | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|     def _get_available_automatic_caption(self, video_id, webpage): | ||||
|         """ | ||||
|         returns {sub_lang: url} or {} if not available | ||||
|         Must be redefined by the subclasses that support automatic captions, | ||||
|         otherwise it will return {} | ||||
|         """ | ||||
|         self._downloader.report_warning(u'Automatic Captions not supported by this server') | ||||
|         return {} | ||||
| @@ -5,8 +5,10 @@ import netrc | ||||
| import re | ||||
| import socket | ||||
| import itertools | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor, SearchInfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_parse_qs, | ||||
| @@ -130,7 +132,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|             return | ||||
|         self._confirm_age() | ||||
|  | ||||
| class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|  | ||||
| class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     IE_DESC = u'YouTube.com' | ||||
|     _VALID_URL = r"""^ | ||||
|                      ( | ||||
| @@ -150,7 +153,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                          |youtu\.be/                                          # just youtu.be/xxxx | ||||
|                          ) | ||||
|                      )?                                                       # all until now is optional -> you can pass the naked ID | ||||
|                      ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID | ||||
|                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID | ||||
|                      (?(1).+)?                                                # if we found the ID, everything can follow | ||||
|                      $""" | ||||
|     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' | ||||
| @@ -386,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False | ||||
|         if YoutubePlaylistIE.suitable(url): return False | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|  | ||||
|     def report_video_webpage_download(self, video_id): | ||||
| @@ -397,19 +400,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         """Report attempt to download video info webpage.""" | ||||
|         self.to_screen(u'%s: Downloading video info webpage' % video_id) | ||||
|  | ||||
|     def report_video_subtitles_download(self, video_id): | ||||
|         """Report attempt to download video info webpage.""" | ||||
|         self.to_screen(u'%s: Checking available subtitles' % video_id) | ||||
|  | ||||
|     def report_video_subtitles_request(self, video_id, sub_lang, format): | ||||
|         """Report attempt to download video info webpage.""" | ||||
|         self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format)) | ||||
|  | ||||
|     def report_video_subtitles_available(self, video_id, sub_lang_list): | ||||
|         """Report available subtitles.""" | ||||
|         sub_lang = ",".join(list(sub_lang_list.keys())) | ||||
|         self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang)) | ||||
|  | ||||
|     def report_information_extraction(self, video_id): | ||||
|         """Report attempt to extract video information.""" | ||||
|         self.to_screen(u'%s: Extracting video information' % video_id) | ||||
| @@ -438,7 +428,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         elif len(s) == 86: | ||||
|             return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53] | ||||
|         elif len(s) == 85: | ||||
|             return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] | ||||
|             return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1] | ||||
|         elif len(s) == 84: | ||||
|             return s[81:36:-1] + s[0] + s[35:2:-1] | ||||
|         elif len(s) == 83: | ||||
| @@ -464,56 +454,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             # Fallback to the other algortihms | ||||
|             return self._decrypt_signature(s) | ||||
|  | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         self.report_video_subtitles_download(video_id) | ||||
|         request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) | ||||
|         try: | ||||
|             sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             sub_list = self._download_webpage( | ||||
|                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, | ||||
|                 video_id, note=False) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) | ||||
|             return {} | ||||
|         sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) | ||||
|         sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) | ||||
|         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) | ||||
|  | ||||
|         sub_lang_list = {} | ||||
|         for l in lang_list: | ||||
|             lang = l[1] | ||||
|             params = compat_urllib_parse.urlencode({ | ||||
|                 'lang': lang, | ||||
|                 'v': video_id, | ||||
|                 'fmt': self._downloader.params.get('subtitlesformat'), | ||||
|             }) | ||||
|             url = u'http://www.youtube.com/api/timedtext?' + params | ||||
|             sub_lang_list[lang] = url | ||||
|         if not sub_lang_list: | ||||
|             self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|             return {} | ||||
|         return sub_lang_list | ||||
|  | ||||
|     def _list_available_subtitles(self, video_id): | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         self.report_video_subtitles_available(video_id, sub_lang_list) | ||||
|  | ||||
|     def _request_subtitle(self, sub_lang, sub_name, video_id, format): | ||||
|         """ | ||||
|         Return the subtitle as a string or None if they are not found | ||||
|         """ | ||||
|         self.report_video_subtitles_request(video_id, sub_lang, format) | ||||
|         params = compat_urllib_parse.urlencode({ | ||||
|             'lang': sub_lang, | ||||
|             'name': sub_name, | ||||
|             'v': video_id, | ||||
|             'fmt': format, | ||||
|         }) | ||||
|         url = 'http://www.youtube.com/api/timedtext?' + params | ||||
|         try: | ||||
|             sub = compat_urllib_request.urlopen(url).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) | ||||
|             return | ||||
|         if not sub: | ||||
|             self._downloader.report_warning(u'Did not fetch video subtitles') | ||||
|             return | ||||
|         return sub | ||||
|  | ||||
|     def _request_automatic_caption(self, video_id, webpage): | ||||
|     def _get_available_automatic_caption(self, video_id, webpage): | ||||
|         """We need the webpage for getting the captions url, pass it as an | ||||
|            argument to speed up the process.""" | ||||
|         sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0] | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         self.to_screen(u'%s: Looking for automatic captions' % video_id) | ||||
|         mobj = re.search(r';ytplayer.config = ({.*?});', webpage) | ||||
|         err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang | ||||
|         err_msg = u'Couldn\'t find automatic captions for %s' % video_id | ||||
|         if mobj is None: | ||||
|             self._downloader.report_warning(err_msg) | ||||
|             return {} | ||||
| @@ -522,53 +494,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             args = player_config[u'args'] | ||||
|             caption_url = args[u'ttsurl'] | ||||
|             timestamp = args[u'timestamp'] | ||||
|             params = compat_urllib_parse.urlencode({ | ||||
|                 'lang': 'en', | ||||
|                 'tlang': sub_lang, | ||||
|                 'fmt': sub_format, | ||||
|                 'ts': timestamp, | ||||
|                 'kind': 'asr', | ||||
|             # We get the available subtitles | ||||
|             list_params = compat_urllib_parse.urlencode({ | ||||
|                 'type': 'list', | ||||
|                 'tlangs': 1, | ||||
|                 'asrs': 1, | ||||
|             }) | ||||
|             subtitles_url = caption_url + '&' + params | ||||
|             sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') | ||||
|             return {sub_lang: sub} | ||||
|             list_url = caption_url + '&' + list_params | ||||
|             list_page = self._download_webpage(list_url, video_id) | ||||
|             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8')) | ||||
|             original_lang_node = caption_list.find('track') | ||||
|             if original_lang_node.attrib.get('kind') != 'asr' : | ||||
|                 self._downloader.report_warning(u'Video doesn\'t have automatic captions') | ||||
|                 return {} | ||||
|             original_lang = original_lang_node.attrib['lang_code'] | ||||
|  | ||||
|             sub_lang_list = {} | ||||
|             for lang_node in caption_list.findall('target'): | ||||
|                 sub_lang = lang_node.attrib['lang_code'] | ||||
|                 params = compat_urllib_parse.urlencode({ | ||||
|                     'lang': original_lang, | ||||
|                     'tlang': sub_lang, | ||||
|                     'fmt': sub_format, | ||||
|                     'ts': timestamp, | ||||
|                     'kind': 'asr', | ||||
|                 }) | ||||
|                 sub_lang_list[sub_lang] = caption_url + '&' + params | ||||
|             return sub_lang_list | ||||
|         # An extractor error can be raise by the download process if there are | ||||
|         # no automatic captions but there are subtitles | ||||
|         except (KeyError, ExtractorError): | ||||
|             self._downloader.report_warning(err_msg) | ||||
|             return {} | ||||
|      | ||||
|     def _extract_subtitles(self, video_id): | ||||
|         """ | ||||
|         Return a dictionary: {language: subtitles} or {} if the subtitles | ||||
|         couldn't be found | ||||
|         """ | ||||
|         available_subs_list = self._get_available_subtitles(video_id) | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         if  not available_subs_list: #There was some error, it didn't get the available subtitles | ||||
|             return {} | ||||
|         if self._downloader.params.get('allsubtitles', False): | ||||
|             sub_lang_list = available_subs_list | ||||
|         else: | ||||
|             if self._downloader.params.get('subtitleslangs', False): | ||||
|                 reqested_langs = self._downloader.params.get('subtitleslangs') | ||||
|             elif 'en' in available_subs_list: | ||||
|                 reqested_langs = ['en'] | ||||
|             else: | ||||
|                 reqested_langs = [list(available_subs_list.keys())[0]] | ||||
|  | ||||
|             sub_lang_list = {} | ||||
|             for sub_lang in reqested_langs: | ||||
|                 if not sub_lang in available_subs_list: | ||||
|                     self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) | ||||
|                     continue | ||||
|                 sub_lang_list[sub_lang] = available_subs_list[sub_lang] | ||||
|         subtitles = {} | ||||
|         for sub_lang in sub_lang_list: | ||||
|             subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) | ||||
|             if subtitle: | ||||
|                 subtitles[sub_lang] = subtitle | ||||
|         return subtitles | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         print('Available formats:') | ||||
| @@ -643,7 +600,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest') | ||||
|         formats_urls = _get_urls(manifest) | ||||
|         for format_url in formats_urls: | ||||
|             itag = self._search_regex(r'itag%3D(\d+?)/', format_url, 'itag') | ||||
|             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag') | ||||
|             url_map[itag] = format_url | ||||
|         return url_map | ||||
|  | ||||
| @@ -768,15 +725,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 video_description = u'' | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = None | ||||
|  | ||||
|         if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): | ||||
|             video_subtitles = self._extract_subtitles(video_id) | ||||
|         elif self._downloader.params.get('writeautomaticsub', False): | ||||
|             video_subtitles = self._request_automatic_caption(video_id, video_webpage) | ||||
|         video_subtitles = self.extract_subtitles(video_id, video_webpage) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id) | ||||
|             self._list_available_subtitles(video_id, video_webpage) | ||||
|             return | ||||
|  | ||||
|         if 'length_seconds' not in video_info: | ||||
| @@ -1015,14 +967,18 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|  | ||||
| class YoutubeUserIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' | ||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)([A-Za-z0-9_-]+)' | ||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' | ||||
|     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' | ||||
|     _GDATA_PAGE_SIZE = 50 | ||||
|     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' | ||||
|     IE_NAME = u'youtube:user' | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         if YoutubeIE.suitable(url): return False | ||||
|         # Don't return True if the url can be extracted with other youtube | ||||
|         # extractor, the regex would is too permissive and it would match. | ||||
|         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls) | ||||
|         if any(ie.suitable(url) for ie in other_ies): return False | ||||
|         else: return super(YoutubeUserIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2013.09.06.1' | ||||
| __version__ = '2013.09.12' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user