Compare commits
	
		
			88 Commits
		
	
	
		
			2014.09.15
			...
			2014.09.24
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 0b97f3a936 | ||
|  | eb73f2649f | ||
|  | f0b5d6af74 | ||
|  | 2f771f6c99 | ||
|  | 3b2f933b01 | ||
|  | cc746841e7 | ||
|  | ac7553d031 | ||
|  | cdc628a498 | ||
|  | 69ea8ca42c | ||
|  | 4bc3a23ec5 | ||
|  | bd5650ac64 | ||
|  | 86916dae4b | ||
|  | f7d159cf95 | ||
|  | 632e5684ce | ||
|  | 094d42fe44 | ||
|  | 63cddb6477 | ||
|  | 273dea4248 | ||
|  | f90d95edeb | ||
|  | 45c85d7ba1 | ||
|  | d0df92928b | ||
|  | df8f53f752 | ||
|  | e35cb78c40 | ||
|  | 3ef7d11acd | ||
|  | 224ce0d872 | ||
|  | dd41e8c82b | ||
|  | b509a4b176 | ||
|  | b28c8403b2 | ||
|  | 7bd4b4229a | ||
|  | 72e450c555 | ||
|  | 522c55b7f2 | ||
|  | 58e7071a2c | ||
|  | 516812df41 | ||
|  | 752297631f | ||
|  | 34e14a9beb | ||
|  | ffb5b05db1 | ||
|  | 3e8fcd9fa1 | ||
|  | 532f5bff70 | ||
|  | f566d9f1d5 | ||
|  | 7267bd536f | ||
|  | 589d3d7c7a | ||
|  | 46f74bcf5c | ||
|  | 37bfe8ace4 | ||
|  | 0529eef5a4 | ||
|  | fd78a4d3e6 | ||
|  | 1de33fafd9 | ||
|  | e2e5dae64d | ||
|  | 09b23c902b | ||
|  | 109a540e7a | ||
|  | 2914e5f00f | ||
|  | 2f834e9381 | ||
|  | 9296738f20 | ||
|  | 0e59b9fffb | ||
|  | 67abbe9527 | ||
|  | 944a3de278 | ||
|  | 5a13fe9ed2 | ||
|  | 6b6096d0b7 | ||
|  | d0246d07f1 | ||
|  | 727a98c3ee | ||
|  | 997987d568 | ||
|  | c001f939e4 | ||
|  | e825c38082 | ||
|  | a04aa7a9e6 | ||
|  | 7cdd5339b3 | ||
|  | 38349518f1 | ||
|  | 64892c0b79 | ||
|  | dc9f356846 | ||
|  | ed86ee3b4a | ||
|  | 7bb5df1cda | ||
|  | 37a81dff04 | ||
|  | fc96eb4e21 | ||
|  | ae369738b0 | ||
|  | e2037b3f7d | ||
|  | 5419033935 | ||
|  | 2eebf060af | ||
|  | acd9db5902 | ||
|  | d0e8b3d59b | ||
|  | c15dd15388 | ||
|  | 0003a5c416 | ||
|  | 6d1f2431bd | ||
|  | fdea3abdf8 | ||
|  | 98703c7fbf | ||
|  | 2bca84e345 | ||
|  | 984e8e14ea | ||
|  | d05cfe0600 | ||
|  | 37419b4f99 | ||
|  | a8aa99442f | ||
|  | 94b539d155 | ||
|  | b8874d4d4e | 
							
								
								
									
										15
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								README.md
									
									
									
									
									
								
							| @@ -227,12 +227,15 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|  | ||||
| ## Video Format Options: | ||||
|     -f, --format FORMAT              video format code, specify the order of | ||||
|                                      preference using slashes: "-f 22/17/18". | ||||
|                                      "-f mp4" and "-f flv" are also supported. | ||||
|                                      You can also use the special names "best", | ||||
|                                      "bestvideo", "bestaudio", "worst", | ||||
|                                      "worstvideo" and "worstaudio". By default, | ||||
|                                      youtube-dl will pick the best quality. | ||||
|                                      preference using slashes: -f 22/17/18 .  -f | ||||
|                                      mp4 , -f m4a and  -f flv  are also | ||||
|                                      supported. You can also use the special | ||||
|                                      names "best", "bestvideo", "bestaudio", | ||||
|                                      "worst", "worstvideo" and "worstaudio". By | ||||
|                                      default, youtube-dl will pick the best | ||||
|                                      quality. Use commas to download multiple | ||||
|                                      audio formats, such as  -f | ||||
|                                      136/137/mp4/bestvideo,140/m4a/bestaudio | ||||
|     --all-formats                    download all available video formats | ||||
|     --prefer-free-formats            prefer free video formats unless a specific | ||||
|                                      one is requested | ||||
|   | ||||
| @@ -41,6 +41,8 @@ from youtube_dl.utils import ( | ||||
|     strip_jsonp, | ||||
|     uppercase_escape, | ||||
|     limit_length, | ||||
|     escape_rfc3986, | ||||
|     escape_url, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -294,5 +296,34 @@ class TestUtil(unittest.TestCase): | ||||
|             limit_length('foo bar baz asd', 12).startswith('foo bar')) | ||||
|         self.assertTrue('...' in limit_length('foo bar baz asd', 12)) | ||||
|  | ||||
|     def test_escape_rfc3986(self): | ||||
|         reserved = "!*'();:@&=+$,/?#[]" | ||||
|         unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~' | ||||
|         self.assertEqual(escape_rfc3986(reserved), reserved) | ||||
|         self.assertEqual(escape_rfc3986(unreserved), unreserved) | ||||
|         self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82') | ||||
|         self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82') | ||||
|         self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar') | ||||
|         self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar') | ||||
|  | ||||
|     def test_escape_url(self): | ||||
|         self.assertEqual( | ||||
|             escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'), | ||||
|             'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4' | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'), | ||||
|             'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290' | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             escape_url('http://тест.рф/фрагмент'), | ||||
|             'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             escape_url('http://тест.рф/абв?абв=абв#абв'), | ||||
|             'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' | ||||
|         ) | ||||
|         self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -10,7 +10,6 @@ from test.helper import FakeYDL | ||||
|  | ||||
|  | ||||
| from youtube_dl.extractor import ( | ||||
|     YoutubeUserIE, | ||||
|     YoutubePlaylistIE, | ||||
|     YoutubeIE, | ||||
|     YoutubeChannelIE, | ||||
| @@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         self.assertEqual(len(entries), 25) | ||||
|         self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') | ||||
|  | ||||
|     def test_youtube_channel(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeChannelIE(dl) | ||||
|         #test paginated channel | ||||
|         result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w') | ||||
|         self.assertTrue(len(result['entries']) > 90) | ||||
|         #test autogenerated channel | ||||
|         result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') | ||||
|         self.assertTrue(len(result['entries']) >= 18) | ||||
|  | ||||
|     def test_youtube_user(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeUserIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') | ||||
|         self.assertTrue(len(result['entries']) >= 320) | ||||
|  | ||||
|     def test_youtube_show(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeShowIE(dl) | ||||
|         result = ie.extract('http://www.youtube.com/show/airdisasters') | ||||
|         self.assertTrue(len(result) >= 3) | ||||
|  | ||||
|     def test_youtube_mix(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
| @@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         entries = result['entries'] | ||||
|         self.assertEqual(len(entries), 100) | ||||
|  | ||||
|     def test_youtube_toplist(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeTopListIE(dl) | ||||
|         result = ie.extract('yttoplist:music:Trending') | ||||
|         entries = result['entries'] | ||||
|         self.assertTrue(len(entries) >= 5) | ||||
|  | ||||
|     def test_youtube_search_url(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeSearchURLIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video') | ||||
|         entries = result['entries'] | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], 'youtube-dl test video') | ||||
|         self.assertTrue(len(entries) >= 5) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -28,6 +28,7 @@ from .utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
|     escape_url, | ||||
|     ContentTooShortError, | ||||
|     date_from_str, | ||||
|     DateRange, | ||||
| @@ -707,7 +708,7 @@ class YoutubeDL(object): | ||||
|             if video_formats: | ||||
|                 return video_formats[0] | ||||
|         else: | ||||
|             extensions = ['mp4', 'flv', 'webm', '3gp'] | ||||
|             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a'] | ||||
|             if format_spec in extensions: | ||||
|                 filter_f = lambda f: f['ext'] == format_spec | ||||
|             else: | ||||
| @@ -808,28 +809,29 @@ class YoutubeDL(object): | ||||
|         if req_format in ('-1', 'all'): | ||||
|             formats_to_download = formats | ||||
|         else: | ||||
|             # We can accept formats requested in the format: 34/5/best, we pick | ||||
|             # the first that is available, starting from left | ||||
|             req_formats = req_format.split('/') | ||||
|             for rf in req_formats: | ||||
|                 if re.match(r'.+?\+.+?', rf) is not None: | ||||
|                     # Two formats have been requested like '137+139' | ||||
|                     format_1, format_2 = rf.split('+') | ||||
|                     formats_info = (self.select_format(format_1, formats), | ||||
|                         self.select_format(format_2, formats)) | ||||
|                     if all(formats_info): | ||||
|                         selected_format = { | ||||
|                             'requested_formats': formats_info, | ||||
|                             'format': rf, | ||||
|                             'ext': formats_info[0]['ext'], | ||||
|                         } | ||||
|             for rfstr in req_format.split(','): | ||||
|                 # We can accept formats requested in the format: 34/5/best, we pick | ||||
|                 # the first that is available, starting from left | ||||
|                 req_formats = rfstr.split('/') | ||||
|                 for rf in req_formats: | ||||
|                     if re.match(r'.+?\+.+?', rf) is not None: | ||||
|                         # Two formats have been requested like '137+139' | ||||
|                         format_1, format_2 = rf.split('+') | ||||
|                         formats_info = (self.select_format(format_1, formats), | ||||
|                             self.select_format(format_2, formats)) | ||||
|                         if all(formats_info): | ||||
|                             selected_format = { | ||||
|                                 'requested_formats': formats_info, | ||||
|                                 'format': rf, | ||||
|                                 'ext': formats_info[0]['ext'], | ||||
|                             } | ||||
|                         else: | ||||
|                             selected_format = None | ||||
|                     else: | ||||
|                         selected_format = None | ||||
|                 else: | ||||
|                     selected_format = self.select_format(rf, formats) | ||||
|                 if selected_format is not None: | ||||
|                     formats_to_download = [selected_format] | ||||
|                     break | ||||
|                         selected_format = self.select_format(rf, formats) | ||||
|                     if selected_format is not None: | ||||
|                         formats_to_download.append(selected_format) | ||||
|                         break | ||||
|         if not formats_to_download: | ||||
|             raise ExtractorError('requested format not available', | ||||
|                                  expected=True) | ||||
| @@ -1241,6 +1243,25 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def urlopen(self, req): | ||||
|         """ Start an HTTP download """ | ||||
|  | ||||
|         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not | ||||
|         # always respected by websites, some tend to give out URLs with non percent-encoded | ||||
|         # non-ASCII characters (see telemb.py, ard.py [#3412]) | ||||
|         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) | ||||
|         # To work around aforementioned issue we will replace request's original URL with | ||||
|         # percent-encoded one | ||||
|         url = req if isinstance(req, compat_str) else req.get_full_url() | ||||
|         url_escaped = escape_url(url) | ||||
|  | ||||
|         # Substitute URL if any change after escaping | ||||
|         if url != url_escaped: | ||||
|             if isinstance(req, compat_str): | ||||
|                 req = url_escaped | ||||
|             else: | ||||
|                 req = compat_urllib_request.Request( | ||||
|                     url_escaped, data=req.data, headers=req.headers, | ||||
|                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) | ||||
|  | ||||
|         return self._opener.open(req, timeout=self._socket_timeout) | ||||
|  | ||||
|     def print_debug_header(self): | ||||
|   | ||||
| @@ -75,6 +75,9 @@ __authors__  = ( | ||||
|     'Ole Ernst', | ||||
|     'Aaron McDaniel (mcd1992)', | ||||
|     'Magnus Kolstad', | ||||
|     'Hari Padmanaban', | ||||
|     'Carlos Ramos', | ||||
|     '5moufl', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
|   | ||||
| @@ -2,6 +2,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from .hls import HlsFD | ||||
| from .hls import NativeHlsFD | ||||
| from .http import HttpFD | ||||
| from .mplayer import MplayerFD | ||||
| from .rtmp import RtmpFD | ||||
| @@ -19,6 +20,8 @@ def get_suitable_downloader(info_dict): | ||||
|  | ||||
|     if url.startswith('rtmp'): | ||||
|         return RtmpFD | ||||
|     if protocol == 'm3u8_native': | ||||
|         return NativeHlsFD | ||||
|     if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'): | ||||
|         return HlsFD | ||||
|     if url.startswith('mms') or url.startswith('rtsp'): | ||||
|   | ||||
| @@ -16,6 +16,7 @@ from ..utils import ( | ||||
|     format_bytes, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -251,6 +252,8 @@ class F4mFD(FileDownloader): | ||||
|             # We only download the first fragment | ||||
|             fragments_list = fragments_list[:1] | ||||
|         total_frags = len(fragments_list) | ||||
|         # For some akamai manifests we'll need to add a query to the fragment url | ||||
|         akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) | ||||
|  | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') | ||||
| @@ -290,6 +293,8 @@ class F4mFD(FileDownloader): | ||||
|         for (seg_i, frag_i) in fragments_list: | ||||
|             name = 'Seg%d-Frag%d' % (seg_i, frag_i) | ||||
|             url = base_url + name | ||||
|             if akamai_pv: | ||||
|                 url += '?' + akamai_pv.strip(';') | ||||
|             frag_filename = '%s-%s' % (tmpfilename, name) | ||||
|             success = http_dl.download(frag_filename, {'url': url}) | ||||
|             if not success: | ||||
|   | ||||
| @@ -1,8 +1,12 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| import subprocess | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     check_executable, | ||||
|     encodeFilename, | ||||
| ) | ||||
| @@ -43,3 +47,46 @@ class HlsFD(FileDownloader): | ||||
|             self.to_stderr(u"\n") | ||||
|             self.report_error(u'%s exited with code %d' % (program, retval)) | ||||
|             return False | ||||
|  | ||||
|  | ||||
| class NativeHlsFD(FileDownloader): | ||||
|     """ A more limited implementation that does not require ffmpeg """ | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         url = info_dict['url'] | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         self.to_screen( | ||||
|             '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id']) | ||||
|         data = self.ydl.urlopen(url).read() | ||||
|         s = data.decode('utf-8', 'ignore') | ||||
|         segment_urls = [] | ||||
|         for line in s.splitlines(): | ||||
|             line = line.strip() | ||||
|             if line and not line.startswith('#'): | ||||
|                 segment_url = ( | ||||
|                     line | ||||
|                     if re.match(r'^https?://', line) | ||||
|                     else compat_urlparse.urljoin(url, line)) | ||||
|                 segment_urls.append(segment_url) | ||||
|  | ||||
|         byte_counter = 0 | ||||
|         with open(tmpfilename, 'wb') as outf: | ||||
|             for i, segurl in enumerate(segment_urls): | ||||
|                 segment = self.ydl.urlopen(segurl).read() | ||||
|                 outf.write(segment) | ||||
|                 byte_counter += len(segment) | ||||
|                 self.to_screen( | ||||
|                     '[hlsnative] %s: Downloading segment %d / %d' % | ||||
|                     (info_dict['id'], i + 1, len(segment_urls))) | ||||
|  | ||||
|         self._hook_progress({ | ||||
|             'downloaded_bytes': byte_counter, | ||||
|             'total_bytes': byte_counter, | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|         }) | ||||
|         self.try_rename(tmpfilename, filename) | ||||
|         return True | ||||
|  | ||||
|   | ||||
| @@ -25,6 +25,7 @@ from .bambuser import BambuserIE, BambuserChannelIE | ||||
| from .bandcamp import BandcampIE, BandcampAlbumIE | ||||
| from .bbccouk import BBCCoUkIE | ||||
| from .beeg import BeegIE | ||||
| from .behindkink import BehindKinkIE | ||||
| from .bilibili import BiliBiliIE | ||||
| from .blinkx import BlinkxIE | ||||
| from .bliptv import BlipTVIE, BlipTVUserIE | ||||
| @@ -83,6 +84,7 @@ from .dropbox import DropboxIE | ||||
| from .ebaumsworld import EbaumsWorldIE | ||||
| from .ehow import EHowIE | ||||
| from .eighttracks import EightTracksIE | ||||
| from .einthusan import EinthusanIE | ||||
| from .eitb import EitbIE | ||||
| from .ellentv import ( | ||||
|     EllenTVIE, | ||||
| @@ -197,6 +199,7 @@ from .malemotion import MalemotionIE | ||||
| from .mdr import MDRIE | ||||
| from .metacafe import MetacafeIE | ||||
| from .metacritic import MetacriticIE | ||||
| from .mgoon import MgoonIE | ||||
| from .ministrygrid import MinistryGridIE | ||||
| from .mit import TechTVMITIE, MITIE, OCWMITIE | ||||
| from .mitele import MiTeleIE | ||||
| @@ -206,6 +209,7 @@ from .mpora import MporaIE | ||||
| from .moevideo import MoeVideoIE | ||||
| from .mofosex import MofosexIE | ||||
| from .mojvideo import MojvideoIE | ||||
| from .moniker import MonikerIE | ||||
| from .mooshare import MooshareIE | ||||
| from .morningstar import MorningstarIE | ||||
| from .motherless import MotherlessIE | ||||
| @@ -218,6 +222,7 @@ from .mtv import ( | ||||
|     MTVServicesEmbeddedIE, | ||||
|     MTVIggyIE, | ||||
| ) | ||||
| from .muenchentv import MuenchenTVIE | ||||
| from .musicplayon import MusicPlayOnIE | ||||
| from .musicvault import MusicVaultIE | ||||
| from .muzu import MuzuTVIE | ||||
| @@ -235,6 +240,7 @@ from .ndtv import NDTVIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| from .newstube import NewstubeIE | ||||
| from .nfb import NFBIE | ||||
| from .nfl import NFLIE | ||||
| from .nhl import NHLIE, NHLVideocenterIE | ||||
| from .niconico import NiconicoIE | ||||
| from .ninegag import NineGagIE | ||||
| @@ -244,7 +250,10 @@ from .nosvideo import NosVideoIE | ||||
| from .novamov import NovaMovIE | ||||
| from .nowness import NownessIE | ||||
| from .nowvideo import NowVideoIE | ||||
| from .npo import NPOIE | ||||
| from .npo import ( | ||||
|     NPOIE, | ||||
|     TegenlichtVproIE, | ||||
| ) | ||||
| from .nrk import ( | ||||
|     NRKIE, | ||||
|     NRKTVIE, | ||||
| @@ -357,6 +366,7 @@ from .thisav import ThisAVIE | ||||
| from .tinypic import TinyPicIE | ||||
| from .tlc import TlcIE, TlcDeIE | ||||
| from .tnaflix import TNAFlixIE | ||||
| from .thvideo import THVideoIE | ||||
| from .toutv import TouTvIE | ||||
| from .toypics import ToypicsUserIE, ToypicsIE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| @@ -365,6 +375,7 @@ from .trutube import TruTubeIE | ||||
| from .tube8 import Tube8IE | ||||
| from .tudou import TudouIE | ||||
| from .tumblr import TumblrIE | ||||
| from .turbo import TurboIE | ||||
| from .tutv import TutvIE | ||||
| from .tvigle import TvigleIE | ||||
| from .tvp import TvpIE | ||||
| @@ -389,6 +400,7 @@ from .videobam import VideoBamIE | ||||
| from .videodetective import VideoDetectiveIE | ||||
| from .videolecturesnet import VideoLecturesNetIE | ||||
| from .videofyme import VideofyMeIE | ||||
| from .videomega import VideoMegaIE | ||||
| from .videopremium import VideoPremiumIE | ||||
| from .videott import VideoTtIE | ||||
| from .videoweed import VideoWeedIE | ||||
| @@ -441,6 +453,7 @@ from .yahoo import ( | ||||
| from .youjizz import YouJizzIE | ||||
| from .youku import YoukuIE | ||||
| from .youporn import YouPornIE | ||||
| from .yourupload import YourUploadIE | ||||
| from .youtube import ( | ||||
|     YoutubeIE, | ||||
|     YoutubeChannelIE, | ||||
|   | ||||
| @@ -51,9 +51,6 @@ class ARDMediathekIE(InfoExtractor): | ||||
|         else: | ||||
|             video_id = m.group('video_id') | ||||
|  | ||||
|         urlp = compat_urllib_parse_urlparse(url) | ||||
|         url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl() | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage: | ||||
|   | ||||
							
								
								
									
										53
									
								
								youtube_dl/extractor/behindkink.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								youtube_dl/extractor/behindkink.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import url_basename | ||||
|  | ||||
|  | ||||
| class BehindKinkIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/', | ||||
|         'md5': '41ad01222b8442089a55528fec43ec01', | ||||
|         'info_dict': { | ||||
|             'id': '36370', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!', | ||||
|             'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...', | ||||
|             'upload_date': '20140814', | ||||
|             'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('id') | ||||
|         year = mobj.group('year') | ||||
|         month = mobj.group('month') | ||||
|         day = mobj.group('day') | ||||
|         upload_date = year + month + day | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r"'file':\s*'([^']+)'", | ||||
|             webpage, 'URL base') | ||||
|  | ||||
|         video_id = url_basename(video_url) | ||||
|         video_id = video_id.split('_')[0] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'display_id': display_id, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'upload_date': upload_date, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
| @@ -15,6 +15,7 @@ from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
|     compat_str, | ||||
|  | ||||
|     clean_html, | ||||
| @@ -130,6 +131,8 @@ class InfoExtractor(object): | ||||
|                     by YoutubeDL if it's missing) | ||||
|     categories:     A list of categories that the video falls in, for example | ||||
|                     ["Sports", "Berlin"] | ||||
|     is_live:        True, False, or None (=unknown). Whether this video is a | ||||
|                     live stream that goes on instead of a fixed-length video. | ||||
|  | ||||
|     Unless mentioned otherwise, the fields should be Unicode strings. | ||||
|  | ||||
| @@ -638,7 +641,9 @@ class InfoExtractor(object): | ||||
|  | ||||
|         return formats | ||||
|  | ||||
|     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None): | ||||
|     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, | ||||
|                               entry_protocol='m3u8', preference=None): | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'm3u8-meta', | ||||
|             'url': m3u8_url, | ||||
| @@ -649,6 +654,11 @@ class InfoExtractor(object): | ||||
|             'format_note': 'Quality selection URL', | ||||
|         }] | ||||
|  | ||||
|         format_url = lambda u: ( | ||||
|             u | ||||
|             if re.match(r'^https?://', u) | ||||
|             else compat_urlparse.urljoin(m3u8_url, u)) | ||||
|  | ||||
|         m3u8_doc = self._download_webpage(m3u8_url, video_id) | ||||
|         last_info = None | ||||
|         kv_rex = re.compile( | ||||
| @@ -665,15 +675,17 @@ class InfoExtractor(object): | ||||
|                 continue | ||||
|             else: | ||||
|                 if last_info is None: | ||||
|                     formats.append({'url': line}) | ||||
|                     formats.append({'url': format_url(line)}) | ||||
|                     continue | ||||
|                 tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) | ||||
|  | ||||
|                 f = { | ||||
|                     'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), | ||||
|                     'url': line.strip(), | ||||
|                     'url': format_url(line.strip()), | ||||
|                     'tbr': tbr, | ||||
|                     'ext': ext, | ||||
|                     'protocol': entry_protocol, | ||||
|                     'preference': preference, | ||||
|                 } | ||||
|                 codecs = last_info.get('CODECS') | ||||
|                 if codecs: | ||||
|   | ||||
| @@ -7,7 +7,7 @@ class DivxStageIE(NovaMovIE): | ||||
|     IE_NAME = 'divxstage' | ||||
|     IE_DESC = 'DivxStage' | ||||
|  | ||||
|     _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'} | ||||
|     _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'} | ||||
|  | ||||
|     _HOST = 'www.divxstage.eu' | ||||
|  | ||||
| @@ -24,4 +24,4 @@ class DivxStageIE(NovaMovIE): | ||||
|             'title': 'youtubedl test video', | ||||
|             'description': 'This is a test video for youtubedl.', | ||||
|         } | ||||
|     } | ||||
|     } | ||||
|   | ||||
| @@ -5,24 +5,29 @@ import os.path | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_parse_unquote | ||||
| from ..utils import compat_urllib_parse_unquote, url_basename | ||||
|  | ||||
|  | ||||
| class DropboxIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0', | ||||
|         'info_dict': { | ||||
|             'id': 'nelirfsxnmcfbfh', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'youtube-dl test video \'ä"BaW_jenozKc' | ||||
|         } | ||||
|     } | ||||
|     }, | ||||
|     { | ||||
|         'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v', | ||||
|         'only_matching': True, | ||||
|     }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         fn = compat_urllib_parse_unquote(mobj.group('title')) | ||||
|         fn = compat_urllib_parse_unquote(url_basename(url)) | ||||
|         title = os.path.splitext(fn)[0] | ||||
|         video_url = ( | ||||
|             re.sub(r'[?&]dl=0', '', url) + | ||||
|   | ||||
| @@ -19,7 +19,7 @@ class DrTuberIE(InfoExtractor): | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|             'comment_count': int, | ||||
|             'categories': list,  # NSFW | ||||
|             'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
| @@ -52,9 +52,9 @@ class DrTuberIE(InfoExtractor): | ||||
|             r'<span class="comments_count">([\d,\.]+)</span>', | ||||
|             webpage, 'comment count', fatal=False)) | ||||
|  | ||||
|         cats_str = self._html_search_regex( | ||||
|             r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) | ||||
|         categories = None if cats_str is None else cats_str.split(' ') | ||||
|         cats_str = self._search_regex( | ||||
|             r'<span>Categories:</span><div>(.+?)</div>', webpage, 'categories', fatal=False) | ||||
|         categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
							
								
								
									
										61
									
								
								youtube_dl/extractor/einthusan.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								youtube_dl/extractor/einthusan.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class EinthusanIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.einthusan.com/movies/watch.php?id=2447', | ||||
|             'md5': 'af244f4458cd667205e513d75da5b8b1', | ||||
|             'info_dict': { | ||||
|                 'id': '2447', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Ek Villain', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'description': 'md5:9d29fc91a7abadd4591fb862fa560d93', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.einthusan.com/movies/watch.php?id=1671', | ||||
|             'md5': 'ef63c7a803e22315880ed182c10d1c5c', | ||||
|             'info_dict': { | ||||
|                 'id': '1671', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Soodhu Kavvuum', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title') | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''', | ||||
|             webpage, 'video url') | ||||
|  | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''', | ||||
|             webpage, "thumbnail url", fatal=False) | ||||
|         if thumbnail is not None: | ||||
|             thumbnail = thumbnail.replace('..', 'http://www.einthusan.com') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'url': video_url, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|         } | ||||
| @@ -10,13 +10,13 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class FlickrIE(InfoExtractor): | ||||
|     """Information Extractor for Flickr videos""" | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' | ||||
|     _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', | ||||
|         'file': '5645318632.mp4', | ||||
|         'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b', | ||||
|         'info_dict': { | ||||
|             'id': '5645318632', | ||||
|             'ext': 'mp4', | ||||
|             "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",  | ||||
|             "uploader_id": "forestwander-nature-pictures",  | ||||
|             "title": "Dark Hollow Waterfalls" | ||||
| @@ -49,12 +49,12 @@ class FlickrIE(InfoExtractor): | ||||
|             raise ExtractorError('Unable to extract video url') | ||||
|         video_url = mobj.group(1) + unescapeHTML(mobj.group(2)) | ||||
|  | ||||
|         return [{ | ||||
|             'id':          video_id, | ||||
|             'url':         video_url, | ||||
|             'ext':         'mp4', | ||||
|             'title':       self._og_search_title(webpage), | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail':   self._og_search_thumbnail(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'uploader_id': video_uploader_id, | ||||
|         }] | ||||
|         } | ||||
|   | ||||
| @@ -4,16 +4,21 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class FranceInterIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]{6})' | ||||
|     _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.franceinter.fr/player/reecouter?play=793962', | ||||
|         'file': '793962.mp3', | ||||
|         'md5': '4764932e466e6f6c79c317d2e74f6884', | ||||
|         "info_dict": { | ||||
|             "title": "L’Histoire dans les jeux vidéo", | ||||
|             'id': '793962', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'L’Histoire dans les jeux vidéo', | ||||
|             'description': 'md5:7e93ddb4451e7530022792240a3049c7', | ||||
|             'timestamp': 1387369800, | ||||
|             'upload_date': '20131218', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -22,17 +27,26 @@ class FranceInterIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'<span class="roll_overflow">(.*?)</span></h1>', webpage, 'title') | ||||
|  | ||||
|         path = self._search_regex( | ||||
|             r'&urlAOD=(.*?)&startTime', webpage, 'video url') | ||||
|             r'<a id="player".+?href="([^"]+)"', webpage, 'video url') | ||||
|         video_url = 'http://www.franceinter.fr/' + path | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<span class="title">(.+?)</span>', webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'<span class="description">(.*?)</span>', | ||||
|             webpage, 'description', fatal=False) | ||||
|         timestamp = int_or_none(self._search_regex( | ||||
|             r'data-date="(\d+)"', webpage, 'upload date', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'formats': [{ | ||||
|                 'url': video_url, | ||||
|                 'vcodec': 'none', | ||||
|             }], | ||||
|             'title': title, | ||||
|         } | ||||
|   | ||||
| @@ -8,45 +8,68 @@ import json | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     ExtractorError, | ||||
|     clean_html, | ||||
|     parse_duration, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FranceTVBaseInfoExtractor(InfoExtractor): | ||||
|     def _extract_video(self, video_id): | ||||
|         info = self._download_xml( | ||||
|             'http://www.francetvinfo.fr/appftv/webservices/video/' | ||||
|             'getInfosOeuvre.php?id-diffusion=' | ||||
|             + video_id, video_id, 'Downloading XML config') | ||||
|     def _extract_video(self, video_id, catalogue): | ||||
|         info = self._download_json( | ||||
|             'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s' | ||||
|             % (video_id, catalogue), | ||||
|             video_id, 'Downloading video JSON') | ||||
|  | ||||
|         manifest_url = info.find('videos/video/url').text | ||||
|         manifest_url = manifest_url.replace('/z/', '/i/') | ||||
|          | ||||
|         if manifest_url.startswith('rtmp'): | ||||
|             formats = [{'url': manifest_url, 'ext': 'flv'}] | ||||
|         else: | ||||
|             formats = [] | ||||
|             available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats') | ||||
|             for index, format_descr in enumerate(available_formats.split(',')): | ||||
|                 format_info = { | ||||
|                     'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index), | ||||
|                     'ext': 'mp4', | ||||
|                 } | ||||
|                 m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr) | ||||
|                 if m_resolution is not None: | ||||
|                     format_info.update({ | ||||
|                         'width': int(m_resolution.group('width')), | ||||
|                         'height': int(m_resolution.group('height')), | ||||
|                     }) | ||||
|                 formats.append(format_info) | ||||
|         if info.get('status') == 'NOK': | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True) | ||||
|  | ||||
|         thumbnail_path = info.find('image').text | ||||
|         formats = [] | ||||
|         for video in info['videos']: | ||||
|             if video['statut'] != 'ONLINE': | ||||
|                 continue | ||||
|             video_url = video['url'] | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             format_id = video['format'] | ||||
|             if video_url.endswith('.f4m'): | ||||
|                 video_url_parsed = compat_urllib_parse_urlparse(video_url) | ||||
|                 f4m_url = self._download_webpage( | ||||
|                     'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path, | ||||
|                     video_id, 'Downloading f4m manifest token', fatal=False) | ||||
|                 if f4m_url: | ||||
|                     f4m_formats = self._extract_f4m_formats(f4m_url, video_id) | ||||
|                     for f4m_format in f4m_formats: | ||||
|                         f4m_format['preference'] = 1 | ||||
|                     formats.extend(f4m_formats) | ||||
|             elif video_url.endswith('.m3u8'): | ||||
|                 formats.extend(self._extract_m3u8_formats(video_url, video_id)) | ||||
|             elif video_url.startswith('rtmp'): | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'format_id': 'rtmp-%s' % format_id, | ||||
|                     'ext': 'flv', | ||||
|                     'preference': 1, | ||||
|                 }) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'format_id': format_id, | ||||
|                     'preference': 2, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info.find('titre').text, | ||||
|             'title': info['titre'], | ||||
|             'description': clean_html(info['synopsis']), | ||||
|             'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']), | ||||
|             'duration': parse_duration(info['duree']), | ||||
|             'timestamp': int_or_none(info['diffusion']['timestamp']), | ||||
|             'formats': formats, | ||||
|             'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), | ||||
|             'description': info.find('synopsis').text, | ||||
|         } | ||||
|  | ||||
|  | ||||
| @@ -61,7 +84,7 @@ class PluzzIE(FranceTVBaseInfoExtractor): | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         video_id = self._search_regex( | ||||
|             r'data-diffusion="(\d+)"', webpage, 'ID') | ||||
|         return self._extract_video(video_id) | ||||
|         return self._extract_video(video_id, 'Pluzz') | ||||
|  | ||||
|  | ||||
| class FranceTvInfoIE(FranceTVBaseInfoExtractor): | ||||
| @@ -70,13 +93,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', | ||||
|         'md5': '9cecf35f99c4079c199e9817882a9a1c', | ||||
|         'info_dict': { | ||||
|             'id': '84981923', | ||||
|             'ext': 'mp4', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Soir 3', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|             'upload_date': '20130826', | ||||
|             'timestamp': 1377548400, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html', | ||||
| @@ -88,15 +111,17 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'HLS (reqires ffmpeg)' | ||||
|         } | ||||
|         }, | ||||
|         'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, page_title) | ||||
|         video_id = self._search_regex(r'id-video=((?:[^0-9]*?_)?[0-9]+)[@"]', webpage, 'video id') | ||||
|         return self._extract_video(video_id) | ||||
|         video_id, catalogue = self._search_regex( | ||||
|             r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@') | ||||
|         return self._extract_video(video_id, catalogue) | ||||
|  | ||||
|  | ||||
| class FranceTVIE(FranceTVBaseInfoExtractor): | ||||
| @@ -112,91 +137,77 @@ class FranceTVIE(FranceTVBaseInfoExtractor): | ||||
|         # france2 | ||||
|         { | ||||
|             'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', | ||||
|             'file': '75540104.mp4', | ||||
|             'md5': 'c03fc87cb85429ffd55df32b9fc05523', | ||||
|             'info_dict': { | ||||
|                 'title': '13h15, le samedi...', | ||||
|                 'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|                 'id': '109169362', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '13h15, le dimanche...', | ||||
|                 'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7', | ||||
|                 'upload_date': '20140914', | ||||
|                 'timestamp': 1410693600, | ||||
|             }, | ||||
|         }, | ||||
|         # france3 | ||||
|         { | ||||
|             'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575', | ||||
|             'md5': '679bb8f8921f8623bd658fa2f8364da0', | ||||
|             'info_dict': { | ||||
|                 'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Le scandale du prix des médicaments', | ||||
|                 'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|                 'upload_date': '20131113', | ||||
|                 'timestamp': 1384380000, | ||||
|             }, | ||||
|         }, | ||||
|         # france4 | ||||
|         { | ||||
|             'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', | ||||
|             'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c', | ||||
|             'info_dict': { | ||||
|                 'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Hero Corp Making of - Extrait 1', | ||||
|                 'description': 'md5:c87d54871b1790679aec1197e73d650a', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|                 'upload_date': '20131106', | ||||
|                 'timestamp': 1383766500, | ||||
|             }, | ||||
|         }, | ||||
|         # france5 | ||||
|         { | ||||
|             'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968', | ||||
|             'md5': '78f0f4064f9074438e660785bbf2c5d9', | ||||
|             'info_dict': { | ||||
|                 'id': '92837968', | ||||
|                 'ext': 'mp4', | ||||
|                 'id': '108961659', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'C à dire ?!', | ||||
|                 'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|                 'description': 'md5:1a4aeab476eb657bf57c4ff122129f81', | ||||
|                 'upload_date': '20140915', | ||||
|                 'timestamp': 1410795000, | ||||
|             }, | ||||
|         }, | ||||
|         # franceo | ||||
|         { | ||||
|             'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013', | ||||
|             'md5': '52f0bfe202848b15915a2f39aaa8981b', | ||||
|             'info_dict': { | ||||
|                 'id': '92327925', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Infô-Afrique', | ||||
|                 'id': '108634970', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Infô Afrique', | ||||
|                 'description': 'md5:ebf346da789428841bee0fd2a935ea55', | ||||
|                 'upload_date': '20140915', | ||||
|                 'timestamp': 1410822000, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'The id changes frequently', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj.group('key'): | ||||
|             webpage = self._download_webpage(url, mobj.group('key')) | ||||
|             id_res = [ | ||||
|                 (r'''(?x)<div\s+class="video-player">\s* | ||||
|                     <a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+ | ||||
|                     class="francetv-video-player">'''), | ||||
|                 (r'<a id="player_direct" href="http://info\.francetelevisions' | ||||
|                  '\.fr/\?id-video=([^"/&]+)'), | ||||
|                 (r'<a class="video" id="ftv_player_(.+?)"'), | ||||
|             ] | ||||
|             video_id = self._html_search_regex(id_res, webpage, 'video ID') | ||||
|         else: | ||||
|             video_id = mobj.group('id') | ||||
|         return self._extract_video(video_id) | ||||
|         webpage = self._download_webpage(url, mobj.group('key') or mobj.group('id')) | ||||
|         video_id, catalogue = self._html_search_regex( | ||||
|             r'href="http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', | ||||
|             webpage, 'video ID').split('@') | ||||
|         return self._extract_video(video_id, catalogue) | ||||
|  | ||||
|  | ||||
| class GenerationQuoiIE(InfoExtractor): | ||||
| @@ -232,16 +243,15 @@ class CultureboxIE(FranceTVBaseInfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813', | ||||
|         'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553', | ||||
|         'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6', | ||||
|         'info_dict': { | ||||
|             'id': 'EV_6785', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Einstein on the beach au Théâtre du Châtelet', | ||||
|             'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|             'id': 'EV_22853', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Dans les jardins de William Christie - Le Camus', | ||||
|             'description': 'md5:4710c82315c40f0c865ca8b9a68b5299', | ||||
|             'upload_date': '20140829', | ||||
|             'timestamp': 1409317200, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -249,5 +259,7 @@ class CultureboxIE(FranceTVBaseInfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id') | ||||
|         return self._extract_video(video_id) | ||||
|         video_id, catalogue = self._search_regex( | ||||
|             r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') | ||||
|  | ||||
|         return self._extract_video(video_id, catalogue) | ||||
|   | ||||
| @@ -584,7 +584,9 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Helper method | ||||
|         def _playlist_from_matches(matches, getter, ie=None): | ||||
|             urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches) | ||||
|             urlrs = orderedSet( | ||||
|                 self.url_result(self._proto_relative_url(getter(m)), ie) | ||||
|                 for m in matches) | ||||
|             return self.playlist_result( | ||||
|                 urlrs, playlist_id=video_id, playlist_title=video_title) | ||||
|  | ||||
| @@ -633,7 +635,7 @@ class GenericIE(InfoExtractor): | ||||
|             \1''', webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches( | ||||
|                 matches, lambda m: unescapeHTML(m[1]), ie='Youtube') | ||||
|                 matches, lambda m: unescapeHTML(m[1])) | ||||
|  | ||||
|         # Look for embedded Dailymotion player | ||||
|         matches = re.findall( | ||||
| @@ -877,7 +879,7 @@ class GenericIE(InfoExtractor): | ||||
|         if not found: | ||||
|             found = re.search( | ||||
|                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' | ||||
|                 r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"', | ||||
|                 r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)', | ||||
|                 webpage) | ||||
|             if found: | ||||
|                 new_url = found.group(1) | ||||
|   | ||||
| @@ -71,6 +71,7 @@ class IGNIE(InfoExtractor): | ||||
|  | ||||
|     def _find_video_id(self, webpage): | ||||
|         res_id = [ | ||||
|             r'"video_id"\s*:\s*"(.*?)"', | ||||
|             r'data-video-id="(.+?)"', | ||||
|             r'<object id="vid_(.+?)"', | ||||
|             r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"', | ||||
| @@ -85,7 +86,7 @@ class IGNIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, name_or_id) | ||||
|         if page_type != 'video': | ||||
|             multiple_urls = re.findall( | ||||
|                 '<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', | ||||
|                 '<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', | ||||
|                 webpage) | ||||
|             if multiple_urls: | ||||
|                 return [self.url_result(u, ie='IGN') for u in multiple_urls] | ||||
| @@ -111,13 +112,13 @@ class IGNIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class OneUPIE(IGNIE): | ||||
|     _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)' | ||||
|     _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html' | ||||
|     IE_NAME = '1up.com' | ||||
|  | ||||
|     _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://gamevideos.1up.com/video/id/34976', | ||||
|         'url': 'http://gamevideos.1up.com/video/id/34976.html', | ||||
|         'md5': '68a54ce4ebc772e4b71e3123d413163d', | ||||
|         'info_dict': { | ||||
|             'id': '34976', | ||||
|   | ||||
							
								
								
									
										87
									
								
								youtube_dl/extractor/mgoon.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								youtube_dl/extractor/mgoon.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,87 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     qualities, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MgoonIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)? | ||||
|     (?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)| | ||||
|         video\.mgoon\.com)/(?P<id>[0-9]+)''' | ||||
|     _API_URL = 'http://mpos.mgoon.com/player/video?id={0:}' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://m.mgoon.com/ch/hi6618/v/5582148', | ||||
|             'md5': 'dd46bb66ab35cf6d51cc812fd82da79d', | ||||
|             'info_dict': { | ||||
|                 'id': '5582148', | ||||
|                 'uploader_id': 'hi6618', | ||||
|                 'duration': 240.419, | ||||
|                 'upload_date': '20131220', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'md5:543aa4c27a4931d371c3f433e8cebebc', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.mgoon.com/play/view/5582148', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://video.mgoon.com/5582148', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         data = self._download_json(self._API_URL.format(video_id), video_id) | ||||
|  | ||||
|         if data.get('errorInfo', {}).get('code') != 'NONE': | ||||
|             raise ExtractorError('%s encountered an error: %s' % ( | ||||
|                 self.IE_NAME, data['errorInfo']['message']), expected=True) | ||||
|  | ||||
|         v_info = data['videoInfo'] | ||||
|         title = v_info.get('v_title') | ||||
|         thumbnail = v_info.get('v_thumbnail') | ||||
|         duration = v_info.get('v_duration') | ||||
|         upload_date = unified_strdate(v_info.get('v_reg_date')) | ||||
|         uploader_id = data.get('userInfo', {}).get('u_alias') | ||||
|         if duration: | ||||
|             duration /= 1000.0 | ||||
|  | ||||
|         age_limit = None | ||||
|         if data.get('accessInfo', {}).get('code') == 'VIDEO_STATUS_ADULT': | ||||
|             age_limit = 18 | ||||
|  | ||||
|         formats = [] | ||||
|         get_quality = qualities(['360p', '480p', '720p', '1080p']) | ||||
|         for fmt in data['videoFiles']: | ||||
|             formats.append({ | ||||
|                 'format_id': fmt['label'], | ||||
|                 'quality': get_quality(fmt['label']), | ||||
|                 'url': fmt['url'], | ||||
|                 'ext': fmt['format'], | ||||
|  | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'upload_date': upload_date, | ||||
|             'uploader_id': uploader_id, | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
							
								
								
									
										70
									
								
								youtube_dl/extractor/moniker.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								youtube_dl/extractor/moniker.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os.path | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MonikerIE(InfoExtractor): | ||||
|     IE_DESC = 'allmyvideos.net and vidspot.net' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://allmyvideos.net/jih3nce3x6wn', | ||||
|         'md5': '710883dee1bfc370ecf9fa6a89307c88', | ||||
|         'info_dict': { | ||||
|             'id': 'jih3nce3x6wn', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'youtube-dl test video', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://vidspot.net/l2ngsmhs8ci5', | ||||
|         'md5': '710883dee1bfc370ecf9fa6a89307c88', | ||||
|         'info_dict': { | ||||
|             'id': 'l2ngsmhs8ci5', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'youtube-dl test video', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.vidspot.net/l2ngsmhs8ci5', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         orig_webpage = self._download_webpage(url, video_id) | ||||
|         fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) | ||||
|         data = dict(fields) | ||||
|  | ||||
|         post = compat_urllib_parse.urlencode(data) | ||||
|         headers = { | ||||
|             b'Content-Type': b'application/x-www-form-urlencoded', | ||||
|         } | ||||
|         req = compat_urllib_request.Request(url, post, headers) | ||||
|         webpage = self._download_webpage( | ||||
|             req, video_id, note='Downloading video page ...') | ||||
|  | ||||
|         title = os.path.splitext(data['fname'])[0] | ||||
|  | ||||
|         #Could be several links with different quality | ||||
|         links = re.findall(r'"file" : "?(.+?)",', webpage) | ||||
|         # Assume the links are ordered in quality | ||||
|         formats = [{ | ||||
|             'url': l, | ||||
|             'quality': i, | ||||
|         } for i, l in enumerate(links)] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										77
									
								
								youtube_dl/extractor/muenchentv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								youtube_dl/extractor/muenchentv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,77 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MuenchenTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream' | ||||
|     IE_DESC = 'münchen.tv' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.muenchen.tv/livestream/', | ||||
|         'info_dict': { | ||||
|             'id': '5334', | ||||
|             'display_id': 'live', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = 'live' | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         now = datetime.datetime.now() | ||||
|         now_str = now.strftime("%Y-%m-%d %H:%M") | ||||
|         title = self._og_search_title(webpage) + ' ' + now_str | ||||
|  | ||||
|         data_js = self._search_regex( | ||||
|             r'(?s)\nplaylist:\s*(\[.*?}\]),related:', | ||||
|             webpage, 'playlist configuration') | ||||
|         data_json = js_to_json(data_js) | ||||
|         data = json.loads(data_json)[0] | ||||
|  | ||||
|         video_id = data['mediaid'] | ||||
|         thumbnail = data.get('image') | ||||
|  | ||||
|         formats = [] | ||||
|         for format_num, s in enumerate(data['sources']): | ||||
|             ext = determine_ext(s['file'], None) | ||||
|             label_str = s.get('label') | ||||
|             if label_str is None: | ||||
|                 label_str = '_%d' % format_num | ||||
|  | ||||
|             if ext is None: | ||||
|                 format_id = label_str | ||||
|             else: | ||||
|                 format_id = '%s-%s' % (ext, label_str) | ||||
|  | ||||
|             formats.append({ | ||||
|                 'url': s['file'], | ||||
|                 'tbr': int_or_none(s.get('label')), | ||||
|                 'ext': 'mp4', | ||||
|                 'format_id': format_id, | ||||
|                 'preference': -100 if '.smil' in s['file'] else 0, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'is_live': True, | ||||
|         } | ||||
|  | ||||
| @@ -16,9 +16,9 @@ class NBCIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', | ||||
|         'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', | ||||
|         # md5 checksum is not stable | ||||
|         'info_dict': { | ||||
|             'id': 'u1RInQZRN7QJ', | ||||
|             'id': 'bTmnLCvIbaaH', | ||||
|             'ext': 'flv', | ||||
|             'title': 'I Am a Firefighter', | ||||
|             'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', | ||||
|   | ||||
							
								
								
									
										103
									
								
								youtube_dl/extractor/nfl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								youtube_dl/extractor/nfl.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NFLIE(InfoExtractor): | ||||
|     IE_NAME = 'nfl.com' | ||||
|     _VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P<id>\d..[0-9]+)' | ||||
|     _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', | ||||
|         # 'md5': '5eb8c40a727dda106d510e5d6ffa79e5',  # md5 checksum fluctuates | ||||
|         'info_dict': { | ||||
|             'id': '0ap3000000398478', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Week 3: Washington Redskins vs. Philadelphia Eagles highlights', | ||||
|             'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', | ||||
|             'upload_date': '20140921', | ||||
|             'timestamp': 1411337580, | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         config = self._download_json(self._PLAYER_CONFIG_URL, video_id, | ||||
|                                      note='Downloading player config') | ||||
|         url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config) | ||||
|         video_data = self._download_json(url_template.format(id=video_id), video_id) | ||||
|  | ||||
|         cdns = config.get('cdns') | ||||
|         if not cdns: | ||||
|             raise ExtractorError('Failed to get CDN data', expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         streams = video_data.get('cdnData', {}).get('bitrateInfo', []) | ||||
|         for name, cdn in cdns.items(): | ||||
|             # LimeLight streams don't seem to work | ||||
|             if cdn.get('name') == 'LIMELIGHT': | ||||
|                 continue | ||||
|  | ||||
|             protocol = cdn.get('protocol') | ||||
|             host = remove_end(cdn.get('host', ''), '/') | ||||
|             if not (protocol and host): | ||||
|                 continue | ||||
|  | ||||
|             path_prefix = cdn.get('pathprefix', '') | ||||
|             if path_prefix and not path_prefix.endswith('/'): | ||||
|                 path_prefix = '%s/' % path_prefix | ||||
|  | ||||
|             get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format( | ||||
|                 protocol=protocol, | ||||
|                 host=host, | ||||
|                 prefix=path_prefix, | ||||
|                 path=p, | ||||
|             ) | ||||
|  | ||||
|             if protocol == 'rtmp': | ||||
|                 preference = -2 | ||||
|             elif 'prog' in name.lower(): | ||||
|                 preference = -1 | ||||
|             else: | ||||
|                 preference = 0 | ||||
|  | ||||
|             for stream in streams: | ||||
|                 path = stream.get('path') | ||||
|                 if not path: | ||||
|                     continue | ||||
|  | ||||
|                 formats.append({ | ||||
|                     'url': get_url(path), | ||||
|                     'vbr': int_or_none(stream.get('rate', 0), 1000), | ||||
|                     'preference': preference, | ||||
|                     'format_note': name, | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = None | ||||
|         for q in ('xl', 'l', 'm', 's', 'xs'): | ||||
|             thumbnail = video_data.get('imagePaths', {}).get(q) | ||||
|             if thumbnail: | ||||
|                 break | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_data.get('storyHeadline'), | ||||
|             'formats': formats, | ||||
|             'description': video_data.get('caption'), | ||||
|             'duration': video_data.get('duration'), | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': int_or_none(video_data.get('posted'), 1000), | ||||
|         } | ||||
| @@ -46,9 +46,9 @@ class NHLBaseInfoExtractor(InfoExtractor): | ||||
|  | ||||
| class NHLIE(NHLBaseInfoExtractor): | ||||
|     IE_NAME = 'nhl.com' | ||||
|     _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', | ||||
|         'info_dict': { | ||||
|             'id': '453614', | ||||
| @@ -58,7 +58,10 @@ class NHLIE(NHLBaseInfoExtractor): | ||||
|             'duration': 18, | ||||
|             'upload_date': '20131006', | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://video.flames.nhl.com/videocenter/console?id=630616', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|   | ||||
| @@ -2,6 +2,8 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import time | ||||
| import hashlib | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| @@ -17,6 +19,7 @@ from ..utils import ( | ||||
| class NocoIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' | ||||
|     _LOGIN_URL = 'http://noco.tv/do.php' | ||||
|     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' | ||||
|     _NETRC_MACHINE = 'noco' | ||||
|  | ||||
|     _TEST = { | ||||
| @@ -55,33 +58,52 @@ class NocoIE(InfoExtractor): | ||||
|         login = self._download_json(request, None, 'Logging in as %s' % username) | ||||
|  | ||||
|         if 'erreur' in login: | ||||
|             raise  ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) | ||||
|             raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) | ||||
|  | ||||
|     def _call_api(self, path, video_id, note): | ||||
|         ts = compat_str(int(time.time() * 1000)) | ||||
|         tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest() | ||||
|         url = self._API_URL_TEMPLATE % (path, ts, tk) | ||||
|  | ||||
|         resp = self._download_json(url, video_id, note) | ||||
|  | ||||
|         if isinstance(resp, dict) and resp.get('error'): | ||||
|             self._raise_error(resp['error'], resp['description']) | ||||
|  | ||||
|         return resp | ||||
|  | ||||
|     def _raise_error(self, error, description): | ||||
|         raise ExtractorError( | ||||
|             '%s returned error: %s - %s' % (self.IE_NAME, error, description), | ||||
|             expected=True) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         medias = self._download_json( | ||||
|             'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON') | ||||
|         medias = self._call_api( | ||||
|             'shows/%s/medias' % video_id, | ||||
|             video_id, 'Downloading video JSON') | ||||
|  | ||||
|         qualities = self._call_api( | ||||
|             'qualities', | ||||
|             video_id, 'Downloading qualities JSON') | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for fmt in medias['fr']['video_list']['default']['quality_list']: | ||||
|             format_id = fmt['quality_key'] | ||||
|         for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items(): | ||||
|  | ||||
|             file = self._download_json( | ||||
|                 'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id), | ||||
|             video = self._call_api( | ||||
|                 'shows/%s/video/%s/fr' % (video_id, format_id.lower()), | ||||
|                 video_id, 'Downloading %s video JSON' % format_id) | ||||
|  | ||||
|             file_url = file['file'] | ||||
|             file_url = video['file'] | ||||
|             if not file_url: | ||||
|                 continue | ||||
|  | ||||
|             if file_url == 'forbidden': | ||||
|                 raise ExtractorError( | ||||
|                     '%s returned error: %s - %s' % ( | ||||
|                         self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']), | ||||
|                     expected=True) | ||||
|             if file_url in ['forbidden', 'not found']: | ||||
|                 popmessage = video['popmessage'] | ||||
|                 self._raise_error(popmessage['title'], popmessage['message']) | ||||
|  | ||||
|             formats.append({ | ||||
|                 'url': file_url, | ||||
| @@ -91,20 +113,31 @@ class NocoIE(InfoExtractor): | ||||
|                 'abr': fmt['audiobitrate'], | ||||
|                 'vbr': fmt['videobitrate'], | ||||
|                 'filesize': fmt['filesize'], | ||||
|                 'format_note': fmt['quality_name'], | ||||
|                 'preference': fmt['priority'], | ||||
|                 'format_note': qualities[format_id]['quality_name'], | ||||
|                 'preference': qualities[format_id]['priority'], | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         show = self._download_json( | ||||
|             'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0] | ||||
|         show = self._call_api( | ||||
|             'shows/by_id/%s' % video_id, | ||||
|             video_id, 'Downloading show JSON')[0] | ||||
|  | ||||
|         upload_date = unified_strdate(show['indexed']) | ||||
|         upload_date = unified_strdate(show['online_date_start_utc']) | ||||
|         uploader = show['partner_name'] | ||||
|         uploader_id = show['partner_key'] | ||||
|         duration = show['duration_ms'] / 1000.0 | ||||
|         thumbnail = show['screenshot'] | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for thumbnail_key, thumbnail_url in show.items(): | ||||
|             m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key) | ||||
|             if not m: | ||||
|                 continue | ||||
|             thumbnails.append({ | ||||
|                 'url': thumbnail_url, | ||||
|                 'width': int(m.group('width')), | ||||
|                 'height': int(m.group('height')), | ||||
|             }) | ||||
|  | ||||
|         episode = show.get('show_TT') or show.get('show_OT') | ||||
|         family = show.get('family_TT') or show.get('family_OT') | ||||
| @@ -124,7 +157,7 @@ class NocoIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'thumbnails': thumbnails, | ||||
|             'upload_date': upload_date, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|   | ||||
| @@ -8,11 +8,11 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     compat_urllib_request, | ||||
|     urlencode_postdata, | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
|  | ||||
| _x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'}) | ||||
| _find = lambda el, p: el.find(_x(p)).text.strip() | ||||
|  | ||||
|  | ||||
| class NosVideoIE(InfoExtractor): | ||||
| @@ -53,9 +53,15 @@ class NosVideoIE(InfoExtractor): | ||||
|         playlist = self._download_xml(playlist_url, video_id) | ||||
|  | ||||
|         track = playlist.find(_x('.//xspf:track')) | ||||
|         title = _find(track, './xspf:title') | ||||
|         url = _find(track, './xspf:file') | ||||
|         thumbnail = _find(track, './xspf:image') | ||||
|         if track is None: | ||||
|             raise ExtractorError( | ||||
|                 'XML playlist is missing the \'track\' element', | ||||
|                 expected=True) | ||||
|         title = xpath_text(track, _x('./xspf:title'), 'title') | ||||
|         url = xpath_text(track, _x('./xspf:file'), 'URL', fatal=True) | ||||
|         thumbnail = xpath_text(track, _x('./xspf:image'), 'thumbnail') | ||||
|         if title is not None: | ||||
|             title = title.strip() | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|   | ||||
| @@ -5,7 +5,9 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     parse_duration, | ||||
|     qualities, | ||||
|     url_basename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -13,22 +15,50 @@ class NPOIE(InfoExtractor): | ||||
|     IE_NAME = 'npo.nl' | ||||
|     _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', | ||||
|         'md5': '4b3f9c429157ec4775f2c9cb7b911016', | ||||
|         'info_dict': { | ||||
|             'id': 'VPWON_1220719', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'Nieuwsuur', | ||||
|             'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', | ||||
|             'upload_date': '20140622', | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', | ||||
|             'md5': '4b3f9c429157ec4775f2c9cb7b911016', | ||||
|             'info_dict': { | ||||
|                 'id': 'VPWON_1220719', | ||||
|                 'ext': 'm4v', | ||||
|                 'title': 'Nieuwsuur', | ||||
|                 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', | ||||
|                 'upload_date': '20140622', | ||||
|             }, | ||||
|         }, | ||||
|     } | ||||
|         { | ||||
|             'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800', | ||||
|             'md5': 'da50a5787dbfc1603c4ad80f31c5120b', | ||||
|             'info_dict': { | ||||
|                 'id': 'VARA_101191800', | ||||
|                 'ext': 'm4v', | ||||
|                 'title': 'De Mega Mike & Mega Thomas show', | ||||
|                 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', | ||||
|                 'upload_date': '20090227', | ||||
|                 'duration': 2400, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289', | ||||
|             'md5': 'f8065e4e5a7824068ed3c7e783178f2c', | ||||
|             'info_dict': { | ||||
|                 'id': 'VPWON_1169289', | ||||
|                 'ext': 'm4v', | ||||
|                 'title': 'Tegenlicht', | ||||
|                 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', | ||||
|                 'upload_date': '20130225', | ||||
|                 'duration': 3000, | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         return self._get_info(video_id) | ||||
|  | ||||
|     def _get_info(self, video_id): | ||||
|         metadata = self._download_json( | ||||
|             'http://e.omroep.nl/metadata/aflevering/%s' % video_id, | ||||
|             video_id, | ||||
| @@ -43,19 +73,28 @@ class NPOIE(InfoExtractor): | ||||
|         token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token') | ||||
|  | ||||
|         formats = [] | ||||
|         quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std']) | ||||
|         quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) | ||||
|         for format_id in metadata['pubopties']: | ||||
|             streams_info = self._download_json( | ||||
|             format_info = self._download_json( | ||||
|                 'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token), | ||||
|                 video_id, 'Downloading %s streams info' % format_id) | ||||
|             stream_info = self._download_json( | ||||
|                 streams_info['streams'][0] + '&type=json', | ||||
|                 video_id, 'Downloading %s stream info' % format_id) | ||||
|                 video_id, 'Downloading %s JSON' % format_id) | ||||
|             if format_info.get('error_code', 0) or format_info.get('errorcode', 0): | ||||
|                 continue | ||||
|             streams = format_info.get('streams') | ||||
|             if streams: | ||||
|                 video_info = self._download_json( | ||||
|                     streams[0] + '&type=json', | ||||
|                     video_id, 'Downloading %s stream JSON' % format_id) | ||||
|             else: | ||||
|                 video_info = format_info | ||||
|             video_url = video_info.get('url') | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             if format_id == 'adaptive': | ||||
|                 formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id)) | ||||
|                 formats.extend(self._extract_m3u8_formats(video_url, video_id)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': stream_info['url'], | ||||
|                     'url': video_url, | ||||
|                     'format_id': format_id, | ||||
|                     'quality': quality(format_id), | ||||
|                 }) | ||||
| @@ -65,7 +104,35 @@ class NPOIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'title': metadata['titel'], | ||||
|             'description': metadata['info'], | ||||
|             'thumbnail': metadata['images'][-1]['url'], | ||||
|             'upload_date': unified_strdate(metadata['gidsdatum']), | ||||
|             'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], | ||||
|             'upload_date': unified_strdate(metadata.get('gidsdatum')), | ||||
|             'duration': parse_duration(metadata.get('tijdsduur')), | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class TegenlichtVproIE(NPOIE): | ||||
|     IE_NAME = 'tegenlicht.vpro.nl' | ||||
|     _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html', | ||||
|             'md5': 'f8065e4e5a7824068ed3c7e783178f2c', | ||||
|             'info_dict': { | ||||
|                 'id': 'VPWON_1169289', | ||||
|                 'ext': 'm4v', | ||||
|                 'title': 'Tegenlicht', | ||||
|                 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', | ||||
|                 'upload_date': '20130225', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         name = url_basename(url) | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         urn = self._html_search_meta('mediaurn', webpage) | ||||
|         info_page = self._download_json( | ||||
|             'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name) | ||||
|         return self._get_info(info_page['mid']) | ||||
|   | ||||
| @@ -10,6 +10,7 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -29,6 +30,7 @@ class PlayFMIE(InfoExtractor): | ||||
|             'duration': 5627.428, | ||||
|             'upload_date': '20140712', | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     } | ||||
| @@ -51,7 +53,8 @@ class PlayFMIE(InfoExtractor): | ||||
|  | ||||
|         recording = rec_doc.find('./recording') | ||||
|         title = recording.find('./title').text | ||||
|         view_count = int_or_none(recording.find('./stats/playcount').text) | ||||
|         view_count = str_to_int(recording.find('./stats/playcount').text) | ||||
|         comment_count = str_to_int(recording.find('./stats/comments').text) | ||||
|         duration = float_or_none(recording.find('./duration').text, scale=1000) | ||||
|         thumbnail = recording.find('./image').text | ||||
|  | ||||
| @@ -75,6 +78,7 @@ class PlayFMIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'upload_date': upload_date, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'duration': duration, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|   | ||||
| @@ -12,7 +12,7 @@ from ..utils import ( | ||||
|  | ||||
| class SBSIE(InfoExtractor): | ||||
|     IE_DESC = 'sbs.com.au' | ||||
|     _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/' | ||||
|     _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/(?:single/)?(?P<id>[0-9]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # Original URL is handled by the generic IE which finds the iframe: | ||||
| @@ -21,12 +21,16 @@ class SBSIE(InfoExtractor): | ||||
|         'md5': '3150cf278965eeabb5b4cea1c963fe0a', | ||||
|         'info_dict': { | ||||
|             'id': '320403011771', | ||||
|             'ext': 'flv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Dingo Conservation', | ||||
|             'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|         'add_ies': ['generic'], | ||||
|     }, | ||||
|     { | ||||
|         'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -31,7 +31,8 @@ class SoundcloudIE(InfoExtractor): | ||||
|                             (?!sets/|likes/?(?:$|[?#])) | ||||
|                             (?P<title>[\w\d-]+)/? | ||||
|                             (?P<token>[^?]+?)?(?:[?].*)?$) | ||||
|                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) | ||||
|                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) | ||||
|                           (?:/?\?secret_token=(?P<secret_token>[^&]+?))?$) | ||||
|                        |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) | ||||
|                     ) | ||||
|                     ''' | ||||
| @@ -80,6 +81,20 @@ class SoundcloudIE(InfoExtractor): | ||||
|                 'duration': 9, | ||||
|             }, | ||||
|         }, | ||||
|         # private link (alt format) | ||||
|         { | ||||
|             'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp', | ||||
|             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', | ||||
|             'info_dict': { | ||||
|                 'id': '123998367', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Youtube - Dl Test Video \'\' Ä↭', | ||||
|                 'uploader': 'jaimeMF', | ||||
|                 'description': 'test chars:  \"\'/\\ä↭', | ||||
|                 'upload_date': '20131209', | ||||
|                 'duration': 9, | ||||
|             }, | ||||
|         }, | ||||
|         # downloadable song | ||||
|         { | ||||
|             'url': 'https://soundcloud.com/oddsamples/bus-brakes', | ||||
| @@ -197,6 +212,9 @@ class SoundcloudIE(InfoExtractor): | ||||
|         if track_id is not None: | ||||
|             info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID | ||||
|             full_title = track_id | ||||
|             token = mobj.group('secret_token') | ||||
|             if token: | ||||
|                 info_json_url += "&secret_token=" + token | ||||
|         elif mobj.group('player'): | ||||
|             query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | ||||
|             return self.url_result(query['url'][0]) | ||||
| @@ -220,7 +238,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class SoundcloudSetIE(SoundcloudIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' | ||||
|     IE_NAME = 'soundcloud:set' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', | ||||
| @@ -234,14 +252,19 @@ class SoundcloudSetIE(SoundcloudIE): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         # extract uploader (which is in the url) | ||||
|         uploader = mobj.group(1) | ||||
|         uploader = mobj.group('uploader') | ||||
|         # extract simple title (uploader + slug of song title) | ||||
|         slug_title = mobj.group(2) | ||||
|         slug_title = mobj.group('slug_title') | ||||
|         full_title = '%s/sets/%s' % (uploader, slug_title) | ||||
|         url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) | ||||
|  | ||||
|         token = mobj.group('token') | ||||
|         if token: | ||||
|             full_title += '/' + token | ||||
|             url += '/' + token | ||||
|  | ||||
|         self.report_resolve(full_title) | ||||
|  | ||||
|         url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) | ||||
|         resolv_url = self._resolv_url(url) | ||||
|         info = self._download_json(resolv_url, full_title) | ||||
|  | ||||
| @@ -252,7 +275,7 @@ class SoundcloudSetIE(SoundcloudIE): | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'entries': [self._extract_info_dict(track) for track in info['tracks']], | ||||
|             'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']], | ||||
|             'id': info['id'], | ||||
|             'title': info['title'], | ||||
|         } | ||||
| @@ -315,34 +338,38 @@ class SoundcloudUserIE(SoundcloudIE): | ||||
|  | ||||
|  | ||||
| class SoundcloudPlaylistIE(SoundcloudIE): | ||||
|     _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' | ||||
|     IE_NAME = 'soundcloud:playlist' | ||||
|     _TESTS = [ | ||||
|  | ||||
|         { | ||||
|             'url': 'http://api.soundcloud.com/playlists/4110309', | ||||
|             'info_dict': { | ||||
|                 'id': '4110309', | ||||
|                 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', | ||||
|                 'description': 're:.*?TILT Brass - Bowery Poetry Club', | ||||
|             }, | ||||
|             'playlist_count': 6, | ||||
|         } | ||||
|     ] | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://api.soundcloud.com/playlists/4110309', | ||||
|         'info_dict': { | ||||
|             'id': '4110309', | ||||
|             'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', | ||||
|             'description': 're:.*?TILT Brass - Bowery Poetry Club', | ||||
|         }, | ||||
|         'playlist_count': 6, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('id') | ||||
|         base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id) | ||||
|  | ||||
|         data = compat_urllib_parse.urlencode({ | ||||
|         data_dict = { | ||||
|             'client_id': self._CLIENT_ID, | ||||
|         }) | ||||
|         } | ||||
|         token = mobj.group('token') | ||||
|  | ||||
|         if token: | ||||
|             data_dict['secret_token'] = token | ||||
|  | ||||
|         data = compat_urllib_parse.urlencode(data_dict) | ||||
|         data = self._download_json( | ||||
|             base_url + data, playlist_id, 'Downloading playlist') | ||||
|  | ||||
|         entries = [ | ||||
|             self._extract_info_dict(t, quiet=True) for t in data['tracks']] | ||||
|             self._extract_info_dict(t, quiet=True, secret_token=token) | ||||
|                 for t in data['tracks']] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     ExtractorError, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
| @@ -55,36 +56,44 @@ class ThePlatformIE(InfoExtractor): | ||||
|         body = meta.find(_x('smil:body')) | ||||
|  | ||||
|         f4m_node = body.find(_x('smil:seq//smil:video')) | ||||
|         if f4m_node is not None: | ||||
|         if f4m_node is not None and '.f4m' in f4m_node.attrib['src']: | ||||
|             f4m_url = f4m_node.attrib['src'] | ||||
|             if 'manifest.f4m?' not in f4m_url: | ||||
|                 f4m_url += '?' | ||||
|             # the parameters are from syfy.com, other sites may use others, | ||||
|             # they also work for nbc.com | ||||
|             f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' | ||||
|             formats = [{ | ||||
|                 'ext': 'flv', | ||||
|                 'url': f4m_url, | ||||
|             }] | ||||
|             formats = self._extract_f4m_formats(f4m_url, video_id) | ||||
|         else: | ||||
|             base_url = head.find(_x('smil:meta')).attrib['base'] | ||||
|             switch = body.find(_x('smil:switch')) | ||||
|             formats = [] | ||||
|             for f in switch.findall(_x('smil:video')): | ||||
|                 attr = f.attrib | ||||
|                 width = int(attr['width']) | ||||
|                 height = int(attr['height']) | ||||
|                 vbr = int(attr['system-bitrate']) // 1000 | ||||
|                 format_id = '%dx%d_%dk' % (width, height, vbr) | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'url': base_url, | ||||
|                     'play_path': 'mp4:' + attr['src'], | ||||
|                     'ext': 'flv', | ||||
|                     'width': width, | ||||
|                     'height': height, | ||||
|                     'vbr': vbr, | ||||
|                 }) | ||||
|             switch = body.find(_x('smil:switch')) | ||||
|             if switch is not None: | ||||
|                 base_url = head.find(_x('smil:meta')).attrib['base'] | ||||
|                 for f in switch.findall(_x('smil:video')): | ||||
|                     attr = f.attrib | ||||
|                     width = int(attr['width']) | ||||
|                     height = int(attr['height']) | ||||
|                     vbr = int(attr['system-bitrate']) // 1000 | ||||
|                     format_id = '%dx%d_%dk' % (width, height, vbr) | ||||
|                     formats.append({ | ||||
|                         'format_id': format_id, | ||||
|                         'url': base_url, | ||||
|                         'play_path': 'mp4:' + attr['src'], | ||||
|                         'ext': 'flv', | ||||
|                         'width': width, | ||||
|                         'height': height, | ||||
|                         'vbr': vbr, | ||||
|                     }) | ||||
|             else: | ||||
|                 switch = body.find(_x('smil:seq//smil:switch')) | ||||
|                 for f in switch.findall(_x('smil:video')): | ||||
|                     attr = f.attrib | ||||
|                     vbr = int(attr['system-bitrate']) // 1000 | ||||
|                     formats.append({ | ||||
|                         'format_id': compat_str(vbr), | ||||
|                         'url': attr['src'], | ||||
|                         'vbr': vbr, | ||||
|                     }) | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
							
								
								
									
										59
									
								
								youtube_dl/extractor/thvideo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								youtube_dl/extractor/thvideo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate | ||||
| ) | ||||
|  | ||||
|  | ||||
| class THVideoIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://thvideo.tv/v/th1987/', | ||||
|         'md5': 'fa107b1f73817e325e9433505a70db50', | ||||
|         'info_dict': { | ||||
|             'id': '1987', | ||||
|             'ext': 'mp4', | ||||
|             'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览', | ||||
|             'display_id': 'th1987', | ||||
|             'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg', | ||||
|             'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...', | ||||
|             'upload_date': '20140722' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         # extract download link from mobile player page | ||||
|         webpage_player = self._download_webpage( | ||||
|             'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id), | ||||
|             video_id, note='Downloading video source page') | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<source src="(.*?)" type', webpage_player, 'video url') | ||||
|  | ||||
|         # extract video info from main page | ||||
|         webpage = self._download_webpage( | ||||
|             'http://thvideo.tv/v/th%s' % (video_id), video_id) | ||||
|         title = self._og_search_title(webpage) | ||||
|         display_id = 'th%s' % video_id | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|         upload_date = unified_strdate(self._html_search_regex( | ||||
|             r'span itemprop="datePublished" content="(.*?)">', webpage, | ||||
|             'upload date', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'ext': 'mp4', | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'display_id': display_id, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|             'upload_date': upload_date | ||||
|         } | ||||
| @@ -14,27 +14,35 @@ from ..aes import aes_decrypt_text | ||||
|  | ||||
|  | ||||
| class Tube8IE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', | ||||
|         'md5': '44bf12b98313827dd52d35b8706a4ea0', | ||||
|         'info_dict': { | ||||
|             'id': '229795', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'hot teen Kasia grinding', | ||||
|             'uploader': 'unknown', | ||||
|             'title': 'Kasia music video', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|     _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', | ||||
|             'md5': '44bf12b98313827dd52d35b8706a4ea0', | ||||
|             'info_dict': { | ||||
|                 'id': '229795', | ||||
|                 'display_id': 'kasia-music-video', | ||||
|                 'ext': 'mp4', | ||||
|                 'description': 'hot teen Kasia grinding', | ||||
|                 'uploader': 'unknown', | ||||
|                 'title': 'Kasia music video', | ||||
|                 'age_limit': 18, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|         webpage = self._download_webpage(req, display_id) | ||||
|  | ||||
|         flashvars = json.loads(self._html_search_regex( | ||||
|             r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars')) | ||||
| @@ -70,6 +78,7 @@ class Tube8IE(InfoExtractor): | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|   | ||||
							
								
								
									
										67
									
								
								youtube_dl/extractor/turbo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								youtube_dl/extractor/turbo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,67 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TurboIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P<id>[0-9]+)-' | ||||
|     _API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html', | ||||
|         'md5': '33f4b91099b36b5d5a91f84b5bcba600', | ||||
|         'info_dict': { | ||||
|             'id': '454443', | ||||
|             'ext': 'mp4', | ||||
|             'duration': 3715, | ||||
|             'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ', | ||||
|             'description': 'Retrouvez dans cette rubrique toutes les vidéos de l\'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         playlist = self._download_xml(self._API_URL.format(video_id), video_id) | ||||
|         item = playlist.find('./channel/item') | ||||
|         if item is None: | ||||
|             raise ExtractorError('Playlist item was not found', expected=True) | ||||
|  | ||||
|         title = xpath_text(item, './title', 'title') | ||||
|         duration = int_or_none(xpath_text(item, './durate', 'duration')) | ||||
|         thumbnail = xpath_text(item, './visuel_clip', 'thumbnail') | ||||
|         description = self._og_search_description(webpage) | ||||
|  | ||||
|         formats = [] | ||||
|         get_quality = qualities(['3g', 'sd', 'hq']) | ||||
|         for child in item: | ||||
|             m = re.search(r'url_video_(?P<quality>.+)', child.tag) | ||||
|             if m: | ||||
|                 quality = m.group('quality') | ||||
|                 formats.append({ | ||||
|                     'format_id': quality, | ||||
|                     'url': child.text, | ||||
|                     'quality': get_quality(quality), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'duration': duration, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -6,6 +6,7 @@ import xml.etree.ElementTree | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_HTTPError, | ||||
|     compat_urllib_request, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| @@ -69,6 +70,21 @@ class VevoIE(InfoExtractor): | ||||
|     }] | ||||
|     _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         req = compat_urllib_request.Request( | ||||
|             'http://www.vevo.com/auth', data=b'') | ||||
|         webpage = self._download_webpage( | ||||
|             req, None, | ||||
|             note='Retrieving oauth token', | ||||
|             errnote='Unable to retrieve oauth token', | ||||
|             fatal=False) | ||||
|         if webpage is False: | ||||
|             self._oauth_token = None | ||||
|         else: | ||||
|             self._oauth_token = self._search_regex( | ||||
|                 r'access_token":\s*"([^"]+)"', | ||||
|                 webpage, 'access token', fatal=False) | ||||
|  | ||||
|     def _formats_from_json(self, video_info): | ||||
|         last_version = {'version': -1} | ||||
|         for version in video_info['videoVersions']: | ||||
| @@ -129,6 +145,26 @@ class VevoIE(InfoExtractor): | ||||
|             }) | ||||
|         return formats | ||||
|  | ||||
|     def _download_api_formats(self, video_id): | ||||
|         if not self._oauth_token: | ||||
|             self._downloader.report_warning( | ||||
|                 'No oauth token available, skipping API HLS download') | ||||
|             return [] | ||||
|  | ||||
|         api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % ( | ||||
|             video_id, self._oauth_token) | ||||
|         api_data = self._download_json( | ||||
|             api_url, video_id, | ||||
|             note='Downloading HLS formats', | ||||
|             errnote='Failed to download HLS format list', fatal=False) | ||||
|         if api_data is None: | ||||
|             return [] | ||||
|  | ||||
|         m3u8_url = api_data[0]['url'] | ||||
|         return self._extract_m3u8_formats( | ||||
|             m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4', | ||||
|             preference=0) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
| @@ -152,30 +188,8 @@ class VevoIE(InfoExtractor): | ||||
|         else: | ||||
|             age_limit = None | ||||
|  | ||||
|         # Download SMIL | ||||
|         smil_blocks = sorted(( | ||||
|             f for f in video_info['videoVersions'] | ||||
|             if f['sourceType'] == 13), | ||||
|             key=lambda f: f['version']) | ||||
|  | ||||
|         smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( | ||||
|             self._SMIL_BASE_URL, video_id, video_id.lower()) | ||||
|         if smil_blocks: | ||||
|             smil_url_m = self._search_regex( | ||||
|                 r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL', | ||||
|                 fatal=False) | ||||
|             if smil_url_m is not None: | ||||
|                 smil_url = smil_url_m | ||||
|  | ||||
|         try: | ||||
|             smil_xml = self._download_webpage(smil_url, video_id, | ||||
|                                               'Downloading SMIL info') | ||||
|             formats.extend(self._formats_from_smil(smil_xml)) | ||||
|         except ExtractorError as ee: | ||||
|             if not isinstance(ee.cause, compat_HTTPError): | ||||
|                 raise | ||||
|             self._downloader.report_warning( | ||||
|                 'Cannot download SMIL information, falling back to JSON ..') | ||||
|         # Download via HLS API | ||||
|         formats.extend(self._download_api_formats(video_id)) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|         timestamp_ms = int(self._search_regex( | ||||
|   | ||||
							
								
								
									
										57
									
								
								youtube_dl/extractor/videomega.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								youtube_dl/extractor/videomega.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     remove_start, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VideoMegaIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|         (?:www\.)?videomega\.tv/ | ||||
|         (?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+) | ||||
|         ''' | ||||
|     _TEST = { | ||||
|         'url': 'http://videomega.tv/?ref=GKeGPVedBe', | ||||
|         'md5': '240fb5bcf9199961f48eb17839b084d6', | ||||
|         'info_dict': { | ||||
|             'id': 'GKeGPVedBe', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'XXL - All Sports United', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         escaped_data = self._search_regex( | ||||
|             r'unescape\("([^"]+)"\)', webpage, 'escaped data') | ||||
|         playlist = compat_urllib_parse.unquote(escaped_data) | ||||
|  | ||||
|         thumbnail = self._search_regex( | ||||
|             r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False) | ||||
|         url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL') | ||||
|         title = remove_start(self._html_search_regex( | ||||
|             r'<title>(.*?)</title>', webpage, 'title'), 'VideoMega.tv - ') | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|             'url': url, | ||||
|         }] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
| @@ -5,7 +5,10 @@ import re | ||||
| import hashlib | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class WatIE(InfoExtractor): | ||||
| @@ -57,6 +60,11 @@ class WatIE(InfoExtractor): | ||||
|  | ||||
|         video_info = self.download_video_info(real_id) | ||||
|  | ||||
|         error_desc = video_info.get('error_desc') | ||||
|         if error_desc: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True) | ||||
|  | ||||
|         geo_list = video_info.get('geoList') | ||||
|         country = geo_list[0] if geo_list else '' | ||||
|  | ||||
|   | ||||
							
								
								
									
										58
									
								
								youtube_dl/extractor/yourupload.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								youtube_dl/extractor/yourupload.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class YourUploadIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)? | ||||
|         (?:yourupload\.com/watch| | ||||
|            embed\.yourupload\.com| | ||||
|            embed\.yucache\.net | ||||
|         )/(?P<id>[A-Za-z0-9]+) | ||||
|         ''' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://yourupload.com/watch/14i14h', | ||||
|             'md5': 'bf5c2f95c4c917536e80936af7bc51e1', | ||||
|             'info_dict': { | ||||
|                 'id': '14i14h', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'BigBuckBunny_320x180.mp4', | ||||
|                 'thumbnail': 're:^https?://.*\.jpe?g', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://embed.yourupload.com/14i14h', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://embed.yucache.net/14i14h?client_file_id=803349', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         url = 'http://embed.yucache.net/{0:}'.format(video_id) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         url = self._og_search_video_url(webpage) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|             'url': url, | ||||
|         }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
| @@ -46,7 +46,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|     def _set_language(self): | ||||
|         return bool(self._download_webpage( | ||||
|             self._LANG_URL, None, | ||||
|             note=u'Setting language', errnote='unable to set language', | ||||
|             note='Setting language', errnote='unable to set language', | ||||
|             fatal=False)) | ||||
|  | ||||
|     def _login(self): | ||||
| @@ -61,13 +61,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|         # No authentication to be performed | ||||
|         if username is None: | ||||
|             if self._LOGIN_REQUIRED: | ||||
|                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) | ||||
|                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) | ||||
|             return True | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             self._LOGIN_URL, None, | ||||
|             note=u'Downloading login page', | ||||
|             errnote=u'unable to fetch login page', fatal=False) | ||||
|             note='Downloading login page', | ||||
|             errnote='unable to fetch login page', fatal=False) | ||||
|         if login_page is False: | ||||
|             return | ||||
|  | ||||
| @@ -105,12 +105,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|         req = compat_urllib_request.Request(self._LOGIN_URL, login_data) | ||||
|         login_results = self._download_webpage( | ||||
|             req, None, | ||||
|             note=u'Logging in', errnote=u'unable to log in', fatal=False) | ||||
|             note='Logging in', errnote='unable to log in', fatal=False) | ||||
|         if login_results is False: | ||||
|             return False | ||||
|  | ||||
|         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None: | ||||
|             raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True) | ||||
|             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True) | ||||
|  | ||||
|         # Two-Factor | ||||
|         # TODO add SMS and phone call support - these require making a request and then prompting the user | ||||
| @@ -119,19 +119,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|             tfa_code = self._get_tfa_info() | ||||
|  | ||||
|             if tfa_code is None: | ||||
|                 self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>') | ||||
|                 self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)') | ||||
|                 self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>') | ||||
|                 self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)') | ||||
|                 return False | ||||
|  | ||||
|             # Unlike the first login form, secTok and timeStmp are both required for the TFA form | ||||
|  | ||||
|             match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U) | ||||
|             if match is None: | ||||
|                 self._downloader.report_warning(u'Failed to get secTok - did the page structure change?') | ||||
|                 self._downloader.report_warning('Failed to get secTok - did the page structure change?') | ||||
|             secTok = match.group(1) | ||||
|             match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U) | ||||
|             if match is None: | ||||
|                 self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?') | ||||
|                 self._downloader.report_warning('Failed to get timeStmp - did the page structure change?') | ||||
|             timeStmp = match.group(1) | ||||
|  | ||||
|             tfa_form_strs = { | ||||
| @@ -155,23 +155,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|             tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data) | ||||
|             tfa_results = self._download_webpage( | ||||
|                 tfa_req, None, | ||||
|                 note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False) | ||||
|                 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False) | ||||
|  | ||||
|             if tfa_results is False: | ||||
|                 return False | ||||
|  | ||||
|             if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None: | ||||
|                 self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.') | ||||
|                 self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.') | ||||
|                 return False | ||||
|             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None: | ||||
|                 self._downloader.report_warning(u'unable to log in - did the page structure change?') | ||||
|                 self._downloader.report_warning('unable to log in - did the page structure change?') | ||||
|                 return False | ||||
|             if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None: | ||||
|                 self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.') | ||||
|                 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.') | ||||
|                 return False | ||||
|  | ||||
|         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: | ||||
|             self._downloader.report_warning(u'unable to log in: bad username or password') | ||||
|             self._downloader.report_warning('unable to log in: bad username or password') | ||||
|             return False | ||||
|         return True | ||||
|  | ||||
| @@ -185,7 +185,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|  | ||||
|         self._download_webpage( | ||||
|             req, None, | ||||
|             note=u'Confirming age', errnote=u'Unable to confirm age') | ||||
|             note='Confirming age', errnote='Unable to confirm age') | ||||
|         return True | ||||
|  | ||||
|     def _real_initialize(self): | ||||
| @@ -211,7 +211,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains | ||||
|                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls | ||||
|                          (?:                                                  # the various things that can precede the ID: | ||||
|                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ | ||||
|                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/ | ||||
|                              |(?:                                             # or the v= param in all its forms | ||||
|                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx) | ||||
|                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! | ||||
| @@ -307,69 +307,74 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     IE_NAME = 'youtube' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc", | ||||
|             u"file":  u"BaW_jenozKc.mp4", | ||||
|             u"info_dict": { | ||||
|                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐", | ||||
|                 u"uploader": u"Philipp Hagemeister", | ||||
|                 u"uploader_id": u"phihag", | ||||
|                 u"upload_date": u"20121002", | ||||
|                 u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", | ||||
|                 u"categories": [u'Science & Technology'], | ||||
|             'url': 'http://www.youtube.com/watch?v=BaW_jenozKc', | ||||
|             'info_dict': { | ||||
|                 'id': 'BaW_jenozKc', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'youtube-dl test video "\'/\\ä↭𝕐', | ||||
|                 'uploader': 'Philipp Hagemeister', | ||||
|                 'uploader_id': 'phihag', | ||||
|                 'upload_date': '20121002', | ||||
|                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', | ||||
|                 'categories': ['Science & Technology'], | ||||
|                 'like_count': int, | ||||
|                 'dislike_count': int, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY", | ||||
|             u"file":  u"UxxajLWwzqY.mp4", | ||||
|             u"note": u"Test generic use_cipher_signature video (#897)", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": u"20120506", | ||||
|                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", | ||||
|                 u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f", | ||||
|                 u"uploader": u"Icona Pop", | ||||
|                 u"uploader_id": u"IconaPop" | ||||
|             'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY', | ||||
|             'note': 'Test generic use_cipher_signature video (#897)', | ||||
|             'info_dict': { | ||||
|                 'id': 'UxxajLWwzqY', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20120506', | ||||
|                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', | ||||
|                 'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', | ||||
|                 'uploader': 'Icona Pop', | ||||
|                 'uploader_id': 'IconaPop', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ", | ||||
|             u"file":  u"07FYdnEawAQ.mp4", | ||||
|             u"note": u"Test VEVO video with age protection (#956)", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": u"20130703", | ||||
|                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)", | ||||
|                 u"description": u"md5:64249768eec3bc4276236606ea996373", | ||||
|                 u"uploader": u"justintimberlakeVEVO", | ||||
|                 u"uploader_id": u"justintimberlakeVEVO" | ||||
|             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ', | ||||
|             'note': 'Test VEVO video with age protection (#956)', | ||||
|             'info_dict': { | ||||
|                 'id': '07FYdnEawAQ', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20130703', | ||||
|                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', | ||||
|                 'description': 'md5:64249768eec3bc4276236606ea996373', | ||||
|                 'uploader': 'justintimberlakeVEVO', | ||||
|                 'uploader_id': 'justintimberlakeVEVO', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ", | ||||
|             u"file":  u"yZIXLfi8CZQ.mp4", | ||||
|             u"note": u"Embed-only video (#1746)", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": u"20120608", | ||||
|                 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012", | ||||
|                 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7", | ||||
|                 u"uploader": u"SET India", | ||||
|                 u"uploader_id": u"setindia" | ||||
|             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', | ||||
|             'note': 'Embed-only video (#1746)', | ||||
|             'info_dict': { | ||||
|                 'id': 'yZIXLfi8CZQ', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20120608', | ||||
|                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012', | ||||
|                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', | ||||
|                 'uploader': 'SET India', | ||||
|                 'uploader_id': 'setindia' | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I", | ||||
|             u"file": u"a9LDPn-MO4I.m4a", | ||||
|             u"note": u"256k DASH audio (format 141) via DASH manifest", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": "20121002", | ||||
|                 u"uploader_id": "8KVIDEO", | ||||
|                 u"description": '', | ||||
|                 u"uploader": "8KVIDEO", | ||||
|                 u"title": "UHDTV TEST 8K VIDEO.mp4" | ||||
|             'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', | ||||
|             'note': '256k DASH audio (format 141) via DASH manifest', | ||||
|             'info_dict': { | ||||
|                 'id': 'a9LDPn-MO4I', | ||||
|                 'ext': 'm4a', | ||||
|                 'upload_date': '20121002', | ||||
|                 'uploader_id': '8KVIDEO', | ||||
|                 'description': '', | ||||
|                 'uploader': '8KVIDEO', | ||||
|                 'title': 'UHDTV TEST 8K VIDEO.mp4' | ||||
|             }, | ||||
|             u"params": { | ||||
|                 u"youtube_include_dash_manifest": True, | ||||
|                 u"format": "141", | ||||
|             'params': { | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '141', | ||||
|             }, | ||||
|         }, | ||||
|         # DASH manifest with encrypted signature | ||||
| @@ -384,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 'uploader_id': 'AfrojackVEVO', | ||||
|                 'upload_date': '20131011', | ||||
|             }, | ||||
|             u"params": { | ||||
|             'params': { | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '141', | ||||
|             }, | ||||
| @@ -397,19 +402,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|     def report_video_info_webpage_download(self, video_id): | ||||
|         """Report attempt to download video info webpage.""" | ||||
|         self.to_screen(u'%s: Downloading video info webpage' % video_id) | ||||
|         self.to_screen('%s: Downloading video info webpage' % video_id) | ||||
|  | ||||
|     def report_information_extraction(self, video_id): | ||||
|         """Report attempt to extract video information.""" | ||||
|         self.to_screen(u'%s: Extracting video information' % video_id) | ||||
|         self.to_screen('%s: Extracting video information' % video_id) | ||||
|  | ||||
|     def report_unavailable_format(self, video_id, format): | ||||
|         """Report extracted video URL.""" | ||||
|         self.to_screen(u'%s: Format %s not available' % (video_id, format)) | ||||
|         self.to_screen('%s: Format %s not available' % (video_id, format)) | ||||
|  | ||||
|     def report_rtmp_download(self): | ||||
|         """Indicate the download will use the RTMP protocol.""" | ||||
|         self.to_screen(u'RTMP download detected') | ||||
|         self.to_screen('RTMP download detected') | ||||
|  | ||||
|     def _signature_cache_id(self, example_sig): | ||||
|         """ Return a string representation of a signature """ | ||||
| @@ -429,21 +434,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             player_type, player_id, self._signature_cache_id(example_sig)) | ||||
|         assert os.path.basename(func_id) == func_id | ||||
|  | ||||
|         cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id) | ||||
|         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id) | ||||
|         if cache_spec is not None: | ||||
|             return lambda s: ''.join(s[i] for i in cache_spec) | ||||
|  | ||||
|         if player_type == 'js': | ||||
|             code = self._download_webpage( | ||||
|                 player_url, video_id, | ||||
|                 note=u'Downloading %s player %s' % (player_type, player_id), | ||||
|                 errnote=u'Download of %s failed' % player_url) | ||||
|                 note='Downloading %s player %s' % (player_type, player_id), | ||||
|                 errnote='Download of %s failed' % player_url) | ||||
|             res = self._parse_sig_js(code) | ||||
|         elif player_type == 'swf': | ||||
|             urlh = self._request_webpage( | ||||
|                 player_url, video_id, | ||||
|                 note=u'Downloading %s player %s' % (player_type, player_id), | ||||
|                 errnote=u'Download of %s failed' % player_url) | ||||
|                 note='Downloading %s player %s' % (player_type, player_id), | ||||
|                 errnote='Download of %s failed' % player_url) | ||||
|             code = urlh.read() | ||||
|             res = self._parse_sig_swf(code) | ||||
|         else: | ||||
| @@ -454,15 +459,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             cache_res = res(test_string) | ||||
|             cache_spec = [ord(c) for c in cache_res] | ||||
|  | ||||
|         self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec) | ||||
|         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec) | ||||
|         return res | ||||
|  | ||||
|     def _print_sig_code(self, func, example_sig): | ||||
|         def gen_sig_code(idxs): | ||||
|             def _genslice(start, end, step): | ||||
|                 starts = '' if start == 0 else str(start) | ||||
|                 ends = (u':%d' % (end+step)) if end + step >= 0 else ':' | ||||
|                 steps = '' if step == 1 else (u':%d' % step) | ||||
|                 ends = (':%d' % (end+step)) if end + step >= 0 else ':' | ||||
|                 steps = '' if step == 1 else (':%d' % step) | ||||
|                 return 's[%s%s%s]' % (starts, ends, steps) | ||||
|  | ||||
|             step = None | ||||
| @@ -492,9 +497,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         expr_code = ' + '.join(gen_sig_code(cache_spec)) | ||||
|         signature_id_tuple = '(%s)' % ( | ||||
|             ', '.join(compat_str(len(p)) for p in example_sig.split('.'))) | ||||
|         code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n' | ||||
|         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n' | ||||
|                 '    return %s\n') % (signature_id_tuple, expr_code) | ||||
|         self.to_screen(u'Extracted signature function:\n' + code) | ||||
|         self.to_screen('Extracted signature function:\n' + code) | ||||
|  | ||||
|     def _parse_sig_js(self, jscode): | ||||
|         funcname = self._search_regex( | ||||
| @@ -516,9 +521,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         """Turn the encrypted s field into a working signature""" | ||||
|  | ||||
|         if player_url is None: | ||||
|             raise ExtractorError(u'Cannot decrypt signature without player_url') | ||||
|             raise ExtractorError('Cannot decrypt signature without player_url') | ||||
|  | ||||
|         if player_url.startswith(u'//'): | ||||
|         if player_url.startswith('//'): | ||||
|             player_url = 'https:' + player_url | ||||
|         try: | ||||
|             player_id = (player_url, self._signature_cache_id(s)) | ||||
| @@ -542,7 +547,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, | ||||
|                 video_id, note=False) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) | ||||
|             self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err)) | ||||
|             return {} | ||||
|         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) | ||||
|  | ||||
| @@ -560,7 +565,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             url = 'https://www.youtube.com/api/timedtext?' + params | ||||
|             sub_lang_list[lang] = url | ||||
|         if not sub_lang_list: | ||||
|             self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|             self._downloader.report_warning('video doesn\'t have subtitles') | ||||
|             return {} | ||||
|         return sub_lang_list | ||||
|  | ||||
| @@ -568,7 +573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         """We need the webpage for getting the captions url, pass it as an | ||||
|            argument to speed up the process.""" | ||||
|         sub_format = self._downloader.params.get('subtitlesformat', 'srt') | ||||
|         self.to_screen(u'%s: Looking for automatic captions' % video_id) | ||||
|         self.to_screen('%s: Looking for automatic captions' % video_id) | ||||
|         mobj = re.search(r';ytplayer.config = ({.*?});', webpage) | ||||
|         err_msg = 'Couldn\'t find automatic captions for %s' % video_id | ||||
|         if mobj is None: | ||||
| @@ -589,7 +594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             caption_list = self._download_xml(list_url, video_id) | ||||
|             original_lang_node = caption_list.find('track') | ||||
|             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : | ||||
|                 self._downloader.report_warning(u'Video doesn\'t have automatic captions') | ||||
|                 self._downloader.report_warning('Video doesn\'t have automatic captions') | ||||
|                 return {} | ||||
|             original_lang = original_lang_node.attrib['lang_code'] | ||||
|  | ||||
| @@ -615,7 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     def extract_id(cls, url): | ||||
|         mobj = re.match(cls._VALID_URL, url, re.VERBOSE) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|         video_id = mobj.group(2) | ||||
|         return video_id | ||||
|  | ||||
| @@ -635,7 +640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|     def _extract_annotations(self, video_id): | ||||
|         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id | ||||
|         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') | ||||
|         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         proto = ( | ||||
| @@ -705,14 +710,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         # Check for "rental" videos | ||||
|         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: | ||||
|             raise ExtractorError(u'"rental" videos not supported') | ||||
|             raise ExtractorError('"rental" videos not supported') | ||||
|  | ||||
|         # Start extracting information | ||||
|         self.report_information_extraction(video_id) | ||||
|  | ||||
|         # uploader | ||||
|         if 'author' not in video_info: | ||||
|             raise ExtractorError(u'Unable to extract uploader name') | ||||
|             raise ExtractorError('Unable to extract uploader name') | ||||
|         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0]) | ||||
|  | ||||
|         # uploader_id | ||||
| @@ -721,13 +726,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         if mobj is not None: | ||||
|             video_uploader_id = mobj.group(1) | ||||
|         else: | ||||
|             self._downloader.report_warning(u'unable to extract uploader nickname') | ||||
|             self._downloader.report_warning('unable to extract uploader nickname') | ||||
|  | ||||
|         # title | ||||
|         if 'title' in video_info: | ||||
|             video_title = video_info['title'][0] | ||||
|         else: | ||||
|             self._downloader.report_warning(u'Unable to extract video title') | ||||
|             self._downloader.report_warning('Unable to extract video title') | ||||
|             video_title = '_' | ||||
|  | ||||
|         # thumbnail image | ||||
| @@ -737,7 +742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         if m_thumb is not None: | ||||
|             video_thumbnail = m_thumb.group(1) | ||||
|         elif 'thumbnail_url' not in video_info: | ||||
|             self._downloader.report_warning(u'unable to extract video thumbnail') | ||||
|             self._downloader.report_warning('unable to extract video thumbnail') | ||||
|             video_thumbnail = None | ||||
|         else:   # don't panic if we can't find it | ||||
|             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) | ||||
| @@ -791,8 +796,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             if count is not None: | ||||
|                 return int(count.replace(',', '')) | ||||
|             return None | ||||
|         like_count = _extract_count(u'like') | ||||
|         dislike_count = _extract_count(u'dislike') | ||||
|         like_count = _extract_count('like') | ||||
|         dislike_count = _extract_count('dislike') | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, video_webpage) | ||||
| @@ -802,7 +807,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             return | ||||
|  | ||||
|         if 'length_seconds' not in video_info: | ||||
|             self._downloader.report_warning(u'unable to extract video duration') | ||||
|             self._downloader.report_warning('unable to extract video duration') | ||||
|             video_duration = None | ||||
|         else: | ||||
|             video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])) | ||||
| @@ -823,11 +828,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map | ||||
|             # this signatures are encrypted | ||||
|             if 'url_encoded_fmt_stream_map' not in args: | ||||
|                 raise ValueError(u'No stream_map present')  # caught below | ||||
|                 raise ValueError('No stream_map present')  # caught below | ||||
|             re_signature = re.compile(r'[&,]s=') | ||||
|             m_s = re_signature.search(args['url_encoded_fmt_stream_map']) | ||||
|             if m_s is not None: | ||||
|                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id) | ||||
|                 self.to_screen('%s: Encrypted signatures detected.' % video_id) | ||||
|                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] | ||||
|             m_s = re_signature.search(args.get('adaptive_fmts', '')) | ||||
|             if m_s is not None: | ||||
| @@ -905,7 +910,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                                 player_desc = 'html5 player %s' % player_version | ||||
|  | ||||
|                         parts_sizes = self._signature_cache_id(encrypted_sig) | ||||
|                         self.to_screen(u'{%s} signature length %s, %s' % | ||||
|                         self.to_screen('{%s} signature length %s, %s' % | ||||
|                             (format_id, parts_sizes, player_desc)) | ||||
|  | ||||
|                     signature = self._decrypt_signature( | ||||
| @@ -920,7 +925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             url_map = self._extract_from_m3u8(manifest_url, video_id) | ||||
|             formats = _map_to_format_list(url_map) | ||||
|         else: | ||||
|             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') | ||||
|             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') | ||||
|  | ||||
|         # Look for the DASH manifest | ||||
|         if (self._downloader.params.get('youtube_include_dash_manifest', False)): | ||||
| @@ -941,9 +946,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) | ||||
|                 dash_doc = self._download_xml( | ||||
|                     dash_manifest_url, video_id, | ||||
|                     note=u'Downloading DASH manifest', | ||||
|                     errnote=u'Could not download DASH manifest') | ||||
|                 for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): | ||||
|                     note='Downloading DASH manifest', | ||||
|                     errnote='Could not download DASH manifest') | ||||
|                 for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): | ||||
|                     url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') | ||||
|                     if url_el is None: | ||||
|                         continue | ||||
| @@ -969,7 +974,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                         existing_format.update(f) | ||||
|  | ||||
|             except (ExtractorError, KeyError) as e: | ||||
|                 self.report_warning(u'Skipping DASH manifest: %s' % e, video_id) | ||||
|                 self.report_warning('Skipping DASH manifest: %s' % e, video_id) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -1000,7 +1005,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|                         (?:\w+\.)? | ||||
|                         youtube\.com/ | ||||
|                         (?: | ||||
|                            (?:course|view_play_list|my_playlists|artist|playlist|watch) | ||||
|                            (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) | ||||
|                            \? (?:.*?&)*? (?:p|a|list)= | ||||
|                         |  p/ | ||||
|                         ) | ||||
| @@ -1056,6 +1061,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|             'title': 'YDL_safe_search', | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
|     }, { | ||||
|         'note': 'embedded', | ||||
|         'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', | ||||
|         'playlist_count': 4, | ||||
|         'info_dict': { | ||||
|             'title': 'JODA15', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
| @@ -1090,7 +1102,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|         # Extract playlist id | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|         playlist_id = mobj.group(1) or mobj.group(2) | ||||
|  | ||||
|         # Check if it's a video-specific URL | ||||
| @@ -1098,16 +1110,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|         if 'v' in query_dict: | ||||
|             video_id = query_dict['v'][0] | ||||
|             if self._downloader.params.get('noplaylist'): | ||||
|                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) | ||||
|                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id) | ||||
|                 return self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|             else: | ||||
|                 self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | ||||
|                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | ||||
|  | ||||
|         if playlist_id.startswith('RD'): | ||||
|             # Mixes require a custom extraction process | ||||
|             return self._extract_mix(playlist_id) | ||||
|         if playlist_id.startswith('TL'): | ||||
|             raise ExtractorError(u'For downloading YouTube.com top lists, use ' | ||||
|             raise ExtractorError('For downloading YouTube.com top lists, use ' | ||||
|                 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) | ||||
|  | ||||
|         url = self._TEMPLATE_URL % playlist_id | ||||
| @@ -1152,19 +1164,28 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|  | ||||
| class YoutubeTopListIE(YoutubePlaylistIE): | ||||
|     IE_NAME = 'youtube:toplist' | ||||
|     IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' | ||||
|     IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' | ||||
|         ' (Example: "yttoplist:music:Top Tracks")') | ||||
|     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' | ||||
|     _TESTS = [] | ||||
|     _TESTS = [{ | ||||
|         'url': 'yttoplist:music:Trending', | ||||
|         'playlist_mincount': 5, | ||||
|         'skip': 'Only works for logged-in users', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         channel = mobj.group('chann') | ||||
|         title = mobj.group('title') | ||||
|         query = compat_urllib_parse.urlencode({'title': title}) | ||||
|         playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query) | ||||
|         channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) | ||||
|         link = self._html_search_regex(playlist_re, channel_page, 'list') | ||||
|         channel_page = self._download_webpage( | ||||
|             'https://www.youtube.com/%s' % channel, title) | ||||
|         link = self._html_search_regex( | ||||
|             r'''(?x) | ||||
|                 <a\s+href="([^"]+)".*?>\s* | ||||
|                 <span\s+class="branded-page-module-title-text">\s* | ||||
|                 <span[^>]*>.*?%s.*?</span>''' % re.escape(query), | ||||
|             channel_page, 'list') | ||||
|         url = compat_urlparse.urljoin('https://www.youtube.com/', link) | ||||
|          | ||||
|         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' | ||||
| @@ -1190,6 +1211,11 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|     _MORE_PAGES_INDICATOR = 'yt-uix-load-more' | ||||
|     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' | ||||
|     IE_NAME = 'youtube:channel' | ||||
|     _TESTS = [{ | ||||
|         'note': 'paginated channel', | ||||
|         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', | ||||
|         'playlist_mincount': 91, | ||||
|     }] | ||||
|  | ||||
|     def extract_videos_from_page(self, page): | ||||
|         ids_in_page = [] | ||||
| @@ -1202,7 +1228,7 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|         # Extract channel id | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|  | ||||
|         # Download channel page | ||||
|         channel_id = mobj.group(1) | ||||
| @@ -1224,7 +1250,7 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|             for pagenum in itertools.count(1): | ||||
|                 url = self._MORE_PAGES_URL % (pagenum, channel_id) | ||||
|                 page = self._download_json( | ||||
|                     url, channel_id, note=u'Downloading page #%s' % pagenum, | ||||
|                     url, channel_id, note='Downloading page #%s' % pagenum, | ||||
|                     transform_source=uppercase_escape) | ||||
|  | ||||
|                 ids_in_page = self.extract_videos_from_page(page['content_html']) | ||||
| @@ -1233,7 +1259,7 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: | ||||
|                     break | ||||
|  | ||||
|         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) | ||||
|         self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) | ||||
|  | ||||
|         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|                        for video_id in video_ids] | ||||
| @@ -1248,6 +1274,17 @@ class YoutubeUserIE(InfoExtractor): | ||||
|     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' | ||||
|     IE_NAME = 'youtube:user' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.youtube.com/user/TheLinuxFoundation', | ||||
|         'playlist_mincount': 320, | ||||
|         'info_dict': { | ||||
|             'title': 'TheLinuxFoundation', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'ytuser:phihag', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         # Don't return True if the url can be extracted with other youtube | ||||
| @@ -1260,7 +1297,7 @@ class YoutubeUserIE(InfoExtractor): | ||||
|         # Extract username | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|  | ||||
|         username = mobj.group(1) | ||||
|  | ||||
| @@ -1281,7 +1318,7 @@ class YoutubeUserIE(InfoExtractor): | ||||
|             try: | ||||
|                 response = json.loads(page) | ||||
|             except ValueError as err: | ||||
|                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) | ||||
|                 raise ExtractorError('Invalid JSON in API response: ' + compat_str(err)) | ||||
|             if 'entry' not in response['feed']: | ||||
|                 return | ||||
|  | ||||
| @@ -1322,9 +1359,9 @@ class YoutubeSearchIE(SearchInfoExtractor): | ||||
|                 compat_urllib_parse.quote_plus(query.encode('utf-8')), | ||||
|                 (PAGE_SIZE * pagenum) + 1) | ||||
|             data_json = self._download_webpage( | ||||
|                 result_url, video_id=u'query "%s"' % query, | ||||
|                 note=u'Downloading page %s' % (pagenum + 1), | ||||
|                 errnote=u'Unable to download API page') | ||||
|                 result_url, video_id='query "%s"' % query, | ||||
|                 note='Downloading page %s' % (pagenum + 1), | ||||
|                 errnote='Unable to download API page') | ||||
|             data = json.loads(data_json) | ||||
|             api_response = data['data'] | ||||
|  | ||||
| @@ -1356,6 +1393,13 @@ class YoutubeSearchURLIE(InfoExtractor): | ||||
|     IE_DESC = 'YouTube.com search URLs' | ||||
|     IE_NAME = 'youtube:search_url' | ||||
|     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', | ||||
|         'playlist_mincount': 5, | ||||
|         'info_dict': { | ||||
|             'title': 'youtube-dl test video', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -1390,17 +1434,38 @@ class YoutubeSearchURLIE(InfoExtractor): | ||||
|  | ||||
| class YoutubeShowIE(InfoExtractor): | ||||
|     IE_DESC = 'YouTube.com (multi-season) shows' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)' | ||||
|     IE_NAME = 'youtube:show' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.youtube.com/show/airdisasters', | ||||
|         'playlist_mincount': 3, | ||||
|         'info_dict': { | ||||
|             'id': 'airdisasters', | ||||
|             'title': 'Air Disasters', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         show_name = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, show_name, 'Downloading show webpage') | ||||
|         playlist_id = mobj.group('id') | ||||
|         webpage = self._download_webpage( | ||||
|             url, playlist_id, 'Downloading show webpage') | ||||
|         # There's one playlist for each season of the show | ||||
|         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) | ||||
|         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) | ||||
|         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] | ||||
|         self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons))) | ||||
|         entries = [ | ||||
|             self.url_result( | ||||
|                 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist') | ||||
|             for season in m_seasons | ||||
|         ] | ||||
|         title = self._og_search_title(webpage, fatal=False) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'title': title, | ||||
|             'entries': entries, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): | ||||
|   | ||||
| @@ -218,7 +218,7 @@ def parseOpts(overrideArguments=None): | ||||
|  | ||||
|     video_format.add_option('-f', '--format', | ||||
|             action='store', dest='format', metavar='FORMAT', default=None, | ||||
|             help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') | ||||
|             help='video format code, specify the order of preference using slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as  -f  136/137/mp4/bestvideo,140/m4a/bestaudio') | ||||
|     video_format.add_option('--all-formats', | ||||
|             action='store_const', dest='format', help='download all available video formats', const='all') | ||||
|     video_format.add_option('--prefer-free-formats', | ||||
|   | ||||
| @@ -1437,6 +1437,24 @@ def uppercase_escape(s): | ||||
|         lambda m: unicode_escape(m.group(0))[0], | ||||
|         s) | ||||
|  | ||||
|  | ||||
| def escape_rfc3986(s): | ||||
|     """Escape non-ASCII characters as suggested by RFC 3986""" | ||||
|     if sys.version_info < (3, 0) and isinstance(s, unicode): | ||||
|         s = s.encode('utf-8') | ||||
|     return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]") | ||||
|  | ||||
|  | ||||
| def escape_url(url): | ||||
|     """Escape URL as suggested by RFC 3986""" | ||||
|     url_parsed = compat_urllib_parse_urlparse(url) | ||||
|     return url_parsed._replace( | ||||
|         path=escape_rfc3986(url_parsed.path), | ||||
|         params=escape_rfc3986(url_parsed.params), | ||||
|         query=escape_rfc3986(url_parsed.query), | ||||
|         fragment=escape_rfc3986(url_parsed.fragment) | ||||
|     ).geturl() | ||||
|  | ||||
| try: | ||||
|     struct.pack(u'!I', 0) | ||||
| except TypeError: | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.09.15.1' | ||||
| __version__ = '2014.09.24.1' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user