Compare commits
	
		
			74 Commits
		
	
	
		
			2014.09.22
			...
			2014.09.29
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | a43ee88c6f | ||
|  | e2dce53781 | ||
|  | 1770ed9e86 | ||
|  | 457ac58cc7 | ||
|  | 9c44d2429b | ||
|  | d2e32f7df5 | ||
|  | 67077b182b | ||
|  | 5f4c318844 | ||
|  | dfee83234b | ||
|  | 7f5c0c4a19 | ||
|  | 4bc77c8417 | ||
|  | 22dd3fad86 | ||
|  | d6e6a42256 | ||
|  | 76e7d1e74b | ||
|  | 38c4d41b74 | ||
|  | f0b8e3607d | ||
|  | 51ee08c4bb | ||
|  | c841789772 | ||
|  | c121a75b36 | ||
|  | 5a8b77551d | ||
|  | 0217aee154 | ||
|  | b14f3a4c1d | ||
|  | 92f7963f6e | ||
|  | 88fbe4c2cc | ||
|  | 394599f422 | ||
|  | ed9266db90 | ||
|  | f4b1c7adb8 | ||
|  | c95eeb7b80 | ||
|  | 5e43e3803c | ||
|  | a89435a7a8 | ||
|  | a0a90b3ba1 | ||
|  | c664182323 | ||
|  | 6be1cd4ddb | ||
|  | ee0d90707a | ||
|  | f776d8f608 | ||
|  | b3ac3a51ac | ||
|  | 0b75c2a88b | ||
|  | 7b7518124e | ||
|  | 68b0973046 | ||
|  | 3a203b8bfa | ||
|  | 70752ccefd | ||
|  | 0155549d6c | ||
|  | b66745288e | ||
|  | 2a1325fdde | ||
|  | 2f9e8776df | ||
|  | 497339fa0e | ||
|  | 8e6f8051f0 | ||
|  | 11b3ce8509 | ||
|  | 6a5af6acb9 | ||
|  | 9a0d98bb40 | ||
|  | fbd3162e49 | ||
|  | 54e9a4af95 | ||
|  | 8a32b82e46 | ||
|  | fec02bcc90 | ||
|  | c6e90caaa6 | ||
|  | 4bbf157794 | ||
|  | 6b08cdf626 | ||
|  | b686fc18da | ||
|  | 0b97f3a936 | ||
|  | eb73f2649f | ||
|  | f0b5d6af74 | ||
|  | 2f771f6c99 | ||
|  | 3b2f933b01 | ||
|  | cc746841e7 | ||
|  | ac7553d031 | ||
|  | cdc628a498 | ||
|  | 69ea8ca42c | ||
|  | 4bc3a23ec5 | ||
|  | bd5650ac64 | ||
|  | 86916dae4b | ||
|  | f7d159cf95 | ||
|  | 632e5684ce | ||
|  | 746c67d72f | ||
|  | 5aa38e75b2 | 
| @@ -442,8 +442,6 @@ If you want to add support for a new site, you can follow this quick list (assum | ||||
|     # coding: utf-8 | ||||
|     from __future__ import unicode_literals | ||||
|  | ||||
|     import re | ||||
|  | ||||
|     from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -451,7 +449,7 @@ If you want to add support for a new site, you can follow this quick list (assum | ||||
|         _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)' | ||||
|         _TEST = { | ||||
|             'url': 'http://yourextractor.com/watch/42', | ||||
|             'md5': 'TODO: md5 sum of the first 10KiB of the video file', | ||||
|             'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', | ||||
|             'info_dict': { | ||||
|                 'id': '42', | ||||
|                 'ext': 'mp4', | ||||
| @@ -466,8 +464,7 @@ If you want to add support for a new site, you can follow this quick list (assum | ||||
|         } | ||||
|  | ||||
|         def _real_extract(self, url): | ||||
|             mobj = re.match(self._VALID_URL, url) | ||||
|             video_id = mobj.group('id') | ||||
|             video_id = self._match_id(url) | ||||
|  | ||||
|             # TODO more code goes here, for example ... | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|   | ||||
| @@ -139,7 +139,9 @@ def generator(test_case): | ||||
|  | ||||
|             if is_playlist: | ||||
|                 self.assertEqual(res_dict['_type'], 'playlist') | ||||
|                 self.assertTrue('entries' in res_dict) | ||||
|                 expect_info_dict(self, test_case.get('info_dict', {}), res_dict) | ||||
|  | ||||
|             if 'playlist_mincount' in test_case: | ||||
|                 assertGreaterEqual( | ||||
|                     self, | ||||
| @@ -188,7 +190,7 @@ def generator(test_case): | ||||
|                 expect_info_dict(self, tc.get('info_dict', {}), info_dict) | ||||
|         finally: | ||||
|             try_rm_tcs_files() | ||||
|             if is_playlist and res_dict is not None: | ||||
|             if is_playlist and res_dict is not None and res_dict.get('entries'): | ||||
|                 # Remove all other files that may have been extracted if the | ||||
|                 # extractor returns full results even with extract_flat | ||||
|                 res_tcs = [{'info_dict': e} for e in res_dict['entries']] | ||||
|   | ||||
| @@ -22,7 +22,8 @@ from youtube_dl.utils import ( | ||||
|     fix_xml_ampersands, | ||||
|     get_meta_content, | ||||
|     orderedSet, | ||||
|     PagedList, | ||||
|     OnDemandPagedList, | ||||
|     InAdvancePagedList, | ||||
|     parse_duration, | ||||
|     read_batch_urls, | ||||
|     sanitize_filename, | ||||
| @@ -246,10 +247,14 @@ class TestUtil(unittest.TestCase): | ||||
|                 for i in range(firstid, upto): | ||||
|                     yield i | ||||
|  | ||||
|             pl = PagedList(get_page, pagesize) | ||||
|             pl = OnDemandPagedList(get_page, pagesize) | ||||
|             got = pl.getslice(*sliceargs) | ||||
|             self.assertEqual(got, expected) | ||||
|  | ||||
|             iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize) | ||||
|             got = iapl.getslice(*sliceargs) | ||||
|             self.assertEqual(got, expected) | ||||
|  | ||||
|         testPL(5, 2, (), [0, 1, 2, 3, 4]) | ||||
|         testPL(5, 2, (1,), [1, 2, 3, 4]) | ||||
|         testPL(5, 2, (2,), [2, 3, 4]) | ||||
|   | ||||
| @@ -10,7 +10,6 @@ from test.helper import FakeYDL | ||||
|  | ||||
|  | ||||
| from youtube_dl.extractor import ( | ||||
|     YoutubeUserIE, | ||||
|     YoutubePlaylistIE, | ||||
|     YoutubeIE, | ||||
|     YoutubeChannelIE, | ||||
| @@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         self.assertEqual(len(entries), 25) | ||||
|         self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') | ||||
|  | ||||
|     def test_youtube_channel(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeChannelIE(dl) | ||||
|         #test paginated channel | ||||
|         result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w') | ||||
|         self.assertTrue(len(result['entries']) > 90) | ||||
|         #test autogenerated channel | ||||
|         result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') | ||||
|         self.assertTrue(len(result['entries']) >= 18) | ||||
|  | ||||
|     def test_youtube_user(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeUserIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') | ||||
|         self.assertTrue(len(result['entries']) >= 320) | ||||
|  | ||||
|     def test_youtube_show(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeShowIE(dl) | ||||
|         result = ie.extract('http://www.youtube.com/show/airdisasters') | ||||
|         self.assertTrue(len(result) >= 3) | ||||
|  | ||||
|     def test_youtube_mix(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
| @@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         entries = result['entries'] | ||||
|         self.assertEqual(len(entries), 100) | ||||
|  | ||||
|     def test_youtube_toplist(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeTopListIE(dl) | ||||
|         result = ie.extract('yttoplist:music:Trending') | ||||
|         entries = result['entries'] | ||||
|         self.assertTrue(len(entries) >= 5) | ||||
|  | ||||
|     def test_youtube_search_url(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeSearchURLIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video') | ||||
|         entries = result['entries'] | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], 'youtube-dl test video') | ||||
|         self.assertTrue(len(entries) >= 5) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -1250,12 +1250,13 @@ class YoutubeDL(object): | ||||
|         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) | ||||
|         # To work around aforementioned issue we will replace request's original URL with | ||||
|         # percent-encoded one | ||||
|         url = req if isinstance(req, compat_str) else req.get_full_url() | ||||
|         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str) | ||||
|         url = req if req_is_string else req.get_full_url() | ||||
|         url_escaped = escape_url(url) | ||||
|  | ||||
|         # Substitute URL if any change after escaping | ||||
|         if url != url_escaped: | ||||
|             if isinstance(req, compat_str): | ||||
|             if req_is_string: | ||||
|                 req = url_escaped | ||||
|             else: | ||||
|                 req = compat_urllib_request.Request( | ||||
|   | ||||
| @@ -78,6 +78,7 @@ __authors__  = ( | ||||
|     'Hari Padmanaban', | ||||
|     'Carlos Ramos', | ||||
|     '5moufl', | ||||
|     'lenaten', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
|   | ||||
| @@ -2,6 +2,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from .hls import HlsFD | ||||
| from .hls import NativeHlsFD | ||||
| from .http import HttpFD | ||||
| from .mplayer import MplayerFD | ||||
| from .rtmp import RtmpFD | ||||
| @@ -19,6 +20,8 @@ def get_suitable_downloader(info_dict): | ||||
|  | ||||
|     if url.startswith('rtmp'): | ||||
|         return RtmpFD | ||||
|     if protocol == 'm3u8_native': | ||||
|         return NativeHlsFD | ||||
|     if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'): | ||||
|         return HlsFD | ||||
|     if url.startswith('mms') or url.startswith('rtsp'): | ||||
|   | ||||
| @@ -42,6 +42,7 @@ class FileDownloader(object): | ||||
|     Subclasses of this one must re-define the real_download method. | ||||
|     """ | ||||
|  | ||||
|     _TEST_FILE_SIZE = 10241 | ||||
|     params = None | ||||
|  | ||||
|     def __init__(self, ydl, params): | ||||
|   | ||||
| @@ -1,8 +1,13 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| import subprocess | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     compat_urllib_request, | ||||
|     check_executable, | ||||
|     encodeFilename, | ||||
| ) | ||||
| @@ -43,3 +48,57 @@ class HlsFD(FileDownloader): | ||||
|             self.to_stderr(u"\n") | ||||
|             self.report_error(u'%s exited with code %d' % (program, retval)) | ||||
|             return False | ||||
|  | ||||
|  | ||||
| class NativeHlsFD(FileDownloader): | ||||
|     """ A more limited implementation that does not require ffmpeg """ | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         url = info_dict['url'] | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         self.to_screen( | ||||
|             '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id']) | ||||
|         data = self.ydl.urlopen(url).read() | ||||
|         s = data.decode('utf-8', 'ignore') | ||||
|         segment_urls = [] | ||||
|         for line in s.splitlines(): | ||||
|             line = line.strip() | ||||
|             if line and not line.startswith('#'): | ||||
|                 segment_url = ( | ||||
|                     line | ||||
|                     if re.match(r'^https?://', line) | ||||
|                     else compat_urlparse.urljoin(url, line)) | ||||
|                 segment_urls.append(segment_url) | ||||
|  | ||||
|         is_test = self.params.get('test', False) | ||||
|         remaining_bytes = self._TEST_FILE_SIZE if is_test else None | ||||
|         byte_counter = 0 | ||||
|         with open(tmpfilename, 'wb') as outf: | ||||
|             for i, segurl in enumerate(segment_urls): | ||||
|                 self.to_screen( | ||||
|                     '[hlsnative] %s: Downloading segment %d / %d' % | ||||
|                     (info_dict['id'], i + 1, len(segment_urls))) | ||||
|                 seg_req = compat_urllib_request.Request(segurl) | ||||
|                 if remaining_bytes is not None: | ||||
|                     seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) | ||||
|  | ||||
|                 segment = self.ydl.urlopen(seg_req).read() | ||||
|                 if remaining_bytes is not None: | ||||
|                     segment = segment[:remaining_bytes] | ||||
|                     remaining_bytes -= len(segment) | ||||
|                 outf.write(segment) | ||||
|                 byte_counter += len(segment) | ||||
|                 if remaining_bytes is not None and remaining_bytes <= 0: | ||||
|                     break | ||||
|  | ||||
|         self._hook_progress({ | ||||
|             'downloaded_bytes': byte_counter, | ||||
|             'total_bytes': byte_counter, | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|         }) | ||||
|         self.try_rename(tmpfilename, filename) | ||||
|         return True | ||||
|  | ||||
|   | ||||
| @@ -14,8 +14,6 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class HttpFD(FileDownloader): | ||||
|     _TEST_FILE_SIZE = 10241 | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         url = info_dict['url'] | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|   | ||||
| @@ -135,12 +135,14 @@ from .gametrailers import GametrailersIE | ||||
| from .gdcvault import GDCVaultIE | ||||
| from .generic import GenericIE | ||||
| from .godtube import GodTubeIE | ||||
| from .golem import GolemIE | ||||
| from .googleplus import GooglePlusIE | ||||
| from .googlesearch import GoogleSearchIE | ||||
| from .gorillavid import GorillaVidIE | ||||
| from .goshgay import GoshgayIE | ||||
| from .grooveshark import GroovesharkIE | ||||
| from .hark import HarkIE | ||||
| from .heise import HeiseIE | ||||
| from .helsinki import HelsinkiIE | ||||
| from .hentaistigma import HentaiStigmaIE | ||||
| from .hornbunny import HornBunnyIE | ||||
| @@ -240,6 +242,7 @@ from .ndtv import NDTVIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| from .newstube import NewstubeIE | ||||
| from .nfb import NFBIE | ||||
| from .nfl import NFLIE | ||||
| from .nhl import NHLIE, NHLVideocenterIE | ||||
| from .niconico import NiconicoIE | ||||
| from .ninegag import NineGagIE | ||||
| @@ -260,6 +263,7 @@ from .nrk import ( | ||||
| from .ntv import NTVIE | ||||
| from .nytimes import NYTimesIE | ||||
| from .nuvid import NuvidIE | ||||
| from .oktoberfesttv import OktoberfestTVIE | ||||
| from .ooyala import OoyalaIE | ||||
| from .orf import ( | ||||
|     ORFTVthekIE, | ||||
| @@ -270,6 +274,7 @@ from .parliamentliveuk import ParliamentLiveUKIE | ||||
| from .patreon import PatreonIE | ||||
| from .pbs import PBSIE | ||||
| from .photobucket import PhotobucketIE | ||||
| from .played import PlayedIE | ||||
| from .playfm import PlayFMIE | ||||
| from .playvid import PlayvidIE | ||||
| from .podomatic import PodomaticIE | ||||
| @@ -338,6 +343,7 @@ from .spankwire import SpankwireIE | ||||
| from .spiegel import SpiegelIE, SpiegelArticleIE | ||||
| from .spiegeltv import SpiegeltvIE | ||||
| from .spike import SpikeIE | ||||
| from .sport5 import Sport5IE | ||||
| from .sportdeutschland import SportDeutschlandIE | ||||
| from .stanfordoc import StanfordOpenClassroomIE | ||||
| from .steam import SteamIE | ||||
| @@ -365,7 +371,10 @@ from .thisav import ThisAVIE | ||||
| from .tinypic import TinyPicIE | ||||
| from .tlc import TlcIE, TlcDeIE | ||||
| from .tnaflix import TNAFlixIE | ||||
| from .thvideo import THVideoIE | ||||
| from .thvideo import ( | ||||
|     THVideoIE, | ||||
|     THVideoPlaylistIE | ||||
| ) | ||||
| from .toutv import TouTvIE | ||||
| from .toypics import ToypicsUserIE, ToypicsIE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| @@ -406,11 +415,12 @@ from .videoweed import VideoWeedIE | ||||
| from .vidme import VidmeIE | ||||
| from .vimeo import ( | ||||
|     VimeoIE, | ||||
|     VimeoChannelIE, | ||||
|     VimeoUserIE, | ||||
|     VimeoAlbumIE, | ||||
|     VimeoChannelIE, | ||||
|     VimeoGroupsIE, | ||||
|     VimeoLikesIE, | ||||
|     VimeoReviewIE, | ||||
|     VimeoUserIE, | ||||
|     VimeoWatchLaterIE, | ||||
| ) | ||||
| from .vimple import VimpleIE | ||||
| @@ -449,6 +459,7 @@ from .yahoo import ( | ||||
|     YahooNewsIE, | ||||
|     YahooSearchIE, | ||||
| ) | ||||
| from .ynet import YnetIE | ||||
| from .youjizz import YouJizzIE | ||||
| from .youku import YoukuIE | ||||
| from .youporn import YouPornIE | ||||
|   | ||||
| @@ -22,8 +22,7 @@ class ABCIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         urls_info_json = self._search_regex( | ||||
|   | ||||
| @@ -35,7 +35,7 @@ class AnySexIE(InfoExtractor): | ||||
|  | ||||
|         title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False) | ||||
|             r'<div class="description"[^>]*>([^<]+)</div>', webpage, 'description', fatal=False) | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
| @@ -43,7 +43,7 @@ class AnySexIE(InfoExtractor): | ||||
|             r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage) | ||||
|  | ||||
|         duration = parse_duration(self._search_regex( | ||||
|             r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False)) | ||||
|             r'<b>Duration:</b> (?:<q itemprop="duration">)?(\d+:\d+)', webpage, 'duration', fatal=False)) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False)) | ||||
|  | ||||
|   | ||||
| @@ -8,8 +8,6 @@ from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     qualities, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_parse, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     unified_strdate, | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import datetime | ||||
| import hashlib | ||||
| import json | ||||
| import netrc | ||||
| @@ -15,11 +16,13 @@ from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
|     compat_str, | ||||
|  | ||||
|     clean_html, | ||||
|     compiled_regex_type, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     RegexNotFoundError, | ||||
|     sanitize_filename, | ||||
| @@ -163,6 +166,14 @@ class InfoExtractor(object): | ||||
|             cls._VALID_URL_RE = re.compile(cls._VALID_URL) | ||||
|         return cls._VALID_URL_RE.match(url) is not None | ||||
|  | ||||
|     @classmethod | ||||
|     def _match_id(cls, url): | ||||
|         if '_VALID_URL_RE' not in cls.__dict__: | ||||
|             cls._VALID_URL_RE = re.compile(cls._VALID_URL) | ||||
|         m = cls._VALID_URL_RE.match(url) | ||||
|         assert m | ||||
|         return m.group('id') | ||||
|  | ||||
|     @classmethod | ||||
|     def working(cls): | ||||
|         """Getter method for _WORKING.""" | ||||
| @@ -640,7 +651,9 @@ class InfoExtractor(object): | ||||
|  | ||||
|         return formats | ||||
|  | ||||
|     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None): | ||||
|     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, | ||||
|                               entry_protocol='m3u8', preference=None): | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'm3u8-meta', | ||||
|             'url': m3u8_url, | ||||
| @@ -651,6 +664,11 @@ class InfoExtractor(object): | ||||
|             'format_note': 'Quality selection URL', | ||||
|         }] | ||||
|  | ||||
|         format_url = lambda u: ( | ||||
|             u | ||||
|             if re.match(r'^https?://', u) | ||||
|             else compat_urlparse.urljoin(m3u8_url, u)) | ||||
|  | ||||
|         m3u8_doc = self._download_webpage(m3u8_url, video_id) | ||||
|         last_info = None | ||||
|         kv_rex = re.compile( | ||||
| @@ -667,15 +685,17 @@ class InfoExtractor(object): | ||||
|                 continue | ||||
|             else: | ||||
|                 if last_info is None: | ||||
|                     formats.append({'url': line}) | ||||
|                     formats.append({'url': format_url(line)}) | ||||
|                     continue | ||||
|                 tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) | ||||
|  | ||||
|                 f = { | ||||
|                     'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), | ||||
|                     'url': line.strip(), | ||||
|                     'url': format_url(line.strip()), | ||||
|                     'tbr': tbr, | ||||
|                     'ext': ext, | ||||
|                     'protocol': entry_protocol, | ||||
|                     'preference': preference, | ||||
|                 } | ||||
|                 codecs = last_info.get('CODECS') | ||||
|                 if codecs: | ||||
| @@ -695,6 +715,34 @@ class InfoExtractor(object): | ||||
|         self._sort_formats(formats) | ||||
|         return formats | ||||
|  | ||||
|     def _live_title(self, name): | ||||
|         """ Generate the title for a live video """ | ||||
|         now = datetime.datetime.now() | ||||
|         now_str = now.strftime("%Y-%m-%d %H:%M") | ||||
|         return name + ' ' + now_str | ||||
|  | ||||
|     def _int(self, v, name, fatal=False, **kwargs): | ||||
|         res = int_or_none(v, **kwargs) | ||||
|         if 'get_attr' in kwargs: | ||||
|             print(getattr(v, kwargs['get_attr'])) | ||||
|         if res is None: | ||||
|             msg = 'Failed to extract %s: Could not parse value %r' % (name, v) | ||||
|             if fatal: | ||||
|                 raise ExtractorError(msg) | ||||
|             else: | ||||
|                 self._downloader.report_warning(msg) | ||||
|         return res | ||||
|  | ||||
|     def _float(self, v, name, fatal=False, **kwargs): | ||||
|         res = float_or_none(v, **kwargs) | ||||
|         if res is None: | ||||
|             msg = 'Failed to extract %s: Could not parse value %r' % (name, v) | ||||
|             if fatal: | ||||
|                 raise ExtractorError(msg) | ||||
|             else: | ||||
|                 self._downloader.report_warning(msg) | ||||
|         return res | ||||
|  | ||||
|  | ||||
| class SearchInfoExtractor(InfoExtractor): | ||||
|     """ | ||||
|   | ||||
| @@ -9,7 +9,7 @@ import xml.etree.ElementTree | ||||
|  | ||||
| from hashlib import sha1 | ||||
| from math import pow, sqrt, floor | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     compat_urllib_parse, | ||||
| @@ -26,7 +26,7 @@ from ..aes import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CrunchyrollIE(InfoExtractor): | ||||
| class CrunchyrollIE(SubtitlesInfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', | ||||
| @@ -271,6 +271,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|             else: | ||||
|                 subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         return { | ||||
|             'id':          video_id, | ||||
|             'title':       video_title, | ||||
|   | ||||
| @@ -1,4 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -7,20 +9,20 @@ from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class EitbIE(InfoExtractor): | ||||
|     IE_NAME = u'eitb.tv' | ||||
|     IE_NAME = 'eitb.tv' | ||||
|     _VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'add_ie': ['Brightcove'], | ||||
|         u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/', | ||||
|         u'md5': u'edf4436247185adee3ea18ce64c47998', | ||||
|         u'info_dict': { | ||||
|             u'id': u'2743577154001', | ||||
|             u'ext': u'mp4', | ||||
|             u'title': u'60 minutos (Lasa y Zabala, 30 años)', | ||||
|         'add_ie': ['Brightcove'], | ||||
|         'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/', | ||||
|         'md5': 'edf4436247185adee3ea18ce64c47998', | ||||
|         'info_dict': { | ||||
|             'id': '2743577154001', | ||||
|             'ext': 'mp4', | ||||
|             'title': '60 minutos (Lasa y Zabala, 30 años)', | ||||
|             # All videos from eitb has this description in the brightcove info | ||||
|             u'description': u'.', | ||||
|             u'uploader': u'Euskal Telebista', | ||||
|             'description': '.', | ||||
|             'uploader': 'Euskal Telebista', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -30,7 +32,7 @@ class EitbIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, chapter_id) | ||||
|         bc_url = BrightcoveIE._extract_brightcove_url(webpage) | ||||
|         if bc_url is None: | ||||
|             raise ExtractorError(u'Could not extract the Brightcove url') | ||||
|             raise ExtractorError('Could not extract the Brightcove url') | ||||
|         # The BrightcoveExperience object doesn't contain the video id, we set | ||||
|         # it manually | ||||
|         bc_url += '&%40videoPlayer={0}'.format(chapter_id) | ||||
|   | ||||
| @@ -7,6 +7,7 @@ from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -20,6 +21,7 @@ class ExtremeTubeIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Music Video 14 british euro brit european cumshots swallow', | ||||
|             'uploader': 'unknown', | ||||
|             'view_count': int, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     }, { | ||||
| @@ -39,8 +41,12 @@ class ExtremeTubeIE(InfoExtractor): | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title') | ||||
|         uploader = self._html_search_regex( | ||||
|             r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader', | ||||
|             fatal=False) | ||||
|             r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         view_count = str_to_int(self._html_search_regex( | ||||
|             r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         video_url = compat_urllib_parse.unquote(self._html_search_regex( | ||||
|             r'video_url=(.+?)&', webpage, 'video_url')) | ||||
|         path = compat_urllib_parse_urlparse(video_url).path | ||||
| @@ -51,6 +57,7 @@ class ExtremeTubeIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'uploader': uploader, | ||||
|             'view_count': view_count, | ||||
|             'url': video_url, | ||||
|             'format': format, | ||||
|             'format_id': format, | ||||
|   | ||||
| @@ -10,13 +10,13 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class FlickrIE(InfoExtractor): | ||||
|     """Information Extractor for Flickr videos""" | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' | ||||
|     _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', | ||||
|         'file': '5645318632.mp4', | ||||
|         'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b', | ||||
|         'info_dict': { | ||||
|             'id': '5645318632', | ||||
|             'ext': 'mp4', | ||||
|             "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",  | ||||
|             "uploader_id": "forestwander-nature-pictures",  | ||||
|             "title": "Dark Hollow Waterfalls" | ||||
| @@ -49,12 +49,12 @@ class FlickrIE(InfoExtractor): | ||||
|             raise ExtractorError('Unable to extract video url') | ||||
|         video_url = mobj.group(1) + unescapeHTML(mobj.group(2)) | ||||
|  | ||||
|         return [{ | ||||
|             'id':          video_id, | ||||
|             'url':         video_url, | ||||
|             'ext':         'mp4', | ||||
|             'title':       self._og_search_title(webpage), | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail':   self._og_search_thumbnail(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'uploader_id': video_uploader_id, | ||||
|         }] | ||||
|         } | ||||
|   | ||||
| @@ -382,14 +382,21 @@ class GenericIE(InfoExtractor): | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|         }, | ||||
|         # Wistia embed | ||||
|         { | ||||
|             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', | ||||
|             'md5': '8788b683c777a5cf25621eaf286d0c23', | ||||
|             'info_dict': { | ||||
|                 'id': '1cfaf6b7ea', | ||||
|                 'ext': 'mov', | ||||
|                 'title': 'md5:51364a8d3d009997ba99656004b5e20d', | ||||
|                 'duration': 643.0, | ||||
|                 'filesize': 182808282, | ||||
|                 'uploader': 'education-portal.com', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_download_webpage(self, video_id): | ||||
|         """Report webpage download.""" | ||||
|         if not self._downloader.params.get('test', False): | ||||
|             self._downloader.report_warning('Falling back on generic information extractor.') | ||||
|         super(GenericIE, self).report_download_webpage(video_id) | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
|         """Report information extraction.""" | ||||
|         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url) | ||||
| @@ -489,6 +496,7 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         url, smuggled_data = unsmuggle_url(url) | ||||
|         force_videoid = None | ||||
|         is_intentional = smuggled_data and smuggled_data.get('to_generic') | ||||
|         if smuggled_data and 'force_videoid' in smuggled_data: | ||||
|             force_videoid = smuggled_data['force_videoid'] | ||||
|             video_id = force_videoid | ||||
| @@ -531,6 +539,9 @@ class GenericIE(InfoExtractor): | ||||
|                     'upload_date': upload_date, | ||||
|                 } | ||||
|  | ||||
|         if not self._downloader.params.get('test', False) and not is_intentional: | ||||
|             self._downloader.report_warning('Falling back on generic information extractor.') | ||||
|  | ||||
|         try: | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|         except ValueError: | ||||
| @@ -584,7 +595,9 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Helper method | ||||
|         def _playlist_from_matches(matches, getter, ie=None): | ||||
|             urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches) | ||||
|             urlrs = orderedSet( | ||||
|                 self.url_result(self._proto_relative_url(getter(m)), ie) | ||||
|                 for m in matches) | ||||
|             return self.playlist_result( | ||||
|                 urlrs, playlist_id=video_id, playlist_title=video_title) | ||||
|  | ||||
| @@ -629,11 +642,11 @@ class GenericIE(InfoExtractor): | ||||
|             ) | ||||
|             (["\']) | ||||
|                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ | ||||
|                 (?:embed|v)/.+?) | ||||
|                 (?:embed|v|p)/.+?) | ||||
|             \1''', webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches( | ||||
|                 matches, lambda m: unescapeHTML(m[1]), ie='Youtube') | ||||
|                 matches, lambda m: unescapeHTML(m[1])) | ||||
|  | ||||
|         # Look for embedded Dailymotion player | ||||
|         matches = re.findall( | ||||
| @@ -654,6 +667,16 @@ class GenericIE(InfoExtractor): | ||||
|                 'title': video_title, | ||||
|                 'id': video_id, | ||||
|             } | ||||
|         match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) | ||||
|         if match: | ||||
|             return { | ||||
|                 '_type': 'url_transparent', | ||||
|                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')), | ||||
|                 'ie_key': 'Wistia', | ||||
|                 'uploader': video_uploader, | ||||
|                 'title': video_title, | ||||
|                 'id': match.group('id') | ||||
|             } | ||||
|  | ||||
|         # Look for embedded blip.tv player | ||||
|         mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage) | ||||
|   | ||||
							
								
								
									
										71
									
								
								youtube_dl/extractor/golem.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								youtube_dl/extractor/golem.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GolemIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/' | ||||
|     _TEST = { | ||||
|         'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html', | ||||
|         'md5': 'c1a2c0a3c863319651c7c992c5ee29bf', | ||||
|         'info_dict': { | ||||
|             'id': '14095', | ||||
|             'format_id': 'high', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'iPhone 6 und 6 Plus - Test', | ||||
|             'duration': 300.44, | ||||
|             'filesize': 65309548, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     _PREFIX = 'http://video.golem.de' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         config = self._download_xml( | ||||
|             'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': config.findtext('./title', 'golem'), | ||||
|             'duration': self._float(config.findtext('./playtime'), 'duration'), | ||||
|         } | ||||
|  | ||||
|         formats = [] | ||||
|         for e in config.findall('./*[url]'): | ||||
|             url = e.findtext('./url') | ||||
|             if not url: | ||||
|                 self._downloader.report_warning( | ||||
|                     "{0}: url: empty, skipping".format(e.tag)) | ||||
|                 continue | ||||
|  | ||||
|             formats.append({ | ||||
|                 'format_id': e.tag, | ||||
|                 'url': compat_urlparse.urljoin(self._PREFIX, url), | ||||
|                 'height': self._int(e.get('height'), 'height'), | ||||
|                 'width': self._int(e.get('width'), 'width'), | ||||
|                 'filesize': self._int(e.findtext('filesize'), 'filesize'), | ||||
|                 'ext': determine_ext(e.findtext('./filename')), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|         info['formats'] = formats | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for e in config.findall('.//teaser[url]'): | ||||
|             url = e.findtext('./url') | ||||
|             if not url: | ||||
|                 continue | ||||
|             thumbnails.append({ | ||||
|                 'url': compat_urlparse.urljoin(self._PREFIX, url), | ||||
|                 'width': self._int(e.get('width'), 'thumbnail width'), | ||||
|                 'height': self._int(e.get('height'), 'thumbnail height'), | ||||
|             }) | ||||
|         info['thumbnails'] = thumbnails | ||||
|  | ||||
|         return info | ||||
							
								
								
									
										81
									
								
								youtube_dl/extractor/heise.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								youtube_dl/extractor/heise.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,81 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     get_meta_content, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class HeiseIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https?://(?:www\.)?heise\.de/video/artikel/ | ||||
|         .+?(?P<id>[0-9]+)\.html(?:$|[?#]) | ||||
|     ''' | ||||
|     _TEST = { | ||||
|         'url': ( | ||||
|             'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' | ||||
|         ), | ||||
|         'md5': 'ffed432483e922e88545ad9f2f15d30e', | ||||
|         'info_dict': { | ||||
|             'id': '2404147', | ||||
|             'ext': 'mp4', | ||||
|             'title': ( | ||||
|                 "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" | ||||
|             ), | ||||
|             'format_id': 'mp4_720', | ||||
|             'timestamp': 1411812600, | ||||
|             'upload_date': '20140927', | ||||
|             'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         json_url = self._search_regex( | ||||
|             r'json_url:\s*"([^"]+)"', webpage, 'json URL') | ||||
|         config = self._download_json(json_url, video_id) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'thumbnail': config.get('poster'), | ||||
|             'timestamp': parse_iso8601(get_meta_content('date', webpage)), | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
|  | ||||
|         title = get_meta_content('fulltitle', webpage) | ||||
|         if title: | ||||
|             info['title'] = title | ||||
|         elif config.get('title'): | ||||
|             info['title'] = config['title'] | ||||
|         else: | ||||
|             info['title'] = self._og_search_title(webpage) | ||||
|  | ||||
|         formats = [] | ||||
|         for t, rs in config['formats'].items(): | ||||
|             if not rs or not hasattr(rs, 'items'): | ||||
|                 self._downloader.report_warning( | ||||
|                     'formats: {0}: no resolutions'.format(t)) | ||||
|                 continue | ||||
|  | ||||
|             for height_str, obj in rs.items(): | ||||
|                 format_id = '{0}_{1}'.format(t, height_str) | ||||
|  | ||||
|                 if not obj or not obj.get('url'): | ||||
|                     self._downloader.report_warning( | ||||
|                         'formats: {0}: no url'.format(format_id)) | ||||
|                     continue | ||||
|  | ||||
|                 formats.append({ | ||||
|                     'url': obj['url'], | ||||
|                     'format_id': format_id, | ||||
|                     'height': self._int(height_str, 'height'), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|         info['formats'] = formats | ||||
|  | ||||
|         return info | ||||
| @@ -1,7 +1,6 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -23,6 +22,7 @@ class MuenchenTVIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'is_live': True, | ||||
|             'thumbnail': 're:^https?://.*\.jpg$' | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -33,9 +33,7 @@ class MuenchenTVIE(InfoExtractor): | ||||
|         display_id = 'live' | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         now = datetime.datetime.now() | ||||
|         now_str = now.strftime("%Y-%m-%d %H:%M") | ||||
|         title = self._og_search_title(webpage) + ' ' + now_str | ||||
|         title = self._live_title(self._og_search_title(webpage)) | ||||
|  | ||||
|         data_js = self._search_regex( | ||||
|             r'(?s)\nplaylist:\s*(\[.*?}\]),related:', | ||||
| @@ -73,5 +71,6 @@ class MuenchenTVIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'is_live': True, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|   | ||||
							
								
								
									
										144
									
								
								youtube_dl/extractor/nfl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										144
									
								
								youtube_dl/extractor/nfl.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,144 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     compat_urllib_parse, | ||||
|     int_or_none, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NFLIE(InfoExtractor): | ||||
|     IE_NAME = 'nfl.com' | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|         (?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/ | ||||
|         (?:.+?/)* | ||||
|         (?P<id>(?:\d[a-z]{2}\d{13}|\w{8}\-(?:\w{4}\-){3}\w{12}))''' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', | ||||
|             'md5': '394ef771ddcd1354f665b471d78ec4c6', | ||||
|             'info_dict': { | ||||
|                 'id': '0ap3000000398478', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Week 3: Redskins vs. Eagles highlights', | ||||
|                 'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', | ||||
|                 'upload_date': '20140921', | ||||
|                 'timestamp': 1411337580, | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266', | ||||
|             'md5': 'cf85bdb4bc49f6e9d3816d130c78279c', | ||||
|             'info_dict': { | ||||
|                 'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'LIVE: Post Game vs. Browns', | ||||
|                 'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8', | ||||
|                 'upload_date': '20131229', | ||||
|                 'timestamp': 1388354455, | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     @staticmethod | ||||
|     def prepend_host(host, url): | ||||
|         if not url.startswith('http'): | ||||
|             if not url.startswith('/'): | ||||
|                 url = '/%s' % url | ||||
|             url = 'http://{0:}{1:}'.format(host, url) | ||||
|         return url | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_from_stream(stream, protocol, host, path_prefix='', | ||||
|                            preference=0, note=None): | ||||
|         url = '{protocol:}://{host:}/{prefix:}{path:}'.format( | ||||
|             protocol=protocol, | ||||
|             host=host, | ||||
|             prefix=path_prefix, | ||||
|             path=stream.get('path'), | ||||
|         ) | ||||
|         return { | ||||
|             'url': url, | ||||
|             'vbr': int_or_none(stream.get('rate', 0), 1000), | ||||
|             'preference': preference, | ||||
|             'format_note': note, | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id, host = mobj.group('id'), mobj.group('host') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         config_url = NFLIE.prepend_host(host, self._search_regex( | ||||
|             r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL')) | ||||
|         config = self._download_json(config_url, video_id, | ||||
|                                      note='Downloading player config') | ||||
|         url_template = NFLIE.prepend_host( | ||||
|             host, '{contentURLTemplate:}'.format(**config)) | ||||
|         video_data = self._download_json( | ||||
|             url_template.format(id=video_id), video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         cdn_data = video_data.get('cdnData', {}) | ||||
|         streams = cdn_data.get('bitrateInfo', []) | ||||
|         if cdn_data.get('format') == 'EXTERNAL_HTTP_STREAM': | ||||
|             parts = compat_urllib_parse.urlparse(cdn_data.get('uri')) | ||||
|             protocol, host = parts.scheme, parts.netloc | ||||
|             for stream in streams: | ||||
|                 formats.append( | ||||
|                     NFLIE.format_from_stream(stream, protocol, host)) | ||||
|         else: | ||||
|             cdns = config.get('cdns') | ||||
|             if not cdns: | ||||
|                 raise ExtractorError('Failed to get CDN data', expected=True) | ||||
|  | ||||
|             for name, cdn in cdns.items(): | ||||
|                 # LimeLight streams don't seem to work | ||||
|                 if cdn.get('name') == 'LIMELIGHT': | ||||
|                     continue | ||||
|  | ||||
|                 protocol = cdn.get('protocol') | ||||
|                 host = remove_end(cdn.get('host', ''), '/') | ||||
|                 if not (protocol and host): | ||||
|                     continue | ||||
|  | ||||
|                 prefix = cdn.get('pathprefix', '') | ||||
|                 if prefix and not prefix.endswith('/'): | ||||
|                     prefix = '%s/' % prefix | ||||
|  | ||||
|                 preference = 0 | ||||
|                 if protocol == 'rtmp': | ||||
|                     preference = -2 | ||||
|                 elif 'prog' in name.lower(): | ||||
|                     preference = 1 | ||||
|  | ||||
|                 for stream in streams: | ||||
|                     formats.append( | ||||
|                         NFLIE.format_from_stream(stream, protocol, host, | ||||
|                                                  prefix, preference, name)) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = None | ||||
|         for q in ('xl', 'l', 'm', 's', 'xs'): | ||||
|             thumbnail = video_data.get('imagePaths', {}).get(q) | ||||
|             if thumbnail: | ||||
|                 break | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_data.get('headline'), | ||||
|             'formats': formats, | ||||
|             'description': video_data.get('caption'), | ||||
|             'duration': video_data.get('duration'), | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': int_or_none(video_data.get('posted'), 1000), | ||||
|         } | ||||
| @@ -62,7 +62,7 @@ class NocoIE(InfoExtractor): | ||||
|  | ||||
|     def _call_api(self, path, video_id, note): | ||||
|         ts = compat_str(int(time.time() * 1000)) | ||||
|         tk = hashlib.md5(hashlib.md5(ts).hexdigest() + '#8S?uCraTedap6a').hexdigest() | ||||
|         tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest() | ||||
|         url = self._API_URL_TEMPLATE % (path, ts, tk) | ||||
|  | ||||
|         resp = self._download_json(url, video_id, note) | ||||
|   | ||||
							
								
								
									
										47
									
								
								youtube_dl/extractor/oktoberfesttv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								youtube_dl/extractor/oktoberfesttv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class OktoberfestTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt', | ||||
|         'info_dict': { | ||||
|             'id': 'hb-zelt', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._live_title(self._html_search_regex( | ||||
|             r'<h1><strong>.*?</strong>(.*?)</h1>', webpage, 'title')) | ||||
|  | ||||
|         clip = self._search_regex( | ||||
|             r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip') | ||||
|         ncurl = self._search_regex( | ||||
|             r"netConnectionUrl:\s*'([^']+)'", webpage, 'rtmp base') | ||||
|         video_url = ncurl + clip | ||||
|         thumbnail = self._search_regex( | ||||
|             r"canvas:\s*\{\s*backgroundImage:\s*'url\(([^)]+)\)'", webpage, | ||||
|             'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'is_live': True, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/played.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/played.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import os.path | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PlayedIE(InfoExtractor): | ||||
|     IE_NAME = 'played.to' | ||||
|     _VALID_URL = r'https?://(?:www\.)?played\.to/(?P<id>[a-zA-Z0-9_-]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://played.to/j2f2sfiiukgt', | ||||
|         'md5': 'c2bd75a368e82980e7257bf500c00637', | ||||
|         'info_dict': { | ||||
|             'id': 'j2f2sfiiukgt', | ||||
|             'ext': 'flv', | ||||
|             'title': 'youtube-dl_test_video.mp4', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         orig_webpage = self._download_webpage(url, video_id) | ||||
|         fields = re.findall( | ||||
|             r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage) | ||||
|         data = dict(fields) | ||||
|  | ||||
|         self._sleep(2, video_id) | ||||
|  | ||||
|         post = compat_urllib_parse.urlencode(data) | ||||
|         headers = { | ||||
|             b'Content-Type': b'application/x-www-form-urlencoded', | ||||
|         } | ||||
|         req = compat_urllib_request.Request(url, post, headers) | ||||
|         webpage = self._download_webpage( | ||||
|             req, video_id, note='Downloading video page ...') | ||||
|  | ||||
|         title = os.path.splitext(data['fname'])[0] | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'file: "?(.+?)",', webpage, 'video URL') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|         } | ||||
							
								
								
									
										92
									
								
								youtube_dl/extractor/sport5.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								youtube_dl/extractor/sport5.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,92 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class Sport5IE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1', | ||||
|             'info_dict': { | ||||
|                 'id': 's5-Y59xx1-GUh2', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'ולנסיה-קורדובה 0:3', | ||||
|                 'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה', | ||||
|                 'duration': 228, | ||||
|                 'categories': list, | ||||
|             }, | ||||
|             'skip': 'Blocked outside of Israel', | ||||
|         }, { | ||||
|             'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE', | ||||
|             'info_dict': { | ||||
|                 'id': 's5-SiXxx1-hKh2', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'GOALS_CELTIC_270914.mp4', | ||||
|                 'description': '', | ||||
|                 'duration': 87, | ||||
|                 'categories': list, | ||||
|             }, | ||||
|             'skip': 'Blocked outside of Israel', | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         media_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, media_id) | ||||
|  | ||||
|         video_id = self._html_search_regex('clipId=([\w-]+)', webpage, 'video id') | ||||
|  | ||||
|         metadata = self._download_xml( | ||||
|             'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id, | ||||
|             video_id) | ||||
|  | ||||
|         error = metadata.find('./Error') | ||||
|         if error is not None: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s - %s' % ( | ||||
|                     self.IE_NAME, | ||||
|                     error.find('./Name').text, | ||||
|                     error.find('./Description').text), | ||||
|                 expected=True) | ||||
|  | ||||
|         title = metadata.find('./Title').text | ||||
|         description = metadata.find('./Description').text | ||||
|         duration = int(metadata.find('./Duration').text) | ||||
|  | ||||
|         posters_el = metadata.find('./PosterLinks') | ||||
|         thumbnails = [{ | ||||
|             'url': thumbnail.text, | ||||
|             'width': int(thumbnail.get('width')), | ||||
|             'height': int(thumbnail.get('height')), | ||||
|         } for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else [] | ||||
|  | ||||
|         categories_el = metadata.find('./Categories') | ||||
|         categories = [ | ||||
|             cat.get('name') for cat in categories_el.findall('./Category') | ||||
|         ] if categories_el is not None else [] | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': fmt.text, | ||||
|             'ext': 'mp4', | ||||
|             'vbr': int(fmt.get('bitrate')), | ||||
|             'width': int(fmt.get('width')), | ||||
|             'height': int(fmt.get('height')), | ||||
|         } for fmt in metadata.findall('./PlaybackLinks/FileURL')] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnails': thumbnails, | ||||
|             'duration': duration, | ||||
|             'categories': categories, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -26,8 +26,7 @@ class THVideoIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         # extract download link from mobile player page | ||||
|         webpage_player = self._download_webpage( | ||||
| @@ -57,3 +56,29 @@ class THVideoIE(InfoExtractor): | ||||
|             'description': description, | ||||
|             'upload_date': upload_date | ||||
|         } | ||||
|  | ||||
|  | ||||
| class THVideoPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://thvideo.tv/mylist2', | ||||
|         'info_dict': { | ||||
|             'id': '2', | ||||
|             'title': '幻想万華鏡', | ||||
|         }, | ||||
|         'playlist_mincount': 23, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         list_title = self._html_search_regex( | ||||
|             r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title', | ||||
|             fatal=False) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result('http://thvideo.tv/v/th' + id, 'THVideo') | ||||
|             for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)] | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, list_title) | ||||
|   | ||||
| @@ -19,7 +19,7 @@ class Vbox7IE(InfoExtractor): | ||||
|         'md5': '99f65c0c9ef9b682b97313e052734c3f', | ||||
|         'info_dict': { | ||||
|             'id': '249bb972c2', | ||||
|             'ext': 'flv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Смях! Чудо - чист за секунди - Скрита камера', | ||||
|         }, | ||||
|     } | ||||
| @@ -50,7 +50,6 @@ class Vbox7IE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': final_url, | ||||
|             'ext': 'flv', | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail_url, | ||||
|         } | ||||
|   | ||||
| @@ -5,7 +5,7 @@ import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_HTTPError, | ||||
|     compat_urllib_request, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| @@ -24,7 +24,7 @@ class VevoIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | ||||
|         "md5": "06bea460acb744eab74a9d7dcb4bfd61", | ||||
|         "md5": "95ee28ee45e70130e3ab02b0f579ae23", | ||||
|         'info_dict': { | ||||
|             'id': 'GB1101300280', | ||||
|             'ext': 'mp4', | ||||
| @@ -40,7 +40,7 @@ class VevoIE(InfoExtractor): | ||||
|     }, { | ||||
|         'note': 'v3 SMIL format', | ||||
|         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', | ||||
|         'md5': '893ec0e0d4426a1d96c01de8f2bdff58', | ||||
|         'md5': 'f6ab09b034f8c22969020b042e5ac7fc', | ||||
|         'info_dict': { | ||||
|             'id': 'USUV71302923', | ||||
|             'ext': 'mp4', | ||||
| @@ -69,6 +69,21 @@ class VevoIE(InfoExtractor): | ||||
|     }] | ||||
|     _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         req = compat_urllib_request.Request( | ||||
|             'http://www.vevo.com/auth', data=b'') | ||||
|         webpage = self._download_webpage( | ||||
|             req, None, | ||||
|             note='Retrieving oauth token', | ||||
|             errnote='Unable to retrieve oauth token', | ||||
|             fatal=False) | ||||
|         if webpage is False: | ||||
|             self._oauth_token = None | ||||
|         else: | ||||
|             self._oauth_token = self._search_regex( | ||||
|                 r'access_token":\s*"([^"]+)"', | ||||
|                 webpage, 'access token', fatal=False) | ||||
|  | ||||
|     def _formats_from_json(self, video_info): | ||||
|         last_version = {'version': -1} | ||||
|         for version in video_info['videoVersions']: | ||||
| @@ -129,6 +144,26 @@ class VevoIE(InfoExtractor): | ||||
|             }) | ||||
|         return formats | ||||
|  | ||||
|     def _download_api_formats(self, video_id): | ||||
|         if not self._oauth_token: | ||||
|             self._downloader.report_warning( | ||||
|                 'No oauth token available, skipping API HLS download') | ||||
|             return [] | ||||
|  | ||||
|         api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % ( | ||||
|             video_id, self._oauth_token) | ||||
|         api_data = self._download_json( | ||||
|             api_url, video_id, | ||||
|             note='Downloading HLS formats', | ||||
|             errnote='Failed to download HLS format list', fatal=False) | ||||
|         if api_data is None: | ||||
|             return [] | ||||
|  | ||||
|         m3u8_url = api_data[0]['url'] | ||||
|         return self._extract_m3u8_formats( | ||||
|             m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4', | ||||
|             preference=0) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
| @@ -152,30 +187,8 @@ class VevoIE(InfoExtractor): | ||||
|         else: | ||||
|             age_limit = None | ||||
|  | ||||
|         # Download SMIL | ||||
|         smil_blocks = sorted(( | ||||
|             f for f in video_info['videoVersions'] | ||||
|             if f['sourceType'] == 13), | ||||
|             key=lambda f: f['version']) | ||||
|  | ||||
|         smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( | ||||
|             self._SMIL_BASE_URL, video_id, video_id.lower()) | ||||
|         if smil_blocks: | ||||
|             smil_url_m = self._search_regex( | ||||
|                 r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL', | ||||
|                 fatal=False) | ||||
|             if smil_url_m is not None: | ||||
|                 smil_url = smil_url_m | ||||
|  | ||||
|         try: | ||||
|             smil_xml = self._download_webpage(smil_url, video_id, | ||||
|                                               'Downloading SMIL info') | ||||
|             formats.extend(self._formats_from_smil(smil_xml)) | ||||
|         except ExtractorError as ee: | ||||
|             if not isinstance(ee.cause, compat_HTTPError): | ||||
|                 raise | ||||
|             self._downloader.report_warning( | ||||
|                 'Cannot download SMIL information, falling back to JSON ..') | ||||
|         # Download via HLS API | ||||
|         formats.extend(self._download_api_formats(video_id)) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|         timestamp_ms = int(self._search_regex( | ||||
|   | ||||
| @@ -8,17 +8,19 @@ import itertools | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     compat_HTTPError, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     clean_html, | ||||
|     get_element_by_attribute, | ||||
|     compat_urlparse, | ||||
|     ExtractorError, | ||||
|     get_element_by_attribute, | ||||
|     InAdvancePagedList, | ||||
|     int_or_none, | ||||
|     RegexNotFoundError, | ||||
|     std_headers, | ||||
|     unsmuggle_url, | ||||
|     urlencode_postdata, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -529,3 +531,58 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater') | ||||
|  | ||||
|  | ||||
| class VimeoLikesIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)' | ||||
|     IE_NAME = 'vimeo:likes' | ||||
|     IE_DESC = 'Vimeo user likes' | ||||
|     _TEST = { | ||||
|         'url': 'https://vimeo.com/user755559/likes/', | ||||
|         'playlist_mincount': 293, | ||||
|         "info_dict": { | ||||
|             "description": "See all the videos urza likes", | ||||
|             "title": 'Videos urza likes', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         user_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, user_id) | ||||
|         page_count = self._int( | ||||
|             self._search_regex( | ||||
|                 r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)"> | ||||
|                     .*?</a></li>\s*<li\s+class="pagination_next"> | ||||
|                 ''', webpage, 'page count'), | ||||
|             'page count', fatal=True) | ||||
|         PAGE_SIZE = 12 | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)<h1>(.+?)</h1>', webpage, 'title', fatal=False) | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         def _get_page(idx): | ||||
|             page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % ( | ||||
|                 self.http_scheme(), user_id, idx + 1) | ||||
|             webpage = self._download_webpage( | ||||
|                 page_url, user_id, | ||||
|                 note='Downloading page %d/%d' % (idx + 1, page_count)) | ||||
|             video_list = self._search_regex( | ||||
|                 r'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>', | ||||
|                 webpage, 'video content') | ||||
|             paths = re.findall( | ||||
|                 r'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list) | ||||
|             for path in paths: | ||||
|                 yield { | ||||
|                     '_type': 'url', | ||||
|                     'url': compat_urlparse.urljoin(page_url, path), | ||||
|                 } | ||||
|  | ||||
|         pl = InAdvancePagedList(_get_page, page_count, PAGE_SIZE) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': 'user%s_likes' % user_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'entries': pl, | ||||
|         } | ||||
|   | ||||
| @@ -6,6 +6,7 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     compat_str, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -16,6 +17,24 @@ class VubeIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://vube.com/trending/William+Wei/Y8NUZ69Tf7?t=s', | ||||
|             'md5': 'e7aabe1f8f1aa826b9e4735e1f9cee42', | ||||
|             'info_dict': { | ||||
|                 'id': 'Y8NUZ69Tf7', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Best Drummer Ever [HD]', | ||||
|                 'description': 'md5:2d63c4b277b85c2277761c2cf7337d71', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|                 'uploader': 'William', | ||||
|                 'timestamp': 1406876915, | ||||
|                 'upload_date': '20140801', | ||||
|                 'duration': 258.051, | ||||
|                 'like_count': int, | ||||
|                 'dislike_count': int, | ||||
|                 'comment_count': int, | ||||
|                 'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'], | ||||
|             }, | ||||
|         }, { | ||||
|             'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', | ||||
|             'md5': 'db7aba89d4603dadd627e9d1973946fe', | ||||
|             'info_dict': { | ||||
| @@ -32,7 +51,8 @@ class VubeIE(InfoExtractor): | ||||
|                 'dislike_count': int, | ||||
|                 'comment_count': int, | ||||
|                 'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'], | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'Removed due to DMCA', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1', | ||||
| @@ -51,7 +71,8 @@ class VubeIE(InfoExtractor): | ||||
|                 'dislike_count': int, | ||||
|                 'comment_count': int, | ||||
|                 'categories': ['seraina', 'jessica', 'krewella', 'alive'], | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'Removed due to DMCA', | ||||
|         }, { | ||||
|             'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s', | ||||
|             'md5': '0584fc13b50f887127d9d1007589d27f', | ||||
| @@ -69,7 +90,8 @@ class VubeIE(InfoExtractor): | ||||
|                 'dislike_count': int, | ||||
|                 'comment_count': int, | ||||
|                 'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'], | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'Removed due to DMCA', | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -102,6 +124,11 @@ class VubeIE(InfoExtractor): | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         if not formats and video.get('vst') == 'dmca': | ||||
|             raise ExtractorError( | ||||
|                 'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.', | ||||
|                 expected=True) | ||||
|  | ||||
|         title = video['title'] | ||||
|         description = video.get('description') | ||||
|         thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:') | ||||
|   | ||||
| @@ -5,7 +5,10 @@ import re | ||||
| import hashlib | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class WatIE(InfoExtractor): | ||||
| @@ -37,6 +40,7 @@ class WatIE(InfoExtractor): | ||||
|                 'upload_date': '20140816', | ||||
|                 'duration': 2910, | ||||
|             }, | ||||
|             'skip': "Ce contenu n'est pas disponible pour l'instant.", | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
| @@ -57,6 +61,11 @@ class WatIE(InfoExtractor): | ||||
|  | ||||
|         video_info = self.download_video_info(real_id) | ||||
|  | ||||
|         error_desc = video_info.get('error_desc') | ||||
|         if error_desc: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True) | ||||
|  | ||||
|         geo_list = video_info.get('geoList') | ||||
|         country = geo_list[0] if geo_list else '' | ||||
|  | ||||
|   | ||||
| @@ -1,13 +1,14 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError, compat_urllib_request | ||||
|  | ||||
|  | ||||
| class WistiaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' | ||||
|     _API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', | ||||
| @@ -24,11 +25,13 @@ class WistiaIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         data_json = self._html_search_regex( | ||||
|             r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data') | ||||
|  | ||||
|         data = json.loads(data_json) | ||||
|         request = compat_urllib_request.Request(self._API_URL.format(video_id)) | ||||
|         request.add_header('Referer', url)  # Some videos require this. | ||||
|         data_json = self._download_json(request, video_id) | ||||
|         if data_json.get('error'): | ||||
|             raise ExtractorError('Error while getting the playlist', | ||||
|                                  expected=True) | ||||
|         data = data_json['media'] | ||||
|  | ||||
|         formats = [] | ||||
|         thumbnails = [] | ||||
|   | ||||
							
								
								
									
										54
									
								
								youtube_dl/extractor/ynet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								youtube_dl/extractor/ynet.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_parse | ||||
|  | ||||
|  | ||||
| class YnetIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', | ||||
|             'md5': '002b44ee2f33d50363a1c153bed524cf', | ||||
|             'info_dict': { | ||||
|                 'id': 'L-11659-99244', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'איש לא יודע מאיפה באנו', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|             } | ||||
|         }, { | ||||
|             'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html', | ||||
|             'md5': '6455046ae1b48cf7e2b7cae285e53a16', | ||||
|             'info_dict': { | ||||
|                 'id': 'L-8859-84418', | ||||
|                 'ext': 'flv', | ||||
|                 'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין", | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|          | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage)) | ||||
|         config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config')) | ||||
|         f4m_url = config['clip']['url'] | ||||
|         title = self._og_search_title(webpage) | ||||
|         m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title) | ||||
|         if m: | ||||
|             title = m.group('title') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': self._extract_f4m_formats(f4m_url, video_id), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|         } | ||||
| @@ -1,6 +1,7 @@ | ||||
| # coding: utf-8 | ||||
|  | ||||
| import json | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import math | ||||
| import random | ||||
| import re | ||||
| @@ -13,18 +14,25 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class YoukuIE(InfoExtractor): | ||||
|     _VALID_URL =  r'(?:(?:http://)?(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|youku:)(?P<ID>[A-Za-z0-9]+)(?:\.html|/v\.swf|)' | ||||
|     _TEST =   { | ||||
|         u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", | ||||
|         u"file": u"XNDgyMDQ2NTQw_part00.flv", | ||||
|         u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b", | ||||
|         u"params": {u"test": False}, | ||||
|         u"info_dict": { | ||||
|             u"title": u"youtube-dl test video \"'/\\ä↭𝕐" | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?: | ||||
|             http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| | ||||
|             youku:) | ||||
|         (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) | ||||
|     ''' | ||||
|     _TEST = { | ||||
|         'url': 'http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html', | ||||
|         'md5': 'ffe3f2e435663dc2d1eea34faeff5b5b', | ||||
|         'params': { | ||||
|             'test': False | ||||
|         }, | ||||
|         'info_dict': { | ||||
|             'id': 'XNDgyMDQ2NTQw_part00', | ||||
|             'ext': 'flv', | ||||
|             'title': 'youtube-dl test video "\'/\\ä↭𝕐' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     def _gen_sid(self): | ||||
|         nowTime = int(time.time() * 1000) | ||||
|         random1 = random.randint(1000,1998) | ||||
| @@ -55,49 +63,42 @@ class YoukuIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('ID') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id | ||||
|  | ||||
|         jsondata = self._download_webpage(info_url, video_id) | ||||
|         config = self._download_json(info_url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         try: | ||||
|             config = json.loads(jsondata) | ||||
|             error_code = config['data'][0].get('error_code') | ||||
|             if error_code: | ||||
|                 # -8 means blocked outside China. | ||||
|                 error = config['data'][0].get('error')  # Chinese and English, separated by newline. | ||||
|                 raise ExtractorError(error or u'Server reported error %i' % error_code, | ||||
|                     expected=True) | ||||
|         error_code = config['data'][0].get('error_code') | ||||
|         if error_code: | ||||
|             # -8 means blocked outside China. | ||||
|             error = config['data'][0].get('error')  # Chinese and English, separated by newline. | ||||
|             raise ExtractorError(error or 'Server reported error %i' % error_code, | ||||
|                 expected=True) | ||||
|  | ||||
|             video_title =  config['data'][0]['title'] | ||||
|             seed = config['data'][0]['seed'] | ||||
|         video_title = config['data'][0]['title'] | ||||
|         seed = config['data'][0]['seed'] | ||||
|  | ||||
|             format = self._downloader.params.get('format', None) | ||||
|             supported_format = list(config['data'][0]['streamfileids'].keys()) | ||||
|         format = self._downloader.params.get('format', None) | ||||
|         supported_format = list(config['data'][0]['streamfileids'].keys()) | ||||
|  | ||||
|             if format is None or format == 'best': | ||||
|                 if 'hd2' in supported_format: | ||||
|                     format = 'hd2' | ||||
|                 else: | ||||
|                     format = 'flv' | ||||
|                 ext = u'flv' | ||||
|             elif format == 'worst': | ||||
|                 format = 'mp4' | ||||
|                 ext = u'mp4' | ||||
|         # TODO proper format selection | ||||
|         if format is None or format == 'best': | ||||
|             if 'hd2' in supported_format: | ||||
|                 format = 'hd2' | ||||
|             else: | ||||
|                 format = 'flv' | ||||
|                 ext = u'flv' | ||||
|             ext = 'flv' | ||||
|         elif format == 'worst': | ||||
|             format = 'mp4' | ||||
|             ext = 'mp4' | ||||
|         else: | ||||
|             format = 'flv' | ||||
|             ext = 'flv' | ||||
|  | ||||
|  | ||||
|             fileid = config['data'][0]['streamfileids'][format] | ||||
|             keys = [s['k'] for s in config['data'][0]['segs'][format]] | ||||
|             # segs is usually a dictionary, but an empty *list* if an error occured. | ||||
|         except (UnicodeDecodeError, ValueError, KeyError): | ||||
|             raise ExtractorError(u'Unable to extract info section') | ||||
|         fileid = config['data'][0]['streamfileids'][format] | ||||
|         keys = [s['k'] for s in config['data'][0]['segs'][format]] | ||||
|         # segs is usually a dictionary, but an empty *list* if an error occured. | ||||
|  | ||||
|         files_info=[] | ||||
|         sid = self._gen_sid() | ||||
| @@ -106,9 +107,8 @@ class YoukuIE(InfoExtractor): | ||||
|         #column 8,9 of fileid represent the segment number | ||||
|         #fileid[7:9] should be changed | ||||
|         for index, key in enumerate(keys): | ||||
|  | ||||
|             temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) | ||||
|             download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) | ||||
|             download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) | ||||
|  | ||||
|             info = { | ||||
|                 'id': '%s_part%02d' % (video_id, index), | ||||
|   | ||||
| @@ -26,7 +26,7 @@ from ..utils import ( | ||||
|     get_element_by_attribute, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     PagedList, | ||||
|     OnDemandPagedList, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     orderedSet, | ||||
| @@ -46,7 +46,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|     def _set_language(self): | ||||
|         return bool(self._download_webpage( | ||||
|             self._LANG_URL, None, | ||||
|             note=u'Setting language', errnote='unable to set language', | ||||
|             note='Setting language', errnote='unable to set language', | ||||
|             fatal=False)) | ||||
|  | ||||
|     def _login(self): | ||||
| @@ -61,13 +61,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|         # No authentication to be performed | ||||
|         if username is None: | ||||
|             if self._LOGIN_REQUIRED: | ||||
|                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) | ||||
|                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) | ||||
|             return True | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             self._LOGIN_URL, None, | ||||
|             note=u'Downloading login page', | ||||
|             errnote=u'unable to fetch login page', fatal=False) | ||||
|             note='Downloading login page', | ||||
|             errnote='unable to fetch login page', fatal=False) | ||||
|         if login_page is False: | ||||
|             return | ||||
|  | ||||
| @@ -105,12 +105,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|         req = compat_urllib_request.Request(self._LOGIN_URL, login_data) | ||||
|         login_results = self._download_webpage( | ||||
|             req, None, | ||||
|             note=u'Logging in', errnote=u'unable to log in', fatal=False) | ||||
|             note='Logging in', errnote='unable to log in', fatal=False) | ||||
|         if login_results is False: | ||||
|             return False | ||||
|  | ||||
|         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None: | ||||
|             raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True) | ||||
|             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True) | ||||
|  | ||||
|         # Two-Factor | ||||
|         # TODO add SMS and phone call support - these require making a request and then prompting the user | ||||
| @@ -119,19 +119,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|             tfa_code = self._get_tfa_info() | ||||
|  | ||||
|             if tfa_code is None: | ||||
|                 self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>') | ||||
|                 self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)') | ||||
|                 self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>') | ||||
|                 self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)') | ||||
|                 return False | ||||
|  | ||||
|             # Unlike the first login form, secTok and timeStmp are both required for the TFA form | ||||
|  | ||||
|             match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U) | ||||
|             if match is None: | ||||
|                 self._downloader.report_warning(u'Failed to get secTok - did the page structure change?') | ||||
|                 self._downloader.report_warning('Failed to get secTok - did the page structure change?') | ||||
|             secTok = match.group(1) | ||||
|             match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U) | ||||
|             if match is None: | ||||
|                 self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?') | ||||
|                 self._downloader.report_warning('Failed to get timeStmp - did the page structure change?') | ||||
|             timeStmp = match.group(1) | ||||
|  | ||||
|             tfa_form_strs = { | ||||
| @@ -155,23 +155,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|             tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data) | ||||
|             tfa_results = self._download_webpage( | ||||
|                 tfa_req, None, | ||||
|                 note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False) | ||||
|                 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False) | ||||
|  | ||||
|             if tfa_results is False: | ||||
|                 return False | ||||
|  | ||||
|             if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None: | ||||
|                 self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.') | ||||
|                 self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.') | ||||
|                 return False | ||||
|             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None: | ||||
|                 self._downloader.report_warning(u'unable to log in - did the page structure change?') | ||||
|                 self._downloader.report_warning('unable to log in - did the page structure change?') | ||||
|                 return False | ||||
|             if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None: | ||||
|                 self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.') | ||||
|                 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.') | ||||
|                 return False | ||||
|  | ||||
|         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: | ||||
|             self._downloader.report_warning(u'unable to log in: bad username or password') | ||||
|             self._downloader.report_warning('unable to log in: bad username or password') | ||||
|             return False | ||||
|         return True | ||||
|  | ||||
| @@ -185,7 +185,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|  | ||||
|         self._download_webpage( | ||||
|             req, None, | ||||
|             note=u'Confirming age', errnote=u'Unable to confirm age') | ||||
|             note='Confirming age', errnote='Unable to confirm age') | ||||
|         return True | ||||
|  | ||||
|     def _real_initialize(self): | ||||
| @@ -211,7 +211,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains | ||||
|                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls | ||||
|                          (?:                                                  # the various things that can precede the ID: | ||||
|                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ | ||||
|                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/ | ||||
|                              |(?:                                             # or the v= param in all its forms | ||||
|                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx) | ||||
|                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! | ||||
| @@ -307,69 +307,74 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     IE_NAME = 'youtube' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc", | ||||
|             u"file":  u"BaW_jenozKc.mp4", | ||||
|             u"info_dict": { | ||||
|                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐", | ||||
|                 u"uploader": u"Philipp Hagemeister", | ||||
|                 u"uploader_id": u"phihag", | ||||
|                 u"upload_date": u"20121002", | ||||
|                 u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", | ||||
|                 u"categories": [u'Science & Technology'], | ||||
|             'url': 'http://www.youtube.com/watch?v=BaW_jenozKc', | ||||
|             'info_dict': { | ||||
|                 'id': 'BaW_jenozKc', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'youtube-dl test video "\'/\\ä↭𝕐', | ||||
|                 'uploader': 'Philipp Hagemeister', | ||||
|                 'uploader_id': 'phihag', | ||||
|                 'upload_date': '20121002', | ||||
|                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', | ||||
|                 'categories': ['Science & Technology'], | ||||
|                 'like_count': int, | ||||
|                 'dislike_count': int, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY", | ||||
|             u"file":  u"UxxajLWwzqY.mp4", | ||||
|             u"note": u"Test generic use_cipher_signature video (#897)", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": u"20120506", | ||||
|                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", | ||||
|                 u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f", | ||||
|                 u"uploader": u"Icona Pop", | ||||
|                 u"uploader_id": u"IconaPop" | ||||
|             'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY', | ||||
|             'note': 'Test generic use_cipher_signature video (#897)', | ||||
|             'info_dict': { | ||||
|                 'id': 'UxxajLWwzqY', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20120506', | ||||
|                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', | ||||
|                 'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', | ||||
|                 'uploader': 'Icona Pop', | ||||
|                 'uploader_id': 'IconaPop', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ", | ||||
|             u"file":  u"07FYdnEawAQ.mp4", | ||||
|             u"note": u"Test VEVO video with age protection (#956)", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": u"20130703", | ||||
|                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)", | ||||
|                 u"description": u"md5:64249768eec3bc4276236606ea996373", | ||||
|                 u"uploader": u"justintimberlakeVEVO", | ||||
|                 u"uploader_id": u"justintimberlakeVEVO" | ||||
|             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ', | ||||
|             'note': 'Test VEVO video with age protection (#956)', | ||||
|             'info_dict': { | ||||
|                 'id': '07FYdnEawAQ', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20130703', | ||||
|                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', | ||||
|                 'description': 'md5:64249768eec3bc4276236606ea996373', | ||||
|                 'uploader': 'justintimberlakeVEVO', | ||||
|                 'uploader_id': 'justintimberlakeVEVO', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ", | ||||
|             u"file":  u"yZIXLfi8CZQ.mp4", | ||||
|             u"note": u"Embed-only video (#1746)", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": u"20120608", | ||||
|                 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012", | ||||
|                 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7", | ||||
|                 u"uploader": u"SET India", | ||||
|                 u"uploader_id": u"setindia" | ||||
|             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', | ||||
|             'note': 'Embed-only video (#1746)', | ||||
|             'info_dict': { | ||||
|                 'id': 'yZIXLfi8CZQ', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20120608', | ||||
|                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012', | ||||
|                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', | ||||
|                 'uploader': 'SET India', | ||||
|                 'uploader_id': 'setindia' | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I", | ||||
|             u"file": u"a9LDPn-MO4I.m4a", | ||||
|             u"note": u"256k DASH audio (format 141) via DASH manifest", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": "20121002", | ||||
|                 u"uploader_id": "8KVIDEO", | ||||
|                 u"description": '', | ||||
|                 u"uploader": "8KVIDEO", | ||||
|                 u"title": "UHDTV TEST 8K VIDEO.mp4" | ||||
|             'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', | ||||
|             'note': '256k DASH audio (format 141) via DASH manifest', | ||||
|             'info_dict': { | ||||
|                 'id': 'a9LDPn-MO4I', | ||||
|                 'ext': 'm4a', | ||||
|                 'upload_date': '20121002', | ||||
|                 'uploader_id': '8KVIDEO', | ||||
|                 'description': '', | ||||
|                 'uploader': '8KVIDEO', | ||||
|                 'title': 'UHDTV TEST 8K VIDEO.mp4' | ||||
|             }, | ||||
|             u"params": { | ||||
|                 u"youtube_include_dash_manifest": True, | ||||
|                 u"format": "141", | ||||
|             'params': { | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '141', | ||||
|             }, | ||||
|         }, | ||||
|         # DASH manifest with encrypted signature | ||||
| @@ -384,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 'uploader_id': 'AfrojackVEVO', | ||||
|                 'upload_date': '20131011', | ||||
|             }, | ||||
|             u"params": { | ||||
|             'params': { | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '141', | ||||
|             }, | ||||
| @@ -397,19 +402,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|     def report_video_info_webpage_download(self, video_id): | ||||
|         """Report attempt to download video info webpage.""" | ||||
|         self.to_screen(u'%s: Downloading video info webpage' % video_id) | ||||
|         self.to_screen('%s: Downloading video info webpage' % video_id) | ||||
|  | ||||
|     def report_information_extraction(self, video_id): | ||||
|         """Report attempt to extract video information.""" | ||||
|         self.to_screen(u'%s: Extracting video information' % video_id) | ||||
|         self.to_screen('%s: Extracting video information' % video_id) | ||||
|  | ||||
|     def report_unavailable_format(self, video_id, format): | ||||
|         """Report extracted video URL.""" | ||||
|         self.to_screen(u'%s: Format %s not available' % (video_id, format)) | ||||
|         self.to_screen('%s: Format %s not available' % (video_id, format)) | ||||
|  | ||||
|     def report_rtmp_download(self): | ||||
|         """Indicate the download will use the RTMP protocol.""" | ||||
|         self.to_screen(u'RTMP download detected') | ||||
|         self.to_screen('RTMP download detected') | ||||
|  | ||||
|     def _signature_cache_id(self, example_sig): | ||||
|         """ Return a string representation of a signature """ | ||||
| @@ -429,21 +434,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             player_type, player_id, self._signature_cache_id(example_sig)) | ||||
|         assert os.path.basename(func_id) == func_id | ||||
|  | ||||
|         cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id) | ||||
|         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id) | ||||
|         if cache_spec is not None: | ||||
|             return lambda s: ''.join(s[i] for i in cache_spec) | ||||
|  | ||||
|         if player_type == 'js': | ||||
|             code = self._download_webpage( | ||||
|                 player_url, video_id, | ||||
|                 note=u'Downloading %s player %s' % (player_type, player_id), | ||||
|                 errnote=u'Download of %s failed' % player_url) | ||||
|                 note='Downloading %s player %s' % (player_type, player_id), | ||||
|                 errnote='Download of %s failed' % player_url) | ||||
|             res = self._parse_sig_js(code) | ||||
|         elif player_type == 'swf': | ||||
|             urlh = self._request_webpage( | ||||
|                 player_url, video_id, | ||||
|                 note=u'Downloading %s player %s' % (player_type, player_id), | ||||
|                 errnote=u'Download of %s failed' % player_url) | ||||
|                 note='Downloading %s player %s' % (player_type, player_id), | ||||
|                 errnote='Download of %s failed' % player_url) | ||||
|             code = urlh.read() | ||||
|             res = self._parse_sig_swf(code) | ||||
|         else: | ||||
| @@ -454,15 +459,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             cache_res = res(test_string) | ||||
|             cache_spec = [ord(c) for c in cache_res] | ||||
|  | ||||
|         self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec) | ||||
|         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec) | ||||
|         return res | ||||
|  | ||||
|     def _print_sig_code(self, func, example_sig): | ||||
|         def gen_sig_code(idxs): | ||||
|             def _genslice(start, end, step): | ||||
|                 starts = '' if start == 0 else str(start) | ||||
|                 ends = (u':%d' % (end+step)) if end + step >= 0 else ':' | ||||
|                 steps = '' if step == 1 else (u':%d' % step) | ||||
|                 ends = (':%d' % (end+step)) if end + step >= 0 else ':' | ||||
|                 steps = '' if step == 1 else (':%d' % step) | ||||
|                 return 's[%s%s%s]' % (starts, ends, steps) | ||||
|  | ||||
|             step = None | ||||
| @@ -492,9 +497,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         expr_code = ' + '.join(gen_sig_code(cache_spec)) | ||||
|         signature_id_tuple = '(%s)' % ( | ||||
|             ', '.join(compat_str(len(p)) for p in example_sig.split('.'))) | ||||
|         code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n' | ||||
|         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n' | ||||
|                 '    return %s\n') % (signature_id_tuple, expr_code) | ||||
|         self.to_screen(u'Extracted signature function:\n' + code) | ||||
|         self.to_screen('Extracted signature function:\n' + code) | ||||
|  | ||||
|     def _parse_sig_js(self, jscode): | ||||
|         funcname = self._search_regex( | ||||
| @@ -516,9 +521,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         """Turn the encrypted s field into a working signature""" | ||||
|  | ||||
|         if player_url is None: | ||||
|             raise ExtractorError(u'Cannot decrypt signature without player_url') | ||||
|             raise ExtractorError('Cannot decrypt signature without player_url') | ||||
|  | ||||
|         if player_url.startswith(u'//'): | ||||
|         if player_url.startswith('//'): | ||||
|             player_url = 'https:' + player_url | ||||
|         try: | ||||
|             player_id = (player_url, self._signature_cache_id(s)) | ||||
| @@ -542,7 +547,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, | ||||
|                 video_id, note=False) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) | ||||
|             self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err)) | ||||
|             return {} | ||||
|         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) | ||||
|  | ||||
| @@ -560,7 +565,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             url = 'https://www.youtube.com/api/timedtext?' + params | ||||
|             sub_lang_list[lang] = url | ||||
|         if not sub_lang_list: | ||||
|             self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|             self._downloader.report_warning('video doesn\'t have subtitles') | ||||
|             return {} | ||||
|         return sub_lang_list | ||||
|  | ||||
| @@ -568,7 +573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         """We need the webpage for getting the captions url, pass it as an | ||||
|            argument to speed up the process.""" | ||||
|         sub_format = self._downloader.params.get('subtitlesformat', 'srt') | ||||
|         self.to_screen(u'%s: Looking for automatic captions' % video_id) | ||||
|         self.to_screen('%s: Looking for automatic captions' % video_id) | ||||
|         mobj = re.search(r';ytplayer.config = ({.*?});', webpage) | ||||
|         err_msg = 'Couldn\'t find automatic captions for %s' % video_id | ||||
|         if mobj is None: | ||||
| @@ -589,7 +594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             caption_list = self._download_xml(list_url, video_id) | ||||
|             original_lang_node = caption_list.find('track') | ||||
|             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : | ||||
|                 self._downloader.report_warning(u'Video doesn\'t have automatic captions') | ||||
|                 self._downloader.report_warning('Video doesn\'t have automatic captions') | ||||
|                 return {} | ||||
|             original_lang = original_lang_node.attrib['lang_code'] | ||||
|  | ||||
| @@ -615,7 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     def extract_id(cls, url): | ||||
|         mobj = re.match(cls._VALID_URL, url, re.VERBOSE) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|         video_id = mobj.group(2) | ||||
|         return video_id | ||||
|  | ||||
| @@ -635,7 +640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|     def _extract_annotations(self, video_id): | ||||
|         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id | ||||
|         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') | ||||
|         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         proto = ( | ||||
| @@ -650,7 +655,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         # Get video webpage | ||||
|         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id | ||||
|         video_webpage = self._download_webpage(url, video_id) | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'PREF=hl=en') | ||||
|         video_webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         # Attempt to extract SWF player URL | ||||
|         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) | ||||
| @@ -705,14 +712,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         # Check for "rental" videos | ||||
|         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: | ||||
|             raise ExtractorError(u'"rental" videos not supported') | ||||
|             raise ExtractorError('"rental" videos not supported') | ||||
|  | ||||
|         # Start extracting information | ||||
|         self.report_information_extraction(video_id) | ||||
|  | ||||
|         # uploader | ||||
|         if 'author' not in video_info: | ||||
|             raise ExtractorError(u'Unable to extract uploader name') | ||||
|             raise ExtractorError('Unable to extract uploader name') | ||||
|         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0]) | ||||
|  | ||||
|         # uploader_id | ||||
| @@ -721,13 +728,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         if mobj is not None: | ||||
|             video_uploader_id = mobj.group(1) | ||||
|         else: | ||||
|             self._downloader.report_warning(u'unable to extract uploader nickname') | ||||
|             self._downloader.report_warning('unable to extract uploader nickname') | ||||
|  | ||||
|         # title | ||||
|         if 'title' in video_info: | ||||
|             video_title = video_info['title'][0] | ||||
|         else: | ||||
|             self._downloader.report_warning(u'Unable to extract video title') | ||||
|             self._downloader.report_warning('Unable to extract video title') | ||||
|             video_title = '_' | ||||
|  | ||||
|         # thumbnail image | ||||
| @@ -737,7 +744,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         if m_thumb is not None: | ||||
|             video_thumbnail = m_thumb.group(1) | ||||
|         elif 'thumbnail_url' not in video_info: | ||||
|             self._downloader.report_warning(u'unable to extract video thumbnail') | ||||
|             self._downloader.report_warning('unable to extract video thumbnail') | ||||
|             video_thumbnail = None | ||||
|         else:   # don't panic if we can't find it | ||||
|             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) | ||||
| @@ -791,8 +798,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             if count is not None: | ||||
|                 return int(count.replace(',', '')) | ||||
|             return None | ||||
|         like_count = _extract_count(u'like') | ||||
|         dislike_count = _extract_count(u'dislike') | ||||
|         like_count = _extract_count('like') | ||||
|         dislike_count = _extract_count('dislike') | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, video_webpage) | ||||
| @@ -802,7 +809,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             return | ||||
|  | ||||
|         if 'length_seconds' not in video_info: | ||||
|             self._downloader.report_warning(u'unable to extract video duration') | ||||
|             self._downloader.report_warning('unable to extract video duration') | ||||
|             video_duration = None | ||||
|         else: | ||||
|             video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])) | ||||
| @@ -823,11 +830,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map | ||||
|             # this signatures are encrypted | ||||
|             if 'url_encoded_fmt_stream_map' not in args: | ||||
|                 raise ValueError(u'No stream_map present')  # caught below | ||||
|                 raise ValueError('No stream_map present')  # caught below | ||||
|             re_signature = re.compile(r'[&,]s=') | ||||
|             m_s = re_signature.search(args['url_encoded_fmt_stream_map']) | ||||
|             if m_s is not None: | ||||
|                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id) | ||||
|                 self.to_screen('%s: Encrypted signatures detected.' % video_id) | ||||
|                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] | ||||
|             m_s = re_signature.search(args.get('adaptive_fmts', '')) | ||||
|             if m_s is not None: | ||||
| @@ -905,7 +912,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                                 player_desc = 'html5 player %s' % player_version | ||||
|  | ||||
|                         parts_sizes = self._signature_cache_id(encrypted_sig) | ||||
|                         self.to_screen(u'{%s} signature length %s, %s' % | ||||
|                         self.to_screen('{%s} signature length %s, %s' % | ||||
|                             (format_id, parts_sizes, player_desc)) | ||||
|  | ||||
|                     signature = self._decrypt_signature( | ||||
| @@ -920,7 +927,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             url_map = self._extract_from_m3u8(manifest_url, video_id) | ||||
|             formats = _map_to_format_list(url_map) | ||||
|         else: | ||||
|             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') | ||||
|             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') | ||||
|  | ||||
|         # Look for the DASH manifest | ||||
|         if (self._downloader.params.get('youtube_include_dash_manifest', False)): | ||||
| @@ -941,9 +948,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) | ||||
|                 dash_doc = self._download_xml( | ||||
|                     dash_manifest_url, video_id, | ||||
|                     note=u'Downloading DASH manifest', | ||||
|                     errnote=u'Could not download DASH manifest') | ||||
|                 for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): | ||||
|                     note='Downloading DASH manifest', | ||||
|                     errnote='Could not download DASH manifest') | ||||
|                 for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): | ||||
|                     url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') | ||||
|                     if url_el is None: | ||||
|                         continue | ||||
| @@ -969,7 +976,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                         existing_format.update(f) | ||||
|  | ||||
|             except (ExtractorError, KeyError) as e: | ||||
|                 self.report_warning(u'Skipping DASH manifest: %s' % e, video_id) | ||||
|                 self.report_warning('Skipping DASH manifest: %s' % e, video_id) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -1000,7 +1007,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|                         (?:\w+\.)? | ||||
|                         youtube\.com/ | ||||
|                         (?: | ||||
|                            (?:course|view_play_list|my_playlists|artist|playlist|watch) | ||||
|                            (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) | ||||
|                            \? (?:.*?&)*? (?:p|a|list)= | ||||
|                         |  p/ | ||||
|                         ) | ||||
| @@ -1056,6 +1063,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|             'title': 'YDL_safe_search', | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
|     }, { | ||||
|         'note': 'embedded', | ||||
|         'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', | ||||
|         'playlist_count': 4, | ||||
|         'info_dict': { | ||||
|             'title': 'JODA15', | ||||
|         } | ||||
|     }, { | ||||
|         'note': 'Embedded SWF player', | ||||
|         'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', | ||||
|         'playlist_count': 4, | ||||
|         'info_dict': { | ||||
|             'title': 'JODA7', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
| @@ -1090,7 +1111,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|         # Extract playlist id | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|         playlist_id = mobj.group(1) or mobj.group(2) | ||||
|  | ||||
|         # Check if it's a video-specific URL | ||||
| @@ -1098,16 +1119,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|         if 'v' in query_dict: | ||||
|             video_id = query_dict['v'][0] | ||||
|             if self._downloader.params.get('noplaylist'): | ||||
|                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) | ||||
|                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id) | ||||
|                 return self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|             else: | ||||
|                 self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | ||||
|                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | ||||
|  | ||||
|         if playlist_id.startswith('RD'): | ||||
|             # Mixes require a custom extraction process | ||||
|             return self._extract_mix(playlist_id) | ||||
|         if playlist_id.startswith('TL'): | ||||
|             raise ExtractorError(u'For downloading YouTube.com top lists, use ' | ||||
|             raise ExtractorError('For downloading YouTube.com top lists, use ' | ||||
|                 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) | ||||
|  | ||||
|         url = self._TEMPLATE_URL % playlist_id | ||||
| @@ -1152,19 +1173,28 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|  | ||||
| class YoutubeTopListIE(YoutubePlaylistIE): | ||||
|     IE_NAME = 'youtube:toplist' | ||||
|     IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' | ||||
|     IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' | ||||
|         ' (Example: "yttoplist:music:Top Tracks")') | ||||
|     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' | ||||
|     _TESTS = [] | ||||
|     _TESTS = [{ | ||||
|         'url': 'yttoplist:music:Trending', | ||||
|         'playlist_mincount': 5, | ||||
|         'skip': 'Only works for logged-in users', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         channel = mobj.group('chann') | ||||
|         title = mobj.group('title') | ||||
|         query = compat_urllib_parse.urlencode({'title': title}) | ||||
|         playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query) | ||||
|         channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) | ||||
|         link = self._html_search_regex(playlist_re, channel_page, 'list') | ||||
|         channel_page = self._download_webpage( | ||||
|             'https://www.youtube.com/%s' % channel, title) | ||||
|         link = self._html_search_regex( | ||||
|             r'''(?x) | ||||
|                 <a\s+href="([^"]+)".*?>\s* | ||||
|                 <span\s+class="branded-page-module-title-text">\s* | ||||
|                 <span[^>]*>.*?%s.*?</span>''' % re.escape(query), | ||||
|             channel_page, 'list') | ||||
|         url = compat_urlparse.urljoin('https://www.youtube.com/', link) | ||||
|          | ||||
|         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' | ||||
| @@ -1190,6 +1220,11 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|     _MORE_PAGES_INDICATOR = 'yt-uix-load-more' | ||||
|     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' | ||||
|     IE_NAME = 'youtube:channel' | ||||
|     _TESTS = [{ | ||||
|         'note': 'paginated channel', | ||||
|         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', | ||||
|         'playlist_mincount': 91, | ||||
|     }] | ||||
|  | ||||
|     def extract_videos_from_page(self, page): | ||||
|         ids_in_page = [] | ||||
| @@ -1202,7 +1237,7 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|         # Extract channel id | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|  | ||||
|         # Download channel page | ||||
|         channel_id = mobj.group(1) | ||||
| @@ -1224,7 +1259,7 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|             for pagenum in itertools.count(1): | ||||
|                 url = self._MORE_PAGES_URL % (pagenum, channel_id) | ||||
|                 page = self._download_json( | ||||
|                     url, channel_id, note=u'Downloading page #%s' % pagenum, | ||||
|                     url, channel_id, note='Downloading page #%s' % pagenum, | ||||
|                     transform_source=uppercase_escape) | ||||
|  | ||||
|                 ids_in_page = self.extract_videos_from_page(page['content_html']) | ||||
| @@ -1233,7 +1268,7 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: | ||||
|                     break | ||||
|  | ||||
|         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) | ||||
|         self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) | ||||
|  | ||||
|         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|                        for video_id in video_ids] | ||||
| @@ -1248,6 +1283,17 @@ class YoutubeUserIE(InfoExtractor): | ||||
|     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' | ||||
|     IE_NAME = 'youtube:user' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.youtube.com/user/TheLinuxFoundation', | ||||
|         'playlist_mincount': 320, | ||||
|         'info_dict': { | ||||
|             'title': 'TheLinuxFoundation', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'ytuser:phihag', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         # Don't return True if the url can be extracted with other youtube | ||||
| @@ -1260,7 +1306,7 @@ class YoutubeUserIE(InfoExtractor): | ||||
|         # Extract username | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|  | ||||
|         username = mobj.group(1) | ||||
|  | ||||
| @@ -1281,7 +1327,7 @@ class YoutubeUserIE(InfoExtractor): | ||||
|             try: | ||||
|                 response = json.loads(page) | ||||
|             except ValueError as err: | ||||
|                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) | ||||
|                 raise ExtractorError('Invalid JSON in API response: ' + compat_str(err)) | ||||
|             if 'entry' not in response['feed']: | ||||
|                 return | ||||
|  | ||||
| @@ -1297,7 +1343,7 @@ class YoutubeUserIE(InfoExtractor): | ||||
|                     'id': video_id, | ||||
|                     'title': title, | ||||
|                 } | ||||
|         url_results = PagedList(download_page, self._GDATA_PAGE_SIZE) | ||||
|         url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE) | ||||
|  | ||||
|         return self.playlist_result(url_results, playlist_title=username) | ||||
|  | ||||
| @@ -1322,9 +1368,9 @@ class YoutubeSearchIE(SearchInfoExtractor): | ||||
|                 compat_urllib_parse.quote_plus(query.encode('utf-8')), | ||||
|                 (PAGE_SIZE * pagenum) + 1) | ||||
|             data_json = self._download_webpage( | ||||
|                 result_url, video_id=u'query "%s"' % query, | ||||
|                 note=u'Downloading page %s' % (pagenum + 1), | ||||
|                 errnote=u'Unable to download API page') | ||||
|                 result_url, video_id='query "%s"' % query, | ||||
|                 note='Downloading page %s' % (pagenum + 1), | ||||
|                 errnote='Unable to download API page') | ||||
|             data = json.loads(data_json) | ||||
|             api_response = data['data'] | ||||
|  | ||||
| @@ -1356,6 +1402,13 @@ class YoutubeSearchURLIE(InfoExtractor): | ||||
|     IE_DESC = 'YouTube.com search URLs' | ||||
|     IE_NAME = 'youtube:search_url' | ||||
|     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', | ||||
|         'playlist_mincount': 5, | ||||
|         'info_dict': { | ||||
|             'title': 'youtube-dl test video', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -1390,17 +1443,38 @@ class YoutubeSearchURLIE(InfoExtractor): | ||||
|  | ||||
| class YoutubeShowIE(InfoExtractor): | ||||
|     IE_DESC = 'YouTube.com (multi-season) shows' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)' | ||||
|     IE_NAME = 'youtube:show' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.youtube.com/show/airdisasters', | ||||
|         'playlist_mincount': 3, | ||||
|         'info_dict': { | ||||
|             'id': 'airdisasters', | ||||
|             'title': 'Air Disasters', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         show_name = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, show_name, 'Downloading show webpage') | ||||
|         playlist_id = mobj.group('id') | ||||
|         webpage = self._download_webpage( | ||||
|             url, playlist_id, 'Downloading show webpage') | ||||
|         # There's one playlist for each season of the show | ||||
|         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) | ||||
|         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) | ||||
|         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] | ||||
|         self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons))) | ||||
|         entries = [ | ||||
|             self.url_result( | ||||
|                 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist') | ||||
|             for season in m_seasons | ||||
|         ] | ||||
|         title = self._og_search_title(webpage, fatal=False) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'title': title, | ||||
|             'entries': entries, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): | ||||
|   | ||||
| @@ -87,7 +87,7 @@ def parseOpts(overrideArguments=None): | ||||
|         for private_opt in ['-p', '--password', '-u', '--username', '--video-password']: | ||||
|             try: | ||||
|                 i = opts.index(private_opt) | ||||
|                 opts[i+1] = '<PRIVATE>' | ||||
|                 opts[i+1] = 'PRIVATE' | ||||
|             except ValueError: | ||||
|                 pass | ||||
|         return opts | ||||
|   | ||||
| @@ -1384,14 +1384,16 @@ def check_executable(exe, args=[]): | ||||
|  | ||||
|  | ||||
| class PagedList(object): | ||||
|     def __init__(self, pagefunc, pagesize): | ||||
|         self._pagefunc = pagefunc | ||||
|         self._pagesize = pagesize | ||||
|  | ||||
|     def __len__(self): | ||||
|         # This is only useful for tests | ||||
|         return len(self.getslice()) | ||||
|  | ||||
|  | ||||
| class OnDemandPagedList(PagedList): | ||||
|     def __init__(self, pagefunc, pagesize): | ||||
|         self._pagefunc = pagefunc | ||||
|         self._pagesize = pagesize | ||||
|  | ||||
|     def getslice(self, start=0, end=None): | ||||
|         res = [] | ||||
|         for pagenum in itertools.count(start // self._pagesize): | ||||
| @@ -1430,6 +1432,35 @@ class PagedList(object): | ||||
|         return res | ||||
|  | ||||
|  | ||||
| class InAdvancePagedList(PagedList): | ||||
|     def __init__(self, pagefunc, pagecount, pagesize): | ||||
|         self._pagefunc = pagefunc | ||||
|         self._pagecount = pagecount | ||||
|         self._pagesize = pagesize | ||||
|  | ||||
|     def getslice(self, start=0, end=None): | ||||
|         res = [] | ||||
|         start_page = start // self._pagesize | ||||
|         end_page = ( | ||||
|             self._pagecount if end is None else (end // self._pagesize + 1)) | ||||
|         skip_elems = start - start_page * self._pagesize | ||||
|         only_more = None if end is None else end - start | ||||
|         for pagenum in range(start_page, end_page): | ||||
|             page = list(self._pagefunc(pagenum)) | ||||
|             if skip_elems: | ||||
|                 page = page[skip_elems:] | ||||
|                 skip_elems = None | ||||
|             if only_more is not None: | ||||
|                 if len(page) < only_more: | ||||
|                     only_more -= len(page) | ||||
|                 else: | ||||
|                     page = page[:only_more] | ||||
|                     res.extend(page) | ||||
|                     break | ||||
|             res.extend(page) | ||||
|         return res | ||||
|  | ||||
|  | ||||
| def uppercase_escape(s): | ||||
|     unicode_escape = codecs.getdecoder('unicode_escape') | ||||
|     return re.sub( | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.09.22.1' | ||||
| __version__ = '2014.09.29' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user