Compare commits
	
		
			71 Commits
		
	
	
		
			2014.08.02
			...
			2014.08.21
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | af40ac054a | ||
|  | a36819731b | ||
|  | 181c8655c7 | ||
|  | 3b95347bb6 | ||
|  | 3b88ee9a7d | ||
|  | 55c49908d2 | ||
|  | db9b0b67b7 | ||
|  | 35f76e0061 | ||
|  | 3f338cd6de | ||
|  | 1d01f26ab1 | ||
|  | 266c71f971 | ||
|  | e8ee972c6e | ||
|  | f83dda12ad | ||
|  | 696d49815e | ||
|  | fe556f1b0c | ||
|  | d5638d974f | ||
|  | 938dd254e5 | ||
|  | 6493f5d704 | ||
|  | cd6b48365e | ||
|  | 4d9bd478f9 | ||
|  | c1d293cfa6 | ||
|  | 49807b4ac6 | ||
|  | c990bb3633 | ||
|  | af8322d2f9 | ||
|  | df866e7f2a | ||
|  | 664718ff63 | ||
|  | 3258263371 | ||
|  | 3cfafc4a9b | ||
|  | 6f600ff5d6 | ||
|  | 90e075da3a | ||
|  | 9572013de9 | ||
|  | 3a5beb0ca1 | ||
|  | a6da7b6b96 | ||
|  | 173a7026d5 | ||
|  | 40a90862f4 | ||
|  | 511c4325dc | ||
|  | 85a699246a | ||
|  | 4dc5286e13 | ||
|  | c767dc74b8 | ||
|  | 56ca04f662 | ||
|  | eb3680123a | ||
|  | f5273890ee | ||
|  | c7a088a816 | ||
|  | fb17b60811 | ||
|  | 1e58804260 | ||
|  | 31bf213032 | ||
|  | 1cccc41ddc | ||
|  | a91cf27767 | ||
|  | 64d02399d8 | ||
|  | 5961017202 | ||
|  | d9760fd43c | ||
|  | d42b2d2985 | ||
|  | cccfab6412 | ||
|  | 4665664c92 | ||
|  | 0adc996bc3 | ||
|  | b42a2a720b | ||
|  | 37edd7dd4a | ||
|  | f87b3500c5 | ||
|  | 66420a2db4 | ||
|  | 6b8492a782 | ||
|  | 6de0595eb8 | ||
|  | e48a2c646d | ||
|  | 0f831a1a92 | ||
|  | 3e510af38d | ||
|  | 5ecd7b0a92 | ||
|  | a229909fa6 | ||
|  | 548f31d99c | ||
|  | 78b296b0ff | ||
|  | be79b07907 | ||
|  | 5537dce84d | ||
|  | 493987fefe | 
							
								
								
									
										8
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								Makefile
									
									
									
									
									
								
							| @@ -6,10 +6,10 @@ clean: | ||||
| cleanall: clean | ||||
| 	rm -f youtube-dl youtube-dl.exe | ||||
|  | ||||
| PREFIX=/usr/local | ||||
| BINDIR=$(PREFIX)/bin | ||||
| MANDIR=$(PREFIX)/man | ||||
| PYTHON=/usr/bin/env python | ||||
| PREFIX ?= /usr/local | ||||
| BINDIR ?= $(PREFIX)/bin | ||||
| MANDIR ?= $(PREFIX)/man | ||||
| PYTHON ?= /usr/bin/env python | ||||
|  | ||||
| # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local | ||||
| ifeq ($(PREFIX),/usr) | ||||
|   | ||||
| @@ -17,6 +17,14 @@ If you do not have curl, you can alternatively use a recent wget: | ||||
|  | ||||
| Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). | ||||
|  | ||||
| OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). | ||||
|  | ||||
|     brew install youtube-dl | ||||
|  | ||||
| You can also use pip: | ||||
|  | ||||
|     sudo pip install youtube-dl | ||||
|  | ||||
| Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html . | ||||
|  | ||||
| # DESCRIPTION | ||||
|   | ||||
| @@ -117,8 +117,9 @@ def expect_info_dict(self, expected_dict, got_dict): | ||||
|                 u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) | ||||
|  | ||||
|     # Check for the presence of mandatory fields | ||||
|     for key in ('id', 'url', 'title', 'ext'): | ||||
|         self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) | ||||
|     if got_dict.get('_type') != 'playlist': | ||||
|         for key in ('id', 'url', 'title', 'ext'): | ||||
|             self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) | ||||
|     # Check for mandatory fields that are automatically set by YoutubeDL | ||||
|     for key in ['webpage_url', 'extractor', 'extractor_key']: | ||||
|         self.assertTrue(got_dict.get(key), u'Missing field: %s' % key) | ||||
|   | ||||
| @@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|  | ||||
|     def test_facebook_matching(self): | ||||
|         self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) | ||||
|         self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) | ||||
|  | ||||
|     def test_no_duplicates(self): | ||||
|         ies = gen_extractors() | ||||
|   | ||||
| @@ -63,15 +63,21 @@ def generator(test_case): | ||||
|     def test_template(self): | ||||
|         ie = youtube_dl.extractor.get_info_extractor(test_case['name']) | ||||
|         other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])] | ||||
|         is_playlist = any(k.startswith('playlist') for k in test_case) | ||||
|         test_cases = test_case.get( | ||||
|             'playlist', [] if is_playlist else [test_case]) | ||||
|  | ||||
|         def print_skipping(reason): | ||||
|             print('Skipping %s: %s' % (test_case['name'], reason)) | ||||
|         if not ie.working(): | ||||
|             print_skipping('IE marked as not _WORKING') | ||||
|             return | ||||
|         if 'playlist' not in test_case: | ||||
|             info_dict = test_case.get('info_dict', {}) | ||||
|             if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')): | ||||
|  | ||||
|         for tc in test_cases: | ||||
|             info_dict = tc.get('info_dict', {}) | ||||
|             if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')): | ||||
|                 raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?') | ||||
|  | ||||
|         if 'skip' in test_case: | ||||
|             print_skipping(test_case['skip']) | ||||
|             return | ||||
| @@ -81,6 +87,9 @@ def generator(test_case): | ||||
|                 return | ||||
|  | ||||
|         params = get_params(test_case.get('params', {})) | ||||
|         if is_playlist and 'playlist' not in test_case: | ||||
|             params.setdefault('extract_flat', True) | ||||
|             params.setdefault('skip_download', True) | ||||
|  | ||||
|         ydl = YoutubeDL(params) | ||||
|         ydl.add_default_info_extractors() | ||||
| @@ -93,7 +102,6 @@ def generator(test_case): | ||||
|         def get_tc_filename(tc): | ||||
|             return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) | ||||
|  | ||||
|         test_cases = test_case.get('playlist', [test_case]) | ||||
|         def try_rm_tcs_files(): | ||||
|             for tc in test_cases: | ||||
|                 tc_filename = get_tc_filename(tc) | ||||
| @@ -105,7 +113,10 @@ def generator(test_case): | ||||
|             try_num = 1 | ||||
|             while True: | ||||
|                 try: | ||||
|                     ydl.download([test_case['url']]) | ||||
|                     # We're not using .download here sine that is just a shim | ||||
|                     # for outside error handling, and returns the exit code | ||||
|                     # instead of the result dict. | ||||
|                     res_dict = ydl.extract_info(test_case['url']) | ||||
|                 except (DownloadError, ExtractorError) as err: | ||||
|                     # Check if the exception is not a network related one | ||||
|                     if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): | ||||
| @@ -121,6 +132,17 @@ def generator(test_case): | ||||
|                 else: | ||||
|                     break | ||||
|  | ||||
|             if is_playlist: | ||||
|                 self.assertEqual(res_dict['_type'], 'playlist') | ||||
|                 expect_info_dict(self, test_case.get('info_dict', {}), res_dict) | ||||
|             if 'playlist_mincount' in test_case: | ||||
|                 self.assertGreaterEqual( | ||||
|                     len(res_dict['entries']), | ||||
|                     test_case['playlist_mincount'], | ||||
|                     'Expected at least %d in playlist %s, but got only %d' % ( | ||||
|                         test_case['playlist_mincount'], test_case['url'], | ||||
|                         len(res_dict['entries']))) | ||||
|  | ||||
|             for tc in test_cases: | ||||
|                 tc_filename = get_tc_filename(tc) | ||||
|                 if not test_case.get('params', {}).get('skip_download', False): | ||||
|   | ||||
| @@ -1,6 +1,17 @@ | ||||
| #!/usr/bin/env python | ||||
| # encoding: utf-8 | ||||
|  | ||||
| ## DEPRECATED FILE! | ||||
| # Add new tests to the extractors themselves, like this: | ||||
| # _TEST = { | ||||
| #    'url': 'http://example.com/playlist/42', | ||||
| #    'playlist_mincount': 99, | ||||
| #    'info_dict': { | ||||
| #        'id': '42', | ||||
| #        'title': 'Playlist number forty-two', | ||||
| #    } | ||||
| # } | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| # Allow direct execution | ||||
|   | ||||
| @@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase): | ||||
|         d = json.loads(stripped) | ||||
|         self.assertEqual(d, [{"id": "532cb", "x": 3}]) | ||||
|  | ||||
|     def test_uppercase_escpae(self): | ||||
|     def test_uppercase_escape(self): | ||||
|         self.assertEqual(uppercase_escape(u'aä'), u'aä') | ||||
|         self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐') | ||||
|  | ||||
|   | ||||
| @@ -162,6 +162,7 @@ class YoutubeDL(object): | ||||
|     default_search:    Prepend this string if an input url is not valid. | ||||
|                        'auto' for elaborate guessing | ||||
|     encoding:          Use this encoding instead of the system-specified. | ||||
|     extract_flat:      Do not resolve URLs, return the immediate result. | ||||
|  | ||||
|     The following parameters are not used by YoutubeDL itself, they are used by | ||||
|     the FileDownloader: | ||||
| @@ -558,7 +559,12 @@ class YoutubeDL(object): | ||||
|         Returns the resolved ie_result. | ||||
|         """ | ||||
|  | ||||
|         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system | ||||
|         result_type = ie_result.get('_type', 'video') | ||||
|  | ||||
|         if self.params.get('extract_flat', False): | ||||
|             if result_type in ('url', 'url_transparent'): | ||||
|                 return ie_result | ||||
|  | ||||
|         if result_type == 'video': | ||||
|             self.add_extra_info(ie_result, extra_info) | ||||
|             return self.process_video_result(ie_result, download=download) | ||||
|   | ||||
| @@ -66,6 +66,10 @@ __authors__  = ( | ||||
|     'Naglis Jonaitis', | ||||
|     'Charles Chen', | ||||
|     'Hassaan Ali', | ||||
|     'Dobrosław Żybort', | ||||
|     'David Fabijan', | ||||
|     'Sebastian Haas', | ||||
|     'Alexander Kirk', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
|   | ||||
| @@ -292,7 +292,7 @@ class FileDownloader(object): | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         """Real download process. Redefine in subclasses.""" | ||||
|         raise NotImplementedError(u'This method must be implemented by sublcasses') | ||||
|         raise NotImplementedError(u'This method must be implemented by subclasses') | ||||
|  | ||||
|     def _hook_progress(self, status): | ||||
|         for ph in self._progress_hooks: | ||||
|   | ||||
| @@ -77,6 +77,10 @@ from .ebaumsworld import EbaumsWorldIE | ||||
| from .ehow import EHowIE | ||||
| from .eighttracks import EightTracksIE | ||||
| from .eitb import EitbIE | ||||
| from .ellentv import ( | ||||
|     EllenTVIE, | ||||
|     EllenTVClipsIE, | ||||
| ) | ||||
| from .elpais import ElPaisIE | ||||
| from .empflix import EmpflixIE | ||||
| from .engadget import EngadgetIE | ||||
| @@ -126,6 +130,7 @@ from .helsinki import HelsinkiIE | ||||
| from .hentaistigma import HentaiStigmaIE | ||||
| from .hotnewhiphop import HotNewHipHopIE | ||||
| from .howcast import HowcastIE | ||||
| from .howstuffworks import HowStuffWorksIE | ||||
| from .huffpost import HuffPostIE | ||||
| from .hypem import HypemIE | ||||
| from .iconosquare import IconosquareIE | ||||
| @@ -146,6 +151,7 @@ from .ivi import ( | ||||
| from .izlesene import IzleseneIE | ||||
| from .jadorecettepub import JadoreCettePubIE | ||||
| from .jeuxvideo import JeuxVideoIE | ||||
| from .jove import JoveIE | ||||
| from .jukebox import JukeboxIE | ||||
| from .justintv import JustinTVIE | ||||
| from .jpopsukitv import JpopsukiIE | ||||
| @@ -177,10 +183,12 @@ from .mdr import MDRIE | ||||
| from .metacafe import MetacafeIE | ||||
| from .metacritic import MetacriticIE | ||||
| from .mit import TechTVMITIE, MITIE, OCWMITIE | ||||
| from .mitele import MiTeleIE | ||||
| from .mixcloud import MixcloudIE | ||||
| from .mlb import MLBIE | ||||
| from .mpora import MporaIE | ||||
| from .mofosex import MofosexIE | ||||
| from .mojvideo import MojvideoIE | ||||
| from .mooshare import MooshareIE | ||||
| from .morningstar import MorningstarIE | ||||
| from .motherless import MotherlessIE | ||||
| @@ -224,9 +232,12 @@ from .nrk import ( | ||||
| from .ntv import NTVIE | ||||
| from .nytimes import NYTimesIE | ||||
| from .nuvid import NuvidIE | ||||
| from .oe1 import OE1IE | ||||
| from .ooyala import OoyalaIE | ||||
| from .orf import ORFIE | ||||
| from .orf import ( | ||||
|     ORFTVthekIE, | ||||
|     ORFOE1IE, | ||||
|     ORFFM4IE, | ||||
| ) | ||||
| from .parliamentliveuk import ParliamentLiveUKIE | ||||
| from .pbs import PBSIE | ||||
| from .photobucket import PhotobucketIE | ||||
| @@ -247,6 +258,7 @@ from .ro220 import Ro220IE | ||||
| from .rottentomatoes import RottenTomatoesIE | ||||
| from .roxwel import RoxwelIE | ||||
| from .rtbf import RTBFIE | ||||
| from .rtlnl import RtlXlIE | ||||
| from .rtlnow import RTLnowIE | ||||
| from .rts import RTSIE | ||||
| from .rtve import RTVEALaCartaIE | ||||
| @@ -327,7 +339,7 @@ from .tutv import TutvIE | ||||
| from .tvigle import TvigleIE | ||||
| from .tvp import TvpIE | ||||
| from .tvplay import TVPlayIE | ||||
| from.ubu import UbuIE | ||||
| from .ubu import UbuIE | ||||
| from .udemy import ( | ||||
|     UdemyIE, | ||||
|     UdemyCourseIE | ||||
| @@ -383,6 +395,7 @@ from .wistia import WistiaIE | ||||
| from .worldstarhiphop import WorldStarHipHopIE | ||||
| from .wrzuta import WrzutaIE | ||||
| from .xbef import XBefIE | ||||
| from .xboxclips import XboxClipsIE | ||||
| from .xhamster import XHamsterIE | ||||
| from .xnxx import XNXXIE | ||||
| from .xvideos import XVideosIE | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import json | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor): | ||||
|                 formats.append({ | ||||
|                     'url': format_url, | ||||
|                     'format': format['type'], | ||||
|                     'width': format['width'], | ||||
|                     'height': int(format['height']), | ||||
|                     'width': int_or_none(format['width']), | ||||
|                     'height': int_or_none(format['height']), | ||||
|                 }) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|   | ||||
| @@ -51,6 +51,9 @@ class ARDIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage: | ||||
|             raise ExtractorError('Video %s is no longer available' % video_id, expected=True) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', | ||||
|              r'<meta name="dcterms.title" content="(.*?)"/>', | ||||
|   | ||||
| @@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] | ||||
|             return any(re.match(r, f['versionCode']) for r in regexes) | ||||
|         # Some formats may not be in the same language as the url | ||||
|         # TODO: Might want not to drop videos that does not match requested language | ||||
|         # but to process those formats with lower precedence | ||||
|         formats = filter(_match_lang, all_formats) | ||||
|         formats = list(formats) # in python3 filter returns an iterator | ||||
|         formats = list(formats)  # in python3 filter returns an iterator | ||||
|         if not formats: | ||||
|             # Some videos are only available in the 'Originalversion' | ||||
|             # they aren't tagged as being in French or German | ||||
|             if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats): | ||||
|                 formats = all_formats | ||||
|             else: | ||||
|                 raise ExtractorError(u'The formats list is empty') | ||||
|             # Sometimes there are neither videos of requested lang code | ||||
|             # nor original version videos available | ||||
|             # For such cases we just take all_formats as is | ||||
|             formats = all_formats | ||||
|             if not formats: | ||||
|                 raise ExtractorError('The formats list is empty') | ||||
|  | ||||
|         if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: | ||||
|             def sort_key(f): | ||||
|   | ||||
| @@ -463,8 +463,9 @@ class InfoExtractor(object): | ||||
|         return self._og_search_property('title', html, **kargs) | ||||
|  | ||||
|     def _og_search_video_url(self, html, name='video url', secure=True, **kargs): | ||||
|         regexes = self._og_regexes('video') | ||||
|         if secure: regexes = self._og_regexes('video:secure_url') + regexes | ||||
|         regexes = self._og_regexes('video') + self._og_regexes('video:url') | ||||
|         if secure: | ||||
|             regexes = self._og_regexes('video:secure_url') + regexes | ||||
|         return self._html_search_regex(regexes, html, name, **kargs) | ||||
|  | ||||
|     def _og_search_url(self, html, **kargs): | ||||
|   | ||||
| @@ -30,7 +30,7 @@ class DFBIE(InfoExtractor): | ||||
|             video_id) | ||||
|         video_info = player_info.find('video') | ||||
|  | ||||
|         f4m_info = self._download_xml(video_info.find('url').text, video_id) | ||||
|         f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id) | ||||
|         token_el = f4m_info.find('token') | ||||
|         manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' | ||||
|  | ||||
|   | ||||
							
								
								
									
										79
									
								
								youtube_dl/extractor/ellentv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								youtube_dl/extractor/ellentv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,79 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EllenTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.ellentv.com/videos/0-7jqrsr18/', | ||||
|         'md5': 'e4af06f3bf0d5f471921a18db5764642', | ||||
|         'info_dict': { | ||||
|             'id': '0-7jqrsr18', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'What\'s Wrong with These Photos? A Whole Lot', | ||||
|             'timestamp': 1406876400, | ||||
|             'upload_date': '20140801', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         timestamp = parse_iso8601(self._search_regex( | ||||
|             r'<span class="publish-date"><time datetime="([^"]+)">', | ||||
|             webpage, 'timestamp')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'url': self._html_search_meta('VideoURL', webpage, 'url'), | ||||
|             'timestamp': timestamp, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class EllenTVClipsIE(InfoExtractor): | ||||
|     IE_NAME = 'EllenTV:clips' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ellentv\.com/episodes/(?P<id>[a-z0-9_-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.ellentv.com/episodes/meryl-streep-vanessa-hudgens/', | ||||
|         'info_dict': { | ||||
|             'id': 'meryl-streep-vanessa-hudgens', | ||||
|             'title': 'Meryl Streep, Vanessa Hudgens', | ||||
|         }, | ||||
|         'playlist_mincount': 9, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         playlist = self._extract_playlist(webpage) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'entries': self._extract_entries(playlist) | ||||
|         } | ||||
|  | ||||
|     def _extract_playlist(self, webpage): | ||||
|         json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json') | ||||
|         try: | ||||
|             return json.loads("[{" + json_string + "}]") | ||||
|         except ValueError as ve: | ||||
|             raise ExtractorError('Failed to download JSON', cause=ve) | ||||
|  | ||||
|     def _extract_entries(self, playlist): | ||||
|         return [self.url_result(item['url'], 'EllenTV') for item in playlist] | ||||
| @@ -36,7 +36,7 @@ class EscapistIE(InfoExtractor): | ||||
|             r'<meta name="description" content="([^"]*)"', | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         playerUrl = self._og_search_video_url(webpage, name=u'player URL') | ||||
|         playerUrl = self._og_search_video_url(webpage, name='player URL') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<meta name="title" content="([^"]*)"', | ||||
|   | ||||
| @@ -20,7 +20,7 @@ from ..utils import ( | ||||
| class FacebookIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https?://(?:\w+\.)?facebook\.com/ | ||||
|         (?:[^#?]*\#!/)? | ||||
|         (?:[^#]*?\#!/)? | ||||
|         (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) | ||||
|         (?:v|video_id)=(?P<id>[0-9]+) | ||||
|         (?:.*)''' | ||||
|   | ||||
| @@ -42,7 +42,6 @@ class FiredriveIE(InfoExtractor): | ||||
|         fields = dict(re.findall(r'''(?x)<input\s+ | ||||
|             type="hidden"\s+ | ||||
|             name="([^"]+)"\s+ | ||||
|             (?:id="[^"]+"\s+)? | ||||
|             value="([^"]*)" | ||||
|             ''', webpage)) | ||||
|  | ||||
| @@ -66,7 +65,7 @@ class FiredriveIE(InfoExtractor): | ||||
|         ext = self._search_regex(r'type:\s?\'([^\']+)\',', | ||||
|                                  webpage, 'extension', fatal=False) | ||||
|         video_url = self._search_regex( | ||||
|             r'file:\s?\'(http[^\']+)\',', webpage, 'file url') | ||||
|             r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url') | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|   | ||||
| @@ -706,6 +706,13 @@ class GenericIE(InfoExtractor): | ||||
|             url = unescapeHTML(mobj.group('url')) | ||||
|             return self.url_result(url, ie='MTVServicesEmbedded') | ||||
|  | ||||
|         # Look for embedded yahoo player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Yahoo') | ||||
|  | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if not found: | ||||
|   | ||||
							
								
								
									
										134
									
								
								youtube_dl/extractor/howstuffworks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								youtube_dl/extractor/howstuffworks.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,134 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import random | ||||
| import string | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import find_xpath_attr | ||||
|  | ||||
|  | ||||
| class HowStuffWorksIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm', | ||||
|             'info_dict': { | ||||
|                 'id': '450221', | ||||
|                 'display_id': 'cool-jobs-iditarod-musher', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Cool Jobs - Iditarod Musher', | ||||
|                 'description': 'md5:82bb58438a88027b8186a1fccb365f90', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # md5 is not consistent | ||||
|                 'skip_download': True | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://adventure.howstuffworks.com/39516-deadliest-catch-jakes-farewell-pots-video.htm', | ||||
|             'info_dict': { | ||||
|                 'id': '553470', | ||||
|                 'display_id': 'deadliest-catch-jakes-farewell-pots', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Deadliest Catch: Jake\'s Farewell Pots', | ||||
|                 'description': 'md5:9632c346d5e43ee238028c9cefd8dbbc', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # md5 is not consistent | ||||
|                 'skip_download': True | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm', | ||||
|             'info_dict': { | ||||
|                 'id': '440011', | ||||
|                 'display_id': 'sword-swallowing-1-by-dan-meyer', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Sword Swallowing #1 by Dan Meyer', | ||||
|                 'description': 'md5:b2409e88172913e2e7d3d1159b0ef735', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # md5 is not consistent | ||||
|                 'skip_download': True | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id') | ||||
|  | ||||
|         mp4 = self._search_regex( | ||||
|             r'''(?xs)var\s+clip\s*=\s*{\s* | ||||
|                 .+?\s* | ||||
|                 content_id\s*:\s*%s\s*,\s* | ||||
|                 .+?\s* | ||||
|                 mp4\s*:\s*\[(.*?),?\]\s* | ||||
|                 };\s* | ||||
|                 videoData\.push\(clip\);''' % content_id, | ||||
|             webpage, 'mp4', fatal=False, default=None) | ||||
|  | ||||
|         smil = self._download_xml( | ||||
|             'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id, | ||||
|             content_id, 'Downloading video SMIL') | ||||
|  | ||||
|         http_base = find_xpath_attr( | ||||
|             smil, | ||||
|             './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'), | ||||
|             'name', | ||||
|             'httpBase').get('content') | ||||
|  | ||||
|         def random_string(str_len=0): | ||||
|             return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)]) | ||||
|  | ||||
|         URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12)) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         if mp4: | ||||
|             for video in json.loads('[%s]' % mp4): | ||||
|                 bitrate = video['bitrate'] | ||||
|                 fmt = { | ||||
|                     'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX, | ||||
|                     'format_id': bitrate, | ||||
|                 } | ||||
|                 m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate) | ||||
|                 if m: | ||||
|                     fmt['vbr'] = int(m.group('vbr')) | ||||
|                 formats.append(fmt) | ||||
|         else: | ||||
|             for video in smil.findall( | ||||
|                     './/{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')): | ||||
|                 vbr = int(video.attrib['system-bitrate']) / 1000 | ||||
|                 formats.append({ | ||||
|                     'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX), | ||||
|                     'format_id': '%dk' % vbr, | ||||
|                     'vbr': vbr, | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         TITLE_SUFFIX = ' : HowStuffWorks' | ||||
|         if title.endswith(TITLE_SUFFIX): | ||||
|             title = title[:-len(TITLE_SUFFIX)] | ||||
|  | ||||
|         description = self._og_search_description(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         return { | ||||
|             'id': content_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										80
									
								
								youtube_dl/extractor/jove.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/extractor/jove.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unified_strdate | ||||
| ) | ||||
|  | ||||
|  | ||||
| class JoveIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)' | ||||
|     _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current', | ||||
|             'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b', | ||||
|             'info_dict': { | ||||
|                 'id': '2744', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation', | ||||
|                 'description': 'md5:015dd4509649c0908bc27f049e0262c6', | ||||
|                 'thumbnail': 're:^https?://.*\.png$', | ||||
|                 'upload_date': '20110523', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation', | ||||
|             'md5': '914aeb356f416811d911996434811beb', | ||||
|             'info_dict': { | ||||
|                 'id': '51796', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment', | ||||
|                 'description': 'md5:35ff029261900583970c4023b70f1dc9', | ||||
|                 'thumbnail': 're:^https?://.*\.png$', | ||||
|                 'upload_date': '20140802', | ||||
|             } | ||||
|         }, | ||||
|  | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         chapters_id = self._html_search_regex( | ||||
|             r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id') | ||||
|  | ||||
|         chapters_xml = self._download_xml( | ||||
|             self._CHAPTERS_URL.format(video_id=chapters_id), | ||||
|             video_id, note='Downloading chapters XML', | ||||
|             errnote='Failed to download chapters XML') | ||||
|  | ||||
|         video_url = chapters_xml.attrib.get('video') | ||||
|         if not video_url: | ||||
|             raise ExtractorError('Failed to get the video URL') | ||||
|  | ||||
|         title = self._html_search_meta('citation_title', webpage, 'title') | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         description = self._html_search_regex( | ||||
|             r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>', | ||||
|             webpage, 'description', fatal=False) | ||||
|         publish_date = unified_strdate(self._html_search_meta( | ||||
|             'citation_publication_date', webpage, 'publish date', fatal=False)) | ||||
|         comment_count = self._html_search_regex( | ||||
|             r'<meta name="num_comments" content="(\d+) Comments?"', | ||||
|             webpage, 'comment count', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|             'upload_date': publish_date, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
| @@ -111,17 +111,28 @@ class LivestreamIE(InfoExtractor): | ||||
|         event_name = mobj.group('event_name') | ||||
|         webpage = self._download_webpage(url, video_id or event_name) | ||||
|  | ||||
|         if video_id is None: | ||||
|             # This is an event page: | ||||
|         og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None) | ||||
|         if og_video is None: | ||||
|             config_json = self._search_regex( | ||||
|                 r'window.config = ({.*?});', webpage, 'window config') | ||||
|             info = json.loads(config_json)['event'] | ||||
|  | ||||
|             def is_relevant(vdata, vid): | ||||
|                 result = vdata['type'] == 'video' | ||||
|                 if video_id is not None: | ||||
|                     result = result and compat_str(vdata['data']['id']) == vid | ||||
|                 return result | ||||
|  | ||||
|             videos = [self._extract_video_info(video_data['data']) | ||||
|                 for video_data in info['feed']['data'] | ||||
|                 if video_data['type'] == 'video'] | ||||
|             return self.playlist_result(videos, info['id'], info['full_name']) | ||||
|                       for video_data in info['feed']['data'] | ||||
|                       if is_relevant(video_data, video_id)] | ||||
|             if video_id is None: | ||||
|                 # This is an event page: | ||||
|                 return self.playlist_result(videos, info['id'], info['full_name']) | ||||
|             else: | ||||
|                 if videos: | ||||
|                     return videos[0] | ||||
|         else: | ||||
|             og_video = self._og_search_video_url(webpage, 'player url') | ||||
|             query_str = compat_urllib_parse_urlparse(og_video).query | ||||
|             query = compat_urlparse.parse_qs(query_str) | ||||
|             api_url = query['play_url'][0].replace('.smil', '') | ||||
|   | ||||
							
								
								
									
										60
									
								
								youtube_dl/extractor/mitele.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								youtube_dl/extractor/mitele.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     get_element_by_attribute, | ||||
|     parse_duration, | ||||
|     strip_jsonp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MiTeleIE(InfoExtractor): | ||||
|     IE_NAME = 'mitele.es' | ||||
|     _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', | ||||
|         'md5': '6a75fe9d0d3275bead0cb683c616fddb', | ||||
|         'info_dict': { | ||||
|             'id': '0fce117d', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Programa 144 - Tor, la web invisible', | ||||
|             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', | ||||
|             'display_id': 'programa-144', | ||||
|             'duration': 2913, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         episode = mobj.group('episode') | ||||
|         webpage = self._download_webpage(url, episode) | ||||
|         embed_data_json = self._search_regex( | ||||
|             r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', | ||||
|             flags=re.DOTALL | ||||
|         ).replace('\'', '"') | ||||
|         embed_data = json.loads(embed_data_json) | ||||
|  | ||||
|         info_url = embed_data['flashvars']['host'] | ||||
|         info_el = self._download_xml(info_url, episode).find('./video/info') | ||||
|  | ||||
|         video_link = info_el.find('videoUrl/link').text | ||||
|         token_query = compat_urllib_parse.urlencode({'id': video_link}) | ||||
|         token_info = self._download_json( | ||||
|             'http://token.mitele.es/?' + token_query, episode, | ||||
|             transform_source=strip_jsonp | ||||
|         ) | ||||
|  | ||||
|         return { | ||||
|             'id': embed_data['videoId'], | ||||
|             'display_id': episode, | ||||
|             'title': info_el.find('title').text, | ||||
|             'url': token_info['tokenizedUrl'], | ||||
|             'description': get_element_by_attribute('class', 'text', webpage), | ||||
|             'thumbnail': info_el.find('thumb').text, | ||||
|             'duration': parse_duration(info_el.find('duration').text), | ||||
|         } | ||||
							
								
								
									
										58
									
								
								youtube_dl/extractor/mojvideo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								youtube_dl/extractor/mojvideo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     parse_duration, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MojvideoIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906', | ||||
|         'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7', | ||||
|         'info_dict': { | ||||
|             'id': '3d1ed4497707730b2906', | ||||
|             'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'V avtu pred mano rdečelaska - Alfi Nipič', | ||||
|             'thumbnail': 're:^http://.*\.jpg$', | ||||
|             'duration': 242, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         # XML is malformed | ||||
|         playerapi = self._download_webpage( | ||||
|             'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id) | ||||
|  | ||||
|         if '<error>true</error>' in playerapi: | ||||
|             error_desc = self._html_search_regex( | ||||
|                 r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False) | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>([^<]+)</title>', playerapi, 'title') | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<file>([^<]+)</file>', playerapi, 'video URL') | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False) | ||||
|         duration = parse_duration(self._html_search_regex( | ||||
|             r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|         } | ||||
| @@ -1,3 +1,4 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| @@ -8,19 +9,34 @@ from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class NownessIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])' | ||||
|     _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation', | ||||
|         'md5': '068bc0202558c2e391924cb8cc470676', | ||||
|         'info_dict': { | ||||
|             'id': '2520295746001', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'Candor: The Art of Gesticulation', | ||||
|             'uploader': 'Nowness', | ||||
|             'title': 'Candor: The Art of Gesticulation', | ||||
|         } | ||||
|     } | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation', | ||||
|             'md5': '068bc0202558c2e391924cb8cc470676', | ||||
|             'info_dict': { | ||||
|                 'id': '2520295746001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Candor: The Art of Gesticulation', | ||||
|                 'description': 'Candor: The Art of Gesticulation', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|                 'uploader': 'Nowness', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr', | ||||
|             'md5': 'e79cf125e387216f86b2e0a5b5c63aa3', | ||||
|             'info_dict': { | ||||
|                 'id': '3716354522001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', | ||||
|                 'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|                 'uploader': 'Nowness', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|   | ||||
| @@ -1,40 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import calendar | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| # audios on oe1.orf.at are only available for 7 days, so we can't | ||||
| # add tests. | ||||
|  | ||||
|  | ||||
| class OE1IE(InfoExtractor): | ||||
|     IE_DESC = 'oe1.orf.at' | ||||
|     _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         show_id = mobj.group('id') | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'http://oe1.orf.at/programm/%s/konsole' % show_id, | ||||
|             show_id | ||||
|         ) | ||||
|  | ||||
|         timestamp = datetime.datetime.strptime('%s %s' % ( | ||||
|             data['item']['day_label'], | ||||
|             data['item']['time'] | ||||
|         ), '%d.%m.%Y %H:%M') | ||||
|         unix_timestamp = calendar.timegm(timestamp.utctimetuple()) | ||||
|  | ||||
|         return { | ||||
|             'id': show_id, | ||||
|             'title': data['item']['title'], | ||||
|             'url': data['item']['url_stream'], | ||||
|             'ext': 'mp3', | ||||
|             'description': data['item'].get('info'), | ||||
|             'timestamp': unix_timestamp | ||||
|         } | ||||
| @@ -3,23 +3,38 @@ import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unescapeHTML | ||||
| from ..utils import ( | ||||
|     unescapeHTML, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class OoyalaIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video | ||||
|         'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', | ||||
|         'md5': '3f5cceb3a7bf461d6c29dc466cf8033c', | ||||
|         'info_dict': { | ||||
|             'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Explaining Data Recovery from Hard Drives and SSDs', | ||||
|             'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video | ||||
|             'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', | ||||
|             'md5': '3f5cceb3a7bf461d6c29dc466cf8033c', | ||||
|             'info_dict': { | ||||
|                 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Explaining Data Recovery from Hard Drives and SSDs', | ||||
|                 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', | ||||
|             }, | ||||
|         }, { | ||||
|             # Only available for ipad | ||||
|             'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', | ||||
|             'md5': '4b9754921fddb68106e48c142e2a01e6', | ||||
|             'info_dict': { | ||||
|                 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Simulation Overview - Levels of Simulation', | ||||
|                 'description': '', | ||||
|             }, | ||||
|         }, | ||||
|     } | ||||
|     ] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _url_for_embed_code(embed_code): | ||||
| @@ -47,13 +62,30 @@ class OoyalaIE(InfoExtractor): | ||||
|         player = self._download_webpage(player_url, embedCode) | ||||
|         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', | ||||
|                                         player, 'mobile player url') | ||||
|         mobile_player = self._download_webpage(mobile_url, embedCode) | ||||
|         videos_info = self._search_regex( | ||||
|             r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', | ||||
|             mobile_player, 'info').replace('\\"','"') | ||||
|         videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"') | ||||
|         # Looks like some videos are only available for particular devices | ||||
|         # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0 | ||||
|         # is only available for ipad) | ||||
|         # Working around with fetching URLs for all the devices found starting with 'unknown' | ||||
|         # until we succeed or eventually fail for each device. | ||||
|         devices = re.findall(r'device\s*=\s*"([^"]+)";', player) | ||||
|         devices.remove('unknown') | ||||
|         devices.insert(0, 'unknown') | ||||
|         for device in devices: | ||||
|             mobile_player = self._download_webpage( | ||||
|                 '%s&device=%s' % (mobile_url, device), embedCode, | ||||
|                 'Downloading mobile player JS for %s device' % device) | ||||
|             videos_info = self._search_regex( | ||||
|                 r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', | ||||
|                 mobile_player, 'info', fatal=False, default=None) | ||||
|             if videos_info: | ||||
|                 break | ||||
|         if not videos_info: | ||||
|             raise ExtractorError('Unable to extract info') | ||||
|         videos_info = videos_info.replace('\\"', '"') | ||||
|         videos_more_info = self._search_regex( | ||||
|             r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"') | ||||
|         videos_info = json.loads(videos_info) | ||||
|         videos_more_info =json.loads(videos_more_info) | ||||
|         videos_more_info = json.loads(videos_more_info) | ||||
|  | ||||
|         if videos_more_info.get('lineup'): | ||||
|             videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] | ||||
|   | ||||
| @@ -3,6 +3,8 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
| import calendar | ||||
| import datetime | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| @@ -12,7 +14,9 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ORFIE(InfoExtractor): | ||||
| class ORFTVthekIE(InfoExtractor): | ||||
|     IE_NAME = 'orf:tvthek' | ||||
|     IE_DESC = 'ORF TVthek' | ||||
|     _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
| @@ -105,3 +109,73 @@ class ORFIE(InfoExtractor): | ||||
|             'entries': entries, | ||||
|             'id': playlist_id, | ||||
|         } | ||||
|  | ||||
|  | ||||
| # Audios on ORF radio are only available for 7 days, so we can't add tests. | ||||
|  | ||||
|  | ||||
| class ORFOE1IE(InfoExtractor): | ||||
|     IE_NAME = 'orf:oe1' | ||||
|     IE_DESC = 'Radio Österreich 1' | ||||
|     _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         show_id = mobj.group('id') | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'http://oe1.orf.at/programm/%s/konsole' % show_id, | ||||
|             show_id | ||||
|         ) | ||||
|  | ||||
|         timestamp = datetime.datetime.strptime('%s %s' % ( | ||||
|             data['item']['day_label'], | ||||
|             data['item']['time'] | ||||
|         ), '%d.%m.%Y %H:%M') | ||||
|         unix_timestamp = calendar.timegm(timestamp.utctimetuple()) | ||||
|  | ||||
|         return { | ||||
|             'id': show_id, | ||||
|             'title': data['item']['title'], | ||||
|             'url': data['item']['url_stream'], | ||||
|             'ext': 'mp3', | ||||
|             'description': data['item'].get('info'), | ||||
|             'timestamp': unix_timestamp | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ORFFM4IE(InfoExtractor): | ||||
|     IE_DESC = 'orf:fm4' | ||||
|     IE_DESC = 'radio FM4' | ||||
|     _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         show_date = mobj.group('date') | ||||
|         show_id = mobj.group('show') | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id), | ||||
|             show_id | ||||
|         ) | ||||
|  | ||||
|         def extract_entry_dict(info, title, subtitle): | ||||
|             return { | ||||
|                 'id': info['loopStreamId'].replace('.mp3', ''), | ||||
|                 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'], | ||||
|                 'title': title, | ||||
|                 'description': subtitle, | ||||
|                 'duration': (info['end'] - info['start']) / 1000, | ||||
|                 'timestamp': info['start'] / 1000, | ||||
|                 'ext': 'mp3' | ||||
|             } | ||||
|  | ||||
|         entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': show_id, | ||||
|             'title': data['title'], | ||||
|             'description': data['subtitle'], | ||||
|             'entries': entries | ||||
|         } | ||||
| @@ -20,17 +20,41 @@ class PBSIE(InfoExtractor): | ||||
|         ) | ||||
|     ''' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', | ||||
|         'md5': 'ce1888486f0908d555a8093cac9a7362', | ||||
|         'info_dict': { | ||||
|             'id': '2365006249', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'A More Perfect Union', | ||||
|             'description': 'md5:ba0c207295339c8d6eced00b7c363c6a', | ||||
|             'duration': 3190, | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', | ||||
|             'md5': 'ce1888486f0908d555a8093cac9a7362', | ||||
|             'info_dict': { | ||||
|                 'id': '2365006249', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'A More Perfect Union', | ||||
|                 'description': 'md5:ba0c207295339c8d6eced00b7c363c6a', | ||||
|                 'duration': 3190, | ||||
|             }, | ||||
|         }, | ||||
|     } | ||||
|         { | ||||
|             'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/', | ||||
|             'md5': '143c98aa54a346738a3d78f54c925321', | ||||
|             'info_dict': { | ||||
|                 'id': '2365297690', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Losing Iraq', | ||||
|                 'description': 'md5:f5bfbefadf421e8bb8647602011caf8e', | ||||
|                 'duration': 5050, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/', | ||||
|             'md5': 'b19856d7f5351b17a5ab1dc6a64be633', | ||||
|             'info_dict': { | ||||
|                 'id': '2201174722', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist', | ||||
|                 'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28', | ||||
|                 'duration': 801, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _extract_ids(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -40,10 +64,13 @@ class PBSIE(InfoExtractor): | ||||
|         if presumptive_id: | ||||
|             webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|             # frontline video embed | ||||
|             MEDIA_ID_REGEXES = [ | ||||
|                 r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed | ||||
|                 r'class="coveplayerid">([^<]+)<',                       # coveplayer | ||||
|             ] | ||||
|  | ||||
|             media_id = self._search_regex( | ||||
|                 r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", | ||||
|                 webpage, 'frontline video ID', fatal=False, default=None) | ||||
|                 MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None) | ||||
|             if media_id: | ||||
|                 return media_id, presumptive_id | ||||
|  | ||||
|   | ||||
| @@ -1,23 +1,23 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import strip_jsonp | ||||
| from ..utils import str_or_none | ||||
|  | ||||
|  | ||||
| class ReverbNationIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', | ||||
|         'file': '16965047.mp3', | ||||
|         'md5': '3da12ebca28c67c111a7f8b262d3f7a7', | ||||
|         'info_dict': { | ||||
|             "id": "16965047", | ||||
|             "ext": "mp3", | ||||
|             "title": "MONA LISA", | ||||
|             "uploader": "ALKILADOS", | ||||
|             "uploader_id": 216429, | ||||
|             "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg" | ||||
|             "uploader_id": "216429", | ||||
|             "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$" | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
| @@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor): | ||||
|         song_id = mobj.group('id') | ||||
|  | ||||
|         api_res = self._download_json( | ||||
|             'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d' | ||||
|                 % (song_id, int(time.time() * 1000)), | ||||
|             'https://api.reverbnation.com/song/%s' % song_id, | ||||
|             song_id, | ||||
|             transform_source=strip_jsonp, | ||||
|             note='Downloading information of song %s' % song_id | ||||
|         ) | ||||
|  | ||||
| @@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor): | ||||
|             'title': api_res.get('name'), | ||||
|             'url': api_res.get('url'), | ||||
|             'uploader': api_res.get('artist', {}).get('name'), | ||||
|             'uploader_id': api_res.get('artist', {}).get('id'), | ||||
|             'thumbnail': api_res.get('image', api_res.get('thumbnail')), | ||||
|             'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), | ||||
|             'thumbnail': self._proto_relative_url( | ||||
|                 api_res.get('image', api_res.get('thumbnail'))), | ||||
|             'ext': 'mp3', | ||||
|             'vcodec': 'none', | ||||
|         } | ||||
|   | ||||
							
								
								
									
										52
									
								
								youtube_dl/extractor/rtlnl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								youtube_dl/extractor/rtlnl.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class RtlXlIE(InfoExtractor): | ||||
|     IE_NAME = 'rtlxl.nl' | ||||
|     _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', | ||||
|         'info_dict': { | ||||
|             'id': '6e4203a6-0a5e-3596-8424-c599a59e0677', | ||||
|             'ext': 'flv', | ||||
|             'title': 'RTL Nieuws - Laat', | ||||
|             'description': 'Dagelijks het laatste nieuws uit binnen- en ' | ||||
|                 'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van ' | ||||
|                 'onze mobiele apps.', | ||||
|             'timestamp': 1408051800, | ||||
|             'upload_date': '20140814', | ||||
|         }, | ||||
|         'params': { | ||||
|             # We download the first bytes of the first fragment, it can't be | ||||
|             # processed by the f4m downloader beacuse it isn't complete | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         uuid = mobj.group('uuid') | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid, | ||||
|             uuid) | ||||
|         meta = info['meta'] | ||||
|         material = info['material'][0] | ||||
|         episode_info = info['episodes'][0] | ||||
|  | ||||
|         f4m_url = 'http://manifest.us.rtl.nl' + material['videopath'] | ||||
|         progname = info['abstracts'][0]['name'] | ||||
|         subtitle = material['title'] or info['episodes'][0]['name'] | ||||
|  | ||||
|         return { | ||||
|             'id': uuid, | ||||
|             'title': '%s - %s' % (progname, subtitle),  | ||||
|             'formats': self._extract_f4m_formats(f4m_url, uuid), | ||||
|             'timestamp': material['original_date'], | ||||
|             'description': episode_info['synopsis'], | ||||
|         } | ||||
| @@ -17,11 +17,11 @@ class SharedIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://shared.sx/0060718775', | ||||
|         'md5': '53e1c58fc3e777ae1dfe9e57ba2f9c72', | ||||
|         'md5': '106fefed92a8a2adb8c98e6a0652f49b', | ||||
|         'info_dict': { | ||||
|             'id': '0060718775', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Big Buck Bunny Trailer', | ||||
|             'title': 'Bmp4', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -37,7 +37,7 @@ class TeamcocoIE(InfoExtractor): | ||||
|         video_id = mobj.group("video_id") | ||||
|         if not video_id: | ||||
|             video_id = self._html_search_regex( | ||||
|                 r'<article class="video" data-id="(\d+?)"', | ||||
|                 r'data-node-id="(\d+?)"', | ||||
|                 webpage, 'video id') | ||||
|  | ||||
|         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id | ||||
|   | ||||
| @@ -121,6 +121,21 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 'videopassword': 'youtube-dl', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vimeo.com/channels/keypeele/75629013', | ||||
|             'md5': '2f86a05afe9d7abc0b9126d229bbe15d', | ||||
|             'note': 'Video is freely available via original URL ' | ||||
|                     'and protected with password when accessed via http://vimeo.com/75629013', | ||||
|             'info_dict': { | ||||
|                 'id': '75629013', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Key & Peele: Terrorist Interrogation', | ||||
|                 'description': 'md5:8678b246399b070816b12313e8b4eb5c', | ||||
|                 'uploader_id': 'atencio', | ||||
|                 'uploader': 'Peter Atencio', | ||||
|                 'duration': 187, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vimeo.com/76979871', | ||||
|             'md5': '3363dd6ffebe3784d56f4132317fd446', | ||||
| @@ -196,8 +211,6 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|         if mobj.group('pro') or mobj.group('player'): | ||||
|             url = 'http://player.vimeo.com/video/' + video_id | ||||
|         else: | ||||
|             url = 'https://vimeo.com/' + video_id | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         request = compat_urllib_request.Request(url, None, headers) | ||||
| @@ -263,7 +276,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         if video_thumbnail is None: | ||||
|             video_thumbs = config["video"].get("thumbs") | ||||
|             if video_thumbs and isinstance(video_thumbs, dict): | ||||
|                 _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1] | ||||
|                 _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] | ||||
|  | ||||
|         # Extract video description | ||||
|         video_description = None | ||||
|   | ||||
| @@ -1,10 +1,12 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VubeIE(InfoExtractor): | ||||
| @@ -29,6 +31,7 @@ class VubeIE(InfoExtractor): | ||||
|                 'like_count': int, | ||||
|                 'dislike_count': int, | ||||
|                 'comment_count': int, | ||||
|                 'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'], | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
| @@ -47,6 +50,7 @@ class VubeIE(InfoExtractor): | ||||
|                 'like_count': int, | ||||
|                 'dislike_count': int, | ||||
|                 'comment_count': int, | ||||
|                 'categories': ['seraina', 'jessica', 'krewella', 'alive'], | ||||
|             } | ||||
|         }, { | ||||
|             'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s', | ||||
| @@ -56,13 +60,15 @@ class VubeIE(InfoExtractor): | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Frozen - Let It Go Cover by Siren Gene', | ||||
|                 'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.', | ||||
|                 'uploader': 'Siren Gene', | ||||
|                 'uploader_id': 'Siren', | ||||
|                 'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$', | ||||
|                 'uploader': 'Siren', | ||||
|                 'timestamp': 1395448018, | ||||
|                 'upload_date': '20140322', | ||||
|                 'duration': 221.788, | ||||
|                 'like_count': int, | ||||
|                 'dislike_count': int, | ||||
|                 'comment_count': int, | ||||
|                 'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'], | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
| @@ -71,47 +77,40 @@ class VubeIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         data_json = self._search_regex( | ||||
|             r'(?s)window\["(?:tapiVideoData|vubeOriginalVideoData)"\]\s*=\s*(\{.*?\n});\n', | ||||
|             webpage, 'video data' | ||||
|         ) | ||||
|         data = json.loads(data_json) | ||||
|         video = ( | ||||
|             data.get('video') or | ||||
|             data) | ||||
|         assert isinstance(video, dict) | ||||
|         video = self._download_json( | ||||
|             'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON') | ||||
|  | ||||
|         public_id = video['public_id'] | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id), | ||||
|                 'height': int(fmt['height']), | ||||
|                 'abr': int(fmt['audio_bitrate']), | ||||
|                 'vbr': int(fmt['video_bitrate']), | ||||
|                 'format_id': fmt['media_resolution_id'] | ||||
|             } for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed' | ||||
|         ] | ||||
|         formats = [] | ||||
|  | ||||
|         for media in video['media'].get('video', []) + video['media'].get('audio', []): | ||||
|             if media['transcoding_status'] != 'processed': | ||||
|                 continue | ||||
|             fmt = { | ||||
|                 'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id), | ||||
|                 'abr': int(media['audio_bitrate']), | ||||
|                 'format_id': compat_str(media['media_resolution_id']), | ||||
|             } | ||||
|             vbr = int(media['video_bitrate']) | ||||
|             if vbr: | ||||
|                 fmt.update({ | ||||
|                     'vbr': vbr, | ||||
|                     'height': int(media['height']), | ||||
|                 }) | ||||
|             formats.append(fmt) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = video['title'] | ||||
|         description = video.get('description') | ||||
|         thumbnail = self._proto_relative_url( | ||||
|             video.get('thumbnail') or video.get('thumbnail_src'), | ||||
|             scheme='http:') | ||||
|         uploader = data.get('user', {}).get('channel', {}).get('name') or video.get('user_alias') | ||||
|         uploader_id = data.get('user', {}).get('name') | ||||
|         thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:') | ||||
|         uploader = video.get('user_alias') or video.get('channel') | ||||
|         timestamp = int_or_none(video.get('upload_time')) | ||||
|         duration = video['duration'] | ||||
|         view_count = video.get('raw_view_count') | ||||
|         like_count = video.get('rlikes') | ||||
|         if like_count is None: | ||||
|             like_count = video.get('total_likes') | ||||
|         dislike_count = video.get('rhates') | ||||
|         if dislike_count is None: | ||||
|             dislike_count = video.get('total_hates') | ||||
|         like_count = video.get('total_likes') | ||||
|         dislike_count = video.get('total_hates') | ||||
|  | ||||
|         comments = video.get('comments') | ||||
|         comment_count = None | ||||
| @@ -124,6 +123,8 @@ class VubeIE(InfoExtractor): | ||||
|         else: | ||||
|             comment_count = len(comments) | ||||
|  | ||||
|         categories = [tag['text'] for tag in video['tags']] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
| @@ -131,11 +132,11 @@ class VubeIE(InfoExtractor): | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'like_count': like_count, | ||||
|             'dislike_count': dislike_count, | ||||
|             'comment_count': comment_count, | ||||
|             'categories': categories, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										57
									
								
								youtube_dl/extractor/xboxclips.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								youtube_dl/extractor/xboxclips.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class XboxClipsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})' | ||||
|     _TEST = { | ||||
|         'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325', | ||||
|         'md5': 'fbe1ec805e920aeb8eced3c3e657df5d', | ||||
|         'info_dict': { | ||||
|             'id': '074a69a9-5faf-46aa-b93b-9909c1720325', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Iabdulelah playing Upload Studio', | ||||
|             'filesize_approx': 28101836.8, | ||||
|             'timestamp': 1407388500, | ||||
|             'upload_date': '20140807', | ||||
|             'duration': 56, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'>Link: <a href="([^"]+)">', webpage, 'video URL') | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title') | ||||
|         timestamp = parse_iso8601(self._html_search_regex( | ||||
|             r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False)) | ||||
|         filesize = float_or_none(self._html_search_regex( | ||||
|             r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024) | ||||
|         duration = int_or_none(self._html_search_regex( | ||||
|             r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False)) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'>Views: (\d+)<', webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'timestamp': timestamp, | ||||
|             'filesize_approx': filesize, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|         } | ||||
| @@ -15,7 +15,7 @@ from ..utils import ( | ||||
|  | ||||
| class YahooIE(InfoExtractor): | ||||
|     IE_DESC = 'Yahoo screen and movies' | ||||
|     _VALID_URL = r'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html' | ||||
|     _VALID_URL = r'(?P<url>https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', | ||||
| @@ -46,12 +46,23 @@ class YahooIE(InfoExtractor): | ||||
|                 'title': 'The World Loves Spider-Man', | ||||
|                 'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''', | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', | ||||
|             'md5': '60e8ac193d8fb71997caa8fce54c6460', | ||||
|             'info_dict': { | ||||
|                 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': "Yahoo Saves 'Community'", | ||||
|                 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         url = mobj.group('url') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         items_json = self._search_regex( | ||||
|   | ||||
| @@ -374,6 +374,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 return lambda s: u''.join(s[i] for i in cache_spec) | ||||
|             except IOError: | ||||
|                 pass  # No cache available | ||||
|             except ValueError: | ||||
|                 try: | ||||
|                     file_size = os.path.getsize(cache_fn) | ||||
|                 except (OSError, IOError) as oe: | ||||
|                     file_size = str(oe) | ||||
|                 self._downloader.report_warning( | ||||
|                     u'Cache %s failed (%s)' % (cache_fn, file_size)) | ||||
|  | ||||
|         if player_type == 'js': | ||||
|             code = self._download_webpage( | ||||
|   | ||||
| @@ -24,6 +24,7 @@ import socket | ||||
| import struct | ||||
| import subprocess | ||||
| import sys | ||||
| import tempfile | ||||
| import traceback | ||||
| import xml.etree.ElementTree | ||||
| import zlib | ||||
| @@ -228,18 +229,36 @@ else: | ||||
|         assert type(s) == type(u'') | ||||
|         print(s) | ||||
|  | ||||
| # In Python 2.x, json.dump expects a bytestream. | ||||
| # In Python 3.x, it writes to a character stream | ||||
| if sys.version_info < (3,0): | ||||
|     def write_json_file(obj, fn): | ||||
|         with open(fn, 'wb') as f: | ||||
|             json.dump(obj, f) | ||||
| else: | ||||
|     def write_json_file(obj, fn): | ||||
|         with open(fn, 'w', encoding='utf-8') as f: | ||||
|             json.dump(obj, f) | ||||
|  | ||||
| if sys.version_info >= (2,7): | ||||
| def write_json_file(obj, fn): | ||||
|     """ Encode obj as JSON and write it to fn, atomically """ | ||||
|  | ||||
|     # In Python 2.x, json.dump expects a bytestream. | ||||
|     # In Python 3.x, it writes to a character stream | ||||
|     if sys.version_info < (3, 0): | ||||
|         mode = 'wb' | ||||
|         encoding = None | ||||
|     else: | ||||
|         mode = 'w' | ||||
|         encoding = 'utf-8' | ||||
|     tf = tempfile.NamedTemporaryFile( | ||||
|         suffix='.tmp', prefix=os.path.basename(fn) + '.', | ||||
|         dir=os.path.dirname(fn), | ||||
|         delete=False) | ||||
|  | ||||
|     try: | ||||
|         with tf: | ||||
|             json.dump(obj, tf) | ||||
|         os.rename(tf.name, fn) | ||||
|     except: | ||||
|         try: | ||||
|             os.remove(tf.name) | ||||
|         except OSError: | ||||
|             pass | ||||
|         raise | ||||
|  | ||||
|  | ||||
| if sys.version_info >= (2, 7): | ||||
|     def find_xpath_attr(node, xpath, key, val): | ||||
|         """ Find the xpath xpath[@key=val] """ | ||||
|         assert re.match(r'^[a-zA-Z-]+$', key) | ||||
| @@ -827,6 +846,7 @@ def unified_strdate(date_str): | ||||
|         '%b %dnd %Y %I:%M%p', | ||||
|         '%b %dth %Y %I:%M%p', | ||||
|         '%Y-%m-%d', | ||||
|         '%Y/%m/%d', | ||||
|         '%d.%m.%Y', | ||||
|         '%d/%m/%Y', | ||||
|         '%Y/%m/%d %H:%M:%S', | ||||
| @@ -1273,9 +1293,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): | ||||
|     if get_attr: | ||||
|         if v is not None: | ||||
|             v = getattr(v, get_attr, None) | ||||
|     if v == '': | ||||
|         v = None | ||||
|     return default if v is None else (int(v) * invscale // scale) | ||||
|  | ||||
|  | ||||
| def str_or_none(v, default=None): | ||||
|     return default if v is None else compat_str(v) | ||||
|  | ||||
|  | ||||
| def str_to_int(int_str): | ||||
|     if int_str is None: | ||||
|         return None | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.08.02.1' | ||||
| __version__ = '2014.08.21.2' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user