Compare commits
	
		
			70 Commits
		
	
	
		
			2014.02.26
			...
			2014.03.04
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | d63516e9cd | ||
|  | e477dcf649 | ||
|  | 9d3f7781f3 | ||
|  | c7095dada3 | ||
|  | 607dbbad76 | ||
|  | 17b75c0de1 | ||
|  | ab24f4f3be | ||
|  | e1a52d9e10 | ||
|  | d0ff838433 | ||
|  | b37b94501c | ||
|  | cb3bb2cfef | ||
|  | e2cc7983e9 | ||
|  | c9ae7b9565 | ||
|  | 86fb4347f7 | ||
|  | 2fcec131f5 | ||
|  | 9f62eaf4ef | ||
|  | f92259c026 | ||
|  | 0afef30b23 | ||
|  | dcdfd1c711 | ||
|  | 2acc1f8f50 | ||
|  | 2c39b0c695 | ||
|  | e77c5b4f63 | ||
|  | 409a16cb72 | ||
|  | 94d5e90b4f | ||
|  | 2d73b45805 | ||
|  | 271a2dbfa2 | ||
|  | bf4adcac66 | ||
|  | fb8b8fdd62 | ||
|  | 5a0b26252e | ||
|  | 7d78f0cc48 | ||
|  | f00fc78674 | ||
|  | 392017874c | ||
|  | c3cb92d1ab | ||
|  | aa5590fa07 | ||
|  | 8cfb5bbf92 | ||
|  | 69bb54ebf9 | ||
|  | ca97a56e4b | ||
|  | fc26f3b4c2 | ||
|  | f604c93c64 | ||
|  | dc3727b65c | ||
|  | aba3231de1 | ||
|  | 9193bab91d | ||
|  | fbcf3e416d | ||
|  | c0e5d85631 | ||
|  | ca7fa3dcb3 | ||
|  | 4ccfba28d9 | ||
|  | abb82f1ddc | ||
|  | cda008cff1 | ||
|  | 1877a14049 | ||
|  | 546582ec3e | ||
|  | 4534485586 | ||
|  | a9ab8855e4 | ||
|  | 8a44ef6868 | ||
|  | 0c7214c404 | ||
|  | 4cf9654693 | ||
|  | 50a138d95c | ||
|  | 91346358b0 | ||
|  | f3783d4b77 | ||
|  | 89ef304bed | ||
|  | 83cebb8b7a | ||
|  | 9e68f9fdf1 | ||
|  | 2acea5c03d | ||
|  | 978177527e | ||
|  | 2648c436f3 | ||
|  | 33f1f2c455 | ||
|  | 995befe0e9 | ||
|  | 1bb92aff55 | ||
|  | b8e1471d3a | ||
|  | 60daf7f0bb | ||
|  | a83a3139d1 | 
| @@ -124,8 +124,12 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      video id, %(playlist)s for the playlist the | ||||
|                                      video is in, %(playlist_index)s for the | ||||
|                                      position in the playlist and %% for a | ||||
|                                      literal percent. Use - to output to stdout. | ||||
|                                      Can also be used to download to a different | ||||
|                                      literal percent. %(height)s and %(width)s | ||||
|                                      for the width and height of the video | ||||
|                                      format. %(resolution)s for a textual | ||||
|                                      description of the resolution of the video | ||||
|                                      format. Use - to output to stdout. Can also | ||||
|                                      be used to download to a different | ||||
|                                      directory, for example with -o '/my/downloa | ||||
|                                      ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' . | ||||
|     --autonumber-size NUMBER         Specifies the number of digits in | ||||
|   | ||||
| @@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|     def test_youtube_truncated(self): | ||||
|         self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) | ||||
|  | ||||
|     def test_youtube_search_matching(self): | ||||
|         self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) | ||||
|         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) | ||||
|  | ||||
|     def test_justin_tv_channelid_matching(self): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) | ||||
|   | ||||
| @@ -16,6 +16,7 @@ from youtube_dl.extractor import ( | ||||
|     YoutubeChannelIE, | ||||
|     YoutubeShowIE, | ||||
|     YoutubeTopListIE, | ||||
|     YoutubeSearchURLIE, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -133,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         entries = result['entries'] | ||||
|         self.assertTrue(len(entries) >= 5) | ||||
|  | ||||
|     def test_youtube_search_url(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeSearchURLIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video') | ||||
|         entries = result['entries'] | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], 'youtube-dl test video') | ||||
|         self.assertTrue(len(entries) >= 5) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -409,6 +409,13 @@ class YoutubeDL(object): | ||||
|             template_dict['autonumber'] = autonumber_templ % self._num_downloads | ||||
|             if template_dict.get('playlist_index') is not None: | ||||
|                 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index'] | ||||
|             if template_dict.get('resolution') is None: | ||||
|                 if template_dict.get('width') and template_dict.get('height'): | ||||
|                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) | ||||
|                 elif template_dict.get('height'): | ||||
|                     res = '%sp' % template_dict['height'] | ||||
|                 elif template_dict.get('width'): | ||||
|                     res = '?x%d' % template_dict['width'] | ||||
|  | ||||
|             sanitize = lambda k, v: sanitize_filename( | ||||
|                 compat_str(v), | ||||
| @@ -675,6 +682,9 @@ class YoutubeDL(object): | ||||
|             info_dict['playlist'] = None | ||||
|             info_dict['playlist_index'] = None | ||||
|  | ||||
|         if 'display_id' not in info_dict and 'id' in info_dict: | ||||
|             info_dict['display_id'] = info_dict['id'] | ||||
|  | ||||
|         # This extractors handle format selection themselves | ||||
|         if info_dict['extractor'] in ['Youku']: | ||||
|             if download: | ||||
|   | ||||
| @@ -48,6 +48,8 @@ __authors__  = ( | ||||
|     'Niklas Laxström', | ||||
|     'David Triendl', | ||||
|     'Anthony Weems', | ||||
|     'David Wagner', | ||||
|     'Juan C. Olivares', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
| @@ -428,6 +430,8 @@ def parseOpts(overrideArguments=None): | ||||
|                   '%(extractor)s for the provider (youtube, metacafe, etc), ' | ||||
|                   '%(id)s for the video id, %(playlist)s for the playlist the video is in, ' | ||||
|                   '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' | ||||
|                   '%(height)s and %(width)s for the width and height of the video format. ' | ||||
|                   '%(resolution)s for a textual description of the resolution of the video format. ' | ||||
|                   'Use - to output to stdout. Can also be used to download to a different directory, ' | ||||
|                   'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) | ||||
|     filesystem.add_option('--autonumber-size', | ||||
|   | ||||
| @@ -85,6 +85,7 @@ class HttpFD(FileDownloader): | ||||
|                         else: | ||||
|                             # The length does not match, we start the download over | ||||
|                             self.report_unable_to_resume() | ||||
|                             resume_len = 0 | ||||
|                             open_mode = 'wb' | ||||
|                             break | ||||
|             # Retry | ||||
|   | ||||
| @@ -23,9 +23,11 @@ from .br import BRIE | ||||
| from .breakcom import BreakIE | ||||
| from .brightcove import BrightcoveIE | ||||
| from .c56 import C56IE | ||||
| from .canal13cl import Canal13clIE | ||||
| from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .cbs import CBSIE | ||||
| from .ceskatelevize import CeskaTelevizeIE | ||||
| from .channel9 import Channel9IE | ||||
| from .chilloutzone import ChilloutzoneIE | ||||
| from .cinemassacre import CinemassacreIE | ||||
| @@ -89,6 +91,7 @@ from .funnyordie import FunnyOrDieIE | ||||
| from .gamekings import GamekingsIE | ||||
| from .gamespot import GameSpotIE | ||||
| from .gametrailers import GametrailersIE | ||||
| from .gdcvault import GDCVaultIE | ||||
| from .generic import GenericIE | ||||
| from .googleplus import GooglePlusIE | ||||
| from .googlesearch import GoogleSearchIE | ||||
| @@ -133,6 +136,7 @@ from .lynda import ( | ||||
| ) | ||||
| from .m6 import M6IE | ||||
| from .macgamestore import MacGameStoreIE | ||||
| from .mailru import MailRuIE | ||||
| from .malemotion import MalemotionIE | ||||
| from .mdr import MDRIE | ||||
| from .metacafe import MetacafeIE | ||||
| @@ -175,6 +179,7 @@ from .podomatic import PodomaticIE | ||||
| from .pornhd import PornHdIE | ||||
| from .pornhub import PornHubIE | ||||
| from .pornotube import PornotubeIE | ||||
| from .prosiebensat1 import ProSiebenSat1IE | ||||
| from .pyvideo import PyvideoIE | ||||
| from .radiofrance import RadioFranceIE | ||||
| from .rbmaradio import RBMARadioIE | ||||
| @@ -234,6 +239,7 @@ from .tube8 import Tube8IE | ||||
| from .tudou import TudouIE | ||||
| from .tumblr import TumblrIE | ||||
| from .tutv import TutvIE | ||||
| from .tvigle import TvigleIE | ||||
| from .tvp import TvpIE | ||||
| from .unistra import UnistraIE | ||||
| from .ustream import UstreamIE, UstreamChannelIE | ||||
| @@ -279,19 +285,20 @@ from .youku import YoukuIE | ||||
| from .youporn import YouPornIE | ||||
| from .youtube import ( | ||||
|     YoutubeIE, | ||||
|     YoutubePlaylistIE, | ||||
|     YoutubeSearchIE, | ||||
|     YoutubeSearchDateIE, | ||||
|     YoutubeUserIE, | ||||
|     YoutubeChannelIE, | ||||
|     YoutubeShowIE, | ||||
|     YoutubeSubscriptionsIE, | ||||
|     YoutubeRecommendedIE, | ||||
|     YoutubeTruncatedURLIE, | ||||
|     YoutubeWatchLaterIE, | ||||
|     YoutubeFavouritesIE, | ||||
|     YoutubeHistoryIE, | ||||
|     YoutubePlaylistIE, | ||||
|     YoutubeRecommendedIE, | ||||
|     YoutubeSearchDateIE, | ||||
|     YoutubeSearchIE, | ||||
|     YoutubeSearchURLIE, | ||||
|     YoutubeShowIE, | ||||
|     YoutubeSubscriptionsIE, | ||||
|     YoutubeTopListIE, | ||||
|     YoutubeTruncatedURLIE, | ||||
|     YoutubeUserIE, | ||||
|     YoutubeWatchLaterIE, | ||||
| ) | ||||
| from .zdf import ZDFIE | ||||
|  | ||||
|   | ||||
							
								
								
									
										48
									
								
								youtube_dl/extractor/canal13cl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								youtube_dl/extractor/canal13cl.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class Canal13clIE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', | ||||
|         'md5': '4cb1fa38adcad8fea88487a078831755', | ||||
|         'info_dict': { | ||||
|             'id': '1403022125', | ||||
|             'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda', | ||||
|             'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         title = self._html_search_meta( | ||||
|             'twitter:title', webpage, 'title', fatal=True) | ||||
|         description = self._html_search_meta( | ||||
|             'twitter:description', webpage, 'description') | ||||
|         url = self._html_search_regex( | ||||
|             r'articuloVideo = \"(.*?)\"', webpage, 'url') | ||||
|         real_id = self._search_regex( | ||||
|             r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id) | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail') | ||||
|  | ||||
|         return { | ||||
|             'id': real_id, | ||||
|             'display_id': display_id, | ||||
|             'url': url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'ext': 'mp4', | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
							
								
								
									
										126
									
								
								youtube_dl/extractor/ceskatelevize.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										126
									
								
								youtube_dl/extractor/ceskatelevize.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,126 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CeskaTelevizeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka', | ||||
|             'info_dict': { | ||||
|                 'id': '213512120230004', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'První republika: Španělská chřipka', | ||||
|                 'duration': 3107.4, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # requires rtmpdump | ||||
|             }, | ||||
|             'skip': 'Works only from Czech Republic.', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt', | ||||
|             'info_dict': { | ||||
|                 'id': '20138143440', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Tsatsiki, maminka a policajt', | ||||
|                 'duration': 6754.1, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # requires rtmpdump | ||||
|             }, | ||||
|             'skip': 'Works only from Czech Republic.', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', | ||||
|             'info_dict': { | ||||
|                 'id': '14716', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'První republika: Zpěvačka z Dupárny Bobina', | ||||
|                 'duration': 90, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # requires rtmpdump | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' | ||||
|         if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | ||||
|             raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | ||||
|  | ||||
|         typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') | ||||
|         episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') | ||||
|  | ||||
|         data = { | ||||
|             'playlist[0][type]': typ, | ||||
|             'playlist[0][id]': episode_id, | ||||
|             'requestUrl': compat_urllib_parse_urlparse(url).path, | ||||
|             'requestSource': 'iVysilani', | ||||
|         } | ||||
|  | ||||
|         req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url', | ||||
|                                             data=compat_urllib_parse.urlencode(data)) | ||||
|  | ||||
|         req.add_header('Content-type', 'application/x-www-form-urlencoded') | ||||
|         req.add_header('x-addr', '127.0.0.1') | ||||
|         req.add_header('X-Requested-With', 'XMLHttpRequest') | ||||
|         req.add_header('Referer', url) | ||||
|  | ||||
|         playlistpage = self._download_json(req, video_id) | ||||
|  | ||||
|         req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url'])) | ||||
|         req.add_header('Referer', url) | ||||
|  | ||||
|         playlist = self._download_xml(req, video_id) | ||||
|          | ||||
|         formats = [] | ||||
|         for i in playlist.find('smilRoot/body'): | ||||
|             if 'AD' not in i.attrib['id']: | ||||
|                 base_url = i.attrib['base'] | ||||
|                 parsedurl = compat_urllib_parse_urlparse(base_url) | ||||
|                 duration = i.attrib['duration'] | ||||
|  | ||||
|                 for video in i.findall('video'): | ||||
|                     if video.attrib['label'] != 'AD': | ||||
|                         format_id = video.attrib['label'] | ||||
|                         play_path = video.attrib['src'] | ||||
|                         vbr = int(video.attrib['system-bitrate']) | ||||
|  | ||||
|                         formats.append({ | ||||
|                             'format_id': format_id, | ||||
|                             'url': base_url, | ||||
|                             'vbr': vbr, | ||||
|                             'play_path': play_path, | ||||
|                             'app': parsedurl.path[1:] + '?' + parsedurl.query, | ||||
|                             'rtmp_live': True, | ||||
|                             'ext': 'flv', | ||||
|                         }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': episode_id, | ||||
|             'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'), | ||||
|             'duration': float(duration), | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -40,7 +40,7 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             'id': 'W5gMp3ZjYg4', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', | ||||
|             'uploader': 'Funnyplox TV', | ||||
|             'uploader': 'FunnyPlox TV', | ||||
|             'uploader_id': 'funnyploxtv', | ||||
|             'description': 'md5:7ded37421526d54afdf005e25bc2b7a3', | ||||
|             'upload_date': '20140128', | ||||
|   | ||||
| @@ -88,6 +88,10 @@ class InfoExtractor(object): | ||||
|  | ||||
|     The following fields are optional: | ||||
|  | ||||
|     display_id      An alternative identifier for the video, not necessarily | ||||
|                     unique, but available before title. Typically, id is | ||||
|                     something like "4234987", title "Dancing naked mole rats", | ||||
|                     and display_id "dancing-naked-mole-rats" | ||||
|     thumbnails:     A list of dictionaries (with the entries "resolution" and | ||||
|                     "url") for the varying thumbnails | ||||
|     thumbnail:      Full URL to a video thumbnail image. | ||||
| @@ -432,14 +436,14 @@ class InfoExtractor(object): | ||||
|         if secure: regexes = self._og_regexes('video:secure_url') + regexes | ||||
|         return self._html_search_regex(regexes, html, name, **kargs) | ||||
|  | ||||
|     def _html_search_meta(self, name, html, display_name=None): | ||||
|     def _html_search_meta(self, name, html, display_name=None, fatal=False): | ||||
|         if display_name is None: | ||||
|             display_name = name | ||||
|         return self._html_search_regex( | ||||
|             r'''(?ix)<meta | ||||
|                     (?=[^>]+(?:itemprop|name|property)=["\']%s["\']) | ||||
|                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), | ||||
|             html, display_name, fatal=False) | ||||
|             html, display_name, fatal=fatal) | ||||
|  | ||||
|     def _dc_search_uploader(self, html): | ||||
|         return self._html_search_meta('dc.creator', html, 'uploader') | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
| import socket | ||||
| @@ -26,20 +28,21 @@ class FacebookIE(InfoExtractor): | ||||
|     _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' | ||||
|     _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' | ||||
|     _NETRC_MACHINE = 'facebook' | ||||
|     IE_NAME = u'facebook' | ||||
|     IE_NAME = 'facebook' | ||||
|     _TEST = { | ||||
|         u'url': u'https://www.facebook.com/photo.php?v=120708114770723', | ||||
|         u'file': u'120708114770723.mp4', | ||||
|         u'md5': u'48975a41ccc4b7a581abd68651c1a5a8', | ||||
|         u'info_dict': { | ||||
|             u"duration": 279, | ||||
|             u"title": u"PEOPLE ARE AWESOME 2013" | ||||
|         'url': 'https://www.facebook.com/photo.php?v=120708114770723', | ||||
|         'md5': '48975a41ccc4b7a581abd68651c1a5a8', | ||||
|         'info_dict': { | ||||
|             'id': '120708114770723', | ||||
|             'ext': 'mp4', | ||||
|             'duration': 279, | ||||
|             'title': 'PEOPLE ARE AWESOME 2013' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def report_login(self): | ||||
|         """Report attempt to log in.""" | ||||
|         self.to_screen(u'Logging in') | ||||
|         self.to_screen('Logging in') | ||||
|  | ||||
|     def _login(self): | ||||
|         (useremail, password) = self._get_login_info() | ||||
| @@ -50,9 +53,11 @@ class FacebookIE(InfoExtractor): | ||||
|         login_page_req.add_header('Cookie', 'locale=en_US') | ||||
|         self.report_login() | ||||
|         login_page = self._download_webpage(login_page_req, None, note=False, | ||||
|             errnote=u'Unable to download login page') | ||||
|         lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd') | ||||
|         lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd') | ||||
|             errnote='Unable to download login page') | ||||
|         lsd = self._search_regex( | ||||
|             r'<input type="hidden" name="lsd" value="([^"]*)"', | ||||
|             login_page, 'lsd') | ||||
|         lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd') | ||||
|  | ||||
|         login_form = { | ||||
|             'email': useremail, | ||||
| @@ -70,22 +75,22 @@ class FacebookIE(InfoExtractor): | ||||
|         try: | ||||
|             login_results = compat_urllib_request.urlopen(request).read() | ||||
|             if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: | ||||
|                 self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') | ||||
|                 self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') | ||||
|                 return | ||||
|  | ||||
|             check_form = { | ||||
|                 'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'), | ||||
|                 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'), | ||||
|                 'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'), | ||||
|                 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'), | ||||
|                 'name_action_selected': 'dont_save', | ||||
|                 'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'), | ||||
|                 'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'), | ||||
|             } | ||||
|             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form)) | ||||
|             check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|             check_response = compat_urllib_request.urlopen(check_req).read() | ||||
|             if re.search(r'id="checkpointSubmitButton"', check_response) is not None: | ||||
|                 self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.') | ||||
|                 self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) | ||||
|             self._downloader.report_warning('unable to log in: %s' % compat_str(err)) | ||||
|             return | ||||
|  | ||||
|     def _real_initialize(self): | ||||
| @@ -94,7 +99,7 @@ class FacebookIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         url = 'https://www.facebook.com/video/video.php?v=%s' % video_id | ||||
| @@ -107,10 +112,10 @@ class FacebookIE(InfoExtractor): | ||||
|             m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) | ||||
|             if m_msg is not None: | ||||
|                 raise ExtractorError( | ||||
|                     u'The video is not available, Facebook said: "%s"' % m_msg.group(1), | ||||
|                     'The video is not available, Facebook said: "%s"' % m_msg.group(1), | ||||
|                     expected=True) | ||||
|             else: | ||||
|                 raise ExtractorError(u'Cannot parse data') | ||||
|                 raise ExtractorError('Cannot parse data') | ||||
|         data = dict(json.loads(m.group(1))) | ||||
|         params_raw = compat_urllib_parse.unquote(data['params']) | ||||
|         params = json.loads(params_raw) | ||||
| @@ -119,12 +124,12 @@ class FacebookIE(InfoExtractor): | ||||
|         if not video_url: | ||||
|             video_url = video_data['sd_src'] | ||||
|         if not video_url: | ||||
|             raise ExtractorError(u'Cannot find video URL') | ||||
|             raise ExtractorError('Cannot find video URL') | ||||
|         video_duration = int(video_data['video_duration']) | ||||
|         thumbnail = video_data['thumbnail_src'] | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title') | ||||
|             r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title') | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -8,8 +8,8 @@ from ..utils import ( | ||||
|     unified_strdate, | ||||
|     str_to_int, | ||||
|     parse_duration, | ||||
|     clean_html, | ||||
| ) | ||||
| from youtube_dl.utils import clean_html | ||||
|  | ||||
|  | ||||
| class FourTubeIE(InfoExtractor): | ||||
|   | ||||
							
								
								
									
										134
									
								
								youtube_dl/extractor/gdcvault.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								youtube_dl/extractor/gdcvault.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,134 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
| class GDCVaultIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', | ||||
|             'md5': '7ce8388f544c88b7ac11c7ab1b593704', | ||||
|             'info_dict': { | ||||
|                 'id': '1019721', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', | ||||
|             'info_dict': { | ||||
|                 'id': '1015683', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # Requires rtmpdump | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _parse_mp4(self, xml_description): | ||||
|         video_formats = [] | ||||
|         mp4_video = xml_description.find('./metadata/mp4video') | ||||
|         if mp4_video is None: | ||||
|             return None | ||||
|  | ||||
|         mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text) | ||||
|         video_root = mobj.group('root') | ||||
|         formats = xml_description.findall('./metadata/MBRVideos/MBRVideo') | ||||
|         for format in formats: | ||||
|             mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text) | ||||
|             url = video_root + mobj.group('path') | ||||
|             vbr = format.find('bitrate').text | ||||
|             video_formats.append({ | ||||
|                 'url': url, | ||||
|                 'vbr': int(vbr), | ||||
|             }) | ||||
|         return video_formats | ||||
|  | ||||
|     def _parse_flv(self, xml_description): | ||||
|         video_formats = [] | ||||
|         akami_url = xml_description.find('./metadata/akamaiHost').text | ||||
|         slide_video_path = xml_description.find('./metadata/slideVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + slide_video_path, | ||||
|             'format_note': 'slide deck video', | ||||
|             'quality': -2, | ||||
|             'preference': -2, | ||||
|             'format_id': 'slides', | ||||
|         }) | ||||
|         speaker_video_path = xml_description.find('./metadata/speakerVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + speaker_video_path, | ||||
|             'format_note': 'speaker video', | ||||
|             'quality': -1, | ||||
|             'preference': -1, | ||||
|             'format_id': 'speaker', | ||||
|         }) | ||||
|         return video_formats | ||||
|  | ||||
|     def _login(self, webpage_url, video_id): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None or password is None: | ||||
|             self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.') | ||||
|             return None | ||||
|  | ||||
|         mobj = re.match(r'(?P<root_url>https?://.*?/).*', webpage_url) | ||||
|         login_url = mobj.group('root_url') + 'api/login.php' | ||||
|         logout_url = mobj.group('root_url') + 'logout' | ||||
|  | ||||
|         login_form = { | ||||
|             'email': username, | ||||
|             'password': password, | ||||
|         } | ||||
|  | ||||
|         request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form)) | ||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         self._download_webpage(request, video_id, 'Logging in') | ||||
|         start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page') | ||||
|         self._download_webpage(logout_url, video_id, 'Logging out') | ||||
|  | ||||
|         return start_page | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage_url = 'http://www.gdcvault.com/play/' + video_id | ||||
|         start_page = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False) | ||||
|  | ||||
|         if xml_root is None: | ||||
|             # Probably need to authenticate | ||||
|             start_page = self._login(webpage_url, video_id) | ||||
|             if start_page is None: | ||||
|                 self.report_warning('Could not login.') | ||||
|             else: | ||||
|                 # Grab the url from the authenticated page | ||||
|                 xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root') | ||||
|  | ||||
|         xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False) | ||||
|         if xml_name is None: | ||||
|             # Fallback to the older format | ||||
|             xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename') | ||||
|  | ||||
|         xml_decription_url = xml_root + 'xml/' + xml_name | ||||
|         xml_description = self._download_xml(xml_decription_url, video_id) | ||||
|  | ||||
|         video_title = xml_description.find('./metadata/title').text | ||||
|         video_formats = self._parse_mp4(xml_description) | ||||
|         if video_formats is None: | ||||
|             video_formats = self._parse_flv(xml_description) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'formats': video_formats, | ||||
|         } | ||||
| @@ -83,10 +83,10 @@ class GenericIE(InfoExtractor): | ||||
|         # Direct link to a video | ||||
|         { | ||||
|             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', | ||||
|             'file': 'trailer.mp4', | ||||
|             'md5': '67d406c2bcb6af27fa886f31aa934bbe', | ||||
|             'info_dict': { | ||||
|                 'id': 'trailer', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'trailer', | ||||
|                 'upload_date': '20100513', | ||||
|             } | ||||
| @@ -94,7 +94,6 @@ class GenericIE(InfoExtractor): | ||||
|         # ooyala video | ||||
|         { | ||||
|             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', | ||||
|             'file': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ.mp4', | ||||
|             'md5': '5644c6ca5d5782c1d0d350dad9bd840c', | ||||
|             'info_dict': { | ||||
|                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', | ||||
| @@ -102,6 +101,22 @@ class GenericIE(InfoExtractor): | ||||
|                 'title': '2cc213299525360.mov',  # that's what we get | ||||
|             }, | ||||
|         }, | ||||
|         # google redirect | ||||
|         { | ||||
|             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', | ||||
|             'info_dict': { | ||||
|                 'id': 'cmQHVoWB5FY', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20130224', | ||||
|                 'uploader_id': 'TheVerge', | ||||
|                 'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.', | ||||
|                 'uploader': 'The Verge', | ||||
|                 'title': 'First Firefox OS phones side-by-side', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': False, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def report_download_webpage(self, video_id): | ||||
| @@ -381,6 +396,11 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Facebook') | ||||
|  | ||||
|         # Look for embedded VK player | ||||
|         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'VK') | ||||
|  | ||||
|         # Look for embedded Huffington Post player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) | ||||
| @@ -411,6 +431,18 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is None: | ||||
|             # HTML5 video | ||||
|             mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL) | ||||
|         if mobj is None: | ||||
|             mobj = re.search( | ||||
|                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' | ||||
|                 r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"', | ||||
|                 webpage) | ||||
|             if mobj: | ||||
|                 new_url = mobj.group(1) | ||||
|                 self.report_following_redirect(new_url) | ||||
|                 return { | ||||
|                     '_type': 'url', | ||||
|                     'url': new_url, | ||||
|                 } | ||||
|         if mobj is None: | ||||
|             raise ExtractorError('Unsupported URL: %s' % url) | ||||
|  | ||||
|   | ||||
| @@ -6,7 +6,8 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     unified_strdate | ||||
|     unified_strdate, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -32,13 +33,11 @@ class LifeNewsIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page') | ||||
|         webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page') | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL') | ||||
|  | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail') | ||||
|         videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage) | ||||
|         if not videos: | ||||
|             raise ExtractorError('No media links available for %s' % video_id) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS' | ||||
| @@ -50,20 +49,26 @@ class LifeNewsIE(InfoExtractor): | ||||
|         view_count = self._html_search_regex( | ||||
|             r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False) | ||||
|         comment_count = self._html_search_regex( | ||||
|             r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count', fatal=False) | ||||
|             r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False) | ||||
|  | ||||
|         upload_date = self._html_search_regex( | ||||
|             r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False) | ||||
|         if upload_date is not None: | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'thumbnail': thumbnail, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'view_count': int_or_none(view_count), | ||||
|             'comment_count': int_or_none(comment_count), | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
|         def make_entry(video_id, media, video_number=None): | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'url': media[1], | ||||
|                 'thumbnail': media[0], | ||||
|                 'title': title if video_number is None else '%s-video%s' % (title, video_number), | ||||
|                 'description': description, | ||||
|                 'view_count': int_or_none(view_count), | ||||
|                 'comment_count': int_or_none(comment_count), | ||||
|                 'upload_date': upload_date, | ||||
|             } | ||||
|  | ||||
|         if len(videos) == 1: | ||||
|             return make_entry(video_id, videos[0]) | ||||
|         else: | ||||
|             return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)] | ||||
							
								
								
									
										66
									
								
								youtube_dl/extractor/mailru.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								youtube_dl/extractor/mailru.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import datetime | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class MailRuIE(InfoExtractor): | ||||
|     IE_NAME = 'mailru' | ||||
|     IE_DESC = 'Видео@Mail.Ru' | ||||
|     _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', | ||||
|         'md5': 'dea205f03120046894db4ebb6159879a', | ||||
|         'info_dict': { | ||||
|             'id': '46301138', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', | ||||
|             'upload_date': '20140224', | ||||
|             'uploader': 'sonypicturesrus', | ||||
|             'uploader_id': 'sonypicturesrus@mail.ru', | ||||
|             'duration': 184, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_data = self._download_json( | ||||
|             'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON') | ||||
|  | ||||
|         author = video_data['author'] | ||||
|         uploader = author['name'] | ||||
|         uploader_id = author['id'] | ||||
|  | ||||
|         movie = video_data['movie'] | ||||
|         content_id = str(movie['contentId']) | ||||
|         title = movie['title'] | ||||
|         thumbnail = movie['poster'] | ||||
|         duration = movie['duration'] | ||||
|  | ||||
|         upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d') | ||||
|         view_count = video_data['views_count'] | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': video['url'], | ||||
|                 'format_id': video['name'], | ||||
|             } for video in video_data['videos'] | ||||
|         ] | ||||
|  | ||||
|         return { | ||||
|             'id': content_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -8,6 +8,7 @@ from .youtube import YoutubeIE | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     get_element_by_id, | ||||
| ) | ||||
|  | ||||
| @@ -72,7 +73,6 @@ class MITIE(TechTVMITIE): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', | ||||
|         'file': '.mp4', | ||||
|         'md5': '7db01d5ccc1895fc5010e9c9e13648da', | ||||
|         'info_dict': { | ||||
|             'id': '21783', | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     compat_urllib_parse, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| @@ -15,8 +16,9 @@ class MixcloudIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', | ||||
|         'file': 'dholbach-cryptkeeper.mp3', | ||||
|         'info_dict': { | ||||
|             'id': 'dholbach-cryptkeeper', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Cryptkeeper', | ||||
|             'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | ||||
|             'uploader': 'Daniel Holbach', | ||||
| @@ -45,7 +47,7 @@ class MixcloudIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         uploader = mobj.group(1) | ||||
|         cloudcast_name = mobj.group(2) | ||||
|         track_id = '-'.join((uploader, cloudcast_name)) | ||||
|         track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name))) | ||||
|  | ||||
|         webpage = self._download_webpage(url, track_id) | ||||
|  | ||||
|   | ||||
| @@ -8,6 +8,7 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     HEADRequest, | ||||
|     unified_strdate, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -35,7 +36,15 @@ class ORFIE(InfoExtractor): | ||||
|         data_json = self._search_regex( | ||||
|             r'initializeAdworx\((.+?)\);\n', webpage, 'video info') | ||||
|         all_data = json.loads(data_json) | ||||
|         sdata = all_data[0]['values']['segments'] | ||||
|  | ||||
|         def get_segments(all_data): | ||||
|             for data in all_data: | ||||
|                 if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM': | ||||
|                     return data['values']['segments'] | ||||
|  | ||||
|         sdata = get_segments(all_data) | ||||
|         if not sdata: | ||||
|             raise ExtractorError('Unable to extract segments') | ||||
|  | ||||
|         def quality_to_int(s): | ||||
|             m = re.search('([0-9]+)', s) | ||||
|   | ||||
							
								
								
									
										297
									
								
								youtube_dl/extractor/prosiebensat1.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										297
									
								
								youtube_dl/extractor/prosiebensat1.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,297 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from hashlib import sha1 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     unified_strdate, | ||||
|     clean_html, | ||||
|     RegexNotFoundError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ProSiebenSat1IE(InfoExtractor): | ||||
|     IE_NAME = 'prosiebensat1' | ||||
|     IE_DESC = 'ProSiebenSat.1 Digital' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|ran|the-voice-of-germany)\.de|fem\.com)/(?P<id>.+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', | ||||
|             'info_dict': { | ||||
|                 'id': '2104602', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Staffel 2, Episode 18 - Jahresrückblick', | ||||
|                 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', | ||||
|                 'upload_date': '20131231', | ||||
|                 'duration': 5845.04, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', | ||||
|             'info_dict': { | ||||
|                 'id': '2570327', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Lady-Umstyling für Audrina', | ||||
|                 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d', | ||||
|                 'upload_date': '20131014', | ||||
|                 'duration': 606.76, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Seems to be broken', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.prosiebenmaxx.de/yep/one-piece/video/148-folge-48-gold-rogers-heimat-ganze-folge', | ||||
|             'info_dict': { | ||||
|                 'id': '2437108', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Folge 48: Gold Rogers Heimat', | ||||
|                 'description': 'Ruffy erreicht die Insel, auf der der berühmte Gold Roger lebte und hingerichtet wurde.', | ||||
|                 'upload_date': '20140226', | ||||
|                 'duration': 1401.48, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', | ||||
|             'info_dict': { | ||||
|                 'id': '2904997', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Sexy laufen in Ugg Boots', | ||||
|                 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6', | ||||
|                 'upload_date': '20140122', | ||||
|                 'duration': 245.32, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', | ||||
|             'info_dict': { | ||||
|                 'id': '2906572', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Im Interview: Kai Wiesinger', | ||||
|                 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', | ||||
|                 'upload_date': '20140225', | ||||
|                 'duration': 522.56, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', | ||||
|             'info_dict': { | ||||
|                 'id': '2992323', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', | ||||
|                 'description': 'md5:2669cde3febe9bce13904f701e774eb6', | ||||
|                 'upload_date': '20140225', | ||||
|                 'duration': 2410.44, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', | ||||
|             'info_dict': { | ||||
|                 'id': '3004256', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Schalke: Tönnies möchte Raul zurück', | ||||
|                 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f', | ||||
|                 'upload_date': '20140226', | ||||
|                 'duration': 228.96, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', | ||||
|             'info_dict': { | ||||
|                 'id': '2572814', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Andreas Kümmert: Rocket Man', | ||||
|                 'description': 'md5:6ddb02b0781c6adf778afea606652e38', | ||||
|                 'upload_date': '20131017', | ||||
|                 'duration': 469.88, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html', | ||||
|             'info_dict': { | ||||
|                 'id': '2156342', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Kurztrips zum Valentinstag', | ||||
|                 'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528', | ||||
|                 'upload_date': '20130206', | ||||
|                 'duration': 307.24, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     _CLIPID_REGEXES = [ | ||||
|         r'"clip_id"\s*:\s+"(\d+)"', | ||||
|         r'clipid: "(\d+)"', | ||||
|     ] | ||||
|     _TITLE_REGEXES = [ | ||||
|         r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', | ||||
|         r'<header class="clearfix">\s*<h3>(.+?)</h3>', | ||||
|         r'<!-- start video -->\s*<h1>(.+?)</h1>', | ||||
|         r'<div class="ep-femvideos-pi4-video-txt">\s*<h2>(.+?)</h2>', | ||||
|     ] | ||||
|     _DESCRIPTION_REGEXES = [ | ||||
|         r'<p itemprop="description">\s*(.+?)</p>', | ||||
|         r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', | ||||
|         r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', | ||||
|         r'<p>(.+?)</p>\s*<div class="ep-femvideos-pi4-video-footer">', | ||||
|     ] | ||||
|     _UPLOAD_DATE_REGEXES = [ | ||||
|         r'<meta property="og:published_time" content="(.+?)">', | ||||
|         r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"', | ||||
|         r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr', | ||||
|         r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', | ||||
|         r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         page = self._download_webpage(url, video_id, 'Downloading page') | ||||
|  | ||||
|         def extract(patterns, name, page, fatal=False): | ||||
|             for pattern in patterns: | ||||
|                 mobj = re.search(pattern, page) | ||||
|                 if mobj: | ||||
|                     return clean_html(mobj.group(1)) | ||||
|             if fatal: | ||||
|                 raise RegexNotFoundError(u'Unable to extract %s' % name) | ||||
|             return None | ||||
|  | ||||
|         clip_id = extract(self._CLIPID_REGEXES, 'clip id', page, fatal=True) | ||||
|  | ||||
|         access_token = 'testclient' | ||||
|         client_name = 'kolibri-1.2.5' | ||||
|         client_location = url | ||||
|  | ||||
|         videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({ | ||||
|             'access_token': access_token, | ||||
|             'client_location': client_location, | ||||
|             'client_name': client_name, | ||||
|             'ids': clip_id, | ||||
|         }) | ||||
|  | ||||
|         videos = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON') | ||||
|  | ||||
|         duration = float(videos[0]['duration']) | ||||
|         source_ids = [source['id'] for source in videos[0]['sources']] | ||||
|         source_ids_str = ','.join(map(str, source_ids)) | ||||
|  | ||||
|         g = '01!8d8F_)r9]4s[qeuXfP%' | ||||
|  | ||||
|         client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]) | ||||
|                                  .encode('utf-8')).hexdigest() | ||||
|  | ||||
|         sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse.urlencode({ | ||||
|             'access_token': access_token, | ||||
|             'client_id': client_id, | ||||
|             'client_location': client_location, | ||||
|             'client_name': client_name, | ||||
|         })) | ||||
|  | ||||
|         sources = self._download_json(sources_api_url, clip_id, 'Downloading sources JSON') | ||||
|         server_id = sources['server_id'] | ||||
|  | ||||
|         client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, | ||||
|                                           client_location, source_ids_str, g, client_name]) | ||||
|                                  .encode('utf-8')).hexdigest() | ||||
|  | ||||
|         url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse.urlencode({ | ||||
|             'access_token': access_token, | ||||
|             'client_id': client_id, | ||||
|             'client_location': client_location, | ||||
|             'client_name': client_name, | ||||
|             'server_id': server_id, | ||||
|             'source_ids': source_ids_str, | ||||
|         })) | ||||
|  | ||||
|         urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON') | ||||
|  | ||||
|         title = extract(self._TITLE_REGEXES, 'title', page, fatal=True) | ||||
|         description = extract(self._DESCRIPTION_REGEXES, 'description', page) | ||||
|         thumbnail = self._og_search_thumbnail(page) | ||||
|  | ||||
|         upload_date = extract(self._UPLOAD_DATE_REGEXES, 'upload date', page) | ||||
|         if upload_date: | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         urls_sources = urls['sources'] | ||||
|         if isinstance(urls_sources, dict): | ||||
|             urls_sources = urls_sources.values() | ||||
|  | ||||
|         def fix_bitrate(bitrate): | ||||
|             return bitrate / 1000 if bitrate % 1000 == 0 else bitrate | ||||
|  | ||||
|         for source in urls_sources: | ||||
|             protocol = source['protocol'] | ||||
|             if protocol == 'rtmp' or protocol == 'rtmpe': | ||||
|                 mobj = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', source['url']) | ||||
|                 if not mobj: | ||||
|                     continue | ||||
|                 formats.append({ | ||||
|                     'url': mobj.group('url'), | ||||
|                     'app': mobj.group('app'), | ||||
|                     'play_path': mobj.group('playpath'), | ||||
|                     'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', | ||||
|                     'page_url': 'http://www.prosieben.de', | ||||
|                     'vbr': fix_bitrate(source['bitrate']), | ||||
|                     'ext': 'mp4', | ||||
|                     'format_id': '%s_%s' % (source['cdn'], source['bitrate']), | ||||
|                 }) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': source['url'], | ||||
|                     'vbr': fix_bitrate(source['bitrate']), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': clip_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -1,148 +1,165 @@ | ||||
| # encoding: utf-8 | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     clean_html, | ||||
|     unified_strdate, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RTLnowIE(InfoExtractor): | ||||
|     """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" | ||||
|     _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', | ||||
|         'file': '90419.flv', | ||||
|         'info_dict': { | ||||
|             'upload_date': '20070416', | ||||
|             'title': 'Ahornallee - Folge 1 - Der Einzug', | ||||
|             'description': 'Folge 1 - Der Einzug', | ||||
|     _VALID_URL = r'''(?x) | ||||
|                         (?:https?://)? | ||||
|                         (?P<url> | ||||
|                             (?P<domain> | ||||
|                                 rtl-now\.rtl\.de| | ||||
|                                 rtl2now\.rtl2\.de| | ||||
|                                 (?:www\.)?voxnow\.de| | ||||
|                                 (?:www\.)?rtlnitronow\.de| | ||||
|                                 (?:www\.)?superrtlnow\.de| | ||||
|                                 (?:www\.)?n-tvnow\.de) | ||||
|                             /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\? | ||||
|                             (?:container_id|film_id)=(?P<video_id>[0-9]+)& | ||||
|                             player=1(?:&season=[0-9]+)?(?:&.*)? | ||||
|                         )''' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', | ||||
|             'info_dict': { | ||||
|                 'id': '90419', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Ahornallee - Folge 1 - Der Einzug', | ||||
|                 'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de', | ||||
|                 'upload_date': '20070416', | ||||
|                 'duration': 1685, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Only works from Germany', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         { | ||||
|             'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', | ||||
|             'info_dict': { | ||||
|                 'id': '69756', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', | ||||
|                 'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0', | ||||
|                 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', | ||||
|                 'upload_date': '20120519', | ||||
|                 'duration': 1245, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Only works from Germany', | ||||
|         }, | ||||
|         'skip': 'Only works from Germany', | ||||
|     }, | ||||
|     { | ||||
|         'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', | ||||
|         'file': '69756.flv', | ||||
|         'info_dict': { | ||||
|             'upload_date': '20120519', | ||||
|             'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', | ||||
|             'description': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', | ||||
|             'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', | ||||
|         { | ||||
|             'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', | ||||
|             'info_dict': { | ||||
|                 'id': '13883', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Voxtours - Südafrika-Reporter II', | ||||
|                 'description': 'md5:de7f8d56be6fd4fed10f10f57786db00', | ||||
|                 'upload_date': '20090627', | ||||
|                 'duration': 1800, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         { | ||||
|             'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', | ||||
|             'info_dict': { | ||||
|                 'id': '99205', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Medicopter 117 - Angst!', | ||||
|                 'description': 'md5:895b1df01639b5f61a04fc305a5cb94d', | ||||
|                 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg', | ||||
|                 'upload_date': '20080928', | ||||
|                 'duration': 2691, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         'skip': 'Only works from Germany', | ||||
|     }, | ||||
|     { | ||||
|         'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', | ||||
|         'file': '13883.flv', | ||||
|         'info_dict': { | ||||
|             'upload_date': '20090627', | ||||
|             'title': 'Voxtours - Südafrika-Reporter II', | ||||
|             'description': 'Südafrika-Reporter II', | ||||
|         { | ||||
|             'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0', | ||||
|             'info_dict': { | ||||
|                 'id': '153819', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Deluxe - Alles was Spaß macht - Thema u.a.: Luxushotel für Vierbeiner', | ||||
|                 'description': 'md5:c3705e1bb32e1a5b2bcd634fc065c631', | ||||
|                 'thumbnail': 'http://autoimg.static-fra.de/ntvnow/383157/1500x1500/image2.jpg', | ||||
|                 'upload_date': '20140221', | ||||
|                 'duration': 2429, | ||||
|             }, | ||||
|             'skip': 'Only works from Germany', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', | ||||
|         'file': '99205.flv', | ||||
|         'info_dict': { | ||||
|             'upload_date': '20080928',  | ||||
|             'title': 'Medicopter 117 - Angst!', | ||||
|             'description': 'Angst!', | ||||
|             'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         'url': 'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', | ||||
|         'file': '124903.flv', | ||||
|         'info_dict': { | ||||
|             'upload_date': '20130101', | ||||
|             'title': 'Top Gear vom 01.01.2013', | ||||
|             'description': 'Episode 1', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Only works from Germany', | ||||
|     }] | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         webpage_url = 'http://' + mobj.group('url') | ||||
|         video_page_url = 'http://' + mobj.group('domain') + '/' | ||||
|         video_page_url = 'http://%s/' % mobj.group('domain') | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|         webpage = self._download_webpage('http://' + mobj.group('url'), video_id) | ||||
|  | ||||
|         note_m = re.search(r'''(?sx) | ||||
|             <div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?) | ||||
|             <div[ ]id="playerteaser">''', webpage) | ||||
|         if note_m: | ||||
|             msg = clean_html(note_m.group(1)) | ||||
|             raise ExtractorError(msg) | ||||
|         mobj = re.search(r'(?s)<div style="margin-left: 20px; font-size: 13px;">(.*?)<div id="playerteaser">', webpage) | ||||
|         if mobj: | ||||
|             raise ExtractorError(clean_html(mobj.group(1)), expected=True) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage, default=None) | ||||
|  | ||||
|         upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date')) | ||||
|  | ||||
|         mobj = re.search(r'<meta itemprop="duration" content="PT(?P<seconds>\d+)S" />', webpage) | ||||
|         duration = int(mobj.group('seconds')) if mobj else None | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>', | ||||
|             webpage, 'title') | ||||
|         playerdata_url = self._html_search_regex( | ||||
|             r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', | ||||
|             webpage, 'playerdata_url') | ||||
|             r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url') | ||||
|  | ||||
|         playerdata = self._download_webpage(playerdata_url, video_id) | ||||
|         mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata) | ||||
|         if mobj: | ||||
|             video_description = mobj.group('description') | ||||
|             if mobj.group('upload_date_Y'): | ||||
|                 video_upload_date = mobj.group('upload_date_Y') | ||||
|             elif mobj.group('upload_date_y'): | ||||
|                 video_upload_date = '20' + mobj.group('upload_date_y') | ||||
|         playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML') | ||||
|  | ||||
|         videoinfo = playerdata.find('./playlist/videoinfo') | ||||
|          | ||||
|         formats = [] | ||||
|         for filename in videoinfo.findall('filename'): | ||||
|             mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text) | ||||
|             if mobj: | ||||
|                 fmt = { | ||||
|                     'url': mobj.group('url'), | ||||
|                     'play_path': 'mp4:' + mobj.group('play_path'), | ||||
|                     'page_url': video_page_url, | ||||
|                     'player_url': video_page_url + 'includes/vodplayer.swf', | ||||
|                 } | ||||
|             else: | ||||
|                 video_upload_date = None | ||||
|             if video_upload_date: | ||||
|                 video_upload_date += mobj.group('upload_date_m') + mobj.group('upload_date_d') | ||||
|         else: | ||||
|             video_description = None | ||||
|             video_upload_date = None | ||||
|             self._downloader.report_warning('Unable to extract description and upload date') | ||||
|  | ||||
|         # Thumbnail: not every video has an thumbnail | ||||
|         mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage) | ||||
|         if mobj: | ||||
|             video_thumbnail = mobj.group('thumbnail') | ||||
|         else: | ||||
|             video_thumbnail = None | ||||
|  | ||||
|         mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError('Unable to extract media URL') | ||||
|         video_url = mobj.group('url') | ||||
|         video_play_path = 'mp4:' + mobj.group('play_path') | ||||
|         video_player_url = video_page_url + 'includes/vodplayer.swf' | ||||
|                 fmt = { | ||||
|                     'url': filename.text, | ||||
|                 } | ||||
|             fmt.update({ | ||||
|                 'width': int_or_none(filename.get('width')), | ||||
|                 'height': int_or_none(filename.get('height')), | ||||
|                 'vbr': int_or_none(filename.get('bitrate')), | ||||
|                 'ext': 'flv', | ||||
|             }) | ||||
|             formats.append(fmt) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'play_path': video_play_path, | ||||
|             'page_url': video_page_url, | ||||
|             'player_url': video_player_url, | ||||
|             'ext': 'flv', | ||||
|             'title': video_title, | ||||
|             'description': video_description, | ||||
|             'upload_date': video_upload_date, | ||||
|             'thumbnail': video_thumbnail, | ||||
|         } | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -217,7 +217,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|         return self._extract_info_dict(info, full_title, secret_token=token) | ||||
|  | ||||
| class SoundcloudSetIE(SoundcloudIE): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' | ||||
|     IE_NAME = 'soundcloud:set' | ||||
|     # it's in tests/test_playlists.py | ||||
|     _TESTS = [] | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from youtube_dl.utils import ExtractorError | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class TinyPicIE(InfoExtractor): | ||||
|   | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/tvigle.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/tvigle.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     clean_html, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TvigleIE(InfoExtractor): | ||||
|     IE_NAME = 'tvigle' | ||||
|     IE_DESC = 'Интернет-телевидение Tvigle.ru' | ||||
|     _VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081', | ||||
|             'md5': '09afba4616666249f087efc6dcf83cb3', | ||||
|             'info_dict': { | ||||
|                 'id': '503081', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Брат 2 ', | ||||
|                 'description': 'md5:f5a42970f50648cee3d7ad740f3ae769', | ||||
|                 'upload_date': '20110919', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433', | ||||
|             'md5': 'e7efe5350dd5011d0de6550b53c3ba7b', | ||||
|             'info_dict': { | ||||
|                 'id': '676433', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком', | ||||
|                 'description': 'md5:027f7dc872948f14c96d19b4178428a4', | ||||
|                 'upload_date': '20121218', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_data = self._download_xml( | ||||
|             'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML') | ||||
|  | ||||
|         video = video_data.find('./video') | ||||
|  | ||||
|         title = video.get('name') | ||||
|         description = video.get('anons') | ||||
|         if description: | ||||
|             description = clean_html(description) | ||||
|         thumbnail = video_data.get('img') | ||||
|         upload_date = unified_strdate(video.get('date')) | ||||
|         like_count = int_or_none(video.get('vtp')) | ||||
|  | ||||
|         formats = [] | ||||
|         for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]): | ||||
|             video_url = video.get(format_id) | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': format_id, | ||||
|                 'format_note': format_note, | ||||
|                 'quality': num, | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|             'like_count': like_count, | ||||
|             'age_limit': 18, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -113,8 +113,8 @@ class VestiIE(InfoExtractor): | ||||
|         priority_transport = playlist['priority_transport'] | ||||
|  | ||||
|         thumbnail = media['picture'] | ||||
|         width = media['width'] | ||||
|         height = media['height'] | ||||
|         width = int_or_none(media['width']) | ||||
|         height = int_or_none(media['height']) | ||||
|         description = media['anons'] | ||||
|         title = media['title'] | ||||
|         duration = int_or_none(media.get('duration')) | ||||
|   | ||||
| @@ -221,7 +221,9 @@ class VimeoIE(SubtitlesInfoExtractor): | ||||
|         # Extract video thumbnail | ||||
|         video_thumbnail = config["video"].get("thumbnail") | ||||
|         if video_thumbnail is None: | ||||
|             _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1] | ||||
|             video_thumbs = config["video"].get("thumbs") | ||||
|             if video_thumbs and isinstance(video_thumbs, dict): | ||||
|                 _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1] | ||||
|  | ||||
|         # Extract video description | ||||
|         video_description = None | ||||
|   | ||||
| @@ -16,7 +16,7 @@ from ..utils import ( | ||||
|  | ||||
| class VKIE(InfoExtractor): | ||||
|     IE_NAME = 'vk.com' | ||||
|     _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)' | ||||
|     _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))' | ||||
|     _NETRC_MACHINE = 'vk' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -42,6 +42,18 @@ class VKIE(InfoExtractor): | ||||
|                 'duration': 558, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'note': 'Embedded video', | ||||
|             'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1', | ||||
|             'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a', | ||||
|             'info_dict': { | ||||
|                 'id': '162925554', | ||||
|                 'ext': 'mp4', | ||||
|                 'uploader': 'Vladimir Gavrin', | ||||
|                 'title': 'Lin Dan', | ||||
|                 'duration': 101, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vk.com/video-8871596_164049491', | ||||
|             'md5': 'a590bcaf3d543576c9bd162812387666', | ||||
| @@ -54,7 +66,7 @@ class VKIE(InfoExtractor): | ||||
|                 'duration': 8352, | ||||
|             }, | ||||
|             'skip': 'Requires vk account credentials', | ||||
|         } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _login(self): | ||||
| @@ -82,7 +94,10 @@ class VKIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = mobj.group('videoid') | ||||
|  | ||||
|         if not video_id: | ||||
|             video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) | ||||
|  | ||||
|         info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id | ||||
|         info_page = self._download_webpage(info_url, video_id) | ||||
|   | ||||
| @@ -103,6 +103,7 @@ class XHamsterIE(InfoExtractor): | ||||
|         }] | ||||
|  | ||||
|         if not hd: | ||||
|             mrss_url = self._search_regex(r'<link rel="canonical" href="([^"]+)', webpage, 'mrss_url') | ||||
|             webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage') | ||||
|             if is_hd(webpage): | ||||
|                 video_url = extract_video_url(webpage) | ||||
|   | ||||
| @@ -7,19 +7,24 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     parse_duration, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class XTubeIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', | ||||
|         'file': 'kVTUy_G222_.mp4', | ||||
|         'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', | ||||
|         'info_dict': { | ||||
|             "title": "strange erotica", | ||||
|             "description": "surreal gay themed erotica...almost an ET kind of thing", | ||||
|             "uploader": "greenshowers", | ||||
|             "age_limit": 18, | ||||
|             'id': 'kVTUy_G222_', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'strange erotica', | ||||
|             'description': 'surreal gay themed erotica...almost an ET kind of thing', | ||||
|             'uploader': 'greenshowers', | ||||
|             'duration': 450, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -32,10 +37,23 @@ class XTubeIE(InfoExtractor): | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, 'title') | ||||
|         video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False) | ||||
|         video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, 'description', fatal=False) | ||||
|         video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/') | ||||
|         video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title') | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False) | ||||
|         video_description = self._html_search_regex( | ||||
|             r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False) | ||||
|         video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/') | ||||
|         duration = parse_duration(self._html_search_regex( | ||||
|             r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False)) | ||||
|         view_count = self._html_search_regex( | ||||
|             r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False) | ||||
|         if view_count: | ||||
|             view_count = str_to_int(view_count) | ||||
|         comment_count = self._html_search_regex( | ||||
|             r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False) | ||||
|         if comment_count: | ||||
|             comment_count = str_to_int(comment_count) | ||||
|  | ||||
|         path = compat_urllib_parse_urlparse(video_url).path | ||||
|         extension = os.path.splitext(path)[1][1:] | ||||
|         format = path.split('/')[5].split('_')[:2] | ||||
| @@ -48,6 +66,9 @@ class XTubeIE(InfoExtractor): | ||||
|             'title': video_title, | ||||
|             'uploader': video_uploader, | ||||
|             'description': video_description, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'url': video_url, | ||||
|             'ext': extension, | ||||
|             'format': format, | ||||
|   | ||||
| @@ -1645,7 +1645,7 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|  | ||||
| class YoutubeUserIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' | ||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' | ||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' | ||||
|     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' | ||||
|     _GDATA_PAGE_SIZE = 50 | ||||
|     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' | ||||
| @@ -1744,12 +1744,50 @@ class YoutubeSearchIE(SearchInfoExtractor): | ||||
|                   for video_id in video_ids] | ||||
|         return self.playlist_result(videos, query) | ||||
|  | ||||
|  | ||||
| class YoutubeSearchDateIE(YoutubeSearchIE): | ||||
|     IE_NAME = YoutubeSearchIE.IE_NAME + ':date' | ||||
|     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' | ||||
|     _SEARCH_KEY = 'ytsearchdate' | ||||
|     IE_DESC = u'YouTube.com searches, newest videos first' | ||||
|  | ||||
|  | ||||
| class YoutubeSearchURLIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com search URLs' | ||||
|     IE_NAME = u'youtube:search_url' | ||||
|     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         query = compat_urllib_parse.unquote_plus(mobj.group('query')) | ||||
|  | ||||
|         webpage = self._download_webpage(url, query) | ||||
|         result_code = self._search_regex( | ||||
|             r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML') | ||||
|  | ||||
|         part_codes = re.findall( | ||||
|             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code) | ||||
|         entries = [] | ||||
|         for part_code in part_codes: | ||||
|             part_title = self._html_search_regex( | ||||
|                 r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) | ||||
|             part_url_snippet = self._html_search_regex( | ||||
|                 r'(?s)href="([^"]+)"', part_code, 'item URL') | ||||
|             part_url = compat_urlparse.urljoin( | ||||
|                 'https://www.youtube.com/', part_url_snippet) | ||||
|             entries.append({ | ||||
|                 '_type': 'url', | ||||
|                 'url': part_url, | ||||
|                 'title': part_title, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'entries': entries, | ||||
|             'title': query, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class YoutubeShowIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com (multi-season) shows' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' | ||||
|   | ||||
| @@ -772,6 +772,7 @@ def unified_strdate(date_str): | ||||
|         '%B %d %Y', | ||||
|         '%b %d %Y', | ||||
|         '%Y-%m-%d', | ||||
|         '%d.%m.%Y', | ||||
|         '%d/%m/%Y', | ||||
|         '%Y/%m/%d %H:%M:%S', | ||||
|         '%Y-%m-%d %H:%M:%S', | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.02.26' | ||||
| __version__ = '2014.03.04.2' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user