Compare commits
	
		
			13 Commits
		
	
	
		
			2014.03.24
			...
			2014.03.24
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | fb8ae2d438 | ||
|  | 893f8832b5 | ||
|  | 878d11ec29 | ||
|  | 515bbe4b5b | ||
|  | 75f2e25ba9 | ||
|  | 0d466d34a3 | ||
|  | 6949d81095 | ||
|  | f847ca02d3 | ||
|  | 510243ba58 | ||
|  | b540697a8a | ||
|  | 0d3641e589 | ||
|  | 72546c831e | ||
|  | 410afb2003 | 
| @@ -28,6 +28,9 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --user-agent UA                  specify a custom user agent | ||||
|     --referer REF                    specify a custom referer, use if the video | ||||
|                                      access is restricted to one domain | ||||
|     --add-header FIELD:VALUE         specify a custom HTTP header and its value, | ||||
|                                      separated by a colon ':'. You can use this | ||||
|                                      option multiple times | ||||
|     --list-extractors                List all supported extractors and the URLs | ||||
|                                      they would handle | ||||
|     --extractor-descriptions         Output descriptions of all supported | ||||
|   | ||||
| @@ -227,6 +227,9 @@ def parseOpts(overrideArguments=None): | ||||
|     general.add_option('--referer', | ||||
|             dest='referer', help='specify a custom referer, use if the video access is restricted to one domain', | ||||
|             metavar='REF', default=None) | ||||
|     general.add_option('--add-header', | ||||
|             dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append", | ||||
|             metavar='FIELD:VALUE') | ||||
|     general.add_option('--list-extractors', | ||||
|             action='store_true', dest='list_extractors', | ||||
|             help='List all supported extractors and the URLs they would handle', default=False) | ||||
| @@ -556,6 +559,16 @@ def _real_main(argv=None): | ||||
|     if opts.referer is not None: | ||||
|         std_headers['Referer'] = opts.referer | ||||
|  | ||||
|     # Custom HTTP headers | ||||
|     if opts.headers is not None: | ||||
|         for h in opts.headers: | ||||
|             if h.find(':', 1) < 0: | ||||
|                 parser.error(u'wrong header formatting, it should be key:value, not "%s"'%h) | ||||
|             key, value = h.split(':', 2) | ||||
|             if opts.verbose: | ||||
|                 write_string(u'[debug] Adding header from command line option %s:%s\n'%(key, value)) | ||||
|             std_headers[key] = value | ||||
|  | ||||
|     # Dump user agent | ||||
|     if opts.dump_user_agent: | ||||
|         compat_print(std_headers['User-Agent']) | ||||
|   | ||||
| @@ -13,8 +13,10 @@ class HlsFD(FileDownloader): | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy', | ||||
|             '-bsf:a', 'aac_adtstoasc', tmpfilename] | ||||
|         args = [ | ||||
|             '-y', '-i', url, '-f', 'mp4', '-c', 'copy', | ||||
|             '-bsf:a', 'aac_adtstoasc', | ||||
|             encodeFilename(tmpfilename, for_subprocess=True)] | ||||
|  | ||||
|         for program in ['avconv', 'ffmpeg']: | ||||
|             try: | ||||
|   | ||||
| @@ -14,6 +14,7 @@ from .arte import ( | ||||
|     ArteTVConcertIE, | ||||
|     ArteTVFutureIE, | ||||
|     ArteTVDDCIE, | ||||
|     ArteTVEmbedIE, | ||||
| ) | ||||
| from .auengine import AUEngineIE | ||||
| from .bambuser import BambuserIE, BambuserChannelIE | ||||
| @@ -25,6 +26,7 @@ from .bloomberg import BloombergIE | ||||
| from .br import BRIE | ||||
| from .breakcom import BreakIE | ||||
| from .brightcove import BrightcoveIE | ||||
| from .byutv import BYUtvIE | ||||
| from .c56 import C56IE | ||||
| from .canal13cl import Canal13clIE | ||||
| from .canalplus import CanalplusIE | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -14,14 +16,14 @@ from ..utils import ( | ||||
| class AddAnimeIE(InfoExtractor): | ||||
|  | ||||
|     _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)' | ||||
|     IE_NAME = u'AddAnime' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', | ||||
|         u'file': u'24MR3YO5SAS9.mp4', | ||||
|         u'md5': u'72954ea10bc979ab5e2eb288b21425a0', | ||||
|         u'info_dict': { | ||||
|             u"description": u"One Piece 606", | ||||
|             u"title": u"One Piece 606" | ||||
|         'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', | ||||
|         'md5': '72954ea10bc979ab5e2eb288b21425a0', | ||||
|         'info_dict': { | ||||
|             'id': '24MR3YO5SAS9', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'One Piece 606', | ||||
|             'title': 'One Piece 606', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -38,10 +40,10 @@ class AddAnimeIE(InfoExtractor): | ||||
|             redir_webpage = ee.cause.read().decode('utf-8') | ||||
|             action = self._search_regex( | ||||
|                 r'<form id="challenge-form" action="([^"]+)"', | ||||
|                 redir_webpage, u'Redirect form') | ||||
|                 redir_webpage, 'Redirect form') | ||||
|             vc = self._search_regex( | ||||
|                 r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>', | ||||
|                 redir_webpage, u'redirect vc value') | ||||
|                 redir_webpage, 'redirect vc value') | ||||
|             av = re.search( | ||||
|                 r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);', | ||||
|                 redir_webpage) | ||||
| @@ -52,19 +54,19 @@ class AddAnimeIE(InfoExtractor): | ||||
|             parsed_url = compat_urllib_parse_urlparse(url) | ||||
|             av_val = av_res + len(parsed_url.netloc) | ||||
|             confirm_url = ( | ||||
|                 parsed_url.scheme + u'://' + parsed_url.netloc + | ||||
|                 parsed_url.scheme + '://' + parsed_url.netloc + | ||||
|                 action + '?' + | ||||
|                 compat_urllib_parse.urlencode({ | ||||
|                     'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) | ||||
|             self._download_webpage( | ||||
|                 confirm_url, video_id, | ||||
|                 note=u'Confirming after redirect') | ||||
|                 note='Confirming after redirect') | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id in ('normal', 'hq'): | ||||
|             rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) | ||||
|             video_url = self._search_regex(rex, webpage, u'video file URLx', | ||||
|             video_url = self._search_regex(rex, webpage, 'video file URLx', | ||||
|                                            fatal=False) | ||||
|             if not video_url: | ||||
|                 continue | ||||
| @@ -72,14 +74,13 @@ class AddAnimeIE(InfoExtractor): | ||||
|                 'format_id': format_id, | ||||
|                 'url': video_url, | ||||
|             }) | ||||
|         if not formats: | ||||
|             raise ExtractorError(u'Cannot find any video format!') | ||||
|         self._sort_formats(formats) | ||||
|         video_title = self._og_search_title(webpage) | ||||
|         video_description = self._og_search_description(webpage) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id':  video_id, | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': video_title, | ||||
|             'description': video_description | ||||
|   | ||||
| @@ -19,114 +19,37 @@ from ..utils import ( | ||||
| # is different for each one. The videos usually expire in 7 days, so we can't | ||||
| # add tests. | ||||
|  | ||||
| class ArteTvIE(InfoExtractor): | ||||
|     _VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html' | ||||
|     _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)' | ||||
|     _LIVE_URL = r'index-[0-9]+\.html$' | ||||
|  | ||||
| class ArteTvIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html' | ||||
|     IE_NAME = 'arte.tv' | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL)) | ||||
|  | ||||
|     # TODO implement Live Stream | ||||
|     # from ..utils import compat_urllib_parse | ||||
|     # def extractLiveStream(self, url): | ||||
|     #     video_lang = url.split('/')[-4] | ||||
|     #     info = self.grep_webpage( | ||||
|     #         url, | ||||
|     #         r'src="(.*?/videothek_js.*?\.js)', | ||||
|     #         0, | ||||
|     #         [ | ||||
|     #             (1, 'url', 'Invalid URL: %s' % url) | ||||
|     #         ] | ||||
|     #     ) | ||||
|     #     http_host = url.split('/')[2] | ||||
|     #     next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url'))) | ||||
|     #     info = self.grep_webpage( | ||||
|     #         next_url, | ||||
|     #         r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' + | ||||
|     #             '(http://.*?\.swf).*?' + | ||||
|     #             '(rtmp://.*?)\'', | ||||
|     #         re.DOTALL, | ||||
|     #         [ | ||||
|     #             (1, 'path',   'could not extract video path: %s' % url), | ||||
|     #             (2, 'player', 'could not extract video player: %s' % url), | ||||
|     #             (3, 'url',    'could not extract video url: %s' % url) | ||||
|     #         ] | ||||
|     #     ) | ||||
|     #     video_url = '%s/%s' % (info.get('url'), info.get('path')) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VIDEOS_URL, url) | ||||
|         if mobj is not None: | ||||
|             id = mobj.group('id') | ||||
|             lang = mobj.group('lang') | ||||
|             return self._extract_video(url, id, lang) | ||||
|  | ||||
|         mobj = re.match(self._LIVEWEB_URL, url) | ||||
|         if mobj is not None: | ||||
|             name = mobj.group('name') | ||||
|             lang = mobj.group('lang') | ||||
|             return self._extract_liveweb(url, name, lang) | ||||
|  | ||||
|         if re.search(self._LIVE_URL, url) is not None: | ||||
|             raise ExtractorError('Arte live streams are not yet supported, sorry') | ||||
|             # self.extractLiveStream(url) | ||||
|             # return | ||||
|  | ||||
|         raise ExtractorError('No video found') | ||||
|  | ||||
|     def _extract_video(self, url, video_id, lang): | ||||
|         """Extract from videos.arte.tv""" | ||||
|         ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') | ||||
|         ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') | ||||
|         ref_xml_doc = self._download_xml( | ||||
|             ref_xml_url, video_id, note='Downloading metadata') | ||||
|         config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) | ||||
|         config_xml_url = config_node.attrib['ref'] | ||||
|         config_xml = self._download_webpage( | ||||
|         config = self._download_xml( | ||||
|             config_xml_url, video_id, note='Downloading configuration') | ||||
|  | ||||
|         video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml)) | ||||
|         def _key(m): | ||||
|             quality = m.group('quality') | ||||
|             if quality == 'hd': | ||||
|                 return 2 | ||||
|             else: | ||||
|                 return 1 | ||||
|         # We pick the best quality | ||||
|         video_urls = sorted(video_urls, key=_key) | ||||
|         video_url = list(video_urls)[-1].group('url') | ||||
|          | ||||
|         title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title') | ||||
|         thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>', | ||||
|                                             config_xml, 'thumbnail') | ||||
|         return {'id': video_id, | ||||
|                 'title': title, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'url': video_url, | ||||
|                 'ext': 'flv', | ||||
|                 } | ||||
|         formats = [{ | ||||
|             'forma_id': q.attrib['quality'], | ||||
|             'url': q.text, | ||||
|             'quality': 2 if q.attrib['quality'] == 'hd' else 1, | ||||
|         } for q in config.findall('.//quality')] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|     def _extract_liveweb(self, url, name, lang): | ||||
|         """Extract form http://liveweb.arte.tv/""" | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id') | ||||
|         config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, | ||||
|                                             video_id, 'Downloading information') | ||||
|         event_doc = config_doc.find('event') | ||||
|         url_node = event_doc.find('video').find('urlHd') | ||||
|         if url_node is None: | ||||
|             url_node = event_doc.find('urlSd') | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': event_doc.find('name%s' % lang.capitalize()).text, | ||||
|                 'url': url_node.text.replace('MP4', 'mp4'), | ||||
|                 'ext': 'flv', | ||||
|                 'thumbnail': self._og_search_thumbnail(webpage), | ||||
|                 } | ||||
|         title = config.find('.//name').text | ||||
|         thumbnail = config.find('.//firstThumbnailUrl').text | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'url': video_url, | ||||
|             'ext': 'flv', | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ArteTVPlus7IE(InfoExtractor): | ||||
| @@ -152,9 +75,7 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|         return self._extract_from_json_url(json_url, video_id, lang) | ||||
|  | ||||
|     def _extract_from_json_url(self, json_url, video_id, lang): | ||||
|         json_info = self._download_webpage(json_url, video_id, 'Downloading info json') | ||||
|         self.report_extraction(video_id) | ||||
|         info = json.loads(json_info) | ||||
|         info = self._download_json(json_url, video_id) | ||||
|         player_info = info['videoJsonPlayer'] | ||||
|  | ||||
|         info_dict = { | ||||
| @@ -176,6 +97,8 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|                 l = 'F' | ||||
|             elif lang == 'de': | ||||
|                 l = 'A' | ||||
|             else: | ||||
|                 l = lang | ||||
|             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] | ||||
|             return any(re.match(r, f['versionCode']) for r in regexes) | ||||
|         # Some formats may not be in the same language as the url | ||||
| @@ -305,3 +228,22 @@ class ArteTVConcertIE(ArteTVPlus7IE): | ||||
|             'description': 'md5:486eb08f991552ade77439fe6d82c305', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|  | ||||
| class ArteTVEmbedIE(ArteTVPlus7IE): | ||||
|     IE_NAME = 'arte.tv:embed' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         http://www\.arte\.tv | ||||
|         /playerv2/embed\.php\?json_url= | ||||
|         (?P<json_url> | ||||
|             http://arte\.tv/papi/tvguide/videos/stream/player/ | ||||
|             (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]* | ||||
|         ) | ||||
|     ''' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         lang = mobj.group('lang') | ||||
|         json_url = mobj.group('json_url') | ||||
|         return self._extract_from_json_url(json_url, video_id, lang) | ||||
|   | ||||
							
								
								
									
										50
									
								
								youtube_dl/extractor/byutv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								youtube_dl/extractor/byutv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BYUtvIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking', | ||||
|         'info_dict': { | ||||
|             'id': 'granite-flats-talking', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f', | ||||
|             'title': 'Talking', | ||||
|             'thumbnail': 're:^https?://.*promo.*' | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         episode_code = self._search_regex( | ||||
|             r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') | ||||
|         episode_json = re.sub( | ||||
|             r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code) | ||||
|         ep = json.loads(episode_json) | ||||
|  | ||||
|         if ep['providerType'] == 'Ooyala': | ||||
|             return { | ||||
|                 '_type': 'url_transparent', | ||||
|                 'ie_key': 'Ooyala', | ||||
|                 'url': 'ooyala:%s' % ep['providerId'], | ||||
|                 'id': video_id, | ||||
|                 'title': ep['title'], | ||||
|                 'description': ep.get('description'), | ||||
|                 'thumbnail': ep.get('imageThumbnail'), | ||||
|             } | ||||
|         else: | ||||
|             raise ExtractorError('Unsupported provider %s' % ep['provider']) | ||||
| @@ -9,12 +9,12 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class CinemassacreIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?' | ||||
|     _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', | ||||
|             'file': '19911.mp4', | ||||
|             'md5': 'fde81fbafaee331785f58cd6c0d46190', | ||||
|             'md5': '782f8504ca95a0eba8fc9177c373eec7', | ||||
|             'info_dict': { | ||||
|                 'upload_date': '20121110', | ||||
|                 'title': '“Angry Video Game Nerd: The Movie” – Trailer', | ||||
| @@ -24,7 +24,7 @@ class CinemassacreIE(InfoExtractor): | ||||
|         { | ||||
|             'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', | ||||
|             'file': '521be8ef82b16.mp4', | ||||
|             'md5': 'd72f10cd39eac4215048f62ab477a511', | ||||
|             'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35', | ||||
|             'info_dict': { | ||||
|                 'upload_date': '20131002', | ||||
|                 'title': 'The Mummy’s Hand (1940)', | ||||
| @@ -34,8 +34,9 @@ class CinemassacreIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, None)  # Don't know video id yet | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') | ||||
|         mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) | ||||
|         if not mobj: | ||||
| @@ -43,33 +44,36 @@ class CinemassacreIE(InfoExtractor): | ||||
|         playerdata_url = mobj.group('embed_url') | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|', | ||||
|             webpage, 'title') | ||||
|         video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>', | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<title>(?P<title>.+?)\|', webpage, 'title') | ||||
|         video_description = self._html_search_regex( | ||||
|             r'<div class="entry-content">(?P<description>.+?)</div>', | ||||
|             webpage, 'description', flags=re.DOTALL, fatal=False) | ||||
|         if len(video_description) == 0: | ||||
|             video_description = None | ||||
|  | ||||
|         playerdata = self._download_webpage(playerdata_url, video_id) | ||||
|  | ||||
|         sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file') | ||||
|         hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file') | ||||
|         sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file') | ||||
|         hd_url = self._html_search_regex( | ||||
|             r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file', | ||||
|             default=None) | ||||
|         video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': sd_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'format': 'sd', | ||||
|                 'format_id': 'sd', | ||||
|             }, | ||||
|             { | ||||
|         formats = [{ | ||||
|             'url': sd_url, | ||||
|             'ext': 'mp4', | ||||
|             'format': 'sd', | ||||
|             'format_id': 'sd', | ||||
|             'quality': 1, | ||||
|         }] | ||||
|         if hd_url: | ||||
|             formats.append({ | ||||
|                 'url': hd_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'format': 'hd', | ||||
|                 'format_id': 'hd', | ||||
|             }, | ||||
|         ] | ||||
|                 'quality': 2, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -197,6 +197,21 @@ class GenericIE(InfoExtractor): | ||||
|                 'description': 'No description', | ||||
|             }, | ||||
|         }, | ||||
|         # arte embed | ||||
|         { | ||||
|             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html', | ||||
|             'md5': '7653032cbb25bf6c80d80f217055fa43', | ||||
|             'info_dict': { | ||||
|                 'id': '048195-004_PLUS7-F', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'X:enius', | ||||
|                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168', | ||||
|                 'upload_date': '20140320', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': 'Requires rtmpdump' | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_download_webpage(self, video_id): | ||||
| @@ -525,6 +540,13 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'TED') | ||||
|  | ||||
|         # Look for embedded arte.tv player | ||||
|         mobj = re.search( | ||||
|             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'ArteTVEmbed') | ||||
|  | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|   | ||||
| @@ -7,7 +7,7 @@ from ..utils import unescapeHTML | ||||
|  | ||||
|  | ||||
| class OoyalaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)' | ||||
|     _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video | ||||
|   | ||||
| @@ -4,26 +4,99 @@ import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_request | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VeohIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/v(?P<id>\d*)' | ||||
|     _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3', | ||||
|         'file': '56314296.mp4', | ||||
|         'md5': '620e68e6a3cff80086df3348426c9ca3', | ||||
|         'info_dict': { | ||||
|             'title': 'Straight Backs Are Stronger', | ||||
|             'uploader': 'LUMOback', | ||||
|             'description': 'At LUMOback, we believe straight backs are stronger.  The LUMOback Posture & Movement Sensor:  It gently vibrates when you slouch, inspiring improved posture and mobility.  Use the app to track your data and improve your posture over time. ', | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3', | ||||
|             'md5': '620e68e6a3cff80086df3348426c9ca3', | ||||
|             'info_dict': { | ||||
|                 'id': '56314296', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Straight Backs Are Stronger', | ||||
|                 'uploader': 'LUMOback', | ||||
|                 'description': 'At LUMOback, we believe straight backs are stronger.  The LUMOback Posture & Movement Sensor:  It gently vibrates when you slouch, inspiring improved posture and mobility.  Use the app to track your data and improve your posture over time. ', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage', | ||||
|             'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa', | ||||
|             'info_dict': { | ||||
|                 'id': '27701988', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Chile workers cover up to avoid skin damage', | ||||
|                 'description': 'md5:2bd151625a60a32822873efc246ba20d', | ||||
|                 'uploader': 'afp-news', | ||||
|                 'duration': 123, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', | ||||
|             'md5': '4fde7b9e33577bab2f2f8f260e30e979', | ||||
|             'note': 'Embedded ooyala video', | ||||
|             'info_dict': { | ||||
|                 'id': '69525809', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery', | ||||
|                 'description': 'md5:f5a11c51f8fb51d2315bca0937526891', | ||||
|                 'uploader': 'newsy-videos', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _extract_formats(self, source): | ||||
|         formats = [] | ||||
|         link = source.get('aowPermalink') | ||||
|         if link: | ||||
|             formats.append({ | ||||
|                 'url': link, | ||||
|                 'ext': 'mp4', | ||||
|                 'format_id': 'aow', | ||||
|             }) | ||||
|         link = source.get('fullPreviewHashLowPath') | ||||
|         if link: | ||||
|             formats.append({ | ||||
|                 'url': link, | ||||
|                 'format_id': 'low', | ||||
|             }) | ||||
|         link = source.get('fullPreviewHashHighPath') | ||||
|         if link: | ||||
|             formats.append({ | ||||
|                 'url': link, | ||||
|                 'format_id': 'high', | ||||
|             }) | ||||
|         return formats | ||||
|  | ||||
|     def _extract_video(self, source): | ||||
|         return { | ||||
|             'id': source.get('videoId'), | ||||
|             'title': source.get('title'), | ||||
|             'description': source.get('description'), | ||||
|             'thumbnail': source.get('highResImage') or source.get('medResImage'), | ||||
|             'uploader': source.get('username'), | ||||
|             'duration': int_or_none(source.get('length')), | ||||
|             'view_count': int_or_none(source.get('views')), | ||||
|             'age_limit': 18 if source.get('isMature') == 'true' or source.get('isSexy') == 'true' else 0, | ||||
|             'formats': self._extract_formats(source), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         if video_id.startswith('v'): | ||||
|             rsp = self._download_xml( | ||||
|                 r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML') | ||||
|             if rsp.get('stat') == 'ok': | ||||
|                 return self._extract_video(rsp.find('./videoList/video')) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         age_limit = 0 | ||||
|         if 'class="adultwarning-container"' in webpage: | ||||
| @@ -33,24 +106,16 @@ class VeohIE(InfoExtractor): | ||||
|             request.add_header('Cookie', 'confirmedAdult=true') | ||||
|             webpage = self._download_webpage(request, video_id) | ||||
|  | ||||
|         m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage) | ||||
|         m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|"|\?)', webpage) | ||||
|         if m_youtube is not None: | ||||
|             youtube_id = m_youtube.group(1) | ||||
|             self.to_screen('%s: detected Youtube video.' % video_id) | ||||
|             return self.url_result(youtube_id, 'Youtube') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info') | ||||
|         info = json.loads(info) | ||||
|         video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath') | ||||
|         info = json.loads( | ||||
|             self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info').replace('\\\'', '\'')) | ||||
|  | ||||
|         return { | ||||
|             'id': info['videoId'], | ||||
|             'title': info['title'], | ||||
|             'url': video_url, | ||||
|             'uploader': info['username'], | ||||
|             'thumbnail': info.get('highResImage') or info.get('medResImage'), | ||||
|             'description': info['description'], | ||||
|             'view_count': info['views'], | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
|         video = self._extract_video(info) | ||||
|         video['age_limit'] = age_limit | ||||
|  | ||||
|         return video | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.03.24.1' | ||||
| __version__ = '2014.03.24.4' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user