ArteTvIE: support videos from videos.arte.tv
Each source of videos have a different extraction process, they are in different methods of the extractor. Changed the extension of videos from mp4 to flv.
This commit is contained in:
		| @@ -11,11 +11,21 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
| class ArteTvIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' | ||||
|     """ | ||||
|     There are two sources of video in arte.tv: videos.arte.tv and | ||||
|     www.arte.tv/guide, the extraction process is different for each one. | ||||
|     The videos expire in 7 days, so we can't add tests. | ||||
|     """ | ||||
|     _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' | ||||
|     _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P<id>.*?).html' | ||||
|     _LIVE_URL = r'index-[0-9]+\.html$' | ||||
|  | ||||
|     IE_NAME = u'arte.tv' | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL)) | ||||
|  | ||||
|     # TODO implement Live Stream | ||||
|     # def extractLiveStream(self, url): | ||||
|     #     video_lang = url.split('/')[-4] | ||||
| @@ -44,17 +54,26 @@ class ArteTvIE(InfoExtractor): | ||||
|     #     video_url = u'%s/%s' % (info.get('url'), info.get('path')) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         # This is not a real id, it can be for example AJT for the news | ||||
|         # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal | ||||
|         video_id = mobj.group('id') | ||||
|         mobj = re.match(self._EMISSION_URL, url) | ||||
|         if mobj is not None: | ||||
|             name = mobj.group('name') | ||||
|             # This is not a real id, it can be for example AJT for the news | ||||
|             # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal | ||||
|             video_id = mobj.group('id') | ||||
|             return self._extract_emission(url, video_id) | ||||
|  | ||||
|         mobj = re.match(self._VIDEOS_URL, url) | ||||
|         if mobj is not None: | ||||
|             id = mobj.group('id') | ||||
|             return self._extract_video(url, id) | ||||
|  | ||||
|         if re.search(self._LIVE_URL, video_id) is not None: | ||||
|             raise ExtractorError(u'Arte live streams are not yet supported, sorry') | ||||
|             # self.extractLiveStream(url) | ||||
|             # return | ||||
|  | ||||
|     def _extract_emission(self, url, video_id): | ||||
|         """Extract from www.arte.tv/guide""" | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') | ||||
|  | ||||
| @@ -68,6 +87,7 @@ class ArteTvIE(InfoExtractor): | ||||
|                      'description': player_info['VDE'], | ||||
|                      'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]), | ||||
|                      'thumbnail': player_info['programImage'], | ||||
|                      'ext': 'flv', | ||||
|                      } | ||||
|  | ||||
|         formats = player_info['VSR'].values() | ||||
| @@ -78,9 +98,36 @@ class ArteTvIE(InfoExtractor): | ||||
|         if format_info['mediaType'] == u'rtmp': | ||||
|             info_dict['url'] = format_info['streamer'] | ||||
|             info_dict['play_path'] = 'mp4:' + format_info['url'] | ||||
|             info_dict['ext'] = 'mp4' | ||||
|         else: | ||||
|             info_dict['url'] = format_info['url'] | ||||
|             info_dict['ext'] = 'mp4' | ||||
|  | ||||
|         return info_dict | ||||
|  | ||||
|     def _extract_video(self, url, video_id): | ||||
|         """Extract from videos.arte.tv""" | ||||
|         config_xml_url = url.replace('/videos/', '/do_delegate/videos/') | ||||
|         config_xml_url = config_xml_url.replace('.html', ',view,asPlayerXml.xml') | ||||
|         config_xml = self._download_webpage(config_xml_url, video_id) | ||||
|         config_xml_url = self._html_search_regex(r'<video lang=".*?" ref="(.*?)"', config_xml, 'config xml url') | ||||
|         config_xml = self._download_webpage(config_xml_url, video_id) | ||||
|  | ||||
|         video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml)) | ||||
|         def _key(m): | ||||
|             quality = m.group('quality') | ||||
|             if quality == 'hd': | ||||
|                 return 2 | ||||
|             else: | ||||
|                 return 1 | ||||
|         # We pick the best quality | ||||
|         video_urls = sorted(video_urls, key=_key) | ||||
|         video_url = list(video_urls)[-1].group('url') | ||||
|          | ||||
|         title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title') | ||||
|         thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>', | ||||
|                                             config_xml, 'thumbnail') | ||||
|         return {'id': video_id, | ||||
|                 'title': title, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'url': video_url, | ||||
|                 'ext': 'flv', | ||||
|                 } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user