[itv] Make SOAP request non fatal and extract metadata from a webpage (closes #16780)
This commit is contained in:
		| @@ -18,6 +18,7 @@ from ..utils import ( | ||||
|     xpath_element, | ||||
|     xpath_text, | ||||
|     int_or_none, | ||||
|     merge_dicts, | ||||
|     parse_duration, | ||||
|     smuggle_url, | ||||
|     ExtractorError, | ||||
| @@ -129,64 +130,65 @@ class ITVIE(InfoExtractor): | ||||
|  | ||||
|         resp_env = self._download_xml( | ||||
|             params['data-playlist-url'], video_id, | ||||
|             headers=headers, data=etree.tostring(req_env)) | ||||
|         playlist = xpath_element(resp_env, './/Playlist') | ||||
|         if playlist is None: | ||||
|             fault_code = xpath_text(resp_env, './/faultcode') | ||||
|             fault_string = xpath_text(resp_env, './/faultstring') | ||||
|             if fault_code == 'InvalidGeoRegion': | ||||
|                 self.raise_geo_restricted( | ||||
|                     msg=fault_string, countries=self._GEO_COUNTRIES) | ||||
|             elif fault_code not in ( | ||||
|                     'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'): | ||||
|                 raise ExtractorError( | ||||
|                     '%s said: %s' % (self.IE_NAME, fault_string), expected=True) | ||||
|             info.update({ | ||||
|                 'title': self._og_search_title(webpage), | ||||
|                 'episode_title': params.get('data-video-episode'), | ||||
|                 'series': params.get('data-video-title'), | ||||
|             }) | ||||
|         else: | ||||
|             title = xpath_text(playlist, 'EpisodeTitle', default=None) | ||||
|             info.update({ | ||||
|                 'title': title, | ||||
|                 'episode_title': title, | ||||
|                 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), | ||||
|                 'series': xpath_text(playlist, 'ProgrammeTitle'), | ||||
|                 'duration': parse_duration(xpath_text(playlist, 'Duration')), | ||||
|             }) | ||||
|             video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) | ||||
|             media_files = xpath_element(video_element, 'MediaFiles', fatal=True) | ||||
|             rtmp_url = media_files.attrib['base'] | ||||
|             headers=headers, data=etree.tostring(req_env), fatal=False) | ||||
|         if resp_env: | ||||
|             playlist = xpath_element(resp_env, './/Playlist') | ||||
|             if playlist is None: | ||||
|                 fault_code = xpath_text(resp_env, './/faultcode') | ||||
|                 fault_string = xpath_text(resp_env, './/faultstring') | ||||
|                 if fault_code == 'InvalidGeoRegion': | ||||
|                     self.raise_geo_restricted( | ||||
|                         msg=fault_string, countries=self._GEO_COUNTRIES) | ||||
|                 elif fault_code not in ( | ||||
|                         'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'): | ||||
|                     raise ExtractorError( | ||||
|                         '%s said: %s' % (self.IE_NAME, fault_string), expected=True) | ||||
|                 info.update({ | ||||
|                     'title': self._og_search_title(webpage), | ||||
|                     'episode_title': params.get('data-video-episode'), | ||||
|                     'series': params.get('data-video-title'), | ||||
|                 }) | ||||
|             else: | ||||
|                 title = xpath_text(playlist, 'EpisodeTitle', default=None) | ||||
|                 info.update({ | ||||
|                     'title': title, | ||||
|                     'episode_title': title, | ||||
|                     'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), | ||||
|                     'series': xpath_text(playlist, 'ProgrammeTitle'), | ||||
|                     'duration': parse_duration(xpath_text(playlist, 'Duration')), | ||||
|                 }) | ||||
|                 video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) | ||||
|                 media_files = xpath_element(video_element, 'MediaFiles', fatal=True) | ||||
|                 rtmp_url = media_files.attrib['base'] | ||||
|  | ||||
|             for media_file in media_files.findall('MediaFile'): | ||||
|                 play_path = xpath_text(media_file, 'URL') | ||||
|                 if not play_path: | ||||
|                     continue | ||||
|                 tbr = int_or_none(media_file.get('bitrate'), 1000) | ||||
|                 f = { | ||||
|                     'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), | ||||
|                     'play_path': play_path, | ||||
|                     # Providing this swfVfy allows to avoid truncated downloads | ||||
|                     'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', | ||||
|                     'page_url': url, | ||||
|                     'tbr': tbr, | ||||
|                     'ext': 'flv', | ||||
|                 } | ||||
|                 app = self._search_regex( | ||||
|                     'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) | ||||
|                 if app: | ||||
|                     f.update({ | ||||
|                         'url': rtmp_url.split('?', 1)[0], | ||||
|                         'app': app, | ||||
|                     }) | ||||
|                 else: | ||||
|                     f['url'] = rtmp_url | ||||
|                 formats.append(f) | ||||
|                 for media_file in media_files.findall('MediaFile'): | ||||
|                     play_path = xpath_text(media_file, 'URL') | ||||
|                     if not play_path: | ||||
|                         continue | ||||
|                     tbr = int_or_none(media_file.get('bitrate'), 1000) | ||||
|                     f = { | ||||
|                         'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), | ||||
|                         'play_path': play_path, | ||||
|                         # Providing this swfVfy allows to avoid truncated downloads | ||||
|                         'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', | ||||
|                         'page_url': url, | ||||
|                         'tbr': tbr, | ||||
|                         'ext': 'flv', | ||||
|                     } | ||||
|                     app = self._search_regex( | ||||
|                         'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) | ||||
|                     if app: | ||||
|                         f.update({ | ||||
|                             'url': rtmp_url.split('?', 1)[0], | ||||
|                             'app': app, | ||||
|                         }) | ||||
|                     else: | ||||
|                         f['url'] = rtmp_url | ||||
|                     formats.append(f) | ||||
|  | ||||
|             for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): | ||||
|                 if caption_url.text: | ||||
|                     extract_subtitle(caption_url.text) | ||||
|                 for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): | ||||
|                     if caption_url.text: | ||||
|                         extract_subtitle(caption_url.text) | ||||
|  | ||||
|         ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id') | ||||
|         hmac = params.get('data-video-hmac') | ||||
| @@ -261,7 +263,17 @@ class ITVIE(InfoExtractor): | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|         webpage_info = self._search_json_ld(webpage, video_id, default={}) | ||||
|         if not webpage_info.get('title'): | ||||
|             webpage_info['title'] = self._html_search_regex( | ||||
|                 r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<', | ||||
|                 webpage, 'title', default=None) or self._og_search_title( | ||||
|                 webpage, default=None) or self._html_search_meta( | ||||
|                 'twitter:title', webpage, 'title', | ||||
|                 default=None) or webpage_info['episode'] | ||||
|  | ||||
|         return merge_dicts(info, webpage_info) | ||||
|  | ||||
|  | ||||
| class ITVBTCCIE(InfoExtractor): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user