[ard:beta] Improve extraction robustness, fix subtitles extraction, improve geo restricted videos extraction
This commit is contained in:
		| @@ -8,13 +8,16 @@ from .generic import GenericIE | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     qualities, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     qualities, | ||||
|     str_or_none, | ||||
|     try_get, | ||||
|     unified_strdate, | ||||
|     xpath_text, | ||||
|     unified_timestamp, | ||||
|     update_url_query, | ||||
|     url_or_none, | ||||
|     xpath_text, | ||||
| ) | ||||
| from ..compat import compat_etree_fromstring | ||||
|  | ||||
| @@ -336,43 +339,62 @@ class ARDBetaMediathekIE(InfoExtractor): | ||||
|             'display_id': display_id, | ||||
|         } | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|         geoblocked = False | ||||
|         for widget in data.values(): | ||||
|             if widget.get('_geoblocked'): | ||||
|                 raise ExtractorError('This video is not available due to geoblocking', expected=True) | ||||
|  | ||||
|             if widget.get('_geoblocked') is True: | ||||
|                 geoblocked = True | ||||
|             if '_duration' in widget: | ||||
|                 res['duration'] = widget['_duration'] | ||||
|                 res['duration'] = int_or_none(widget['_duration']) | ||||
|             if 'clipTitle' in widget: | ||||
|                 res['title'] = widget['clipTitle'] | ||||
|             if '_previewImage' in widget: | ||||
|                 res['thumbnail'] = widget['_previewImage'] | ||||
|             if 'broadcastedOn' in widget: | ||||
|                 res['upload_date'] = unified_strdate(widget['broadcastedOn']) | ||||
|                 res['timestamp'] = unified_timestamp(widget['broadcastedOn']) | ||||
|             if 'synopsis' in widget: | ||||
|                 res['description'] = widget['synopsis'] | ||||
|             if '_subtitleUrl' in widget: | ||||
|                 res['subtitles'] = {'de': [{ | ||||
|             subtitle_url = url_or_none(widget.get('_subtitleUrl')) | ||||
|             if subtitle_url: | ||||
|                 subtitles.setdefault('de', []).append({ | ||||
|                     'ext': 'ttml', | ||||
|                     'url': widget['_subtitleUrl'], | ||||
|                 }]} | ||||
|                     'url': subtitle_url, | ||||
|                 }) | ||||
|             if '_quality' in widget: | ||||
|                 format_url = widget['_stream']['json'][0] | ||||
|  | ||||
|                 if format_url.endswith('.f4m'): | ||||
|                 format_url = url_or_none(try_get( | ||||
|                     widget, lambda x: x['_stream']['json'][0])) | ||||
|                 if not format_url: | ||||
|                     continue | ||||
|                 ext = determine_ext(format_url) | ||||
|                 if ext == 'f4m': | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         format_url + '?hdcore=3.11.0', | ||||
|                         video_id, f4m_id='hds', fatal=False)) | ||||
|                 elif format_url.endswith('m3u8'): | ||||
|                 elif ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | ||||
|                         format_url, video_id, 'mp4', m3u8_id='hls', | ||||
|                         fatal=False)) | ||||
|                 else: | ||||
|                     # HTTP formats are not available when geoblocked is True, | ||||
|                     # other formats are fine though | ||||
|                     if geoblocked: | ||||
|                         continue | ||||
|                     quality = str_or_none(widget.get('_quality')) | ||||
|                     formats.append({ | ||||
|                         'format_id': 'http-' + widget['_quality'], | ||||
|                         'format_id': ('http-' + quality) if quality else 'http', | ||||
|                         'url': format_url, | ||||
|                         'preference': 10,  # Plain HTTP, that's nice | ||||
|                     }) | ||||
|  | ||||
|         if not formats and geoblocked: | ||||
|             self.raise_geo_restricted( | ||||
|                 msg='This video is not available due to geoblocking', | ||||
|                 countries=['DE']) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|         res['formats'] = formats | ||||
|         res.update({ | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats, | ||||
|         }) | ||||
|  | ||||
|         return res | ||||
|   | ||||
		Reference in New Issue
	
	Block a user