[mitele] Fix extraction (Closes #6414)
This commit is contained in:
		| @@ -1,17 +1,11 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_unquote, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..compat import compat_urllib_parse | ||||
| from ..utils import ( | ||||
|     encode_dict, | ||||
|     get_element_by_attribute, | ||||
|     parse_duration, | ||||
|     strip_jsonp, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -21,54 +15,71 @@ class MiTeleIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', | ||||
|         'md5': 'ace7635b2a0b286aaa37d3ff192d2a8a', | ||||
|         'info_dict': { | ||||
|             'id': '0fce117d', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Programa 144 - Tor, la web invisible', | ||||
|             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', | ||||
|             'id': '0NF1jJnxS1Wu3pHrmvFyw2', | ||||
|             'display_id': 'programa-144', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Tor, la web invisible', | ||||
|             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', | ||||
|             'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|             'duration': 2913, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         episode = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, episode) | ||||
|         embed_data_json = self._search_regex( | ||||
|             r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', | ||||
|         ).replace('\'', '"') | ||||
|         embed_data = json.loads(embed_data_json) | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         domain = embed_data['mediaUrl'] | ||||
|         if not domain.startswith('http'): | ||||
|             # only happens in telecinco.es videos | ||||
|             domain = 'http://' + domain | ||||
|         info_url = compat_urlparse.urljoin( | ||||
|             domain, | ||||
|             compat_urllib_parse_unquote(embed_data['flashvars']['host']) | ||||
|         ) | ||||
|         info_el = self._download_xml(info_url, episode).find('./video/info') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_link = info_el.find('videoUrl/link').text | ||||
|         token_query = compat_urllib_parse.urlencode({'id': video_link}) | ||||
|         token_info = self._download_json( | ||||
|             embed_data['flashvars']['ov_tk'] + '?' + token_query, | ||||
|             episode, | ||||
|             transform_source=strip_jsonp | ||||
|         ) | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             token_info['tokenizedUrl'], episode, ext='mp4') | ||||
|         config_url = self._search_regex( | ||||
|             r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url') | ||||
|  | ||||
|         config = self._download_json( | ||||
|             config_url, display_id, 'Downloading config JSON') | ||||
|  | ||||
|         mmc = self._download_json( | ||||
|             config['services']['mmc'], display_id, 'Downloading mmc JSON') | ||||
|  | ||||
|         formats = [] | ||||
|         for location in mmc['locations']: | ||||
|             gat = self._proto_relative_url(location.get('gat'), 'http:') | ||||
|             bas = location.get('bas') | ||||
|             loc = location.get('loc') | ||||
|             ogn = location.get('ogn') | ||||
|             if None in (gat, bas, loc, ogn): | ||||
|                 continue | ||||
|             token_data = { | ||||
|                 'bas': bas, | ||||
|                 'icd': loc, | ||||
|                 'ogn': ogn, | ||||
|                 'sta': '0', | ||||
|             } | ||||
|             media = self._download_json( | ||||
|                 '%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data)).encode('utf-8')), | ||||
|                 display_id, 'Downloading %s JSON' % location['loc']) | ||||
|             file_ = media.get('file') | ||||
|             if not file_: | ||||
|                 continue | ||||
|             formats.extend(self._extract_f4m_formats( | ||||
|                 file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', | ||||
|                 display_id, f4m_id=loc)) | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title') | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'data-media-id\s*=\s*"([^"]+)"', webpage, | ||||
|             'data media id', default=None) or display_id | ||||
|         thumbnail = config.get('poster', {}).get('imageUrl') | ||||
|         duration = int_or_none(mmc.get('duration')) | ||||
|  | ||||
|         return { | ||||
|             'id': embed_data['videoId'], | ||||
|             'display_id': episode, | ||||
|             'title': info_el.find('title').text, | ||||
|             'formats': formats, | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': get_element_by_attribute('class', 'text', webpage), | ||||
|             'thumbnail': info_el.find('thumb').text, | ||||
|             'duration': parse_duration(info_el.find('duration').text), | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user