[crunchyroll] parse vilos media data(closes #17343)
This commit is contained in:
		| @@ -7,7 +7,7 @@ import zlib | ||||
|  | ||||
| from hashlib import sha1 | ||||
| from math import pow, sqrt, floor | ||||
| from .common import InfoExtractor | ||||
| from .vrv import VRVIE | ||||
| from ..compat import ( | ||||
|     compat_b64decode, | ||||
|     compat_etree_fromstring, | ||||
| @@ -18,6 +18,8 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     bytes_to_intlist, | ||||
|     extract_attributes, | ||||
|     float_or_none, | ||||
|     intlist_to_bytes, | ||||
|     int_or_none, | ||||
|     lowercase_escape, | ||||
| @@ -26,14 +28,13 @@ from ..utils import ( | ||||
|     unified_strdate, | ||||
|     urlencode_postdata, | ||||
|     xpath_text, | ||||
|     extract_attributes, | ||||
| ) | ||||
| from ..aes import ( | ||||
|     aes_cbc_decrypt, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CrunchyrollBaseIE(InfoExtractor): | ||||
| class CrunchyrollBaseIE(VRVIE): | ||||
|     _LOGIN_URL = 'https://www.crunchyroll.com/login' | ||||
|     _LOGIN_FORM = 'login_form' | ||||
|     _NETRC_MACHINE = 'crunchyroll' | ||||
| @@ -148,7 +149,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', | ||||
|             'description': 'md5:2d17137920c64f2f49981a7797d275ef', | ||||
|             'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': 'Yomiuri Telecasting Corporation (YTV)', | ||||
|             'upload_date': '20131013', | ||||
|             'url': 're:(?!.*&)', | ||||
| @@ -221,7 +222,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): | ||||
|         'info_dict': { | ||||
|             'id': '535080', | ||||
|             'ext': 'mp4', | ||||
|             'title': '11eyes Episode 1 – Piros éjszaka - Red Night', | ||||
|             'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka', | ||||
|             'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".', | ||||
|             'uploader': 'Marvelous AQL Inc.', | ||||
|             'upload_date': '20091021', | ||||
| @@ -437,13 +438,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|         if 'To view this, please log in to verify you are 18 or older.' in webpage: | ||||
|             self.raise_login_required() | ||||
|  | ||||
|         media = self._parse_json(self._search_regex( | ||||
|             r'vilos\.config\.media\s*=\s*({.+?});', | ||||
|             webpage, 'vilos media', default='{}'), video_id) | ||||
|         media_metadata = media.get('metadata') or {} | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>', | ||||
|             webpage, 'video_title') | ||||
|         video_title = re.sub(r' {2,}', ' ', video_title) | ||||
|         video_description = self._parse_json(self._html_search_regex( | ||||
|         video_description = (self._parse_json(self._html_search_regex( | ||||
|             r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id, | ||||
|             webpage, 'description', default='{}'), video_id).get('description') | ||||
|             webpage, 'description', default='{}'), video_id) or media_metadata).get('description') | ||||
|         if video_description: | ||||
|             video_description = lowercase_escape(video_description.replace(r'\r\n', '\n')) | ||||
|         video_upload_date = self._html_search_regex( | ||||
| @@ -456,91 +462,99 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|             [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'], | ||||
|             webpage, 'video_uploader', fatal=False) | ||||
|  | ||||
|         available_fmts = [] | ||||
|         for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): | ||||
|             attrs = extract_attributes(a) | ||||
|             href = attrs.get('href') | ||||
|             if href and '/freetrial' in href: | ||||
|                 continue | ||||
|             available_fmts.append(fmt) | ||||
|         if not available_fmts: | ||||
|             for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'): | ||||
|                 available_fmts = re.findall(p, webpage) | ||||
|                 if available_fmts: | ||||
|                     break | ||||
|         video_encode_ids = [] | ||||
|         formats = [] | ||||
|         for fmt in available_fmts: | ||||
|             stream_quality, stream_format = self._FORMAT_IDS[fmt] | ||||
|             video_format = fmt + 'p' | ||||
|             stream_infos = [] | ||||
|             streamdata = self._call_rpc_api( | ||||
|                 'VideoPlayer_GetStandardConfig', video_id, | ||||
|                 'Downloading media info for %s' % video_format, data={ | ||||
|                     'media_id': video_id, | ||||
|                     'video_format': stream_format, | ||||
|                     'video_quality': stream_quality, | ||||
|                     'current_page': url, | ||||
|                 }) | ||||
|             if streamdata is not None: | ||||
|                 stream_info = streamdata.find('./{default}preload/stream_info') | ||||
|         for stream in media.get('streams', []): | ||||
|             formats.extend(self._extract_vrv_formats( | ||||
|                 stream.get('url'), video_id, stream.get('format'), | ||||
|                 stream.get('audio_lang'), stream.get('hardsub_lang'))) | ||||
|         if not formats: | ||||
|             available_fmts = [] | ||||
|             for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): | ||||
|                 attrs = extract_attributes(a) | ||||
|                 href = attrs.get('href') | ||||
|                 if href and '/freetrial' in href: | ||||
|                     continue | ||||
|                 available_fmts.append(fmt) | ||||
|             if not available_fmts: | ||||
|                 for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'): | ||||
|                     available_fmts = re.findall(p, webpage) | ||||
|                     if available_fmts: | ||||
|                         break | ||||
|             if not available_fmts: | ||||
|                 available_fmts = self._FORMAT_IDS.keys() | ||||
|             video_encode_ids = [] | ||||
|  | ||||
|             for fmt in available_fmts: | ||||
|                 stream_quality, stream_format = self._FORMAT_IDS[fmt] | ||||
|                 video_format = fmt + 'p' | ||||
|                 stream_infos = [] | ||||
|                 streamdata = self._call_rpc_api( | ||||
|                     'VideoPlayer_GetStandardConfig', video_id, | ||||
|                     'Downloading media info for %s' % video_format, data={ | ||||
|                         'media_id': video_id, | ||||
|                         'video_format': stream_format, | ||||
|                         'video_quality': stream_quality, | ||||
|                         'current_page': url, | ||||
|                     }) | ||||
|                 if streamdata is not None: | ||||
|                     stream_info = streamdata.find('./{default}preload/stream_info') | ||||
|                     if stream_info is not None: | ||||
|                         stream_infos.append(stream_info) | ||||
|                 stream_info = self._call_rpc_api( | ||||
|                     'VideoEncode_GetStreamInfo', video_id, | ||||
|                     'Downloading stream info for %s' % video_format, data={ | ||||
|                         'media_id': video_id, | ||||
|                         'video_format': stream_format, | ||||
|                         'video_encode_quality': stream_quality, | ||||
|                     }) | ||||
|                 if stream_info is not None: | ||||
|                     stream_infos.append(stream_info) | ||||
|             stream_info = self._call_rpc_api( | ||||
|                 'VideoEncode_GetStreamInfo', video_id, | ||||
|                 'Downloading stream info for %s' % video_format, data={ | ||||
|                     'media_id': video_id, | ||||
|                     'video_format': stream_format, | ||||
|                     'video_encode_quality': stream_quality, | ||||
|                 }) | ||||
|             if stream_info is not None: | ||||
|                 stream_infos.append(stream_info) | ||||
|             for stream_info in stream_infos: | ||||
|                 video_encode_id = xpath_text(stream_info, './video_encode_id') | ||||
|                 if video_encode_id in video_encode_ids: | ||||
|                     continue | ||||
|                 video_encode_ids.append(video_encode_id) | ||||
|                 for stream_info in stream_infos: | ||||
|                     video_encode_id = xpath_text(stream_info, './video_encode_id') | ||||
|                     if video_encode_id in video_encode_ids: | ||||
|                         continue | ||||
|                     video_encode_ids.append(video_encode_id) | ||||
|  | ||||
|                 video_file = xpath_text(stream_info, './file') | ||||
|                 if not video_file: | ||||
|                     continue | ||||
|                 if video_file.startswith('http'): | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         video_file, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                         m3u8_id='hls', fatal=False)) | ||||
|                     continue | ||||
|  | ||||
|                 video_url = xpath_text(stream_info, './host') | ||||
|                 if not video_url: | ||||
|                     continue | ||||
|                 metadata = stream_info.find('./metadata') | ||||
|                 format_info = { | ||||
|                     'format': video_format, | ||||
|                     'height': int_or_none(xpath_text(metadata, './height')), | ||||
|                     'width': int_or_none(xpath_text(metadata, './width')), | ||||
|                 } | ||||
|  | ||||
|                 if '.fplive.net/' in video_url: | ||||
|                     video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) | ||||
|                     parsed_video_url = compat_urlparse.urlparse(video_url) | ||||
|                     direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( | ||||
|                         netloc='v.lvlt.crcdn.net', | ||||
|                         path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) | ||||
|                     if self._is_valid_url(direct_video_url, video_id, video_format): | ||||
|                         format_info.update({ | ||||
|                             'format_id': 'http-' + video_format, | ||||
|                             'url': direct_video_url, | ||||
|                         }) | ||||
|                         formats.append(format_info) | ||||
|                     video_file = xpath_text(stream_info, './file') | ||||
|                     if not video_file: | ||||
|                         continue | ||||
|                     if video_file.startswith('http'): | ||||
|                         formats.extend(self._extract_m3u8_formats( | ||||
|                             video_file, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                             m3u8_id='hls', fatal=False)) | ||||
|                         continue | ||||
|  | ||||
|                 format_info.update({ | ||||
|                     'format_id': 'rtmp-' + video_format, | ||||
|                     'url': video_url, | ||||
|                     'play_path': video_file, | ||||
|                     'ext': 'flv', | ||||
|                 }) | ||||
|                 formats.append(format_info) | ||||
|                     video_url = xpath_text(stream_info, './host') | ||||
|                     if not video_url: | ||||
|                         continue | ||||
|                     metadata = stream_info.find('./metadata') | ||||
|                     format_info = { | ||||
|                         'format': video_format, | ||||
|                         'height': int_or_none(xpath_text(metadata, './height')), | ||||
|                         'width': int_or_none(xpath_text(metadata, './width')), | ||||
|                     } | ||||
|  | ||||
|                     if '.fplive.net/' in video_url: | ||||
|                         video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) | ||||
|                         parsed_video_url = compat_urlparse.urlparse(video_url) | ||||
|                         direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( | ||||
|                             netloc='v.lvlt.crcdn.net', | ||||
|                             path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) | ||||
|                         if self._is_valid_url(direct_video_url, video_id, video_format): | ||||
|                             format_info.update({ | ||||
|                                 'format_id': 'http-' + video_format, | ||||
|                                 'url': direct_video_url, | ||||
|                             }) | ||||
|                             formats.append(format_info) | ||||
|                             continue | ||||
|  | ||||
|                     format_info.update({ | ||||
|                         'format_id': 'rtmp-' + video_format, | ||||
|                         'url': video_url, | ||||
|                         'play_path': video_file, | ||||
|                         'ext': 'flv', | ||||
|                     }) | ||||
|                     formats.append(format_info) | ||||
|         self._sort_formats(formats, ('height', 'width', 'tbr', 'fps')) | ||||
|  | ||||
|         metadata = self._call_rpc_api( | ||||
| @@ -549,7 +563,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|                 'media_id': video_id, | ||||
|             }) | ||||
|  | ||||
|         subtitles = self.extract_subtitles(video_id, webpage) | ||||
|         subtitles = {} | ||||
|         for subtitle in media.get('subtitles', []): | ||||
|             subtitle_url = subtitle.get('url') | ||||
|             if not subtitle_url: | ||||
|                 continue | ||||
|             subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({ | ||||
|                 'url': subtitle_url, | ||||
|                 'ext': subtitle.get('format', 'ass'), | ||||
|             }) | ||||
|         if not subtitles: | ||||
|             subtitles = self.extract_subtitles(video_id, webpage) | ||||
|  | ||||
|         # webpage provide more accurate data than series_title from XML | ||||
|         series = self._html_search_regex( | ||||
| @@ -557,8 +581,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|             webpage, 'series', fatal=False) | ||||
|         season = xpath_text(metadata, 'series_title') | ||||
|  | ||||
|         episode = xpath_text(metadata, 'episode_title') | ||||
|         episode_number = int_or_none(xpath_text(metadata, 'episode_number')) | ||||
|         episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title') | ||||
|         episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number')) | ||||
|  | ||||
|         season_number = int_or_none(self._search_regex( | ||||
|             r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)', | ||||
| @@ -568,7 +592,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'description': video_description, | ||||
|             'thumbnail': xpath_text(metadata, 'episode_image_url'), | ||||
|             'duration': float_or_none(media_metadata.get('duration'), 1000), | ||||
|             'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'), | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date': video_upload_date, | ||||
|             'series': series, | ||||
|   | ||||
| @@ -72,7 +72,7 @@ class VRVBaseIE(InfoExtractor): | ||||
| class VRVIE(VRVBaseIE): | ||||
|     IE_NAME = 'vrv' | ||||
|     _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT', | ||||
|         'info_dict': { | ||||
|             'id': 'GR9PNZ396', | ||||
| @@ -85,7 +85,28 @@ class VRVIE(VRVBaseIE): | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|     }] | ||||
|  | ||||
|     def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): | ||||
|         if not url or stream_format not in ('hls', 'dash'): | ||||
|             return [] | ||||
|         stream_id = hardsub_lang or audio_lang | ||||
|         format_id = '%s-%s' % (stream_format, stream_id) | ||||
|         if stream_format == 'hls': | ||||
|             adaptive_formats = self._extract_m3u8_formats( | ||||
|                 url, video_id, 'mp4', m3u8_id=format_id, | ||||
|                 note='Downloading %s m3u8 information' % stream_id, | ||||
|                 fatal=False) | ||||
|         elif stream_format == 'dash': | ||||
|             adaptive_formats = self._extract_mpd_formats( | ||||
|                 url, video_id, mpd_id=format_id, | ||||
|                 note='Downloading %s MPD information' % stream_id, | ||||
|                 fatal=False) | ||||
|         if audio_lang: | ||||
|             for f in adaptive_formats: | ||||
|                 if f.get('acodec') != 'none': | ||||
|                     f['language'] = audio_lang | ||||
|         return adaptive_formats | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| @@ -115,26 +136,9 @@ class VRVIE(VRVBaseIE): | ||||
|         for stream_type, streams in streams_json.get('streams', {}).items(): | ||||
|             if stream_type in ('adaptive_hls', 'adaptive_dash'): | ||||
|                 for stream in streams.values(): | ||||
|                     stream_url = stream.get('url') | ||||
|                     if not stream_url: | ||||
|                         continue | ||||
|                     stream_id = stream.get('hardsub_locale') or audio_locale | ||||
|                     format_id = '%s-%s' % (stream_type.split('_')[1], stream_id) | ||||
|                     if stream_type == 'adaptive_hls': | ||||
|                         adaptive_formats = self._extract_m3u8_formats( | ||||
|                             stream_url, video_id, 'mp4', m3u8_id=format_id, | ||||
|                             note='Downloading %s m3u8 information' % stream_id, | ||||
|                             fatal=False) | ||||
|                     else: | ||||
|                         adaptive_formats = self._extract_mpd_formats( | ||||
|                             stream_url, video_id, mpd_id=format_id, | ||||
|                             note='Downloading %s MPD information' % stream_id, | ||||
|                             fatal=False) | ||||
|                     if audio_locale: | ||||
|                         for f in adaptive_formats: | ||||
|                             if f.get('acodec') != 'none': | ||||
|                                 f['language'] = audio_locale | ||||
|                     formats.extend(adaptive_formats) | ||||
|                     formats.extend(self._extract_vrv_formats( | ||||
|                         stream.get('url'), video_id, stream_type.split('_')[1], | ||||
|                         audio_locale, stream.get('hardsub_locale'))) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|   | ||||
		Reference in New Issue
	
	Block a user