[mtv,cc,cmt,spike] Improve and refactor
- Eliminate _transform_rtmp_url * Generalize triforce mgid extraction + [cmt] Add support for full-episodes (closes #11623)
This commit is contained in:
		| @@ -1,13 +1,11 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .mtv import MTVIE | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class CMTIE(MTVIE): | ||||
|     IE_NAME = 'cmt.com' | ||||
|     _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)' | ||||
|     _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' | ||||
|     _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes)/(?P<id>[^/]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', | ||||
| @@ -35,15 +33,16 @@ class CMTIE(MTVIE): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def _transform_rtmp_url(cls, rtmp_video_url): | ||||
|         if 'error_not_available.swf' in rtmp_video_url: | ||||
|             raise ExtractorError( | ||||
|                 '%s said: video is not available' % cls.IE_NAME, expected=True) | ||||
|  | ||||
|         return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url) | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         return self._search_regex( | ||||
|         mgid = self._search_regex( | ||||
|             r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1', | ||||
|             webpage, 'mgid', group='mgid') | ||||
|             webpage, 'mgid', group='mgid', default=None) | ||||
|         if not mgid: | ||||
|             mgid = self._extract_triforce_mgid(webpage) | ||||
|         return mgid | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         mgid = self._extract_mgid(webpage) | ||||
|         return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) | ||||
|   | ||||
| @@ -48,17 +48,8 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed') | ||||
|         feed = self._parse_json(feed_json, playlist_id) | ||||
|         zones = feed['manifest']['zones'] | ||||
|  | ||||
|         video_zone = zones['t2_lc_promo1'] | ||||
|         feed = self._download_json(video_zone['feed'], playlist_id) | ||||
|         mgid = feed['result']['data']['id'] | ||||
|  | ||||
|         mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1') | ||||
|         videos_info = self._get_videos_info(mgid) | ||||
|  | ||||
|         return videos_info | ||||
|  | ||||
|  | ||||
| @@ -94,12 +85,6 @@ class ToshIE(MTVServicesInfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def _transform_rtmp_url(cls, rtmp_video_url): | ||||
|         new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url) | ||||
|         new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm') | ||||
|         return new_urls | ||||
|  | ||||
|  | ||||
| class ComedyCentralTVIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)' | ||||
|   | ||||
| @@ -13,11 +13,11 @@ from ..utils import ( | ||||
|     fix_xml_ampersands, | ||||
|     float_or_none, | ||||
|     HEADRequest, | ||||
|     NO_DEFAULT, | ||||
|     RegexNotFoundError, | ||||
|     sanitized_Request, | ||||
|     strip_or_none, | ||||
|     timeconvert, | ||||
|     try_get, | ||||
|     unescapeHTML, | ||||
|     update_url_query, | ||||
|     url_basename, | ||||
| @@ -42,15 +42,6 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         # Remove the templates, like &device={device} | ||||
|         return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) | ||||
|  | ||||
|     # This was originally implemented for ComedyCentral, but it also works here | ||||
|     @classmethod | ||||
|     def _transform_rtmp_url(cls, rtmp_video_url): | ||||
|         m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url) | ||||
|         if not m: | ||||
|             return {'rtmp': rtmp_video_url} | ||||
|         base = 'http://viacommtvstrmfs.fplive.net/' | ||||
|         return {'http': base + m.group('finalid')} | ||||
|  | ||||
|     def _get_feed_url(self, uri): | ||||
|         return self._FEED_URL | ||||
|  | ||||
| @@ -91,22 +82,28 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|             if rendition.get('method') == 'hls': | ||||
|                 hls_url = rendition.find('./src').text | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     hls_url, video_id, ext='mp4', entry_protocol='m3u8_native')) | ||||
|                     hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls')) | ||||
|             else: | ||||
|                 # fms | ||||
|                 try: | ||||
|                     _, _, ext = rendition.attrib['type'].partition('/') | ||||
|                     rtmp_video_url = rendition.find('./src').text | ||||
|                     if 'error_not_available.swf' in rtmp_video_url: | ||||
|                         raise ExtractorError( | ||||
|                             '%s said: video is not available' % self.IE_NAME, | ||||
|                             expected=True) | ||||
|                     if rtmp_video_url.endswith('siteunavail.png'): | ||||
|                         continue | ||||
|                     new_urls = self._transform_rtmp_url(rtmp_video_url) | ||||
|                     formats.extend([{ | ||||
|                         'ext': 'flv' if new_url.startswith('rtmp') else ext, | ||||
|                         'url': new_url, | ||||
|                         'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])), | ||||
|                         'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext, | ||||
|                         'url': rtmp_video_url, | ||||
|                         'format_id': '-'.join(filter(None, [ | ||||
|                             'rtmp' if rtmp_video_url.startswith('rtmp') else None, | ||||
|                             rendition.get('bitrate')])), | ||||
|                         'width': int(rendition.get('width')), | ||||
|                         'height': int(rendition.get('height')), | ||||
|                     } for kind, new_url in new_urls.items()]) | ||||
|                     }]) | ||||
|                 except (KeyError, TypeError): | ||||
|                     raise ExtractorError('Invalid rendition field.') | ||||
|         self._sort_formats(formats) | ||||
| @@ -212,7 +209,28 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|             [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')], | ||||
|             playlist_title=title, playlist_description=description) | ||||
|  | ||||
|     def _extract_mgid(self, webpage, default=NO_DEFAULT): | ||||
|     def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): | ||||
|         triforce_feed = self._parse_json(self._search_regex( | ||||
|             r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, | ||||
|             'triforce feed', default='{}'), video_id, fatal=False) | ||||
|  | ||||
|         data_zone = self._search_regex( | ||||
|             r'data-zone=(["\'])(?P<zone>.+?_lc_promo.*?)\1', webpage, | ||||
|             'data zone', default=data_zone, group='zone') | ||||
|  | ||||
|         feed_url = try_get( | ||||
|             triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'], | ||||
|             compat_str) | ||||
|         if not feed_url: | ||||
|             return | ||||
|  | ||||
|         feed = self._download_json(feed_url, video_id, fatal=False) | ||||
|         if not feed: | ||||
|             return | ||||
|  | ||||
|         return try_get(feed, lambda x: x['result']['data']['id'], compat_str) | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         try: | ||||
|             # the url can be http://media.mtvnservices.com/fb/{mgid}.swf | ||||
|             # or http://media.mtvnservices.com/{mgid} | ||||
| @@ -232,7 +250,11 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|             sm4_embed = self._html_search_meta( | ||||
|                 'sm4:video:embed', webpage, 'sm4 embed', default='') | ||||
|             mgid = self._search_regex( | ||||
|                 r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default) | ||||
|                 r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None) | ||||
|  | ||||
|         if not mgid: | ||||
|             mgid = self._extract_triforce_mgid(webpage) | ||||
|  | ||||
|         return mgid | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -46,7 +46,7 @@ class SpikeIE(MTVServicesInfoExtractor): | ||||
|     _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None) | ||||
|         mgid = super(SpikeIE, self)._extract_mgid(webpage) | ||||
|         if mgid is None: | ||||
|             url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') | ||||
|             video_type, episode_id = url_parts.split('/', 1) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user