[espn] Fix extraction (closes #11041)
This commit is contained in:
		| @@ -1,38 +1,117 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import remove_end | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     unified_timestamp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ESPNIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)' | ||||
|     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://espn.go.com/video/clip?id=10365079', | ||||
|         'md5': '60e5d097a523e767d06479335d1bdc58', | ||||
|         'info_dict': { | ||||
|             'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', | ||||
|             'id': '10365079', | ||||
|             'ext': 'mp4', | ||||
|             'title': '30 for 30 Shorts: Judging Jewell', | ||||
|             'description': None, | ||||
|             'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f', | ||||
|             'timestamp': 1390936111, | ||||
|             'upload_date': '20140128', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['OoyalaExternal'], | ||||
|     }, { | ||||
|         # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season | ||||
|         'url': 'http://espn.go.com/video/clip?id=2743663', | ||||
|         'md5': 'f4ac89b59afc7e2d7dbb049523df6768', | ||||
|         'info_dict': { | ||||
|             'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', | ||||
|             'id': '2743663', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Must-See Moments: Best of the MLS season', | ||||
|             'description': 'md5:4c2d7232beaea572632bec41004f0aeb', | ||||
|             'timestamp': 1449446454, | ||||
|             'upload_date': '20151207', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['OoyalaExternal'], | ||||
|         'expected_warnings': ['Unable to download f4m manifest'], | ||||
|     }, { | ||||
|         'url': 'http://www.espn.com/video/clip?id=10365079', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.espn.com/video/clip/_/id/17989860', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         clip = self._download_json( | ||||
|             'http://api-app.espn.com/v1/video/clips/%s' % video_id, | ||||
|             video_id)['videos'][0] | ||||
|  | ||||
|         title = clip['headline'] | ||||
|  | ||||
|         format_urls = set() | ||||
|         formats = [] | ||||
|  | ||||
|         def traverse_source(source, base_source_id=None): | ||||
|             for source_id, source in source.items(): | ||||
|                 if isinstance(source, compat_str): | ||||
|                     extract_source(source, base_source_id) | ||||
|                 elif isinstance(source, dict): | ||||
|                     traverse_source( | ||||
|                         source, | ||||
|                         '%s-%s' % (base_source_id, source_id) | ||||
|                         if base_source_id else source_id) | ||||
|  | ||||
|         def extract_source(source_url, source_id=None): | ||||
|             if source_url in format_urls: | ||||
|                 return | ||||
|             format_urls.add(source_url) | ||||
|             ext = determine_ext(source_url) | ||||
|             if ext == 'smil': | ||||
|                 formats.extend(self._extract_smil_formats( | ||||
|                     source_url, video_id, fatal=False)) | ||||
|             elif ext == 'f4m': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     source_url, video_id, f4m_id=source_id, fatal=False)) | ||||
|             elif ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     source_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id=source_id, fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': source_url, | ||||
|                     'format_id': source_id, | ||||
|                 }) | ||||
|  | ||||
|         traverse_source(clip['links']['source']) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = clip.get('caption') or clip.get('description') | ||||
|         thumbnail = clip.get('thumbnail') | ||||
|         duration = int_or_none(clip.get('duration')) | ||||
|         timestamp = unified_timestamp(clip.get('originalPublishDate')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ESPNArticleIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
| @@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.espn.com/video/clip?id=10365079', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
| @@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor): | ||||
|             r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)', | ||||
|             webpage, 'video id', group='id') | ||||
|  | ||||
|         cms = 'espn' | ||||
|         if 'data-source="intl"' in webpage: | ||||
|             cms = 'intl' | ||||
|         player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms) | ||||
|         player = self._download_webpage( | ||||
|             player_url, video_id) | ||||
|  | ||||
|         pcode = self._search_regex( | ||||
|             r'["\']pcode=([^"\']+)["\']', player, 'pcode') | ||||
|  | ||||
|         title = remove_end( | ||||
|             self._og_search_title(webpage), | ||||
|             '- ESPN Video').strip() | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode), | ||||
|             'ie_key': 'OoyalaExternal', | ||||
|             'title': title, | ||||
|         } | ||||
|         return self.url_result( | ||||
|             'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user