Generalize XML manifest processing code and improve XSPF parsing (closes #15794)
This commit is contained in:
		| @@ -698,40 +698,47 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ | ||||
|         _TEST_CASES = [ | ||||
|             ( | ||||
|                 'foo_xspf', | ||||
|                 'https://example.org/src/', | ||||
|                 'https://example.org/src/foo_xspf.xspf', | ||||
|                 [{ | ||||
|                     'id': 'foo_xspf', | ||||
|                     'title': 'Pandemonium', | ||||
|                     'description': 'Visit http://bigbrother404.bandcamp.com', | ||||
|                     'duration': 202.416, | ||||
|                     'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}], | ||||
|                     'formats': [{ | ||||
|                         'manifest_url': 'https://example.org/src/foo_xspf.xspf', | ||||
|                         'url': 'https://example.org/src/cd1/track%201.mp3', | ||||
|                     }], | ||||
|                 }, { | ||||
|                     'id': 'foo_xspf', | ||||
|                     'title': 'Pandemonium' | ||||
|                 }, | ||||
|                 { | ||||
|                     'title': 'Final Cartridge (Nichico Twelve Remix)', | ||||
|                     'description': 'Visit http://bigbrother404.bandcamp.com', | ||||
|                     'duration': 255.857, | ||||
|                     'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}], | ||||
|                     'formats': [{ | ||||
|                         'manifest_url': 'https://example.org/src/foo_xspf.xspf', | ||||
|                         'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3', | ||||
|                     }], | ||||
|                 }, { | ||||
|                     'id': 'foo_xspf', | ||||
|                     'title': 'Final Cartridge (Nichico Twelve Remix)' | ||||
|                 }, | ||||
|                 { | ||||
|                     'title': 'Rebuilding Nightingale', | ||||
|                     'description': 'Visit http://bigbrother404.bandcamp.com', | ||||
|                     'duration': 287.915, | ||||
|                     'formats': [ | ||||
|                         {'url': 'https://example.org/src/track3.mp3'}, | ||||
|                         {'url': 'https://example.com/track3.mp3'} | ||||
|                     ], | ||||
|                     'id': 'foo_xspf', | ||||
|                     'title': 'Rebuilding Nightingale' | ||||
|                     'formats': [{ | ||||
|                         'manifest_url': 'https://example.org/src/foo_xspf.xspf', | ||||
|                         'url': 'https://example.org/src/track3.mp3', | ||||
|                     }, { | ||||
|                         'manifest_url': 'https://example.org/src/foo_xspf.xspf', | ||||
|                         'url': 'https://example.com/track3.mp3', | ||||
|                     }] | ||||
|                 }] | ||||
|             ), | ||||
|         ] | ||||
|  | ||||
|         for xspf_file, xspf_base_url, expected_entries in _TEST_CASES: | ||||
|         for xspf_file, xspf_url, expected_entries in _TEST_CASES: | ||||
|             with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, | ||||
|                          mode='r', encoding='utf-8') as f: | ||||
|                 entries = self.ie._parse_xspf( | ||||
|                     compat_etree_fromstring(f.read().encode('utf-8')), | ||||
|                         xspf_file, xspf_base_url) | ||||
|                     xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) | ||||
|                 expect_value(self, entries, expected_entries, None) | ||||
|                 for i in range(len(entries)): | ||||
|                     expect_dict(self, entries[i], expected_entries[i]) | ||||
|   | ||||
| @@ -1706,22 +1706,24 @@ class InfoExtractor(object): | ||||
|             }) | ||||
|         return subtitles | ||||
|  | ||||
|     def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True): | ||||
|     def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True): | ||||
|         xspf = self._download_xml( | ||||
|             playlist_url, playlist_id, 'Downloading xpsf playlist', | ||||
|             xspf_url, playlist_id, 'Downloading xpsf playlist', | ||||
|             'Unable to download xspf manifest', fatal=fatal) | ||||
|         if xspf is False: | ||||
|             return [] | ||||
|         return self._parse_xspf(xspf, playlist_id, base_url(playlist_url)) | ||||
|         return self._parse_xspf( | ||||
|             xspf, playlist_id, xspf_url=xspf_url, | ||||
|             xspf_base_url=base_url(xspf_url)) | ||||
|  | ||||
|     def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''): | ||||
|     def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None): | ||||
|         NS_MAP = { | ||||
|             'xspf': 'http://xspf.org/ns/0/', | ||||
|             's1': 'http://static.streamone.nl/player/ns/0', | ||||
|         } | ||||
|  | ||||
|         entries = [] | ||||
|         for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): | ||||
|         for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): | ||||
|             title = xpath_text( | ||||
|                 track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id) | ||||
|             description = xpath_text( | ||||
| @@ -1731,12 +1733,18 @@ class InfoExtractor(object): | ||||
|             duration = float_or_none( | ||||
|                 xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) | ||||
|  | ||||
|             formats = [{ | ||||
|                 'url': urljoin(playlist_base_url, location.text), | ||||
|                 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), | ||||
|                 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), | ||||
|                 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), | ||||
|             } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] | ||||
|             formats = [] | ||||
|             for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)): | ||||
|                 format_url = urljoin(xspf_base_url, location.text) | ||||
|                 if not format_url: | ||||
|                     continue | ||||
|                 formats.append({ | ||||
|                     'url': format_url, | ||||
|                     'manifest_url': xspf_url, | ||||
|                     'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), | ||||
|                     'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), | ||||
|                     'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), | ||||
|                 }) | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             entries.append({ | ||||
| @@ -1750,18 +1758,18 @@ class InfoExtractor(object): | ||||
|         return entries | ||||
|  | ||||
|     def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}): | ||||
|         res = self._download_webpage_handle( | ||||
|         res = self._download_xml_handle( | ||||
|             mpd_url, video_id, | ||||
|             note=note or 'Downloading MPD manifest', | ||||
|             errnote=errnote or 'Failed to download MPD manifest', | ||||
|             fatal=fatal) | ||||
|         if res is False: | ||||
|             return [] | ||||
|         mpd, urlh = res | ||||
|         mpd_doc, urlh = res | ||||
|         mpd_base_url = base_url(urlh.geturl()) | ||||
|  | ||||
|         return self._parse_mpd_formats( | ||||
|             compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, | ||||
|             mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url, | ||||
|             formats_dict=formats_dict, mpd_url=mpd_url) | ||||
|  | ||||
|     def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): | ||||
| @@ -2035,17 +2043,16 @@ class InfoExtractor(object): | ||||
|         return formats | ||||
|  | ||||
|     def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): | ||||
|         res = self._download_webpage_handle( | ||||
|         res = self._download_xml_handle( | ||||
|             ism_url, video_id, | ||||
|             note=note or 'Downloading ISM manifest', | ||||
|             errnote=errnote or 'Failed to download ISM manifest', | ||||
|             fatal=fatal) | ||||
|         if res is False: | ||||
|             return [] | ||||
|         ism, urlh = res | ||||
|         ism_doc, urlh = res | ||||
|  | ||||
|         return self._parse_ism_formats( | ||||
|             compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id) | ||||
|         return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id) | ||||
|  | ||||
|     def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None): | ||||
|         """ | ||||
|   | ||||
| @@ -2233,7 +2233,9 @@ class GenericIE(InfoExtractor): | ||||
|                 return smil | ||||
|             elif doc.tag == '{http://xspf.org/ns/0/}playlist': | ||||
|                 return self.playlist_result( | ||||
|                     self._parse_xspf(doc, video_id, compat_str(full_response.geturl())), | ||||
|                     self._parse_xspf( | ||||
|                         doc, video_id, xspf_url=url, | ||||
|                         xspf_base_url=compat_str(full_response.geturl())), | ||||
|                     video_id) | ||||
|             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): | ||||
|                 info_dict['formats'] = self._parse_mpd_formats( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user