[youtube] improve m3u8 format extraction
This commit is contained in:
		@@ -1253,21 +1253,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
        video_id = mobj.group(2)
 | 
			
		||||
        return video_id
 | 
			
		||||
 | 
			
		||||
    def _extract_from_m3u8(self, manifest_url, video_id):
 | 
			
		||||
        url_map = {}
 | 
			
		||||
 | 
			
		||||
        def _get_urls(_manifest):
 | 
			
		||||
            lines = _manifest.split('\n')
 | 
			
		||||
            urls = filter(lambda l: l and not l.startswith('#'),
 | 
			
		||||
                          lines)
 | 
			
		||||
            return urls
 | 
			
		||||
        manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
 | 
			
		||||
        formats_urls = _get_urls(manifest)
 | 
			
		||||
        for format_url in formats_urls:
 | 
			
		||||
            itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
 | 
			
		||||
            url_map[itag] = format_url
 | 
			
		||||
        return url_map
 | 
			
		||||
 | 
			
		||||
    def _extract_annotations(self, video_id):
 | 
			
		||||
        url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
 | 
			
		||||
        return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
 | 
			
		||||
@@ -1573,19 +1558,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
        if self._downloader.params.get('writeannotations', False):
 | 
			
		||||
            video_annotations = self._extract_annotations(video_id)
 | 
			
		||||
 | 
			
		||||
        def _map_to_format_list(urlmap):
 | 
			
		||||
            formats = []
 | 
			
		||||
            for itag, video_real_url in urlmap.items():
 | 
			
		||||
                dct = {
 | 
			
		||||
                    'format_id': itag,
 | 
			
		||||
                    'url': video_real_url,
 | 
			
		||||
                    'player_url': player_url,
 | 
			
		||||
                }
 | 
			
		||||
                if itag in self._formats:
 | 
			
		||||
                    dct.update(self._formats[itag])
 | 
			
		||||
                formats.append(dct)
 | 
			
		||||
            return formats
 | 
			
		||||
 | 
			
		||||
        if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
 | 
			
		||||
            self.report_rtmp_download()
 | 
			
		||||
            formats = [{
 | 
			
		||||
@@ -1718,11 +1690,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                formats.append(dct)
 | 
			
		||||
        elif video_info.get('hlsvp'):
 | 
			
		||||
            manifest_url = video_info['hlsvp'][0]
 | 
			
		||||
            url_map = self._extract_from_m3u8(manifest_url, video_id)
 | 
			
		||||
            formats = _map_to_format_list(url_map)
 | 
			
		||||
            # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
 | 
			
		||||
            for a_format in formats:
 | 
			
		||||
            formats = []
 | 
			
		||||
            m3u8_formats = self._extract_m3u8_formats(
 | 
			
		||||
                manifest_url, video_id, 'mp4', fatal=False)
 | 
			
		||||
            for a_format in m3u8_formats:
 | 
			
		||||
                itag = self._search_regex(
 | 
			
		||||
                    r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
 | 
			
		||||
                if itag:
 | 
			
		||||
                    a_format['format_id'] = itag
 | 
			
		||||
                    if itag in self._formats:
 | 
			
		||||
                        dct = self._formats[itag].copy()
 | 
			
		||||
                        dct.update(a_format)
 | 
			
		||||
                        a_format = dct
 | 
			
		||||
                a_format['player_url'] = player_url
 | 
			
		||||
                # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
 | 
			
		||||
                a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
 | 
			
		||||
                formats.append(a_format)
 | 
			
		||||
        else:
 | 
			
		||||
            unavailable_message = self._html_search_regex(
 | 
			
		||||
                r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user