[arte] Clean up format sorting mess
We now use our standard sorting facilities. As a side effect, it's finally possible to download German videos from French URLs and vice versa.
This commit is contained in:
		@@ -8,10 +8,10 @@ from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    get_element_by_id,
 | 
			
		||||
    get_element_by_attribute,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    qualities,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# There are different sources of video in arte.tv, the extraction process 
 | 
			
		||||
@@ -102,79 +102,54 @@ class ArteTVPlus7IE(InfoExtractor):
 | 
			
		||||
            'upload_date': unified_strdate(upload_date_str),
 | 
			
		||||
            'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
 | 
			
		||||
        }
 | 
			
		||||
        qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
 | 
			
		||||
 | 
			
		||||
        all_formats = []
 | 
			
		||||
        formats = []
 | 
			
		||||
        for format_id, format_dict in player_info['VSR'].items():
 | 
			
		||||
            fmt = dict(format_dict)
 | 
			
		||||
            fmt['format_id'] = format_id
 | 
			
		||||
            all_formats.append(fmt)
 | 
			
		||||
        # Some formats use the m3u8 protocol
 | 
			
		||||
        all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
 | 
			
		||||
        def _match_lang(f):
 | 
			
		||||
            if f.get('versionCode') is None:
 | 
			
		||||
                return True
 | 
			
		||||
            # Return true if that format is in the language of the url
 | 
			
		||||
            if lang == 'fr':
 | 
			
		||||
                l = 'F'
 | 
			
		||||
            elif lang == 'de':
 | 
			
		||||
                l = 'A'
 | 
			
		||||
            else:
 | 
			
		||||
                l = lang
 | 
			
		||||
            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
 | 
			
		||||
            return any(re.match(r, f['versionCode']) for r in regexes)
 | 
			
		||||
        # Some formats may not be in the same language as the url
 | 
			
		||||
        # TODO: Might want not to drop videos that does not match requested language
 | 
			
		||||
        # but to process those formats with lower precedence
 | 
			
		||||
        formats = filter(_match_lang, all_formats)
 | 
			
		||||
        formats = list(formats)  # in python3 filter returns an iterator
 | 
			
		||||
        if not formats:
 | 
			
		||||
            # Some videos are only available in the 'Originalversion'
 | 
			
		||||
            # they aren't tagged as being in French or German
 | 
			
		||||
            # Sometimes there are neither videos of requested lang code
 | 
			
		||||
            # nor original version videos available
 | 
			
		||||
            # For such cases we just take all_formats as is
 | 
			
		||||
            formats = all_formats
 | 
			
		||||
            if not formats:
 | 
			
		||||
                raise ExtractorError('The formats list is empty')
 | 
			
		||||
            f = dict(format_dict)
 | 
			
		||||
            versionCode = f.get('versionCode')
 | 
			
		||||
 | 
			
		||||
        if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
 | 
			
		||||
            def sort_key(f):
 | 
			
		||||
                return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
 | 
			
		||||
        else:
 | 
			
		||||
            def sort_key(f):
 | 
			
		||||
                versionCode = f.get('versionCode')
 | 
			
		||||
                if versionCode is None:
 | 
			
		||||
                    versionCode = ''
 | 
			
		||||
                return (
 | 
			
		||||
                    # Sort first by quality
 | 
			
		||||
                    int(f.get('height', -1)),
 | 
			
		||||
                    int(f.get('bitrate', -1)),
 | 
			
		||||
                    # The original version with subtitles has lower relevance
 | 
			
		||||
                    re.match(r'VO-ST(F|A)', versionCode) is None,
 | 
			
		||||
                    # The version with sourds/mal subtitles has also lower relevance
 | 
			
		||||
                    re.match(r'VO?(F|A)-STM\1', versionCode) is None,
 | 
			
		||||
                    # Prefer http downloads over m3u8
 | 
			
		||||
                    0 if f['url'].endswith('m3u8') else 1,
 | 
			
		||||
                )
 | 
			
		||||
        formats = sorted(formats, key=sort_key)
 | 
			
		||||
        def _format(format_info):
 | 
			
		||||
            info = {
 | 
			
		||||
                'format_id': format_info['format_id'],
 | 
			
		||||
                'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
 | 
			
		||||
                'width': int_or_none(format_info.get('width')),
 | 
			
		||||
                'height': int_or_none(format_info.get('height')),
 | 
			
		||||
                'tbr': int_or_none(format_info.get('bitrate')),
 | 
			
		||||
            langcode = {
 | 
			
		||||
                'fr': 'F',
 | 
			
		||||
                'de': 'A',
 | 
			
		||||
            }.get(lang, lang)
 | 
			
		||||
            lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode]
 | 
			
		||||
            lang_pref = (
 | 
			
		||||
                None if versionCode is None else (
 | 
			
		||||
                    10 if any(re.match(r, versionCode) for r in lang_rexs)
 | 
			
		||||
                    else -10))
 | 
			
		||||
            source_pref = 0
 | 
			
		||||
            if versionCode is not None:
 | 
			
		||||
                # The original version with subtitles has lower relevance
 | 
			
		||||
                if re.match(r'VO-ST(F|A)', versionCode):
 | 
			
		||||
                    source_pref -= 10
 | 
			
		||||
                # The version with sourds/mal subtitles has also lower relevance
 | 
			
		||||
                elif re.match(r'VO?(F|A)-STM\1', versionCode):
 | 
			
		||||
                    source_pref -= 9
 | 
			
		||||
            format = {
 | 
			
		||||
                'format_id': format_id,
 | 
			
		||||
                'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
 | 
			
		||||
                'language_preference': lang_pref,
 | 
			
		||||
                'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
 | 
			
		||||
                'width': int_or_none(f.get('width')),
 | 
			
		||||
                'height': int_or_none(f.get('height')),
 | 
			
		||||
                'tbr': int_or_none(f.get('bitrate')),
 | 
			
		||||
                'quality': qfunc(f['quality']),
 | 
			
		||||
                'source_preference': source_pref,
 | 
			
		||||
            }
 | 
			
		||||
            if format_info['mediaType'] == 'rtmp':
 | 
			
		||||
                info['url'] = format_info['streamer']
 | 
			
		||||
                info['play_path'] = 'mp4:' + format_info['url']
 | 
			
		||||
                info['ext'] = 'flv'
 | 
			
		||||
            else:
 | 
			
		||||
                info['url'] = format_info['url']
 | 
			
		||||
                info['ext'] = determine_ext(info['url'])
 | 
			
		||||
            return info
 | 
			
		||||
        info_dict['formats'] = [_format(f) for f in formats]
 | 
			
		||||
 | 
			
		||||
            if f.get('mediaType') == 'rtmp':
 | 
			
		||||
                format['url'] = f['streamer']
 | 
			
		||||
                format['play_path'] = 'mp4:' + f['url']
 | 
			
		||||
                format['ext'] = 'flv'
 | 
			
		||||
            else:
 | 
			
		||||
                format['url'] = f['url']
 | 
			
		||||
 | 
			
		||||
            formats.append(format)
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        info_dict['formats'] = formats
 | 
			
		||||
        return info_dict
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -87,6 +87,11 @@ class InfoExtractor(object):
 | 
			
		||||
                                 by this field, regardless of all other values.
 | 
			
		||||
                                 -1 for default (order by other properties),
 | 
			
		||||
                                 -2 or smaller for less than default.
 | 
			
		||||
                    * language_preference  Is this in the correct requested
 | 
			
		||||
                                 language?
 | 
			
		||||
                                 10 if it's what the URL is about,
 | 
			
		||||
                                 -1 for default (don't know),
 | 
			
		||||
                                 -10 otherwise, other values reserved for now.
 | 
			
		||||
                    * quality    Order number of the video quality of this
 | 
			
		||||
                                 format, irrespective of the file format.
 | 
			
		||||
                                 -1 for default (order by other properties),
 | 
			
		||||
@@ -615,6 +620,7 @@ class InfoExtractor(object):
 | 
			
		||||
 | 
			
		||||
            return (
 | 
			
		||||
                preference,
 | 
			
		||||
                f.get('language_preference') if f.get('language_preference') is not None else -1,
 | 
			
		||||
                f.get('quality') if f.get('quality') is not None else -1,
 | 
			
		||||
                f.get('height') if f.get('height') is not None else -1,
 | 
			
		||||
                f.get('width') if f.get('width') is not None else -1,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user