[generic] Allow multiple matches for generic hits (Fixes #2818)
This commit is contained in:
		@@ -637,70 +637,77 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            return self.url_result(smotri_url, 'Smotri')
 | 
			
		||||
 | 
			
		||||
        # Start with something easy: JW Player in SWFObject
 | 
			
		||||
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
        found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
 | 
			
		||||
        if not found:
 | 
			
		||||
            # Look for gorilla-vid style embedding
 | 
			
		||||
            mobj = re.search(r'''(?sx)
 | 
			
		||||
            found = re.findall(r'''(?sx)
 | 
			
		||||
                (?:
 | 
			
		||||
                    jw_plugins|
 | 
			
		||||
                    JWPlayerOptions|
 | 
			
		||||
                    jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
 | 
			
		||||
                )
 | 
			
		||||
                .*?file\s*:\s*["\'](.*?)["\']''', webpage)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
        if not found:
 | 
			
		||||
            # Broaden the search a little bit
 | 
			
		||||
            mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            # Broaden the search a little bit: JWPlayer JS loader
 | 
			
		||||
            mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
 | 
			
		||||
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
 | 
			
		||||
        if not found:
 | 
			
		||||
            # Broaden the findall a little bit: JWPlayer JS loader
 | 
			
		||||
            found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
 | 
			
		||||
        if not found:
 | 
			
		||||
            # Try to find twitter cards info
 | 
			
		||||
            mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
 | 
			
		||||
        if not found:
 | 
			
		||||
            # We look for Open Graph info:
 | 
			
		||||
            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
 | 
			
		||||
            m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
 | 
			
		||||
            m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
 | 
			
		||||
            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
 | 
			
		||||
            if m_video_type is not None:
 | 
			
		||||
                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
                found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
 | 
			
		||||
        if not found:
 | 
			
		||||
            # HTML5 video
 | 
			
		||||
            mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            mobj = re.search(
 | 
			
		||||
            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)
 | 
			
		||||
        if not found:
 | 
			
		||||
            found = re.findall(
 | 
			
		||||
                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
 | 
			
		||||
                r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
 | 
			
		||||
                webpage)
 | 
			
		||||
            if mobj:
 | 
			
		||||
                new_url = mobj.group(1)
 | 
			
		||||
            if found:
 | 
			
		||||
                new_url = found.group(1)
 | 
			
		||||
                self.report_following_redirect(new_url)
 | 
			
		||||
                return {
 | 
			
		||||
                    '_type': 'url',
 | 
			
		||||
                    'url': new_url,
 | 
			
		||||
                }
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
        if not found:
 | 
			
		||||
            raise ExtractorError('Unsupported URL: %s' % url)
 | 
			
		||||
 | 
			
		||||
        # It's possible that one of the regexes
 | 
			
		||||
        # matched, but returned an empty group:
 | 
			
		||||
        if mobj.group(1) is None:
 | 
			
		||||
            raise ExtractorError('Did not find a valid video URL at %s' % url)
 | 
			
		||||
        entries = []
 | 
			
		||||
        for video_url in found:
 | 
			
		||||
            video_url = compat_urlparse.urljoin(url, video_url)
 | 
			
		||||
            video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
 | 
			
		||||
 | 
			
		||||
        video_url = mobj.group(1)
 | 
			
		||||
        video_url = compat_urlparse.urljoin(url, video_url)
 | 
			
		||||
        video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
 | 
			
		||||
            # Sometimes, jwplayer extraction will result in a YouTube URL
 | 
			
		||||
            if YoutubeIE.suitable(video_url):
 | 
			
		||||
                entries.append(self.url_result(video_url, 'Youtube'))
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
        # Sometimes, jwplayer extraction will result in a YouTube URL
 | 
			
		||||
        if YoutubeIE.suitable(video_url):
 | 
			
		||||
            return self.url_result(video_url, 'Youtube')
 | 
			
		||||
            # here's a fun little line of code for you:
 | 
			
		||||
            video_id = os.path.splitext(video_id)[0]
 | 
			
		||||
 | 
			
		||||
        # here's a fun little line of code for you:
 | 
			
		||||
        video_id = os.path.splitext(video_id)[0]
 | 
			
		||||
            entries.append({
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'uploader': video_uploader,
 | 
			
		||||
                'title': video_title,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        if len(entries) == 1:
 | 
			
		||||
            return entries[1]
 | 
			
		||||
        else:
 | 
			
		||||
            for num, e in enumerate(entries, start=1):
 | 
			
		||||
                e['title'] = '%s (%d)' % (e['title'], num)
 | 
			
		||||
            return {
 | 
			
		||||
                '_type': 'playlist',
 | 
			
		||||
                'entries': entries,
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'uploader': video_uploader,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user