[tnaflix] Improve and make generic
This commit is contained in:
		@@ -5,11 +5,17 @@ import re
 | 
				
			|||||||
from .common import InfoExtractor
 | 
					from .common import InfoExtractor
 | 
				
			||||||
from ..utils import (
 | 
					from ..utils import (
 | 
				
			||||||
    parse_duration,
 | 
					    parse_duration,
 | 
				
			||||||
    str_to_int,
 | 
					    fix_xml_ampersands,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TNAFlixIE(InfoExtractor):
 | 
					class TNAFlixIE(InfoExtractor):
 | 
				
			||||||
    _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
 | 
					    _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _TITLE_REGEX = None
 | 
				
			||||||
 | 
					    _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
 | 
				
			||||||
 | 
					    _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    _TEST = {
 | 
					    _TEST = {
 | 
				
			||||||
        'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
 | 
					        'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
 | 
				
			||||||
        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
 | 
					        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
 | 
				
			||||||
@@ -18,8 +24,9 @@ class TNAFlixIE(InfoExtractor):
 | 
				
			|||||||
            'display_id': 'Carmella-Decesare-striptease',
 | 
					            'display_id': 'Carmella-Decesare-striptease',
 | 
				
			||||||
            'ext': 'mp4',
 | 
					            'ext': 'mp4',
 | 
				
			||||||
            'title': 'Carmella Decesare - striptease',
 | 
					            'title': 'Carmella Decesare - striptease',
 | 
				
			||||||
 | 
					            'description': '',
 | 
				
			||||||
            'thumbnail': 're:https?://.*\.jpg$',
 | 
					            'thumbnail': 're:https?://.*\.jpg$',
 | 
				
			||||||
            #'duration': 84,
 | 
					            'duration': 91,
 | 
				
			||||||
            'age_limit': 18,
 | 
					            'age_limit': 18,
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -31,14 +38,30 @@ class TNAFlixIE(InfoExtractor):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        webpage = self._download_webpage(url, display_id)
 | 
					        webpage = self._download_webpage(url, display_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        redir_url = self._html_search_regex(
 | 
					        title = self._html_search_regex(
 | 
				
			||||||
            r'flashvars.config = escape\("([^"]+)"', webpage, 'redirection URL')
 | 
					            self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
 | 
				
			||||||
        redirection_webpage = self._download_webpage(redir_url, display_id)
 | 
					        description = self._html_search_regex(
 | 
				
			||||||
        sources = self._search_regex(
 | 
					            self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='')
 | 
				
			||||||
            r'<quality>(.+)</quality>', redirection_webpage, 'sources', flags=re.MULTILINE|re.DOTALL)
 | 
					
 | 
				
			||||||
 | 
					        age_limit = self._rta_search(webpage)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        duration = self._html_search_meta('duration', webpage, 'duration', default=None)
 | 
				
			||||||
 | 
					        if duration:
 | 
				
			||||||
 | 
					            duration = parse_duration(duration[1:])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        cfg_url = self._html_search_regex(
 | 
				
			||||||
 | 
					            self._CONFIG_REGEX, webpage, 'flashvars.config')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        cfg_xml = self._download_xml(
 | 
				
			||||||
 | 
					            cfg_url, display_id, note='Downloading metadata',
 | 
				
			||||||
 | 
					            transform_source=fix_xml_ampersands)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        thumbnail = cfg_xml.find('./startThumb').text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        formats = []
 | 
					        formats = []
 | 
				
			||||||
        for format_id, video_url in re.findall(r'<res>([^<]+)</res>\s*<videoLink>([^<]+)</videoLink>', sources, flags=re.MULTILINE|re.DOTALL):
 | 
					        for item in cfg_xml.findall('./quality/item'):
 | 
				
			||||||
 | 
					            video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
 | 
				
			||||||
 | 
					            format_id = item.find('res').text
 | 
				
			||||||
            fmt = {
 | 
					            fmt = {
 | 
				
			||||||
                'url': video_url,
 | 
					                'url': video_url,
 | 
				
			||||||
                'format_id': format_id,
 | 
					                'format_id': format_id,
 | 
				
			||||||
@@ -49,20 +72,13 @@ class TNAFlixIE(InfoExtractor):
 | 
				
			|||||||
            formats.append(fmt)
 | 
					            formats.append(fmt)
 | 
				
			||||||
        self._sort_formats(formats)
 | 
					        self._sort_formats(formats)
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        title = self._og_search_title(webpage)
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        #duration = self._html_search_regex(r'<meta itemprop="duration" content="T(\d+)M(\d+)S"', webpage, 'duration')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        thumbnail = self._html_search_regex(
 | 
					 | 
				
			||||||
            r'<meta\s+itemprop="thumbnailUrl"\s+content="([^"]+)"',
 | 
					 | 
				
			||||||
            webpage, 'thumbnail', fatal=False)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return {
 | 
					        return {
 | 
				
			||||||
            'id': video_id,
 | 
					            'id': video_id,
 | 
				
			||||||
            'display_id': display_id,
 | 
					            'display_id': display_id,
 | 
				
			||||||
            'url': video_url,
 | 
					 | 
				
			||||||
            'title': title,
 | 
					            'title': title,
 | 
				
			||||||
 | 
					            'description': description,
 | 
				
			||||||
            'thumbnail': thumbnail,
 | 
					            'thumbnail': thumbnail,
 | 
				
			||||||
            #'duration': duration,
 | 
					            'duration': duration,
 | 
				
			||||||
            'age_limit': self._rta_search(webpage),
 | 
					            'age_limit': age_limit,
 | 
				
			||||||
 | 
					            'formats': formats,
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user