[wistia] Add extractor
This commit is contained in:
		@@ -488,7 +488,8 @@ class YoutubeDL(object):
 | 
				
			|||||||
                new_result = ie_result.copy()
 | 
					                new_result = ie_result.copy()
 | 
				
			||||||
                for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 | 
					                for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 | 
				
			||||||
                          'entries', 'urlhandle', 'ie_key', 'duration',
 | 
					                          'entries', 'urlhandle', 'ie_key', 'duration',
 | 
				
			||||||
                          'subtitles', 'annotations', 'format'):
 | 
					                          'subtitles', 'annotations', 'format',
 | 
				
			||||||
 | 
					                          'thumbnail', 'thumbnails'):
 | 
				
			||||||
                    if f in new_result:
 | 
					                    if f in new_result:
 | 
				
			||||||
                        del new_result[f]
 | 
					                        del new_result[f]
 | 
				
			||||||
                    if f in embedded_info:
 | 
					                    if f in embedded_info:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -178,6 +178,7 @@ from .wat import WatIE
 | 
				
			|||||||
from .websurg import WeBSurgIE
 | 
					from .websurg import WeBSurgIE
 | 
				
			||||||
from .weibo import WeiboIE
 | 
					from .weibo import WeiboIE
 | 
				
			||||||
from .wimp import WimpIE
 | 
					from .wimp import WimpIE
 | 
				
			||||||
 | 
					from .wistia import WistiaIE
 | 
				
			||||||
from .worldstarhiphop import WorldStarHipHopIE
 | 
					from .worldstarhiphop import WorldStarHipHopIE
 | 
				
			||||||
from .xhamster import XHamsterIE
 | 
					from .xhamster import XHamsterIE
 | 
				
			||||||
from .xnxx import XNXXIE
 | 
					from .xnxx import XNXXIE
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -169,8 +169,13 @@ class GenericIE(InfoExtractor):
 | 
				
			|||||||
        #   Site Name | Video Title
 | 
					        #   Site Name | Video Title
 | 
				
			||||||
        #   Video Title - Tagline | Site Name
 | 
					        #   Video Title - Tagline | Site Name
 | 
				
			||||||
        # and so on and so forth; it's just not practical
 | 
					        # and so on and so forth; it's just not practical
 | 
				
			||||||
        video_title = self._html_search_regex(r'<title>(.*)</title>',
 | 
					        video_title = self._html_search_regex(
 | 
				
			||||||
            webpage, u'video title', default=u'video', flags=re.DOTALL)
 | 
					            r'(?s)<title>(.*?)</title>', webpage, u'video title',
 | 
				
			||||||
 | 
					            default=u'video')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # video uploader is domain name
 | 
				
			||||||
 | 
					        video_uploader = self._search_regex(
 | 
				
			||||||
 | 
					            r'^(?:https?://)?([^/]*)/.*', url, u'video uploader')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Look for BrightCove:
 | 
					        # Look for BrightCove:
 | 
				
			||||||
        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
 | 
					        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
 | 
				
			||||||
@@ -188,7 +193,7 @@ class GenericIE(InfoExtractor):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        # Look for embedded YouTube player
 | 
					        # Look for embedded YouTube player
 | 
				
			||||||
        matches = re.findall(
 | 
					        matches = re.findall(
 | 
				
			||||||
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
 | 
					            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
 | 
				
			||||||
        if matches:
 | 
					        if matches:
 | 
				
			||||||
            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
 | 
					            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
 | 
				
			||||||
                     for tuppl in matches]
 | 
					                     for tuppl in matches]
 | 
				
			||||||
@@ -197,13 +202,26 @@ class GenericIE(InfoExtractor):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        # Look for embedded Dailymotion player
 | 
					        # Look for embedded Dailymotion player
 | 
				
			||||||
        matches = re.findall(
 | 
					        matches = re.findall(
 | 
				
			||||||
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion.com/embed/video/.+?)\1', webpage)
 | 
					            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
 | 
				
			||||||
        if matches:
 | 
					        if matches:
 | 
				
			||||||
            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
 | 
					            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
 | 
				
			||||||
                     for tuppl in matches]
 | 
					                     for tuppl in matches]
 | 
				
			||||||
            return self.playlist_result(
 | 
					            return self.playlist_result(
 | 
				
			||||||
                urlrs, playlist_id=video_id, playlist_title=video_title)
 | 
					                urlrs, playlist_id=video_id, playlist_title=video_title)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Look for embedded Wistia player
 | 
				
			||||||
 | 
					        match = re.search(
 | 
				
			||||||
 | 
					            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
 | 
				
			||||||
 | 
					        if match:
 | 
				
			||||||
 | 
					            return {
 | 
				
			||||||
 | 
					                '_type': 'url_transparent',
 | 
				
			||||||
 | 
					                'url': unescapeHTML(match.group('url')),
 | 
				
			||||||
 | 
					                'ie_key': 'Wistia',
 | 
				
			||||||
 | 
					                'uploader': video_uploader,
 | 
				
			||||||
 | 
					                'title': video_title,
 | 
				
			||||||
 | 
					                'id': video_id,
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Look for Bandcamp pages with custom domain
 | 
					        # Look for Bandcamp pages with custom domain
 | 
				
			||||||
        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
 | 
					        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
 | 
				
			||||||
        if mobj is not None:
 | 
					        if mobj is not None:
 | 
				
			||||||
@@ -247,14 +265,9 @@ class GenericIE(InfoExtractor):
 | 
				
			|||||||
        # here's a fun little line of code for you:
 | 
					        # here's a fun little line of code for you:
 | 
				
			||||||
        video_id = os.path.splitext(video_id)[0]
 | 
					        video_id = os.path.splitext(video_id)[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # video uploader is domain name
 | 
					 | 
				
			||||||
        video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
 | 
					 | 
				
			||||||
            url, u'video uploader')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return {
 | 
					        return {
 | 
				
			||||||
            'id':       video_id,
 | 
					            'id':       video_id,
 | 
				
			||||||
            'url':      video_url,
 | 
					            'url':      video_url,
 | 
				
			||||||
            'uploader': video_uploader,
 | 
					            'uploader': video_uploader,
 | 
				
			||||||
            'upload_date':  None,
 | 
					 | 
				
			||||||
            'title':    video_title,
 | 
					            'title':    video_title,
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/wistia.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/wistia.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,55 @@
 | 
				
			|||||||
 | 
					import json
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from .common import InfoExtractor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class WistiaIE(InfoExtractor):
 | 
				
			||||||
 | 
					    _VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _TEST = {
 | 
				
			||||||
 | 
					        u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt",
 | 
				
			||||||
 | 
					        u"file": u"sh7fpupwlt.mov",
 | 
				
			||||||
 | 
					        u"md5": u"cafeb56ec0c53c18c97405eecb3133df",
 | 
				
			||||||
 | 
					        u"info_dict": {
 | 
				
			||||||
 | 
					            u"title": u"cfh_resourceful_zdkh_final_1"
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
 | 
					        mobj = re.match(self._VALID_URL, url)
 | 
				
			||||||
 | 
					        video_id = mobj.group('id')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        webpage = self._download_webpage(url, video_id)
 | 
				
			||||||
 | 
					        data_json = self._html_search_regex(
 | 
				
			||||||
 | 
					            r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        data = json.loads(data_json)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        formats = []
 | 
				
			||||||
 | 
					        thumbnails = []
 | 
				
			||||||
 | 
					        for atype, a in data['assets'].items():
 | 
				
			||||||
 | 
					            if atype == 'still':
 | 
				
			||||||
 | 
					                thumbnails.append({
 | 
				
			||||||
 | 
					                    'url': a['url'],
 | 
				
			||||||
 | 
					                    'resolution': '%dx%d' % (a['width'], a['height']),
 | 
				
			||||||
 | 
					                })
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            if atype == 'preview':
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            formats.append({
 | 
				
			||||||
 | 
					                'format_id': atype,
 | 
				
			||||||
 | 
					                'url': a['url'],
 | 
				
			||||||
 | 
					                'width': a['width'],
 | 
				
			||||||
 | 
					                'height': a['height'],
 | 
				
			||||||
 | 
					                'filesize': a['size'],
 | 
				
			||||||
 | 
					                'ext': a['ext'],
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					        formats.sort(key=lambda a: a['filesize'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return {
 | 
				
			||||||
 | 
					            'id': video_id,
 | 
				
			||||||
 | 
					            'title': data['name'],
 | 
				
			||||||
 | 
					            'formats': formats,
 | 
				
			||||||
 | 
					            'thumbnails': thumbnails,
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
		Reference in New Issue
	
	Block a user