Merge remote-tracking branch 'upstream/master'
This commit is contained in:
		@@ -26,9 +26,9 @@ tests = [
 | 
			
		||||
    # 85
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
 | 
			
		||||
     ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
 | 
			
		||||
    # 84
 | 
			
		||||
    # 84 - vflh9ybst 2013/08/23 (sporadic)
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
 | 
			
		||||
     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
 | 
			
		||||
     "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
 | 
			
		||||
    # 83
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
 | 
			
		||||
     ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
 | 
			
		||||
 
 | 
			
		||||
@@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE
 | 
			
		||||
from .generic import GenericIE
 | 
			
		||||
from .googleplus import GooglePlusIE
 | 
			
		||||
from .googlesearch import GoogleSearchIE
 | 
			
		||||
from .hark import HarkIE
 | 
			
		||||
from .hotnewhiphop import HotNewHipHopIE
 | 
			
		||||
from .howcast import HowcastIE
 | 
			
		||||
from .hypem import HypemIE
 | 
			
		||||
@@ -57,6 +58,7 @@ from .pornotube import PornotubeIE
 | 
			
		||||
from .rbmaradio import RBMARadioIE
 | 
			
		||||
from .redtube import RedTubeIE
 | 
			
		||||
from .ringtv import RingTVIE
 | 
			
		||||
from .ro220 import Ro220IE
 | 
			
		||||
from .roxwel import RoxwelIE
 | 
			
		||||
from .rtlnow import RTLnowIE
 | 
			
		||||
from .sina import SinaIE
 | 
			
		||||
@@ -116,12 +118,14 @@ _ALL_CLASSES = [
 | 
			
		||||
]
 | 
			
		||||
_ALL_CLASSES.append(GenericIE)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def gen_extractors():
 | 
			
		||||
    """ Return a list of an instance of every supported extractor.
 | 
			
		||||
    The order does matter; the first extractor matched is the one handling the URL.
 | 
			
		||||
    """
 | 
			
		||||
    return [klass() for klass in _ALL_CLASSES]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_info_extractor(ie_name):
 | 
			
		||||
    """Returns the info extractor class with the given ie_name"""
 | 
			
		||||
    return globals()[ie_name+'IE']
 | 
			
		||||
 
 | 
			
		||||
@@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    _TEST ={
 | 
			
		||||
        u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
 | 
			
		||||
        u'file': u'93440716.mp4',
 | 
			
		||||
        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
 | 
			
		||||
        u'file': u'93440716.flv',
 | 
			
		||||
        u'md5': u'e59995ac63d0457783ea05f93f12a866',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'网事知多少 第32期:车怒',
 | 
			
		||||
        },
 | 
			
		||||
 
 | 
			
		||||
@@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
 | 
			
		||||
        u'file': u'x33vw9.mp4',
 | 
			
		||||
        u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"uploader": u"Alex and Van .", 
 | 
			
		||||
            u"uploader": u"Amphora Alex and Van .", 
 | 
			
		||||
            u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
@@ -7,12 +7,14 @@ from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_error,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
from .brightcove import BrightcoveIE
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GenericIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = u'Generic downloader that works on some sites'
 | 
			
		||||
    _VALID_URL = r'.*'
 | 
			
		||||
@@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            u'file': u'13601338388002.mp4',
 | 
			
		||||
            u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
 | 
			
		||||
            u'info_dict': {
 | 
			
		||||
                u"uploader": u"www.hodiho.fr", 
 | 
			
		||||
                u"uploader": u"www.hodiho.fr",
 | 
			
		||||
                u"title": u"R\u00e9gis plante sa Jeep"
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
@@ -124,7 +126,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            raise ExtractorError(u'Invalid URL: %s' % url)
 | 
			
		||||
 | 
			
		||||
        self.report_extraction(video_id)
 | 
			
		||||
        # Look for BrigthCove:
 | 
			
		||||
        # Look for BrightCove:
 | 
			
		||||
        m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
 | 
			
		||||
        if m_brightcove is not None:
 | 
			
		||||
            self.to_screen(u'Brightcove video detected.')
 | 
			
		||||
@@ -161,6 +163,10 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            raise ExtractorError(u'Invalid URL: %s' % url)
 | 
			
		||||
 | 
			
		||||
        video_url = compat_urllib_parse.unquote(mobj.group(1))
 | 
			
		||||
        if video_url.startswith('//'):
 | 
			
		||||
            video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
 | 
			
		||||
        if '://' not in video_url:
 | 
			
		||||
            video_url = url + ('' if url.endswith('/') else '/') + video_url
 | 
			
		||||
        video_id = os.path.basename(video_url)
 | 
			
		||||
 | 
			
		||||
        # here's a fun little line of code for you:
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										35
									
								
								youtube_dl/extractor/hark.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								youtube_dl/extractor/hark.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,35 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import determine_ext
 | 
			
		||||
 | 
			
		||||
class HarkIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
 | 
			
		||||
        u'file': u'mmbzyhkgny.mp3',
 | 
			
		||||
        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group(1)
 | 
			
		||||
        embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
 | 
			
		||||
        webpage = self._download_webpage(embed_url, video_id)
 | 
			
		||||
 | 
			
		||||
        final_url = self._search_regex(r'src="(.+?).mp3"',
 | 
			
		||||
                                webpage, 'video url')+'.mp3'
 | 
			
		||||
        title = self._html_search_regex(r'<title>(.+?)</title>',
 | 
			
		||||
                                webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
 | 
			
		||||
                                'Sound Clip , Quote, MP3, and Ringtone - Hark','')
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'url' : final_url,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'ext': determine_ext(final_url),
 | 
			
		||||
                }
 | 
			
		||||
							
								
								
									
										42
									
								
								youtube_dl/extractor/ro220.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								youtube_dl/extractor/ro220.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,42 @@
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    clean_html,
 | 
			
		||||
    compat_parse_qs,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Ro220IE(InfoExtractor):
 | 
			
		||||
    IE_NAME = '220.ro'
 | 
			
		||||
    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
 | 
			
		||||
        u'file': u'LYV6doKo7f.mp4',
 | 
			
		||||
        u'md5': u'03af18b73a07b4088753930db7a34add',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"title": u"Luati-le Banii sez 4 ep 1",
 | 
			
		||||
            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('video_id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        flashVars_str = self._search_regex(
 | 
			
		||||
            r'<param name="flashVars" value="([^"]+)"',
 | 
			
		||||
            webpage, u'flashVars')
 | 
			
		||||
        flashVars = compat_parse_qs(flashVars_str)
 | 
			
		||||
 | 
			
		||||
        info = {
 | 
			
		||||
            '_type': 'video',
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'url': flashVars['videoURL'][0],
 | 
			
		||||
            'title': flashVars['title'][0],
 | 
			
		||||
            'description': clean_html(flashVars['desc'][0]),
 | 
			
		||||
            'thumbnail': flashVars['preview'][0],
 | 
			
		||||
        }
 | 
			
		||||
        return info
 | 
			
		||||
@@ -8,8 +8,8 @@ from ..utils import (
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
class RTLnowIE(InfoExtractor):
 | 
			
		||||
    """Information Extractor for RTLnow, RTL2now and VOXnow"""
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
 | 
			
		||||
    """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
 | 
			
		||||
        u'file': u'90419.flv',
 | 
			
		||||
@@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):
 | 
			
		||||
        u'params': {
 | 
			
		||||
            u'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
 | 
			
		||||
        u'file': u'99205.flv',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'upload_date': u'20080928', 
 | 
			
		||||
            u'title': u'Medicopter 117 - Angst!',
 | 
			
		||||
            u'description': u'Angst!',
 | 
			
		||||
            u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
 | 
			
		||||
        },
 | 
			
		||||
        u'params': {
 | 
			
		||||
            u'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self,url):
 | 
			
		||||
 
 | 
			
		||||
@@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
        elif len(s) == 85:
 | 
			
		||||
            return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
 | 
			
		||||
        elif len(s) == 84:
 | 
			
		||||
            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
 | 
			
		||||
            return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
 | 
			
		||||
        elif len(s) == 83:
 | 
			
		||||
            return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
 | 
			
		||||
        elif len(s) == 82:
 | 
			
		||||
 
 | 
			
		||||
@@ -476,7 +476,7 @@ def formatSeconds(secs):
 | 
			
		||||
def make_HTTPS_handler(opts):
 | 
			
		||||
    if sys.version_info < (3,2):
 | 
			
		||||
        # Python's 2.x handler is very simplistic
 | 
			
		||||
        return compat_urllib_request.HTTPSHandler()
 | 
			
		||||
        return YoutubeDLHandlerHTTPS()
 | 
			
		||||
    else:
 | 
			
		||||
        import ssl
 | 
			
		||||
        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 | 
			
		||||
@@ -485,7 +485,7 @@ def make_HTTPS_handler(opts):
 | 
			
		||||
        context.verify_mode = (ssl.CERT_NONE
 | 
			
		||||
                               if opts.no_check_certificate
 | 
			
		||||
                               else ssl.CERT_REQUIRED)
 | 
			
		||||
        return compat_urllib_request.HTTPSHandler(context=context)
 | 
			
		||||
        return YoutubeDLHandlerHTTPS(context=context)
 | 
			
		||||
 | 
			
		||||
class ExtractorError(Exception):
 | 
			
		||||
    """Error during info extraction."""
 | 
			
		||||
@@ -569,7 +569,8 @@ class ContentTooShortError(Exception):
 | 
			
		||||
        self.downloaded = downloaded
 | 
			
		||||
        self.expected = expected
 | 
			
		||||
 | 
			
		||||
class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 | 
			
		||||
 | 
			
		||||
class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
 | 
			
		||||
    """Handler for HTTP requests and responses.
 | 
			
		||||
 | 
			
		||||
    This class, when installed with an OpenerDirector, automatically adds
 | 
			
		||||
@@ -602,8 +603,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 | 
			
		||||
        ret.code = code
 | 
			
		||||
        return ret
 | 
			
		||||
 | 
			
		||||
    def http_request(self, req):
 | 
			
		||||
        for h,v in std_headers.items():
 | 
			
		||||
    def _http_request(self, req):
 | 
			
		||||
        for h, v in std_headers.items():
 | 
			
		||||
            if h in req.headers:
 | 
			
		||||
                del req.headers[h]
 | 
			
		||||
            req.add_header(h, v)
 | 
			
		||||
@@ -618,7 +619,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 | 
			
		||||
            del req.headers['Youtubedl-user-agent']
 | 
			
		||||
        return req
 | 
			
		||||
 | 
			
		||||
    def http_response(self, req, resp):
 | 
			
		||||
    def _http_response(self, req, resp):
 | 
			
		||||
        old_resp = resp
 | 
			
		||||
        # gzip
 | 
			
		||||
        if resp.headers.get('Content-encoding', '') == 'gzip':
 | 
			
		||||
@@ -632,8 +633,16 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 | 
			
		||||
            resp.msg = old_resp.msg
 | 
			
		||||
        return resp
 | 
			
		||||
 | 
			
		||||
    https_request = http_request
 | 
			
		||||
    https_response = http_response
 | 
			
		||||
 | 
			
		||||
class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
 | 
			
		||||
    http_request = YoutubeDLHandler_Template._http_request
 | 
			
		||||
    http_response = YoutubeDLHandler_Template._http_response
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
 | 
			
		||||
    https_request = YoutubeDLHandler_Template._http_request
 | 
			
		||||
    https_response = YoutubeDLHandler_Template._http_response
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def unified_strdate(date_str):
 | 
			
		||||
    """Return a string with the date in the format YYYYMMDD"""
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user