Compare commits
95 Commits
2013.07.24
...
2013.08.21
Author | SHA1 | Date | |
---|---|---|---|
|
4b2d7cae11 | ||
|
7fea7156cb | ||
|
3093468977 | ||
|
79cb25776f | ||
|
87f78946a5 | ||
|
d741e55a42 | ||
|
17d3aaaf16 | ||
|
ea55b2a4ca | ||
|
3f0537dd4a | ||
|
01b32990da | ||
|
dbda1b5147 | ||
|
ddf3bd328b | ||
|
b9c37b92cf | ||
|
f9c3c90ca8 | ||
|
6daccbe317 | ||
|
71ea844c0e | ||
|
3a7256697e | ||
|
d1ba998274 | ||
|
718ced8d8c | ||
|
e1842025d0 | ||
|
0577177e3e | ||
|
298f833b16 | ||
|
0f399e6e5e | ||
|
5b075e27cb | ||
|
8a9d86a2a7 | ||
|
d468a09789 | ||
|
9f4ab73d7f | ||
|
02cf62e240 | ||
|
67fb0c5495 | ||
|
4efba05c56 | ||
|
0f90943e45 | ||
|
526e638c8a | ||
|
356e067390 | ||
|
e2f48f9643 | ||
|
b513a251f8 | ||
|
36cb11f068 | ||
|
7a4c6cc92f | ||
|
7edcb8f39c | ||
|
39b782b390 | ||
|
577664c8e8 | ||
|
bba12cec89 | ||
|
70c4c03cb8 | ||
|
f5791ed136 | ||
|
fbf189a6ee | ||
|
09825cb5c0 | ||
|
ed27d35674 | ||
|
fd5539eb41 | ||
|
04bca64bde | ||
|
03cc7c20c1 | ||
|
4075311d94 | ||
|
86fe61c8f9 | ||
|
9bb6d2f21d | ||
|
e3f4593e76 | ||
|
1d043b93cf | ||
|
b15d4f624f | ||
|
4aa16a50f5 | ||
|
bbcbf4d459 | ||
|
930ad9eecc | ||
|
b072a9defd | ||
|
75952c6e3d | ||
|
05afc96b73 | ||
|
fa80026915 | ||
|
2bc3de0f28 | ||
|
99c7bc94af | ||
|
152c8f349d | ||
|
d75654c15e | ||
|
0725f584e1 | ||
|
8cda9241d1 | ||
|
a3124ba49f | ||
|
579e2691fe | ||
|
63f05de10b | ||
|
caeefc29eb | ||
|
a3c736def2 | ||
|
58261235f0 | ||
|
da70877a1b | ||
|
5c468ca8a8 | ||
|
aedd6bb97d | ||
|
733d9cacb8 | ||
|
42f2805e48 | ||
|
0ffcb7c6fc | ||
|
27669bd11d | ||
|
6625f82940 | ||
|
d0866f0bb4 | ||
|
09eeb75130 | ||
|
0a99956f71 | ||
|
12ef6aefa8 | ||
|
e93aa81aa6 | ||
|
755eb0320e | ||
|
43ba5456b1 | ||
|
156d5ad6da | ||
|
c626a3d9fa | ||
|
b2e8bc1b20 | ||
|
771822ebb8 | ||
|
eb6a41ba0f | ||
|
7d2392691c |
@@ -17,24 +17,33 @@ tests = [
|
||||
# 87 - vflART1Nf 2013/07/24
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
||||
"tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
|
||||
# 86 - vfl_ymO4Z 2013/06/27
|
||||
# 86 - vflm_D8eE 2013/07/31
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
|
||||
">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre"),
|
||||
# 85 - vflSAFCP9 2013/07/19
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
|
||||
# 84
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
|
||||
# 83 - vflcaqGO8 2013/07/11
|
||||
# 83 - vflTWC9KW 2013/08/01
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||
"urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"),
|
||||
"qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"),
|
||||
# 82
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
|
||||
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
|
||||
# 81
|
||||
# 81 - vflLC8JvQ 2013/07/25
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
|
||||
"urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."),
|
||||
"C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
|
||||
# 79 - vflLC8JvQ 2013/07/25 (sporadic)
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
|
||||
"Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
|
||||
]
|
||||
|
||||
tests_age_gate = [
|
||||
# 86 - vflqinMWD
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
|
||||
]
|
||||
|
||||
def find_matching(wrong, right):
|
||||
@@ -87,6 +96,8 @@ def genall(tests):
|
||||
|
||||
def main():
|
||||
print(genall(tests))
|
||||
print(u' Age gate:')
|
||||
print(genall(tests_age_gate))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -50,6 +50,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
|
38
test/test_playlists.py
Normal file
38
test/test_playlists.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
import json
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
|
||||
from youtube_dl.utils import *
|
||||
|
||||
from helper import FakeYDL
|
||||
|
||||
class TestPlaylists(unittest.TestCase):
|
||||
def assertIsPlaylist(self, info):
|
||||
"""Make sure the info has '_type' set to 'playlist'"""
|
||||
self.assertEqual(info['_type'], 'playlist')
|
||||
|
||||
def test_dailymotion_playlist(self):
|
||||
dl = FakeYDL()
|
||||
ie = DailymotionPlaylistIE(dl)
|
||||
result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'SPORT')
|
||||
self.assertTrue(len(result['entries']) > 20)
|
||||
|
||||
def test_vimeo_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = VimeoChannelIE(dl)
|
||||
result = ie.extract('http://vimeo.com/channels/tributes')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'Vimeo Tributes')
|
||||
self.assertTrue(len(result['entries']) > 24)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -1,67 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import unittest
|
||||
import sys
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor.youtube import YoutubeIE
|
||||
from helper import FakeYDL
|
||||
|
||||
sig = YoutubeIE(FakeYDL())._decrypt_signature
|
||||
|
||||
class TestYoutubeSig(unittest.TestCase):
|
||||
def test_92(self):
|
||||
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
|
||||
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_90(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
|
||||
right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_88(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
|
||||
right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_87(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
|
||||
right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_86(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
|
||||
right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_85(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
|
||||
right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_84(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
|
||||
right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_83(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
||||
right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_82(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
|
||||
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_81(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>."
|
||||
right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -329,6 +329,35 @@ class FileDownloader(object):
|
||||
self.report_error(u'mplayer exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
def _download_m3u8_with_ffmpeg(self, filename, url):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
|
||||
# Check for ffmpeg first
|
||||
try:
|
||||
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
|
||||
return False
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'ffmpeg exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
|
||||
def _do_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
@@ -354,6 +383,10 @@ class FileDownloader(object):
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
return self._download_with_mplayer(filename, url)
|
||||
|
||||
# m3u8 manifest are downloaded with ffmpeg
|
||||
if determine_ext(url) == u'm3u8':
|
||||
return self._download_m3u8_with_ffmpeg(filename, url)
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
stream = None
|
||||
|
||||
|
@@ -100,7 +100,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
self._nopostoverwrites = nopostoverwrites
|
||||
|
||||
def get_audio_codec(self, path):
|
||||
if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
|
||||
if not self._exes['ffprobe'] and not self._exes['avprobe']:
|
||||
raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
|
||||
try:
|
||||
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
|
||||
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
|
||||
@@ -208,7 +209,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
try:
|
||||
os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
|
||||
except:
|
||||
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
|
||||
self._downloader.report_warning(u'Cannot update utime of audio file')
|
||||
|
||||
information['filepath'] = new_path
|
||||
return self._nopostoverwrites,information
|
||||
|
@@ -264,7 +264,7 @@ class YoutubeDL(object):
|
||||
self.report_error(u'Erroneous output template')
|
||||
return None
|
||||
except ValueError as err:
|
||||
self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
|
||||
self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
return None
|
||||
|
||||
def _match_entry(self, info_dict):
|
||||
@@ -547,7 +547,7 @@ class YoutubeDL(object):
|
||||
try:
|
||||
success = self.fd._do_download(filename, info_dict)
|
||||
except (OSError, IOError) as err:
|
||||
raise UnavailableVideoError()
|
||||
raise UnavailableVideoError(err)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_error(u'unable to download video data: %s' % str(err))
|
||||
return
|
||||
@@ -594,7 +594,7 @@ class YoutubeDL(object):
|
||||
# No clear decision yet, let IE decide
|
||||
keep_video = keep_video_wish
|
||||
except PostProcessingError as e:
|
||||
self.to_stderr(u'ERROR: ' + e.msg)
|
||||
self.report_error(e.msg)
|
||||
if keep_video is False and not self.params.get('keepvideo', False):
|
||||
try:
|
||||
self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
|
||||
|
@@ -398,6 +398,8 @@ def _real_main(argv=None):
|
||||
batchurls = batchfd.readlines()
|
||||
batchurls = [x.strip() for x in batchurls]
|
||||
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
|
||||
if opts.verbose:
|
||||
sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
|
||||
except IOError:
|
||||
sys.exit(u'ERROR: batch file could not be read')
|
||||
all_urls = batchurls + args
|
||||
|
@@ -12,7 +12,7 @@ from .comedycentral import ComedyCentralIE
|
||||
from .condenast import CondeNastIE
|
||||
from .criterion import CriterionIE
|
||||
from .cspan import CSpanIE
|
||||
from .dailymotion import DailymotionIE
|
||||
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
|
||||
from .depositfiles import DepositFilesIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
@@ -38,20 +38,25 @@ from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
from .kankan import KankanIE
|
||||
from .keek import KeekIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import LivestreamIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mtv import MTVIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .nba import NBAIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pornotube import PornotubeIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .ringtv import RingTVIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .sina import SinaIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
||||
from .spiegel import SpiegelIE
|
||||
@@ -70,7 +75,8 @@ from .ustream import UstreamIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veoh import VeohIE
|
||||
from .vevo import VevoIE
|
||||
from .vimeo import VimeoIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .vimeo import VimeoIE, VimeoChannelIE
|
||||
from .vine import VineIE
|
||||
from .c56 import C56IE
|
||||
from .wat import WatIE
|
||||
@@ -93,6 +99,8 @@ from .youtube import (
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeWatchLaterIE,
|
||||
YoutubeFavouritesIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
@@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor):
|
||||
"""
|
||||
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
|
||||
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||
|
||||
IE_NAME = u'arte.tv'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
|
||||
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
|
||||
|
||||
# TODO implement Live Stream
|
||||
# from ..utils import compat_urllib_parse
|
||||
@@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor):
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_video(url, id, lang)
|
||||
|
||||
mobj = re.match(self._LIVEWEB_URL, url)
|
||||
if mobj is not None:
|
||||
name = mobj.group('name')
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_liveweb(url, name, lang)
|
||||
|
||||
if re.search(self._LIVE_URL, video_id) is not None:
|
||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||
# self.extractLiveStream(url)
|
||||
@@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor):
|
||||
|
||||
info_dict = {'id': player_info['VID'],
|
||||
'title': player_info['VTI'],
|
||||
'description': player_info['VDE'],
|
||||
'description': player_info.get('VDE'),
|
||||
'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
|
||||
'thumbnail': player_info['programImage'],
|
||||
'ext': 'flv',
|
||||
@@ -98,12 +105,14 @@ class ArteTvIE(InfoExtractor):
|
||||
l = 'F'
|
||||
elif lang == 'de':
|
||||
l = 'A'
|
||||
regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
|
||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||
# Some formats may not be in the same language as the url
|
||||
formats = filter(_match_lang, formats)
|
||||
# We order the formats by quality
|
||||
formats = sorted(formats, key=lambda f: int(f['height']))
|
||||
# Prefer videos without subtitles in the same language
|
||||
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
|
||||
# Pick the best quality
|
||||
format_info = formats[-1]
|
||||
if format_info['mediaType'] == u'rtmp':
|
||||
@@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
}
|
||||
|
||||
def _extract_liveweb(self, url, name, lang):
|
||||
"""Extract form http://liveweb.arte.tv/"""
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
||||
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
||||
video_id, u'Downloading information')
|
||||
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
event_doc = config_doc.find('event')
|
||||
url_node = event_doc.find('video').find('urlHd')
|
||||
if url_node is None:
|
||||
url_node = video_doc.find('urlSd')
|
||||
|
||||
return {'id': video_id,
|
||||
'title': event_doc.find('name%s' % lang.capitalize()).text,
|
||||
'url': url_node.text.replace('MP4', 'mp4'),
|
||||
'ext': 'flv',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@@ -4,15 +4,16 @@ import xml.etree.ElementTree
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
determine_ext,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CollegeHumorIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
u'file': u'6902724.mp4',
|
||||
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
|
||||
@@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor):
|
||||
u'title': u'Comic-Con Cosplay Catastrophe',
|
||||
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
|
||||
u'file': u'3505939.mp4',
|
||||
u'md5': u'c51ca16b82bb456a4397987791a835f5',
|
||||
u'info_dict': {
|
||||
u'title': u'Font Conference',
|
||||
u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -49,25 +59,29 @@ class CollegeHumorIE(InfoExtractor):
|
||||
info['description'] = videoNode.findall('./description')[0].text
|
||||
info['title'] = videoNode.findall('./caption')[0].text
|
||||
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
||||
manifest_url = videoNode.findall('./file')[0].text
|
||||
next_url = videoNode.findall('./file')[0].text
|
||||
except IndexError:
|
||||
raise ExtractorError(u'Invalid metadata XML file')
|
||||
|
||||
manifest_url += '?hdcore=2.10.3'
|
||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
if next_url.endswith(u'manifest.f4m'):
|
||||
manifest_url = next_url + '?hdcore=2.10.3'
|
||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
||||
node_id = media_node.attrib['url']
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError as err:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
||||
node_id = media_node.attrib['url']
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError as err:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
info['ext'] = 'mp4'
|
||||
else:
|
||||
# Old-style direct links
|
||||
info['url'] = next_url
|
||||
info['ext'] = determine_ext(info['url'])
|
||||
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
info['ext'] = 'mp4'
|
||||
return [info]
|
||||
return info
|
||||
|
@@ -77,7 +77,13 @@ class InfoExtractor(object):
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url) is not None
|
||||
|
||||
# This does not use has/getattr intentionally - we want to know whether
|
||||
# we have cached the regexp for *this* class, whereas getattr would also
|
||||
# match the superclass
|
||||
if '_VALID_URL_RE' not in cls.__dict__:
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
return cls._VALID_URL_RE.match(url) is not None
|
||||
|
||||
@classmethod
|
||||
def working(cls):
|
||||
|
@@ -1,9 +1,12 @@
|
||||
import re
|
||||
import json
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
get_element_by_attribute,
|
||||
get_element_by_id,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -77,3 +80,31 @@ class DailymotionIE(InfoExtractor):
|
||||
'ext': video_extension,
|
||||
'thumbnail': info['thumbnail_url']
|
||||
}]
|
||||
|
||||
|
||||
class DailymotionPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
|
||||
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
video_ids = []
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
|
||||
playlist_id, u'Downloading page %s' % pagenum)
|
||||
|
||||
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
|
||||
video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
break
|
||||
|
||||
entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||
for video_id in video_ids]
|
||||
return {'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': get_element_by_id(u'playlist_name', webpage),
|
||||
'entries': entries,
|
||||
}
|
||||
|
@@ -8,17 +8,30 @@ class ExfmIE(InfoExtractor):
|
||||
IE_NAME = u'exfm'
|
||||
IE_DESC = u'ex.fm'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
||||
_SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||
_TEST = {
|
||||
u'url': u'http://ex.fm/song/1bgtzg',
|
||||
u'file': u'1bgtzg.mp3',
|
||||
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
|
||||
u'info_dict': {
|
||||
u"title": u"We Can't Stop",
|
||||
u"uploader": u"Miley Cyrus",
|
||||
u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37'
|
||||
}
|
||||
}
|
||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://ex.fm/song/1bgtzg',
|
||||
u'file': u'95223130.mp3',
|
||||
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
|
||||
u'info_dict': {
|
||||
u"title": u"We Can't Stop - Miley Cyrus",
|
||||
u"uploader": u"Miley Cyrus",
|
||||
u'upload_date': u'20130603',
|
||||
u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
|
||||
},
|
||||
u'note': u'Soundcloud song',
|
||||
},
|
||||
{
|
||||
u'url': u'http://ex.fm/song/wddt8',
|
||||
u'file': u'wddt8.mp3',
|
||||
u'md5': u'966bd70741ac5b8570d8e45bfaed3643',
|
||||
u'info_dict': {
|
||||
u'title': u'Safe and Sound',
|
||||
u'uploader': u'Capital Cities',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -26,11 +39,10 @@ class ExfmIE(InfoExtractor):
|
||||
info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
|
||||
webpage = self._download_webpage(info_url, song_id)
|
||||
info = json.loads(webpage)
|
||||
song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url'])
|
||||
if song_url is not None:
|
||||
song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28"
|
||||
else:
|
||||
song_url = info['song']['url']
|
||||
song_url = info['song']['url']
|
||||
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
||||
self.to_screen('Soundcloud song detected')
|
||||
return self.url_result(song_url.replace('/stream',''), 'Soundcloud')
|
||||
return [{
|
||||
'id': song_id,
|
||||
'url': song_url,
|
||||
|
@@ -107,8 +107,13 @@ class GenericIE(InfoExtractor):
|
||||
return new_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
new_url = self._test_redirect(url)
|
||||
if new_url: return [self.url_result(new_url)]
|
||||
try:
|
||||
new_url = self._test_redirect(url)
|
||||
if new_url:
|
||||
return [self.url_result(new_url)]
|
||||
except compat_urllib_error.HTTPError:
|
||||
# This may be a stupid server that doesn't like HEAD, our UA, or so
|
||||
pass
|
||||
|
||||
video_id = url.split('/')[-1]
|
||||
try:
|
||||
@@ -144,6 +149,9 @@ class GenericIE(InfoExtractor):
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
# HTML5 video
|
||||
mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
class InaIE(InfoExtractor):
|
||||
"""Information Extractor for Ina.fr"""
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
|
||||
_TEST = {
|
||||
u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||
u'file': u'I12055569.mp4',
|
||||
|
37
youtube_dl/extractor/kankan.py
Normal file
37
youtube_dl/extractor/kankan.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class KankanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
|
||||
u'file': u'48863.flv',
|
||||
u'md5': u'29aca1e47ae68fc28804aca89f29507e',
|
||||
u'info_dict': {
|
||||
u'title': u'Ready To Go',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
|
||||
gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
|
||||
|
||||
video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
|
||||
video_id, u'Downloading video url info')
|
||||
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
|
||||
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
|
||||
video_url = 'http://%s%s' % (ip, path)
|
||||
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
}
|
@@ -4,10 +4,10 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class KeekIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
|
||||
IE_NAME = u'keek'
|
||||
_TEST = {
|
||||
u'url': u'http://www.keek.com/ytdl/keeks/NODfbab',
|
||||
u'url': u'https://www.keek.com/ytdl/keeks/NODfbab',
|
||||
u'file': u'NODfbab.mp4',
|
||||
u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
|
||||
u'info_dict': {
|
||||
|
64
youtube_dl/extractor/muzu.py
Normal file
64
youtube_dl/extractor/muzu.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class MuzuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
||||
IE_NAME = u'muzu.tv'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
|
||||
u'file': u'1981454.mp4',
|
||||
u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
|
||||
u'info_dict': {
|
||||
u'title': u'Cat Walk (Original Mix)',
|
||||
u'description': u'md5:90e868994de201b2570e4e5854e19420',
|
||||
u'uploader': u'MarcAshken featuring SOS',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
info_data = compat_urllib_parse.urlencode({'format': 'json',
|
||||
'url': url,
|
||||
})
|
||||
video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
|
||||
video_id, u'Downloading video info')
|
||||
info = json.loads(video_info_page)
|
||||
|
||||
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
|
||||
video_id, u'Downloading player info')
|
||||
video_info = json.loads(player_info_page)['videos'][0]
|
||||
for quality in ['1080' , '720', '480', '360']:
|
||||
if video_info.get('v%s' % quality):
|
||||
break
|
||||
|
||||
data = compat_urllib_parse.urlencode({'ai': video_id,
|
||||
# Even if each time you watch a video the hash changes,
|
||||
# it seems to work for different videos, and it will work
|
||||
# even if you use any non empty string as a hash
|
||||
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
|
||||
'device': 'web',
|
||||
'qv': quality,
|
||||
})
|
||||
video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
|
||||
video_id, u'Downloading video url')
|
||||
video_url_info = json.loads(video_url_page)
|
||||
video_url = video_url_info['url']
|
||||
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'description': info['description'],
|
||||
'uploader': info['author_name'],
|
||||
}
|
@@ -2,11 +2,13 @@ import binascii
|
||||
import base64
|
||||
import hashlib
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_ord,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -16,7 +18,7 @@ from ..utils import (
|
||||
class MyVideoIE(InfoExtractor):
|
||||
"""Information Extractor for myvideo.de."""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
|
||||
IE_NAME = u'myvideo'
|
||||
_TEST = {
|
||||
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
||||
@@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor):
|
||||
'ext': video_ext,
|
||||
}]
|
||||
|
||||
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
|
||||
if mobj is not None:
|
||||
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
||||
response = self._download_webpage(request, video_id,
|
||||
u'Downloading video info')
|
||||
info = json.loads(base64.b64decode(response).decode('utf-8'))
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
|
||||
'play_path': info['filename'],
|
||||
'ext': 'flv',
|
||||
'thumbnail': info['thumbnail'][0]['url'],
|
||||
}
|
||||
|
||||
# try encxml
|
||||
mobj = re.search('var flashvars={(.+?)}', webpage)
|
||||
if mobj is None:
|
||||
|
52
youtube_dl/extractor/ooyala.py
Normal file
52
youtube_dl/extractor/ooyala.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unescapeHTML
|
||||
|
||||
class OoyalaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
||||
u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||
u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
|
||||
u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
|
||||
u'info_dict': {
|
||||
u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
|
||||
u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_result(self, info, more_info):
|
||||
return {'id': info['embedCode'],
|
||||
'ext': 'mp4',
|
||||
'title': unescapeHTML(info['title']),
|
||||
'url': info['url'],
|
||||
'description': unescapeHTML(more_info['description']),
|
||||
'thumbnail': more_info['promo'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
embedCode = mobj.group('id')
|
||||
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
|
||||
player = self._download_webpage(player_url, embedCode)
|
||||
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
|
||||
player, u'mobile player url')
|
||||
mobile_player = self._download_webpage(mobile_url, embedCode)
|
||||
videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
|
||||
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
|
||||
videos_info = json.loads(videos_info)
|
||||
videos_more_info =json.loads(videos_more_info)
|
||||
|
||||
if videos_more_info.get('lineup'):
|
||||
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
|
||||
return {'_type': 'playlist',
|
||||
'id': embedCode,
|
||||
'title': unescapeHTML(videos_more_info['title']),
|
||||
'entries': videos,
|
||||
}
|
||||
else:
|
||||
return self._extract_result(videos_info[0], videos_more_info)
|
||||
|
49
youtube_dl/extractor/roxwel.py
Normal file
49
youtube_dl/extractor/roxwel.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate, determine_ext
|
||||
|
||||
|
||||
class RoxwelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
|
||||
u'file': u'passionpittakeawalklive.flv',
|
||||
u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
|
||||
u'info_dict': {
|
||||
u'title': u'Take A Walk (live)',
|
||||
u'uploader': u'Passion Pit',
|
||||
u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
|
||||
},
|
||||
u'skip': u'Requires rtmpdump',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
filename = mobj.group('filename')
|
||||
info_url = 'http://www.roxwel.com/api/videos/%s' % filename
|
||||
info_page = self._download_webpage(info_url, filename,
|
||||
u'Downloading video info')
|
||||
|
||||
self.report_extraction(filename)
|
||||
info = json.loads(info_page)
|
||||
rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
|
||||
best_rate = rtmp_rates[-1]
|
||||
url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
|
||||
rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url')
|
||||
ext = determine_ext(rtmp_url)
|
||||
if ext == 'f4v':
|
||||
rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
|
||||
|
||||
return {'id': filename,
|
||||
'title': info['title'],
|
||||
'url': rtmp_url,
|
||||
'ext': 'flv',
|
||||
'description': info['description'],
|
||||
'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
|
||||
'uploader': info['artist'],
|
||||
'uploader_id': info['artistname'],
|
||||
'upload_date': unified_strdate(info['dbdate']),
|
||||
}
|
100
youtube_dl/extractor/rtlnow.py
Normal file
100
youtube_dl/extractor/rtlnow.py
Normal file
@@ -0,0 +1,100 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
class RTLnowIE(InfoExtractor):
|
||||
"""Information Extractor for RTLnow, RTL2now and VOXnow"""
|
||||
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
||||
_TESTS = [{
|
||||
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
||||
u'file': u'90419.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20070416',
|
||||
u'title': u'Ahornallee - Folge 1 - Der Einzug',
|
||||
u'description': u'Folge 1 - Der Einzug',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
|
||||
u'file': u'69756.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20120519',
|
||||
u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
|
||||
u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
|
||||
u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
|
||||
u'file': u'13883.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20090627',
|
||||
u'title': u'Voxtours - Südafrika-Reporter II',
|
||||
u'description': u'Südafrika-Reporter II',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
webpage_url = u'http://' + mobj.group('url')
|
||||
video_page_url = u'http://' + mobj.group('base_url')
|
||||
video_id = mobj.group(u'video_id')
|
||||
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',
|
||||
webpage, u'title')
|
||||
playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
|
||||
webpage, u'playerdata_url')
|
||||
|
||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata)
|
||||
if mobj:
|
||||
video_description = mobj.group(u'description')
|
||||
if mobj.group('upload_date_Y'):
|
||||
video_upload_date = mobj.group('upload_date_Y')
|
||||
else:
|
||||
video_upload_date = u'20' + mobj.group('upload_date_y')
|
||||
video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d')
|
||||
else:
|
||||
video_description = None
|
||||
video_upload_date = None
|
||||
self._downloader.report_warning(u'Unable to extract description and upload date')
|
||||
|
||||
# Thumbnail: not every video has an thumbnail
|
||||
mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
|
||||
if mobj:
|
||||
video_thumbnail = mobj.group(u'thumbnail')
|
||||
else:
|
||||
video_thumbnail = None
|
||||
|
||||
mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
video_url = mobj.group(u'url')
|
||||
video_play_path = u'mp4:' + mobj.group(u'play_path')
|
||||
video_player_url = video_page_url + u'includes/vodplayer.swf'
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
'page_url': video_page_url,
|
||||
'player_url': video_player_url,
|
||||
'ext': 'flv',
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'upload_date': video_upload_date,
|
||||
'thumbnail': video_thumbnail,
|
||||
}]
|
@@ -19,7 +19,11 @@ class SoundcloudIE(InfoExtractor):
|
||||
of the stream token and uid
|
||||
"""
|
||||
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$'
|
||||
_VALID_URL = r'''^(?:https?://)?
|
||||
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
||||
)
|
||||
'''
|
||||
IE_NAME = u'soundcloud'
|
||||
_TEST = {
|
||||
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||
@@ -33,59 +37,65 @@ class SoundcloudIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
|
||||
|
||||
def report_resolve(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self.to_screen(u'%s: Resolving id' % video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
@classmethod
|
||||
def _resolv_url(cls, url):
|
||||
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
|
||||
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group(2)
|
||||
full_title = '%s/%s' % (uploader, slug_title)
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
|
||||
resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON')
|
||||
|
||||
info = json.loads(info_json)
|
||||
def _extract_info_dict(self, info, full_title=None):
|
||||
video_id = info['id']
|
||||
self.report_extraction(full_title)
|
||||
name = full_title or video_id
|
||||
self.report_extraction(name)
|
||||
|
||||
streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
stream_json = self._download_webpage(streams_url, full_title,
|
||||
u'Downloading stream definitions',
|
||||
u'unable to download stream definitions')
|
||||
|
||||
streams = json.loads(stream_json)
|
||||
mediaURL = streams['http_mp3_128_url']
|
||||
upload_date = unified_strdate(info['created_at'])
|
||||
|
||||
return [{
|
||||
thumbnail = info['artwork_url']
|
||||
if thumbnail is not None:
|
||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||
return {
|
||||
'id': info['id'],
|
||||
'url': mediaURL,
|
||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
'uploader': info['user']['username'],
|
||||
'upload_date': upload_date,
|
||||
'upload_date': unified_strdate(info['created_at']),
|
||||
'title': info['title'],
|
||||
'ext': u'mp3',
|
||||
'description': info['description'],
|
||||
}]
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
class SoundcloudSetIE(InfoExtractor):
|
||||
"""Information extractor for soundcloud.com sets
|
||||
To access the media, the uid of the song and a stream token
|
||||
must be extracted from the page source and the script must make
|
||||
a request to media.soundcloud.com/crossdomain.xml. Then
|
||||
the media can be grabbed by requesting from an url composed
|
||||
of the stream token and uid
|
||||
"""
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
track_id = mobj.group('track_id')
|
||||
if track_id is not None:
|
||||
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||
full_title = track_id
|
||||
else:
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group(2)
|
||||
full_title = '%s/%s' % (uploader, slug_title)
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
|
||||
info_json_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
|
||||
|
||||
info = json.loads(info_json)
|
||||
return self._extract_info_dict(info, full_title)
|
||||
|
||||
class SoundcloudSetIE(SoundcloudIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
||||
IE_NAME = u'soundcloud:set'
|
||||
_TEST = {
|
||||
@@ -153,10 +163,6 @@ class SoundcloudSetIE(InfoExtractor):
|
||||
]
|
||||
}
|
||||
|
||||
def report_resolve(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self.to_screen(u'%s: Resolving id' % video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
@@ -171,7 +177,7 @@ class SoundcloudSetIE(InfoExtractor):
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||
resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
resolv_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(resolv_url, full_title)
|
||||
|
||||
videos = []
|
||||
@@ -182,23 +188,8 @@ class SoundcloudSetIE(InfoExtractor):
|
||||
return
|
||||
|
||||
self.report_extraction(full_title)
|
||||
for track in info['tracks']:
|
||||
video_id = track['id']
|
||||
|
||||
streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
stream_json = self._download_webpage(streams_url, video_id, u'Downloading track info JSON')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
streams = json.loads(stream_json)
|
||||
mediaURL = streams['http_mp3_128_url']
|
||||
|
||||
videos.append({
|
||||
'id': video_id,
|
||||
'url': mediaURL,
|
||||
'uploader': track['user']['username'],
|
||||
'upload_date': unified_strdate(track['created_at']),
|
||||
'title': track['title'],
|
||||
'ext': u'mp3',
|
||||
'description': track['description'],
|
||||
})
|
||||
return videos
|
||||
return {'_type': 'playlist',
|
||||
'entries': [self._extract_info_dict(track) for track in info['tracks']],
|
||||
'id': info['id'],
|
||||
'title': info['title'],
|
||||
}
|
||||
|
@@ -33,7 +33,7 @@ class TeamcocoIE(InfoExtractor):
|
||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
||||
|
||||
video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
|
||||
video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>',
|
||||
data, u'video URL')
|
||||
|
||||
return [{
|
||||
|
@@ -6,20 +6,17 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
class TF1IE(InfoExtractor):
|
||||
"""
|
||||
TF1 uses the wat.tv player, currently it can only download videos with the
|
||||
html5 player enabled, it cannot download HD videos.
|
||||
"""
|
||||
_WORKING = False
|
||||
"""TF1 uses the wat.tv player."""
|
||||
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
|
||||
_TEST = {
|
||||
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||
u'file': u'10635995.mp4',
|
||||
u'md5': u'66789d3e91278d332f75e1feb7aea327',
|
||||
u'md5': u'2e378cc28b9957607d5e88f274e637d8',
|
||||
u'info_dict': {
|
||||
u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
|
||||
u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
|
||||
}
|
||||
},
|
||||
u'skip': u'Sometimes wat serves the whole file with the --test option',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
class VevoIE(InfoExtractor):
|
||||
"""
|
||||
Accecps urls from vevo.com or in the format 'vevo:{id}'
|
||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||
(currently used by MTVIE)
|
||||
"""
|
||||
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$'
|
||||
@@ -19,7 +19,7 @@ class VevoIE(InfoExtractor):
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20130624",
|
||||
u"uploader": u"Hurts",
|
||||
u"title": u"Somebody To Die For"
|
||||
u"title": u"Somebody to Die For"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,12 +35,12 @@ class VevoIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
video_info = json.loads(info_json)
|
||||
m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):(?P<url>.*?)"', links_webpage))
|
||||
m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage))
|
||||
if m_urls is None or len(m_urls) == 0:
|
||||
raise ExtractorError(u'Unable to extract video url')
|
||||
# They are sorted from worst to best quality
|
||||
m_url = m_urls[-1]
|
||||
video_url = base_url + m_url.group('url')
|
||||
video_url = base_url + '/' + m_url.group('url')
|
||||
ext = m_url.group('ext')
|
||||
|
||||
return {'url': video_url,
|
||||
|
49
youtube_dl/extractor/videofyme.py
Normal file
49
youtube_dl/extractor/videofyme.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
class VideofyMeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)'
|
||||
IE_NAME = u'videofy.me'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
|
||||
u'file': u'1100701.mp4',
|
||||
u'md5': u'2046dd5758541d630bfa93e741e2fd79',
|
||||
u'info_dict': {
|
||||
u'title': u'This is VideofyMe',
|
||||
u'description': None,
|
||||
u'uploader': u'VideofyMe',
|
||||
u'uploader_id': u'thisisvideofyme',
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
||||
video_id)
|
||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
video = config.find('video')
|
||||
sources = video.find('sources')
|
||||
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on')
|
||||
if url_node is None:
|
||||
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off')
|
||||
video_url = url_node.find('url').text
|
||||
|
||||
return {'id': video_id,
|
||||
'title': video.find('title').text,
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'thumbnail': video.find('thumb').text,
|
||||
'description': video.find('description').text,
|
||||
'uploader': config.find('blog/name').text,
|
||||
'uploader_id': video.find('identifier').text,
|
||||
'view_count': re.search(r'\d+', video.find('views').text).group(),
|
||||
}
|
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -171,3 +172,31 @@ class VimeoIE(InfoExtractor):
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
}]
|
||||
|
||||
|
||||
class VimeoChannelIE(InfoExtractor):
|
||||
IE_NAME = u'vimeo:channel'
|
||||
_VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
|
||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
video_ids = []
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
|
||||
channel_id, u'Downloading page %s' % pagenum)
|
||||
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
break
|
||||
|
||||
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
|
||||
for video_id in video_ids]
|
||||
channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
|
||||
webpage, u'channel title')
|
||||
return {'_type': 'playlist',
|
||||
'id': channel_id,
|
||||
'title': channel_title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
@@ -12,17 +12,17 @@ from ..utils import (
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
|
||||
IE_NAME = 'wat.tv'
|
||||
_TEST = {
|
||||
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
|
||||
u'file': u'10631273.mp4',
|
||||
u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a',
|
||||
u'md5': u'd8b2231e1e333acd12aad94b80937e19',
|
||||
u'info_dict': {
|
||||
u'title': u'World War Z - Philadelphia VOST',
|
||||
u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
|
||||
}
|
||||
},
|
||||
u'skip': u'Sometimes wat serves the whole file with the --test option',
|
||||
}
|
||||
|
||||
def download_video_info(self, real_id):
|
||||
@@ -59,20 +59,8 @@ class WatIE(InfoExtractor):
|
||||
|
||||
# Otherwise we can continue and extract just one part, we have to use
|
||||
# the short id for getting the video url
|
||||
player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id,
|
||||
'html5': '1'})
|
||||
player_info = self._download_webpage('http://www.wat.tv/player?' + player_data,
|
||||
real_id, u'Downloading player info')
|
||||
player = json.loads(player_info)['player']
|
||||
html5_player = self._html_search_regex(r'iframe src="(.*?)"', player,
|
||||
'html5 player')
|
||||
player_webpage = self._download_webpage(html5_player, real_id,
|
||||
u'Downloading player webpage')
|
||||
|
||||
video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage,
|
||||
'video url')
|
||||
info = {'id': real_id,
|
||||
'url': video_url,
|
||||
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
||||
'ext': 'mp4',
|
||||
'title': first_chapter['title'],
|
||||
'thumbnail': first_chapter['preview'],
|
||||
|
@@ -21,6 +21,13 @@ class WorldStarHipHopIE(InfoExtractor):
|
||||
|
||||
webpage_src = self._download_webpage(url, video_id)
|
||||
|
||||
m_vevo_id = re.search(r'videoId=(.*?)&?',
|
||||
webpage_src)
|
||||
|
||||
if m_vevo_id is not None:
|
||||
self.to_screen(u'Vevo video detected:')
|
||||
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
|
||||
|
||||
video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
|
||||
webpage_src, u'video URL')
|
||||
|
||||
|
@@ -23,8 +23,114 @@ from ..utils import (
|
||||
orderedSet,
|
||||
)
|
||||
|
||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
"""Provide base functions for Youtube extractors"""
|
||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
|
||||
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = False
|
||||
|
||||
class YoutubeIE(InfoExtractor):
|
||||
def report_lang(self):
|
||||
"""Report attempt to set language."""
|
||||
self.to_screen(u'Setting language')
|
||||
|
||||
def _set_language(self):
|
||||
request = compat_urllib_request.Request(self._LANG_URL)
|
||||
try:
|
||||
self.report_lang()
|
||||
compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
|
||||
return False
|
||||
return True
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
# No authentication to be performed
|
||||
if username is None:
|
||||
if self._LOGIN_REQUIRED:
|
||||
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
return False
|
||||
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL)
|
||||
try:
|
||||
login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
|
||||
return False
|
||||
|
||||
galx = None
|
||||
dsh = None
|
||||
match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
|
||||
if match:
|
||||
galx = match.group(1)
|
||||
match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
|
||||
if match:
|
||||
dsh = match.group(1)
|
||||
|
||||
# Log in
|
||||
login_form_strs = {
|
||||
u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
|
||||
u'Email': username,
|
||||
u'GALX': galx,
|
||||
u'Passwd': password,
|
||||
u'PersistentCookie': u'yes',
|
||||
u'_utf8': u'霱',
|
||||
u'bgresponse': u'js_disabled',
|
||||
u'checkConnection': u'',
|
||||
u'checkedDomains': u'youtube',
|
||||
u'dnConn': u'',
|
||||
u'dsh': dsh,
|
||||
u'pstMsg': u'0',
|
||||
u'rmShown': u'1',
|
||||
u'secTok': u'',
|
||||
u'signIn': u'Sign in',
|
||||
u'timeStmp': u'',
|
||||
u'service': u'youtube',
|
||||
u'uilel': u'3',
|
||||
u'hl': u'en_US',
|
||||
}
|
||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||
# chokes on unicode
|
||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
||||
try:
|
||||
self.report_login()
|
||||
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||
return False
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
||||
return False
|
||||
return True
|
||||
|
||||
def _confirm_age(self):
|
||||
age_form = {
|
||||
'next_url': '/',
|
||||
'action_confirm': 'Confirm',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
|
||||
try:
|
||||
self.report_age_confirmation()
|
||||
compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
|
||||
return True
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._downloader is None:
|
||||
return
|
||||
if not self._set_language():
|
||||
return
|
||||
if not self._login():
|
||||
return
|
||||
self._confirm_age()
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = u'YouTube.com'
|
||||
_VALID_URL = r"""^
|
||||
(
|
||||
@@ -35,7 +141,7 @@ class YoutubeIE(InfoExtractor):
|
||||
(?: # the various things that can precede the ID:
|
||||
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|
||||
|(?: # or the v= param in all its forms
|
||||
(?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||
v=
|
||||
@@ -45,14 +151,16 @@ class YoutubeIE(InfoExtractor):
|
||||
([0-9A-Za-z_-]+) # here is it! the YouTube video ID
|
||||
(?(1).+)? # if we found the ID, everything can follow
|
||||
$"""
|
||||
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
|
||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
|
||||
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
# Listed in order of quality
|
||||
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
|
||||
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
|
||||
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
|
||||
'95', '94', '93', '92', '132', '151',
|
||||
'85', '84', '102', '83', '101', '82', '100',
|
||||
]
|
||||
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
|
||||
'95', '94', '93', '92', '132', '151',
|
||||
'85', '102', '84', '101', '83', '100', '82',
|
||||
]
|
||||
_video_extensions = {
|
||||
'13': '3gp',
|
||||
'17': 'mp4',
|
||||
@@ -64,6 +172,24 @@ class YoutubeIE(InfoExtractor):
|
||||
'44': 'webm',
|
||||
'45': 'webm',
|
||||
'46': 'webm',
|
||||
|
||||
# 3d videos
|
||||
'82': 'mp4',
|
||||
'83': 'mp4',
|
||||
'84': 'mp4',
|
||||
'85': 'mp4',
|
||||
'100': 'webm',
|
||||
'101': 'webm',
|
||||
'102': 'webm',
|
||||
|
||||
# videos that use m3u8
|
||||
'92': 'mp4',
|
||||
'93': 'mp4',
|
||||
'94': 'mp4',
|
||||
'95': 'mp4',
|
||||
'96': 'mp4',
|
||||
'132': 'mp4',
|
||||
'151': 'mp4',
|
||||
}
|
||||
_video_dimensions = {
|
||||
'5': '240x400',
|
||||
@@ -80,7 +206,22 @@ class YoutubeIE(InfoExtractor):
|
||||
'44': '480x854',
|
||||
'45': '720x1280',
|
||||
'46': '1080x1920',
|
||||
'82': '360p',
|
||||
'83': '480p',
|
||||
'84': '720p',
|
||||
'85': '1080p',
|
||||
'92': '240p',
|
||||
'93': '360p',
|
||||
'94': '480p',
|
||||
'95': '720p',
|
||||
'96': '1080p',
|
||||
'100': '360p',
|
||||
'101': '480p',
|
||||
'102': '720p',
|
||||
'132': '240p',
|
||||
'151': '72p',
|
||||
}
|
||||
_3d_itags = ['85', '84', '102', '83', '101', '82', '100']
|
||||
IE_NAME = u'youtube'
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -130,6 +271,21 @@ class YoutubeIE(InfoExtractor):
|
||||
u"uploader_id": u"justintimberlakeVEVO"
|
||||
}
|
||||
},
|
||||
{
|
||||
u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
|
||||
u'file': u'TGi3HqYrWHE.mp4',
|
||||
u'note': u'm3u8 video',
|
||||
u'info_dict': {
|
||||
u'title': u'Triathlon - Men - London 2012 Olympic Games',
|
||||
u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
|
||||
u'uploader': u'olympic',
|
||||
u'upload_date': u'20120807',
|
||||
u'uploader_id': u'olympic',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -139,10 +295,6 @@ class YoutubeIE(InfoExtractor):
|
||||
if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
def report_lang(self):
|
||||
"""Report attempt to set language."""
|
||||
self.to_screen(u'Setting language')
|
||||
|
||||
def report_video_webpage_download(self, video_id):
|
||||
"""Report attempt to download video webpage."""
|
||||
self.to_screen(u'%s: Downloading video webpage' % video_id)
|
||||
@@ -183,26 +335,40 @@ class YoutubeIE(InfoExtractor):
|
||||
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
|
||||
elif len(s) == 90:
|
||||
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
|
||||
elif len(s) == 89:
|
||||
return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
|
||||
elif len(s) == 88:
|
||||
return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
|
||||
elif len(s) == 87:
|
||||
return s[4:23] + s[86] + s[24:85]
|
||||
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
|
||||
elif len(s) == 86:
|
||||
return s[2:63] + s[82] + s[64:82] + s[63]
|
||||
return s[5:20] + s[2] + s[21:]
|
||||
elif len(s) == 85:
|
||||
return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
|
||||
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
|
||||
elif len(s) == 84:
|
||||
return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
|
||||
return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
|
||||
elif len(s) == 83:
|
||||
return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:]
|
||||
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
|
||||
elif len(s) == 82:
|
||||
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
|
||||
elif len(s) == 81:
|
||||
return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[2] + s[34:53] + s[24] + s[54:81]
|
||||
return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
|
||||
elif len(s) == 79:
|
||||
return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
|
||||
|
||||
else:
|
||||
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
|
||||
|
||||
def _decrypt_signature_age_gate(self, s):
|
||||
# The videos with age protection use another player, so the algorithms
|
||||
# can be different.
|
||||
if len(s) == 86:
|
||||
return s[2:63] + s[82] + s[64:82] + s[63]
|
||||
else:
|
||||
# Fallback to the other algortihms
|
||||
return self._decrypt_signature(s)
|
||||
|
||||
|
||||
def _get_available_subtitles(self, video_id):
|
||||
self.report_video_subtitles_download(video_id)
|
||||
request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
|
||||
@@ -304,92 +470,9 @@ class YoutubeIE(InfoExtractor):
|
||||
def _print_formats(self, formats):
|
||||
print('Available formats:')
|
||||
for x in formats:
|
||||
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._downloader is None:
|
||||
return
|
||||
|
||||
# Set language
|
||||
request = compat_urllib_request.Request(self._LANG_URL)
|
||||
try:
|
||||
self.report_lang()
|
||||
compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
|
||||
# No authentication to be performed
|
||||
if username is None:
|
||||
return
|
||||
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL)
|
||||
try:
|
||||
login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
galx = None
|
||||
dsh = None
|
||||
match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
|
||||
if match:
|
||||
galx = match.group(1)
|
||||
|
||||
match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
|
||||
if match:
|
||||
dsh = match.group(1)
|
||||
|
||||
# Log in
|
||||
login_form_strs = {
|
||||
u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
|
||||
u'Email': username,
|
||||
u'GALX': galx,
|
||||
u'Passwd': password,
|
||||
u'PersistentCookie': u'yes',
|
||||
u'_utf8': u'霱',
|
||||
u'bgresponse': u'js_disabled',
|
||||
u'checkConnection': u'',
|
||||
u'checkedDomains': u'youtube',
|
||||
u'dnConn': u'',
|
||||
u'dsh': dsh,
|
||||
u'pstMsg': u'0',
|
||||
u'rmShown': u'1',
|
||||
u'secTok': u'',
|
||||
u'signIn': u'Sign in',
|
||||
u'timeStmp': u'',
|
||||
u'service': u'youtube',
|
||||
u'uilel': u'3',
|
||||
u'hl': u'en_US',
|
||||
}
|
||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||
# chokes on unicode
|
||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
||||
try:
|
||||
self.report_login()
|
||||
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||
return
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
# Confirm age
|
||||
age_form = {
|
||||
'next_url': '/',
|
||||
'action_confirm': 'Confirm',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
|
||||
try:
|
||||
self.report_age_confirmation()
|
||||
compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
|
||||
print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
|
||||
self._video_dimensions.get(x, '???'),
|
||||
' (3D)' if x in self._3d_itags else ''))
|
||||
|
||||
def _extract_id(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
@@ -398,6 +481,57 @@ class YoutubeIE(InfoExtractor):
|
||||
video_id = mobj.group(2)
|
||||
return video_id
|
||||
|
||||
def _get_video_url_list(self, url_map):
|
||||
"""
|
||||
Transform a dictionary in the format {itag:url} to a list of (itag, url)
|
||||
with the requested formats.
|
||||
"""
|
||||
req_format = self._downloader.params.get('format', None)
|
||||
format_limit = self._downloader.params.get('format_limit', None)
|
||||
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
|
||||
if format_limit is not None and format_limit in available_formats:
|
||||
format_list = available_formats[available_formats.index(format_limit):]
|
||||
else:
|
||||
format_list = available_formats
|
||||
existing_formats = [x for x in format_list if x in url_map]
|
||||
if len(existing_formats) == 0:
|
||||
raise ExtractorError(u'no known formats available for video')
|
||||
if self._downloader.params.get('listformats', None):
|
||||
self._print_formats(existing_formats)
|
||||
return
|
||||
if req_format is None or req_format == 'best':
|
||||
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
|
||||
elif req_format == 'worst':
|
||||
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
|
||||
elif req_format in ('-1', 'all'):
|
||||
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
||||
else:
|
||||
# Specific formats. We pick the first in a slash-delimeted sequence.
|
||||
# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
|
||||
req_formats = req_format.split('/')
|
||||
video_url_list = None
|
||||
for rf in req_formats:
|
||||
if rf in url_map:
|
||||
video_url_list = [(rf, url_map[rf])]
|
||||
break
|
||||
if video_url_list is None:
|
||||
raise ExtractorError(u'requested format not available')
|
||||
return video_url_list
|
||||
|
||||
def _extract_from_m3u8(self, manifest_url, video_id):
|
||||
url_map = {}
|
||||
def _get_urls(_manifest):
|
||||
lines = _manifest.split('\n')
|
||||
urls = filter(lambda l: l and not l.startswith('#'),
|
||||
lines)
|
||||
return urls
|
||||
manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
|
||||
formats_urls = _get_urls(manifest)
|
||||
for format_url in formats_urls:
|
||||
itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
|
||||
url_map[itag] = format_url
|
||||
return url_map
|
||||
|
||||
def _real_extract(self, url):
|
||||
if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
|
||||
self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
|
||||
@@ -552,7 +686,6 @@ class YoutubeIE(InfoExtractor):
|
||||
video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
|
||||
|
||||
# Decide which formats to download
|
||||
req_format = self._downloader.params.get('format', None)
|
||||
|
||||
try:
|
||||
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
|
||||
@@ -587,8 +720,8 @@ class YoutubeIE(InfoExtractor):
|
||||
s = url_data['s'][0]
|
||||
if age_gate:
|
||||
player_version = self._search_regex(r'ad3-(.+?)\.swf',
|
||||
video_info['ad3_module'][0], 'flash player',
|
||||
fatal=False)
|
||||
video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
|
||||
'flash player', fatal=False)
|
||||
player = 'flash player %s' % player_version
|
||||
else:
|
||||
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
|
||||
@@ -596,41 +729,25 @@ class YoutubeIE(InfoExtractor):
|
||||
parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
|
||||
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
|
||||
(len(s), parts_sizes, url_data['itag'][0], player))
|
||||
signature = self._decrypt_signature(url_data['s'][0])
|
||||
encrypted_sig = url_data['s'][0]
|
||||
if age_gate:
|
||||
signature = self._decrypt_signature_age_gate(encrypted_sig)
|
||||
else:
|
||||
signature = self._decrypt_signature(encrypted_sig)
|
||||
url += '&signature=' + signature
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
url_map[url_data['itag'][0]] = url
|
||||
|
||||
format_limit = self._downloader.params.get('format_limit', None)
|
||||
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
|
||||
if format_limit is not None and format_limit in available_formats:
|
||||
format_list = available_formats[available_formats.index(format_limit):]
|
||||
else:
|
||||
format_list = available_formats
|
||||
existing_formats = [x for x in format_list if x in url_map]
|
||||
if len(existing_formats) == 0:
|
||||
raise ExtractorError(u'no known formats available for video')
|
||||
if self._downloader.params.get('listformats', None):
|
||||
self._print_formats(existing_formats)
|
||||
video_url_list = self._get_video_url_list(url_map)
|
||||
if not video_url_list:
|
||||
return
|
||||
if req_format is None or req_format == 'best':
|
||||
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
|
||||
elif req_format == 'worst':
|
||||
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
|
||||
elif req_format in ('-1', 'all'):
|
||||
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
||||
else:
|
||||
# Specific formats. We pick the first in a slash-delimeted sequence.
|
||||
# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
|
||||
req_formats = req_format.split('/')
|
||||
video_url_list = None
|
||||
for rf in req_formats:
|
||||
if rf in url_map:
|
||||
video_url_list = [(rf, url_map[rf])]
|
||||
break
|
||||
if video_url_list is None:
|
||||
raise ExtractorError(u'requested format not available')
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||
video_url_list = self._get_video_url_list(url_map)
|
||||
if not video_url_list:
|
||||
return
|
||||
|
||||
else:
|
||||
raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
|
||||
|
||||
@@ -639,8 +756,9 @@ class YoutubeIE(InfoExtractor):
|
||||
# Extension
|
||||
video_extension = self._video_extensions.get(format_param, 'flv')
|
||||
|
||||
video_format = '{0} - {1}'.format(format_param if format_param else video_extension,
|
||||
self._video_dimensions.get(format_param, '???'))
|
||||
video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
|
||||
self._video_dimensions.get(format_param, '???'),
|
||||
' (3D)' if format_param in self._3d_itags else '')
|
||||
|
||||
results.append({
|
||||
'id': video_id,
|
||||
@@ -670,10 +788,10 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
\? (?:.*?&)*? (?:p|a|list)=
|
||||
| p/
|
||||
)
|
||||
((?:PL|EC|UU)?[0-9A-Za-z-_]{10,})
|
||||
((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
|
||||
.*
|
||||
|
|
||||
((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
|
||||
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
|
||||
)"""
|
||||
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
|
||||
_MAX_RESULTS = 50
|
||||
@@ -692,11 +810,14 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
|
||||
# Download playlist videos from API
|
||||
playlist_id = mobj.group(1) or mobj.group(2)
|
||||
page_num = 1
|
||||
videos = []
|
||||
|
||||
while True:
|
||||
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, self._MAX_RESULTS * (page_num - 1) + 1)
|
||||
for page_num in itertools.count(1):
|
||||
start_index = self._MAX_RESULTS * (page_num - 1) + 1
|
||||
if start_index >= 1000:
|
||||
self._downloader.report_warning(u'Max number of results reached')
|
||||
break
|
||||
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
|
||||
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
||||
|
||||
try:
|
||||
@@ -716,10 +837,6 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
if 'media$group' in entry and 'media$player' in entry['media$group']:
|
||||
videos.append((index, entry['media$group']['media$player']['url']))
|
||||
|
||||
if len(response['feed']['entry']) < self._MAX_RESULTS:
|
||||
break
|
||||
page_num += 1
|
||||
|
||||
videos = [v[1] for v in sorted(videos)]
|
||||
|
||||
url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
|
||||
@@ -762,9 +879,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
|
||||
# Download any subsequent channel pages using the json-based channel_ajax query
|
||||
if self._MORE_PAGES_INDICATOR in page:
|
||||
while True:
|
||||
pagenum = pagenum + 1
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||
page = self._download_webpage(url, channel_id,
|
||||
u'Downloading page #%s' % pagenum)
|
||||
@@ -807,9 +922,8 @@ class YoutubeUserIE(InfoExtractor):
|
||||
# all of them.
|
||||
|
||||
video_ids = []
|
||||
pagenum = 0
|
||||
|
||||
while True:
|
||||
for pagenum in itertools.count(0):
|
||||
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
|
||||
|
||||
gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
|
||||
@@ -834,8 +948,6 @@ class YoutubeUserIE(InfoExtractor):
|
||||
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
|
||||
break
|
||||
|
||||
pagenum += 1
|
||||
|
||||
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
|
||||
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
|
||||
return [self.playlist_result(url_results, playlist_title = username)]
|
||||
@@ -898,33 +1010,30 @@ class YoutubeShowIE(InfoExtractor):
|
||||
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
||||
|
||||
|
||||
class YoutubeFeedsInfoExtractor(YoutubeIE):
|
||||
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
"""
|
||||
Base class for extractors that fetch info from
|
||||
http://www.youtube.com/feed_ajax
|
||||
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
||||
"""
|
||||
_LOGIN_REQUIRED = True
|
||||
_PAGING_STEP = 30
|
||||
|
||||
# Overwrite YoutubeIE properties we don't want
|
||||
_TESTS = []
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url) is not None
|
||||
# use action_load_personal_feed instead of action_load_system_feed
|
||||
_PERSONAL_FEED = False
|
||||
|
||||
@property
|
||||
def _FEED_TEMPLATE(self):
|
||||
return 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=%s&paging=%%s' % self._FEED_NAME
|
||||
action = 'action_load_system_feed'
|
||||
if self._PERSONAL_FEED:
|
||||
action = 'action_load_personal_feed'
|
||||
return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
|
||||
|
||||
@property
|
||||
def IE_NAME(self):
|
||||
return u'youtube:%s' % self._FEED_NAME
|
||||
|
||||
def _real_initialize(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
|
||||
super(YoutubeFeedsInfoExtractor, self)._real_initialize()
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed_entries = []
|
||||
@@ -936,7 +1045,7 @@ class YoutubeFeedsInfoExtractor(YoutubeIE):
|
||||
u'Downloading page %s' % i)
|
||||
info = json.loads(info)
|
||||
feed_html = info['feed_html']
|
||||
m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
|
||||
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
||||
ids = orderedSet(m.group(1) for m in m_ids)
|
||||
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
|
||||
if info['paging'] is None:
|
||||
@@ -954,3 +1063,22 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
|
||||
_FEED_NAME = 'recommended'
|
||||
_PLAYLIST_TITLE = u'Youtube Recommended videos'
|
||||
|
||||
class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
|
||||
IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
|
||||
_FEED_NAME = 'watch_later'
|
||||
_PLAYLIST_TITLE = u'Youtube Watch Later'
|
||||
_PAGING_STEP = 100
|
||||
_PERSONAL_FEED = True
|
||||
|
||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||
IE_NAME = u'youtube:favorites'
|
||||
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
|
||||
_LOGIN_REQUIRED = True
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
|
||||
playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
|
||||
return self.url_result(playlist_id, 'YoutubePlaylist')
|
||||
|
@@ -207,7 +207,7 @@ if sys.version_info >= (2,7):
|
||||
def find_xpath_attr(node, xpath, key, val):
|
||||
""" Find the xpath xpath[@key=val] """
|
||||
assert re.match(r'^[a-zA-Z]+$', key)
|
||||
assert re.match(r'^[a-zA-Z@]*$', val)
|
||||
assert re.match(r'^[a-zA-Z@\s]*$', val)
|
||||
expr = xpath + u"[@%s='%s']" % (key, val)
|
||||
return node.find(expr)
|
||||
else:
|
||||
@@ -497,7 +497,7 @@ class ExtractorError(Exception):
|
||||
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
||||
expected = True
|
||||
if not expected:
|
||||
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
|
||||
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
|
||||
super(ExtractorError, self).__init__(msg)
|
||||
|
||||
self.traceback = tb
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.07.24.1'
|
||||
__version__ = '2013.08.21'
|
||||
|
Reference in New Issue
Block a user