Compare commits
344 Commits
2014.02.06
...
2014.03.11
Author | SHA1 | Date | |
---|---|---|---|
|
34ca5d9ba0 | ||
|
60cc4dc4b4 | ||
|
db95dc13a1 | ||
|
777ac90791 | ||
|
04f9bebbcb | ||
|
4ea3137e41 | ||
|
a0792b738e | ||
|
19a41fc613 | ||
|
3ee52157fb | ||
|
c4d197ee2d | ||
|
a33932cfe3 | ||
|
bcf89ce62c | ||
|
e3899d0e00 | ||
|
dcb00da49c | ||
|
aa51d20d19 | ||
|
ae7ed92057 | ||
|
e45b31d9bd | ||
|
5a25f39653 | ||
|
963d7ec412 | ||
|
e712d94adf | ||
|
6a72423955 | ||
|
4126826b10 | ||
|
b773ead7fd | ||
|
855e2750bc | ||
|
805ef3c60b | ||
|
fbc2dcb40b | ||
|
5375d7ad84 | ||
|
90f3476180 | ||
|
ee95c09333 | ||
|
75d06db9fc | ||
|
439a1fffcb | ||
|
9d9d70c462 | ||
|
b4a186b7be | ||
|
bdebf51c8f | ||
|
264b86f9b4 | ||
|
9e55e37a2e | ||
|
1471956573 | ||
|
27865b2169 | ||
|
6d07ce0162 | ||
|
edb7fc5435 | ||
|
31f77343f2 | ||
|
63ad031583 | ||
|
957688cee6 | ||
|
806d6c2e8c | ||
|
0ef68e04d9 | ||
|
a496524db2 | ||
|
935c7360cc | ||
|
340b046876 | ||
|
cc1db7f9b7 | ||
|
a4ff6c4762 | ||
|
1060425cbb | ||
|
e9c092f125 | ||
|
22ff5d2105 | ||
|
136db7881b | ||
|
dae313e725 | ||
|
b74fa8cd2c | ||
|
94eae04c94 | ||
|
16ff7ebc77 | ||
|
c361c505b0 | ||
|
d37c07c575 | ||
|
9d6105c9f0 | ||
|
8dec03ecba | ||
|
826547870b | ||
|
52d6a9a61d | ||
|
ad242b5fbc | ||
|
3524175625 | ||
|
7b9965ea93 | ||
|
0a5bce566f | ||
|
8012bd2424 | ||
|
f55a1f0a88 | ||
|
bacac173a9 | ||
|
ca1fee34f2 | ||
|
6dadaa9930 | ||
|
553f6e4633 | ||
|
652bee05f0 | ||
|
d63516e9cd | ||
|
e477dcf649 | ||
|
9d3f7781f3 | ||
|
c7095dada3 | ||
|
607dbbad76 | ||
|
17b75c0de1 | ||
|
ab24f4f3be | ||
|
e1a52d9e10 | ||
|
d0ff838433 | ||
|
b37b94501c | ||
|
cb3bb2cfef | ||
|
e2cc7983e9 | ||
|
c9ae7b9565 | ||
|
86fb4347f7 | ||
|
2fcec131f5 | ||
|
9f62eaf4ef | ||
|
f92259c026 | ||
|
0afef30b23 | ||
|
dcdfd1c711 | ||
|
2acc1f8f50 | ||
|
2c39b0c695 | ||
|
e77c5b4f63 | ||
|
409a16cb72 | ||
|
94d5e90b4f | ||
|
2d73b45805 | ||
|
271a2dbfa2 | ||
|
bf4adcac66 | ||
|
fb8b8fdd62 | ||
|
5a0b26252e | ||
|
7d78f0cc48 | ||
|
f00fc78674 | ||
|
392017874c | ||
|
c3cb92d1ab | ||
|
aa5590fa07 | ||
|
8cfb5bbf92 | ||
|
69bb54ebf9 | ||
|
ca97a56e4b | ||
|
fc26f3b4c2 | ||
|
f604c93c64 | ||
|
dc3727b65c | ||
|
aba3231de1 | ||
|
9193bab91d | ||
|
fbcf3e416d | ||
|
c0e5d85631 | ||
|
ca7fa3dcb3 | ||
|
4ccfba28d9 | ||
|
abb82f1ddc | ||
|
cda008cff1 | ||
|
1877a14049 | ||
|
546582ec3e | ||
|
4534485586 | ||
|
a9ab8855e4 | ||
|
8a44ef6868 | ||
|
0c7214c404 | ||
|
4cf9654693 | ||
|
50a138d95c | ||
|
1b86cc41cf | ||
|
91346358b0 | ||
|
f3783d4b77 | ||
|
89ef304bed | ||
|
83cebb8b7a | ||
|
9e68f9fdf1 | ||
|
2acea5c03d | ||
|
978177527e | ||
|
2648c436f3 | ||
|
33f1f2c455 | ||
|
995befe0e9 | ||
|
1bb92aff55 | ||
|
b8e1471d3a | ||
|
60daf7f0bb | ||
|
a83a3139d1 | ||
|
fdb7ca3b8d | ||
|
0d7caf5cdf | ||
|
a339d7ba91 | ||
|
7216de55d6 | ||
|
2437fbca64 | ||
|
7d75d06b78 | ||
|
13ef5648c4 | ||
|
5b2478e2ba | ||
|
8b286571c3 | ||
|
f3ac523794 | ||
|
020cf5ebfd | ||
|
54ab193970 | ||
|
8f563f32ab | ||
|
151bae3566 | ||
|
76df418cba | ||
|
d0a72674c6 | ||
|
1d430674c7 | ||
|
70cb73922b | ||
|
344400951c | ||
|
ea5a0be811 | ||
|
3c7fd0bdb2 | ||
|
6cadf8c858 | ||
|
27579b9e4c | ||
|
4d756a9cc0 | ||
|
3e668e05be | ||
|
60d3a2e0f8 | ||
|
cc3a3b6b47 | ||
|
eda1d49a62 | ||
|
62e609ab77 | ||
|
2bfe4ead4b | ||
|
b1c6c32f78 | ||
|
f6acbdecf4 | ||
|
f1c9dfcc01 | ||
|
ce78943ae1 | ||
|
d6f0d86649 | ||
|
5bb67dbfea | ||
|
47610c4d3e | ||
|
b732f3581f | ||
|
9e57ce716f | ||
|
cd7ee7aa44 | ||
|
3cfe791473 | ||
|
973f2532f5 | ||
|
bc3be21d59 | ||
|
0bf5cf9886 | ||
|
919052d094 | ||
|
a2dafe2887 | ||
|
92661c994b | ||
|
ffe8fe356a | ||
|
bc2f773b4f | ||
|
f919201ecc | ||
|
7ff5d5c2e2 | ||
|
9b77f951c7 | ||
|
a25f2f990a | ||
|
78b373975d | ||
|
2fcc873c4c | ||
|
23c2baadb3 | ||
|
521ee82334 | ||
|
1df96e59ce | ||
|
3e123c1e28 | ||
|
f38da66731 | ||
|
06aabfc422 | ||
|
1052d2bfec | ||
|
5e0b652344 | ||
|
0f8f097183 | ||
|
491ed3dda2 | ||
|
af284c6d1b | ||
|
41d3ec5fba | ||
|
0568c352f3 | ||
|
2e7b4cb714 | ||
|
9767726b66 | ||
|
9ddfd84e41 | ||
|
1cf563d84b | ||
|
7928024f57 | ||
|
3eb38acb43 | ||
|
f7300c5c90 | ||
|
3489b7d26c | ||
|
acd2bcc384 | ||
|
43e77ca455 | ||
|
da36297988 | ||
|
dbb94fb044 | ||
|
d68f0cdb23 | ||
|
eae16eb67b | ||
|
4fc946b546 | ||
|
280bc5dad6 | ||
|
f43770d8c9 | ||
|
98c4b8fa1b | ||
|
ccb079ee67 | ||
|
2ea237472c | ||
|
0d4b4865cc | ||
|
fe52f9f956 | ||
|
882907a818 | ||
|
572a89cc4e | ||
|
c377110539 | ||
|
a9c7198a0b | ||
|
f6f01ea17b | ||
|
f2d0fc6823 | ||
|
f7000f3a1b | ||
|
c7f0177fa7 | ||
|
09c4d50944 | ||
|
2eb5d315d4 | ||
|
ad5976b4d9 | ||
|
a0dfcdce5e | ||
|
96d1637082 | ||
|
960f317171 | ||
|
4412ca751d | ||
|
cbffec0c95 | ||
|
0cea52cc18 | ||
|
6d784e87f4 | ||
|
ae6cae78f1 | ||
|
0f99566c01 | ||
|
2db806b4aa | ||
|
3f32c0ba4c | ||
|
541cb26c0d | ||
|
5544e038ab | ||
|
9032dc28a6 | ||
|
03635e2a71 | ||
|
00cf938aa5 | ||
|
a5f707c495 | ||
|
1824b48169 | ||
|
07ad22b8af | ||
|
b53466e168 | ||
|
6a7a389679 | ||
|
4edff78531 | ||
|
99043c2ea5 | ||
|
e68abba910 | ||
|
3165dc4d9f | ||
|
66c43a53e4 | ||
|
463b334616 | ||
|
b71dbc57c4 | ||
|
72ca1d7f45 | ||
|
76e461f395 | ||
|
1074982e6e | ||
|
29b2aaf035 | ||
|
6f90d098c5 | ||
|
0715161450 | ||
|
896583517f | ||
|
713d31fac8 | ||
|
96cb10a5f5 | ||
|
c207c1044e | ||
|
79629ec717 | ||
|
008fda0f08 | ||
|
0ae6b01937 | ||
|
def630e523 | ||
|
c5ba203e23 | ||
|
2317e6b2b3 | ||
|
cb38928974 | ||
|
fa78f13302 | ||
|
18395217c4 | ||
|
34bd987811 | ||
|
af6ba6a1c4 | ||
|
85409a0c69 | ||
|
ebfe352b62 | ||
|
fde56d2f17 | ||
|
3501423dfe | ||
|
0de668af51 | ||
|
2a584ea90a | ||
|
0f6ed94a15 | ||
|
bcb891e82b | ||
|
ac6e4ca1ed | ||
|
2e20bba708 | ||
|
e70dc1d14b | ||
|
0793a7b3c7 | ||
|
026fcc0495 | ||
|
81c2f20b53 | ||
|
1afe753462 | ||
|
524c2c716a | ||
|
b542d4bbd7 | ||
|
cf1eb45153 | ||
|
a97bcd80ba | ||
|
17968e444c | ||
|
2e3fd9ec2f | ||
|
d6a283b025 | ||
|
9766538124 | ||
|
98dbee8681 | ||
|
e421491b3b | ||
|
6828d37c41 | ||
|
bf5f610099 | ||
|
8b7f73404a | ||
|
85cacb2f51 | ||
|
b3fa3917e2 | ||
|
082c6c867a | ||
|
03fcf1ab57 | ||
|
3b00dea5eb | ||
|
8bc6c8e3c0 | ||
|
79bc27b53a | ||
|
84dd703199 | ||
|
c6fdba23a6 | ||
|
b19fe521a9 | ||
|
c1e672d121 | ||
|
f4371f4784 | ||
|
d914d9d187 | ||
|
845d14d377 | ||
|
4a9540b6d2 | ||
|
9f31be7000 | ||
|
c0c4e66b29 | ||
|
cd8662de22 | ||
|
f2dffe55f8 | ||
|
46a073bfac |
28
README.md
28
README.md
@@ -20,7 +20,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
sure that you have sufficient permissions
|
||||
(run with sudo if needed)
|
||||
-i, --ignore-errors continue on download errors, for example to
|
||||
to skip unavailable videos in a playlist
|
||||
skip unavailable videos in a playlist
|
||||
--abort-on-error Abort downloading of further videos (in the
|
||||
playlist or the command line) if an error
|
||||
occurs
|
||||
@@ -124,8 +124,12 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
video id, %(playlist)s for the playlist the
|
||||
video is in, %(playlist_index)s for the
|
||||
position in the playlist and %% for a
|
||||
literal percent. Use - to output to stdout.
|
||||
Can also be used to download to a different
|
||||
literal percent. %(height)s and %(width)s
|
||||
for the width and height of the video
|
||||
format. %(resolution)s for a textual
|
||||
description of the resolution of the video
|
||||
format. Use - to output to stdout. Can also
|
||||
be used to download to a different
|
||||
directory, for example with -o '/my/downloa
|
||||
ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||
--autonumber-size NUMBER Specifies the number of digits in
|
||||
@@ -246,7 +250,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
|
||||
|
||||
# OUTPUT TEMPLATE
|
||||
|
||||
@@ -281,12 +285,14 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb
|
||||
|
||||
Examples:
|
||||
|
||||
$ # Download only the videos uploaded in the last 6 months
|
||||
$ youtube-dl --dateafter now-6months
|
||||
$ # Download only the videos uploaded on January 1, 1970
|
||||
$ youtube-dl --date 19700101
|
||||
$ # will only download the videos uploaded in the 200x decade
|
||||
$ youtube-dl --dateafter 20000101 --datebefore 20091231
|
||||
# Download only the videos uploaded in the last 6 months
|
||||
$ youtube-dl --dateafter now-6months
|
||||
|
||||
# Download only the videos uploaded on January 1, 1970
|
||||
$ youtube-dl --date 19700101
|
||||
|
||||
$ # will only download the videos uploaded in the 200x decade
|
||||
$ youtube-dl --dateafter 20000101 --datebefore 20091231
|
||||
|
||||
# FAQ
|
||||
|
||||
@@ -355,7 +361,7 @@ If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
|
||||
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
|
||||
|
||||
# BUGS
|
||||
|
||||
|
@@ -14,9 +14,9 @@
|
||||
|
||||
set -e
|
||||
|
||||
skip_tests=false
|
||||
if [ "$1" = '--skip-test' ]; then
|
||||
skip_tests=true
|
||||
skip_tests=true
|
||||
if [ "$1" = '--run-tests' ]; then
|
||||
skip_tests=false
|
||||
shift
|
||||
fi
|
||||
|
||||
|
44
test/test_InfoExtractor.py
Normal file
44
test/test_InfoExtractor.py
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
pass
|
||||
|
||||
|
||||
class TestInfoExtractor(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.ie = TestIE(FakeYDL())
|
||||
|
||||
def test_ie_key(self):
|
||||
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
|
||||
|
||||
def test_html_search_regex(self):
|
||||
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
|
||||
search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
|
||||
self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
|
||||
|
||||
def test_opengraph(self):
|
||||
ie = self.ie
|
||||
html = '''
|
||||
<meta name="og:title" content='Foo'/>
|
||||
<meta content="Some video's description " name="og:description"/>
|
||||
<meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/>
|
||||
'''
|
||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
@@ -13,6 +15,7 @@ from youtube_dl.extractor import (
|
||||
FacebookIE,
|
||||
gen_extractors,
|
||||
JustinTVIE,
|
||||
PBSIE,
|
||||
YoutubeIE,
|
||||
)
|
||||
|
||||
@@ -29,20 +32,20 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
|
||||
def test_youtube_playlist_matching(self):
|
||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||
assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
||||
assertPlaylist(u'PL63F0C78739B09958')
|
||||
assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
|
||||
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
||||
assertPlaylist('PL63F0C78739B09958')
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||
# Top tracks
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||
|
||||
def test_youtube_matching(self):
|
||||
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
||||
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
||||
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
||||
@@ -65,6 +68,13 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
def test_youtube_show_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
|
||||
|
||||
def test_youtube_truncated(self):
|
||||
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
|
||||
|
||||
def test_youtube_search_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||
|
||||
def test_justin_tv_channelid_matching(self):
|
||||
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
||||
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
||||
@@ -82,7 +92,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
||||
|
||||
def test_youtube_extract(self):
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
@@ -91,7 +101,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||
|
||||
def test_facebook_matching(self):
|
||||
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
@@ -126,5 +136,9 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
|
||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
|
||||
|
||||
def test_pbs(self):
|
||||
# https://github.com/rg3/youtube-dl/issues/2350
|
||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -18,6 +18,7 @@ from test.helper import (
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
import youtube_dl.YoutubeDL
|
||||
@@ -72,9 +73,7 @@ def generator(test_case):
|
||||
if 'playlist' not in test_case:
|
||||
info_dict = test_case.get('info_dict', {})
|
||||
if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
||||
print_skipping('The output file cannot be know, the "file" '
|
||||
'key is missing or the info_dict is incomplete')
|
||||
return
|
||||
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
|
||||
if 'skip' in test_case:
|
||||
print_skipping(test_case['skip'])
|
||||
return
|
||||
@@ -137,12 +136,21 @@ def generator(test_case):
|
||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||
info_dict = json.load(infof)
|
||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
got = 'md5:' + md5(info_dict.get(info_field))
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
||||
got = info_dict.get(info_field)
|
||||
self.assertEqual(expected, got,
|
||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
match_str = expected[len('re:'):]
|
||||
match_rex = re.compile(match_str)
|
||||
|
||||
self.assertTrue(
|
||||
isinstance(got, compat_str) and match_rex.match(got),
|
||||
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
got = 'md5:' + md5(info_dict.get(info_field))
|
||||
else:
|
||||
got = info_dict.get(info_field)
|
||||
self.assertEqual(expected, got,
|
||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
|
||||
# If checkable fields are missing from the test case, print the info_dict
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
|
@@ -36,6 +36,7 @@ from youtube_dl.extractor import (
|
||||
RutubeChannelIE,
|
||||
GoogleSearchIE,
|
||||
GenericIE,
|
||||
TEDIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -55,10 +56,10 @@ class TestPlaylists(unittest.TestCase):
|
||||
def test_dailymotion_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = DailymotionUserIE(dl)
|
||||
result = ie.extract('http://www.dailymotion.com/user/generation-quoi/')
|
||||
result = ie.extract('https://www.dailymotion.com/user/nqtv')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'Génération Quoi')
|
||||
self.assertTrue(len(result['entries']) >= 26)
|
||||
self.assertEqual(result['title'], 'Rémi Gaillard')
|
||||
self.assertTrue(len(result['entries']) >= 100)
|
||||
|
||||
def test_vimeo_channel(self):
|
||||
dl = FakeYDL()
|
||||
@@ -98,7 +99,7 @@ class TestPlaylists(unittest.TestCase):
|
||||
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '5124905')
|
||||
self.assertTrue(len(result['entries']) >= 11)
|
||||
self.assertTrue(len(result['entries']) >= 6)
|
||||
|
||||
def test_soundcloud_set(self):
|
||||
dl = FakeYDL()
|
||||
@@ -170,12 +171,12 @@ class TestPlaylists(unittest.TestCase):
|
||||
def test_AcademicEarthCourse(self):
|
||||
dl = FakeYDL()
|
||||
ie = AcademicEarthCourseIE(dl)
|
||||
result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/')
|
||||
result = ie.extract('http://academicearth.org/playlists/laws-of-nature/')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'building-dynamic-websites')
|
||||
self.assertEqual(result['title'], 'Building Dynamic Websites')
|
||||
self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
|
||||
self.assertEqual(len(result['entries']), 10)
|
||||
self.assertEqual(result['id'], 'laws-of-nature')
|
||||
self.assertEqual(result['title'], 'Laws of Nature')
|
||||
self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
|
||||
self.assertEqual(len(result['entries']), 4)
|
||||
|
||||
def test_ivi_compilation(self):
|
||||
dl = FakeYDL()
|
||||
@@ -250,5 +251,23 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], 'python language')
|
||||
self.assertTrue(len(result['entries']) == 15)
|
||||
|
||||
def test_generic_rss_feed(self):
|
||||
dl = FakeYDL()
|
||||
ie = GenericIE(dl)
|
||||
result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
|
||||
self.assertEqual(result['title'], 'Zero Punctuation')
|
||||
self.assertTrue(len(result['entries']) > 10)
|
||||
|
||||
def test_ted_playlist(self):
|
||||
dl = FakeYDL()
|
||||
ie = TEDIE(dl)
|
||||
result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '10')
|
||||
self.assertEqual(result['title'], 'Who are the hackers?')
|
||||
self.assertTrue(len(result['entries']) >= 6)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
# Various small unit tests
|
||||
import io
|
||||
import xml.etree.ElementTree
|
||||
|
||||
#from youtube_dl.utils import htmlentity_transform
|
||||
@@ -21,15 +22,18 @@ from youtube_dl.utils import (
|
||||
orderedSet,
|
||||
PagedList,
|
||||
parse_duration,
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
struct_unpack,
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
@@ -127,6 +131,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
||||
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = u'''<root>
|
||||
@@ -200,7 +205,16 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('1'), 1)
|
||||
self.assertEqual(parse_duration('1337:12'), 80232)
|
||||
self.assertEqual(parse_duration('9:12:43'), 33163)
|
||||
self.assertEqual(parse_duration('12:00'), 720)
|
||||
self.assertEqual(parse_duration('00:01:01'), 61)
|
||||
self.assertEqual(parse_duration('x:y'), None)
|
||||
self.assertEqual(parse_duration('3h11m53s'), 11513)
|
||||
self.assertEqual(parse_duration('62m45s'), 3765)
|
||||
self.assertEqual(parse_duration('6m59s'), 419)
|
||||
self.assertEqual(parse_duration('49s'), 49)
|
||||
self.assertEqual(parse_duration('0h0m0s'), 0)
|
||||
self.assertEqual(parse_duration('0m0s'), 0)
|
||||
self.assertEqual(parse_duration('0s'), 0)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
@@ -236,5 +250,21 @@ class TestUtil(unittest.TestCase):
|
||||
testPL(5, 2, (2, 99), [2, 3, 4])
|
||||
testPL(5, 2, (20, 99), [])
|
||||
|
||||
def test_struct_unpack(self):
|
||||
self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
|
||||
|
||||
def test_read_batch_urls(self):
|
||||
f = io.StringIO(u'''\xef\xbb\xbf foo
|
||||
bar\r
|
||||
baz
|
||||
# More after this line\r
|
||||
; or after this
|
||||
bam''')
|
||||
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
|
||||
|
||||
def test_urlencode_postdata(self):
|
||||
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
||||
self.assertTrue(isinstance(data, bytes))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -16,6 +16,7 @@ from youtube_dl.extractor import (
|
||||
YoutubeChannelIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeTopListIE,
|
||||
YoutubeSearchURLIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,7 +31,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'ytdl test PL')
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
|
||||
|
||||
def test_youtube_playlist_noplaylist(self):
|
||||
@@ -39,7 +40,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertEqual(result['_type'], 'url')
|
||||
self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||
|
||||
def test_issue_673(self):
|
||||
dl = FakeYDL()
|
||||
@@ -59,7 +60,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||
|
||||
@@ -76,9 +77,9 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
entries = result['entries']
|
||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
|
||||
def test_youtube_channel(self):
|
||||
dl = FakeYDL()
|
||||
@@ -118,6 +119,8 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||
|
||||
def test_youtube_toptracks(self):
|
||||
print('Skipping: The playlist page gives error 500')
|
||||
return
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
||||
@@ -131,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
entries = result['entries']
|
||||
self.assertTrue(len(entries) >= 5)
|
||||
|
||||
def test_youtube_search_url(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeSearchURLIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
|
||||
entries = result['entries']
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'youtube-dl test video')
|
||||
self.assertTrue(len(entries) >= 5)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -370,12 +370,15 @@ class YoutubeDL(object):
|
||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
if self.params.get('logger') is not None:
|
||||
self.params['logger'].warning(message)
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
warning_message = '%s %s' % (_msg_header, message)
|
||||
self.to_stderr(warning_message)
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
warning_message = '%s %s' % (_msg_header, message)
|
||||
self.to_stderr(warning_message)
|
||||
|
||||
def report_error(self, message, tb=None):
|
||||
'''
|
||||
@@ -409,6 +412,13 @@ class YoutubeDL(object):
|
||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||
if template_dict.get('playlist_index') is not None:
|
||||
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
|
||||
if template_dict.get('resolution') is None:
|
||||
if template_dict.get('width') and template_dict.get('height'):
|
||||
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||
elif template_dict.get('height'):
|
||||
template_dict['resolution'] = '%sp' % template_dict['height']
|
||||
elif template_dict.get('width'):
|
||||
template_dict['resolution'] = '?x%d' % template_dict['width']
|
||||
|
||||
sanitize = lambda k, v: sanitize_filename(
|
||||
compat_str(v),
|
||||
@@ -675,6 +685,9 @@ class YoutubeDL(object):
|
||||
info_dict['playlist'] = None
|
||||
info_dict['playlist_index'] = None
|
||||
|
||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
# This extractors handle format selection themselves
|
||||
if info_dict['extractor'] in ['Youku']:
|
||||
if download:
|
||||
@@ -688,8 +701,11 @@ class YoutubeDL(object):
|
||||
else:
|
||||
formats = info_dict['formats']
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found!')
|
||||
|
||||
# We check that all the formats have the format and format_id fields
|
||||
for (i, format) in enumerate(formats):
|
||||
for i, format in enumerate(formats):
|
||||
if format.get('format_id') is None:
|
||||
format['format_id'] = compat_str(i)
|
||||
if format.get('format') is None:
|
||||
@@ -908,7 +924,7 @@ class YoutubeDL(object):
|
||||
self.to_screen('[%s] %s: Downloading thumbnail ...' %
|
||||
(info_dict['extractor'], info_dict['id']))
|
||||
try:
|
||||
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
|
||||
uf = self.urlopen(info_dict['thumbnail'])
|
||||
with open(thumb_filename, 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
|
||||
@@ -1154,7 +1170,7 @@ class YoutubeDL(object):
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
return self._opener.open(req)
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
if not self.params.get('verbose'):
|
||||
@@ -1185,7 +1201,7 @@ class YoutubeDL(object):
|
||||
|
||||
def _setup_opener(self):
|
||||
timeout_val = self.params.get('socket_timeout')
|
||||
timeout = 600 if timeout_val is None else float(timeout_val)
|
||||
self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
|
||||
|
||||
opts_cookiefile = self.params.get('cookiefile')
|
||||
opts_proxy = self.params.get('proxy')
|
||||
@@ -1223,7 +1239,3 @@ class YoutubeDL(object):
|
||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||
opener.addheaders = []
|
||||
self._opener = opener
|
||||
|
||||
# TODO remove this global modification
|
||||
compat_urllib_request.install_opener(opener)
|
||||
socket.setdefaulttimeout(timeout)
|
||||
|
@@ -41,12 +41,23 @@ __authors__ = (
|
||||
'Chris Gahan',
|
||||
'Saimadhav Heblikar',
|
||||
'Mike Col',
|
||||
'Oleg Prutz',
|
||||
'pulpe',
|
||||
'Andreas Schmitz',
|
||||
'Michael Kaiser',
|
||||
'Niklas Laxström',
|
||||
'David Triendl',
|
||||
'Anthony Weems',
|
||||
'David Wagner',
|
||||
'Juan C. Olivares',
|
||||
'Mattias Harrysson',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
||||
import codecs
|
||||
import getpass
|
||||
import io
|
||||
import locale
|
||||
import optparse
|
||||
import os
|
||||
@@ -65,6 +76,7 @@ from .utils import (
|
||||
get_cachedir,
|
||||
MaxDownloadsReached,
|
||||
preferredencoding,
|
||||
read_batch_urls,
|
||||
SameFileError,
|
||||
setproctitle,
|
||||
std_headers,
|
||||
@@ -203,7 +215,7 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option('-U', '--update',
|
||||
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
|
||||
general.add_option('-i', '--ignore-errors',
|
||||
action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
|
||||
action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False)
|
||||
general.add_option('--abort-on-error',
|
||||
action='store_false', dest='ignoreerrors',
|
||||
help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
|
||||
@@ -419,6 +431,8 @@ def parseOpts(overrideArguments=None):
|
||||
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
||||
'%(id)s for the video id, %(playlist)s for the playlist the video is in, '
|
||||
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
||||
'%(height)s and %(width)s for the width and height of the video format. '
|
||||
'%(resolution)s for a textual description of the resolution of the video format. '
|
||||
'Use - to output to stdout. Can also be used to download to a different directory, '
|
||||
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
||||
filesystem.add_option('--autonumber-size',
|
||||
@@ -546,21 +560,19 @@ def _real_main(argv=None):
|
||||
sys.exit(0)
|
||||
|
||||
# Batch file verification
|
||||
batchurls = []
|
||||
batch_urls = []
|
||||
if opts.batchfile is not None:
|
||||
try:
|
||||
if opts.batchfile == '-':
|
||||
batchfd = sys.stdin
|
||||
else:
|
||||
batchfd = open(opts.batchfile, 'r')
|
||||
batchurls = batchfd.readlines()
|
||||
batchurls = [x.strip() for x in batchurls]
|
||||
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
|
||||
batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
|
||||
batch_urls = read_batch_urls(batchfd)
|
||||
if opts.verbose:
|
||||
write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
|
||||
write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
|
||||
except IOError:
|
||||
sys.exit(u'ERROR: batch file could not be read')
|
||||
all_urls = batchurls + args
|
||||
all_urls = batch_urls + args
|
||||
all_urls = [url.strip() for url in all_urls]
|
||||
_enc = preferredencoding()
|
||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||
|
@@ -5,6 +5,7 @@ from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
from .mplayer import MplayerFD
|
||||
from .rtmp import RtmpFD
|
||||
from .f4m import F4mFD
|
||||
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -22,5 +23,7 @@ def get_suitable_downloader(info_dict):
|
||||
return HlsFD
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
return MplayerFD
|
||||
if determine_ext(url) == 'f4m':
|
||||
return F4mFD
|
||||
else:
|
||||
return HttpFD
|
||||
|
314
youtube_dl/downloader/f4m.py
Normal file
314
youtube_dl/downloader/f4m.py
Normal file
@@ -0,0 +1,314 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import io
|
||||
import itertools
|
||||
import os
|
||||
import time
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..utils import (
|
||||
struct_pack,
|
||||
struct_unpack,
|
||||
compat_urlparse,
|
||||
format_bytes,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
)
|
||||
|
||||
|
||||
class FlvReader(io.BytesIO):
|
||||
"""
|
||||
Reader for Flv files
|
||||
The file format is documented in https://www.adobe.com/devnet/f4v.html
|
||||
"""
|
||||
|
||||
# Utility functions for reading numbers and strings
|
||||
def read_unsigned_long_long(self):
|
||||
return struct_unpack('!Q', self.read(8))[0]
|
||||
|
||||
def read_unsigned_int(self):
|
||||
return struct_unpack('!I', self.read(4))[0]
|
||||
|
||||
def read_unsigned_char(self):
|
||||
return struct_unpack('!B', self.read(1))[0]
|
||||
|
||||
def read_string(self):
|
||||
res = b''
|
||||
while True:
|
||||
char = self.read(1)
|
||||
if char == b'\x00':
|
||||
break
|
||||
res += char
|
||||
return res
|
||||
|
||||
def read_box_info(self):
|
||||
"""
|
||||
Read a box and return the info as a tuple: (box_size, box_type, box_data)
|
||||
"""
|
||||
real_size = size = self.read_unsigned_int()
|
||||
box_type = self.read(4)
|
||||
header_end = 8
|
||||
if size == 1:
|
||||
real_size = self.read_unsigned_long_long()
|
||||
header_end = 16
|
||||
return real_size, box_type, self.read(real_size-header_end)
|
||||
|
||||
def read_asrt(self):
|
||||
# version
|
||||
self.read_unsigned_char()
|
||||
# flags
|
||||
self.read(3)
|
||||
quality_entry_count = self.read_unsigned_char()
|
||||
# QualityEntryCount
|
||||
for i in range(quality_entry_count):
|
||||
self.read_string()
|
||||
|
||||
segment_run_count = self.read_unsigned_int()
|
||||
segments = []
|
||||
for i in range(segment_run_count):
|
||||
first_segment = self.read_unsigned_int()
|
||||
fragments_per_segment = self.read_unsigned_int()
|
||||
segments.append((first_segment, fragments_per_segment))
|
||||
|
||||
return {
|
||||
'segment_run': segments,
|
||||
}
|
||||
|
||||
def read_afrt(self):
|
||||
# version
|
||||
self.read_unsigned_char()
|
||||
# flags
|
||||
self.read(3)
|
||||
# time scale
|
||||
self.read_unsigned_int()
|
||||
|
||||
quality_entry_count = self.read_unsigned_char()
|
||||
# QualitySegmentUrlModifiers
|
||||
for i in range(quality_entry_count):
|
||||
self.read_string()
|
||||
|
||||
fragments_count = self.read_unsigned_int()
|
||||
fragments = []
|
||||
for i in range(fragments_count):
|
||||
first = self.read_unsigned_int()
|
||||
first_ts = self.read_unsigned_long_long()
|
||||
duration = self.read_unsigned_int()
|
||||
if duration == 0:
|
||||
discontinuity_indicator = self.read_unsigned_char()
|
||||
else:
|
||||
discontinuity_indicator = None
|
||||
fragments.append({
|
||||
'first': first,
|
||||
'ts': first_ts,
|
||||
'duration': duration,
|
||||
'discontinuity_indicator': discontinuity_indicator,
|
||||
})
|
||||
|
||||
return {
|
||||
'fragments': fragments,
|
||||
}
|
||||
|
||||
def read_abst(self):
|
||||
# version
|
||||
self.read_unsigned_char()
|
||||
# flags
|
||||
self.read(3)
|
||||
|
||||
self.read_unsigned_int() # BootstrapinfoVersion
|
||||
# Profile,Live,Update,Reserved
|
||||
self.read(1)
|
||||
# time scale
|
||||
self.read_unsigned_int()
|
||||
# CurrentMediaTime
|
||||
self.read_unsigned_long_long()
|
||||
# SmpteTimeCodeOffset
|
||||
self.read_unsigned_long_long()
|
||||
|
||||
self.read_string() # MovieIdentifier
|
||||
server_count = self.read_unsigned_char()
|
||||
# ServerEntryTable
|
||||
for i in range(server_count):
|
||||
self.read_string()
|
||||
quality_count = self.read_unsigned_char()
|
||||
# QualityEntryTable
|
||||
for i in range(quality_count):
|
||||
self.read_string()
|
||||
# DrmData
|
||||
self.read_string()
|
||||
# MetaData
|
||||
self.read_string()
|
||||
|
||||
segments_count = self.read_unsigned_char()
|
||||
segments = []
|
||||
for i in range(segments_count):
|
||||
box_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'asrt'
|
||||
segment = FlvReader(box_data).read_asrt()
|
||||
segments.append(segment)
|
||||
fragments_run_count = self.read_unsigned_char()
|
||||
fragments = []
|
||||
for i in range(fragments_run_count):
|
||||
box_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'afrt'
|
||||
fragments.append(FlvReader(box_data).read_afrt())
|
||||
|
||||
return {
|
||||
'segments': segments,
|
||||
'fragments': fragments,
|
||||
}
|
||||
|
||||
def read_bootstrap_info(self):
|
||||
total_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'abst'
|
||||
return FlvReader(box_data).read_abst()
|
||||
|
||||
|
||||
def read_bootstrap_info(bootstrap_bytes):
|
||||
return FlvReader(bootstrap_bytes).read_bootstrap_info()
|
||||
|
||||
|
||||
def build_fragments_list(boot_info):
|
||||
""" Return a list of (segment, fragment) for each fragment in the video """
|
||||
res = []
|
||||
segment_run_table = boot_info['segments'][0]
|
||||
# I've only found videos with one segment
|
||||
segment_run_entry = segment_run_table['segment_run'][0]
|
||||
n_frags = segment_run_entry[1]
|
||||
fragment_run_entry_table = boot_info['fragments'][0]['fragments']
|
||||
first_frag_number = fragment_run_entry_table[0]['first']
|
||||
for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)):
|
||||
res.append((1, frag_number))
|
||||
return res
|
||||
|
||||
|
||||
def write_flv_header(stream, metadata):
|
||||
"""Writes the FLV header and the metadata to stream"""
|
||||
# FLV header
|
||||
stream.write(b'FLV\x01')
|
||||
stream.write(b'\x05')
|
||||
stream.write(b'\x00\x00\x00\x09')
|
||||
# FLV File body
|
||||
stream.write(b'\x00\x00\x00\x00')
|
||||
# FLVTAG
|
||||
# Script data
|
||||
stream.write(b'\x12')
|
||||
# Size of the metadata with 3 bytes
|
||||
stream.write(struct_pack('!L', len(metadata))[1:])
|
||||
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
|
||||
stream.write(metadata)
|
||||
# Magic numbers extracted from the output files produced by AdobeHDS.php
|
||||
#(https://github.com/K-S-V/Scripts)
|
||||
stream.write(b'\x00\x00\x01\x73')
|
||||
|
||||
|
||||
def _add_ns(prop):
|
||||
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
def to_screen(self, *args, **kargs):
|
||||
pass
|
||||
|
||||
|
||||
class F4mFD(FileDownloader):
|
||||
"""
|
||||
A downloader for f4m manifests or AdobeHDS.
|
||||
"""
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
self.to_screen('[download] Downloading f4m manifest')
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
self.report_destination(filename)
|
||||
http_dl = HttpQuietDownloader(self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'test': self.params.get('test', False),
|
||||
})
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
||||
bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
|
||||
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
fragments_list = build_fragments_list(boot_info)
|
||||
if self.params.get('test', False):
|
||||
# We only download the first fragment
|
||||
fragments_list = fragments_list[:1]
|
||||
total_frags = len(fragments_list)
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||
write_flv_header(dest_stream, metadata)
|
||||
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
state = {
|
||||
'downloaded_bytes': 0,
|
||||
'frag_counter': 0,
|
||||
}
|
||||
start = time.time()
|
||||
|
||||
def frag_progress_hook(status):
|
||||
frag_total_bytes = status.get('total_bytes', 0)
|
||||
estimated_size = (state['downloaded_bytes'] +
|
||||
(total_frags - state['frag_counter']) * frag_total_bytes)
|
||||
if status['status'] == 'finished':
|
||||
state['downloaded_bytes'] += frag_total_bytes
|
||||
state['frag_counter'] += 1
|
||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
||||
byte_counter = state['downloaded_bytes']
|
||||
else:
|
||||
frag_downloaded_bytes = status['downloaded_bytes']
|
||||
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
|
||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||
frag_total_bytes)
|
||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
||||
progress += frag_progress / float(total_frags)
|
||||
|
||||
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
|
||||
self.report_progress(progress, format_bytes(estimated_size),
|
||||
status.get('speed'), eta)
|
||||
http_dl.add_progress_hook(frag_progress_hook)
|
||||
|
||||
frags_filenames = []
|
||||
for (seg_i, frag_i) in fragments_list:
|
||||
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
||||
url = base_url + name
|
||||
frag_filename = '%s-%s' % (tmpfilename, name)
|
||||
success = http_dl.download(frag_filename, {'url': url})
|
||||
if not success:
|
||||
return False
|
||||
with open(frag_filename, 'rb') as down:
|
||||
down_data = down.read()
|
||||
reader = FlvReader(down_data)
|
||||
while True:
|
||||
_, box_type, box_data = reader.read_box_info()
|
||||
if box_type == b'mdat':
|
||||
dest_stream.write(box_data)
|
||||
break
|
||||
frags_filenames.append(frag_filename)
|
||||
|
||||
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
|
||||
|
||||
self.try_rename(tmpfilename, filename)
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(frag_file)
|
||||
|
||||
fsize = os.path.getsize(encodeFilename(filename))
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
|
||||
return True
|
@@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
|
||||
while count <= retries:
|
||||
# Establish connection
|
||||
try:
|
||||
data = compat_urllib_request.urlopen(request)
|
||||
data = self.ydl.urlopen(request)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
@@ -59,7 +59,7 @@ class HttpFD(FileDownloader):
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
data = compat_urllib_request.urlopen(basic_request)
|
||||
data = self.ydl.urlopen(basic_request)
|
||||
content_length = data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
@@ -85,6 +85,7 @@ class HttpFD(FileDownloader):
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
resume_len = 0
|
||||
open_mode = 'wb'
|
||||
break
|
||||
# Retry
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
@@ -22,7 +24,7 @@ class RtmpFD(FileDownloader):
|
||||
proc_stderr_closed = False
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = u''
|
||||
line = ''
|
||||
while True:
|
||||
char = proc.stderr.read(1)
|
||||
if not char:
|
||||
@@ -46,7 +48,7 @@ class RtmpFD(FileDownloader):
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
data_len_str = u'~' + format_bytes(data_len)
|
||||
data_len_str = '~' + format_bytes(data_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
@@ -76,19 +78,21 @@ class RtmpFD(FileDownloader):
|
||||
})
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
self.to_screen('')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen(u'[rtmpdump] '+line)
|
||||
self.to_screen('[rtmpdump] '+line)
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
self.to_screen('')
|
||||
return proc.returncode
|
||||
|
||||
url = info_dict['url']
|
||||
player_url = info_dict.get('player_url', None)
|
||||
page_url = info_dict.get('page_url', None)
|
||||
app = info_dict.get('app', None)
|
||||
play_path = info_dict.get('play_path', None)
|
||||
tc_url = info_dict.get('tc_url', None)
|
||||
flash_version = info_dict.get('flash_version', None)
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn', None)
|
||||
|
||||
@@ -100,7 +104,7 @@ class RtmpFD(FileDownloader):
|
||||
try:
|
||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||
self.report_error('RTMP download detected but "rtmpdump" could not be run')
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
@@ -111,17 +115,21 @@ class RtmpFD(FileDownloader):
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
basic_args += ['--pageUrl', page_url]
|
||||
if app is not None:
|
||||
basic_args += ['--app', app]
|
||||
if play_path is not None:
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
if test:
|
||||
basic_args += ['--stop', '1']
|
||||
if flash_version is not None:
|
||||
basic_args += ['--flashVer', flash_version]
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
if conn:
|
||||
basic_args += ['--conn', conn]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
@@ -144,26 +152,35 @@ class RtmpFD(FileDownloader):
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||
self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||
|
||||
RD_SUCCESS = 0
|
||||
RD_FAILED = 1
|
||||
RD_INCOMPLETE = 2
|
||||
RD_NO_CONNECT = 3
|
||||
|
||||
retval = run_rtmpdump(args)
|
||||
|
||||
while (retval == 2 or retval == 1) and not test:
|
||||
if retval == RD_NO_CONNECT:
|
||||
self.report_error('[rtmpdump] Could not connect to RTMP server.')
|
||||
return False
|
||||
|
||||
while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
||||
self.to_screen('[rtmpdump] %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == 1:
|
||||
if prevsize == cursize and retval == RD_FAILED:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
||||
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = 0
|
||||
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
|
||||
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = RD_SUCCESS
|
||||
break
|
||||
if retval == 0 or (test and retval == 2):
|
||||
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
||||
self.to_screen('[rtmpdump] %s bytes' % fsize)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
@@ -173,6 +190,6 @@ class RtmpFD(FileDownloader):
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'rtmpdump exited with code %d' % retval)
|
||||
self.to_stderr('\n')
|
||||
self.report_error('rtmpdump exited with code %d' % retval)
|
||||
return False
|
||||
|
@@ -1,5 +1,6 @@
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .anitube import AnitubeIE
|
||||
from .aparat import AparatIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
@@ -15,22 +16,30 @@ from .arte import (
|
||||
from .auengine import AUEngineIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .blinkx import BlinkxIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .br import BRIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import BrightcoveIE
|
||||
from .c56 import C56IE
|
||||
from .canal13cl import Canal13clIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .cbs import CBSIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .cmt import CMTIE
|
||||
from .cnn import CNNIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNBlogsIE,
|
||||
)
|
||||
from .collegehumor import CollegeHumorIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .condenast import CondeNastIE
|
||||
@@ -44,7 +53,6 @@ from .dailymotion import (
|
||||
DailymotionUserIE,
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .depositfiles import DepositFilesIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
@@ -62,11 +70,13 @@ from .extremetube import ExtremeTubeIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fktv import (
|
||||
FKTVIE,
|
||||
FKTVPosteckeIE,
|
||||
)
|
||||
from .flickr import FlickrIE
|
||||
from .fourtube import FourTubeIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
PluzzIE,
|
||||
@@ -81,10 +91,12 @@ from .funnyordie import FunnyOrDieIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .hark import HarkIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .huffpost import HuffPostIE
|
||||
@@ -103,6 +115,7 @@ from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
)
|
||||
from .jadorecettepub import JadoreCettePubIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
@@ -112,6 +125,7 @@ from .keezmovies import KeezMoviesIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .la7 import LA7IE
|
||||
from .lifenews import LifeNewsIE
|
||||
from .liveleak import LiveLeakIE
|
||||
@@ -122,11 +136,12 @@ from .lynda import (
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .malemotion import MalemotionIE
|
||||
from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mit import TechTVMITIE, MITIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mpora import MporaIE
|
||||
from .mofosex import MofosexIE
|
||||
@@ -141,7 +156,10 @@ from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import NBCNewsIE
|
||||
from .nbc import (
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
)
|
||||
from .ndr import NDRIE
|
||||
from .ndtv import NDTVIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
@@ -150,17 +168,19 @@ from .nhl import NHLIE, NHLVideocenterIE
|
||||
from .niconico import NiconicoIE
|
||||
from .ninegag import NineGagIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .novamov import NovamovIE
|
||||
from .novamov import NovaMovIE
|
||||
from .nowness import NownessIE
|
||||
from .nowvideo import NowVideoIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .playvid import PlayvidIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import PornHubIE
|
||||
from .pornotube import PornotubeIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
@@ -176,6 +196,7 @@ from .rutube import (
|
||||
RutubeMovieIE,
|
||||
RutubePersonIE,
|
||||
)
|
||||
from .savefrom import SaveFromIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sina import SinaIE
|
||||
from .slashdot import SlashdotIE
|
||||
@@ -200,10 +221,13 @@ from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .statigram import StatigramIE
|
||||
from .steam import SteamIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .thisav import ThisAVIE
|
||||
@@ -211,10 +235,12 @@ from .tinypic import TinyPicIE
|
||||
from .toutv import TouTvIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trutube import TruTubeIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
from .unistra import UnistraIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
@@ -222,8 +248,10 @@ from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vevo import VevoIE
|
||||
from .vgtrk import VGTRKIE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
@@ -258,19 +286,20 @@ from .youku import YoukuIE
|
||||
from .youporn import YouPornIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeChannelIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeWatchLaterIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTopListIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'AcademicEarth:Course'
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -14,12 +14,12 @@ class AcademicEarthCourseIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
|
||||
r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title')
|
||||
description = self._html_search_regex(
|
||||
r'<p class="excerpt">(.*?)</p>',
|
||||
r'<p class="excerpt"[^>]*?>(.*?)</p>',
|
||||
webpage, u'description', fatal=False)
|
||||
urls = re.findall(
|
||||
r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
|
||||
r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
|
||||
webpage)
|
||||
entries = [self.url_result(u) for u in urls]
|
||||
|
||||
|
69
youtube_dl/extractor/aftonbladet.py
Normal file
69
youtube_dl/extractor/aftonbladet.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AftonbladetIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
|
||||
'info_dict': {
|
||||
'id': 'article36015',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
|
||||
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
|
||||
'upload_date': '20140306',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.search(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('video_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# find internal video meta data
|
||||
META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
||||
internal_meta_id = self._html_search_regex(
|
||||
r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
|
||||
internal_meta_url = META_URL % internal_meta_id
|
||||
internal_meta_json = self._download_json(
|
||||
internal_meta_url, video_id, 'Downloading video meta data')
|
||||
|
||||
# find internal video formats
|
||||
FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
|
||||
internal_video_id = internal_meta_json['videoId']
|
||||
internal_formats_url = FORMATS_URL % internal_video_id
|
||||
internal_formats_json = self._download_json(
|
||||
internal_formats_url, video_id, 'Downloading video formats')
|
||||
|
||||
formats = []
|
||||
for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
|
||||
p = fmt['paths'][0]
|
||||
formats.append({
|
||||
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
|
||||
'ext': 'mp4',
|
||||
'width': fmt['width'],
|
||||
'height': fmt['height'],
|
||||
'tbr': fmt['bitrate'],
|
||||
'protocol': 'http',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
|
||||
upload_date = timestamp.strftime('%Y%m%d')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': internal_meta_json['title'],
|
||||
'formats': formats,
|
||||
'thumbnail': internal_meta_json['imageUrl'],
|
||||
'description': internal_meta_json['shortPreamble'],
|
||||
'upload_date': upload_date,
|
||||
'duration': internal_meta_json['duration'],
|
||||
'view_count': internal_meta_json['views'],
|
||||
}
|
@@ -72,18 +72,22 @@ class ArteTvIE(InfoExtractor):
|
||||
return self._extract_liveweb(url, name, lang)
|
||||
|
||||
if re.search(self._LIVE_URL, url) is not None:
|
||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||
raise ExtractorError('Arte live streams are not yet supported, sorry')
|
||||
# self.extractLiveStream(url)
|
||||
# return
|
||||
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
def _extract_video(self, url, video_id, lang):
|
||||
"""Extract from videos.arte.tv"""
|
||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||
ref_xml_doc = self._download_xml(
|
||||
ref_xml_url, video_id, note='Downloading metadata')
|
||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||
config_xml_url = config_node.attrib['ref']
|
||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||
config_xml = self._download_webpage(
|
||||
config_xml_url, video_id, note='Downloading configuration')
|
||||
|
||||
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
||||
def _key(m):
|
||||
|
223
youtube_dl/extractor/bbccouk.py
Normal file
223
youtube_dl/extractor/bbccouk.py
Normal file
@@ -0,0 +1,223 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'title': 'Kaleidoscope: Leonard Cohen',
|
||||
'description': 'md5:db4755d7a665ae72343779f7dacb402c',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||
'duration': 1800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Episode is no longer available on BBC iPlayer Radio',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||
'duration': 5100,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
href = connection.get('href')
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, supplier),
|
||||
})
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
formats.append({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': supplier,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
vbr = int(media.get('bitrate'))
|
||||
vcodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int(media.get('width'))
|
||||
height = int(media.get('height'))
|
||||
file_size = int(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_audio(self, media, programme_id):
|
||||
formats = []
|
||||
abr = int(media.get('bitrate'))
|
||||
acodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_captions(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||
srt = ''
|
||||
for pos, p in enumerate(ps):
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
|
||||
p.text.strip() if p.text is not None else '')
|
||||
subtitles[lang] = srt
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
group_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
if re.search(r'id="emp-error" class="notinuk">', webpage):
|
||||
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
expected=True)
|
||||
|
||||
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
||||
'Downloading playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % group_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % group_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self._extract_captions(media, programme_id)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(programme_id, subtitles)
|
||||
return
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -24,5 +24,7 @@ class BloombergIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
ooyala_url = self._twitter_search_player(webpage)
|
||||
return self.url_result(ooyala_url, OoyalaIE.ie_key())
|
||||
embed_code = self._search_regex(
|
||||
r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
|
||||
'embed code')
|
||||
return OoyalaIE._build_url_result(embed_code)
|
||||
|
80
youtube_dl/extractor/br.py
Normal file
80
youtube_dl/extractor/br.py
Normal file
@@ -0,0 +1,80 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class BRIE(InfoExtractor):
|
||||
IE_DESC = "Bayerischer Rundfunk Mediathek"
|
||||
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
|
||||
_BASE_URL = "http://www.br.de"
|
||||
|
||||
_TEST = {
|
||||
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
|
||||
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
|
||||
"info_dict": {
|
||||
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
|
||||
"ext": "mp4",
|
||||
"title": "Feiern und Verzichten",
|
||||
"description": "Anselm Grün: Feiern und Verzichten",
|
||||
"uploader": "BR/Birgit Baier",
|
||||
"upload_date": "20140301"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
page = self._download_webpage(url, display_id)
|
||||
xml_url = self._search_regex(
|
||||
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
|
||||
xml = self._download_xml(self._BASE_URL + xml_url, None)
|
||||
|
||||
videos = [{
|
||||
"id": xml_video.get("externalId"),
|
||||
"title": xml_video.find("title").text,
|
||||
"formats": self._extract_formats(xml_video.find("assets")),
|
||||
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
|
||||
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
|
||||
"uploader": xml_video.find("author").text,
|
||||
"upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
|
||||
"webpage_url": xml_video.find("permalink").text,
|
||||
} for xml_video in xml.findall("video")]
|
||||
|
||||
if len(videos) > 1:
|
||||
self._downloader.report_warning(
|
||||
'found multiple videos; please '
|
||||
'report this with the video URL to http://yt-dl.org/bug')
|
||||
if not videos:
|
||||
raise ExtractorError('No video entries found')
|
||||
return videos[0]
|
||||
|
||||
def _extract_formats(self, assets):
|
||||
formats = [{
|
||||
"url": asset.find("downloadUrl").text,
|
||||
"ext": asset.find("mediaType").text,
|
||||
"format_id": asset.get("type"),
|
||||
"width": int(asset.find("frameWidth").text),
|
||||
"height": int(asset.find("frameHeight").text),
|
||||
"tbr": int(asset.find("bitrateVideo").text),
|
||||
"abr": int(asset.find("bitrateAudio").text),
|
||||
"vcodec": asset.find("codecVideo").text,
|
||||
"container": asset.find("mediaType").text,
|
||||
"filesize": int(asset.find("size").text),
|
||||
} for asset in assets.findall("asset")
|
||||
if asset.find("downloadUrl") is not None]
|
||||
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _extract_thumbnails(self, variants):
|
||||
thumbnails = [{
|
||||
"url": self._BASE_URL + variant.find("url").text,
|
||||
"width": int(variant.find("width").text),
|
||||
"height": int(variant.find("height").text),
|
||||
} for variant in variants.findall("variant")]
|
||||
thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
|
||||
return thumbnails
|
@@ -1,18 +1,20 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?break\.com/video/([^/]+)'
|
||||
_VALID_URL = r'http://(?:www\.)?break\.com/video/([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
||||
u'file': u'2468056.mp4',
|
||||
u'md5': u'a3513fb1547fba4fb6cfac1bffc6c46b',
|
||||
u'info_dict': {
|
||||
u"title": u"When Girls Act Like D-Bags"
|
||||
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
||||
'md5': 'a3513fb1547fba4fb6cfac1bffc6c46b',
|
||||
'info_dict': {
|
||||
'id': '2468056',
|
||||
'ext': 'mp4',
|
||||
'title': 'When Girls Act Like D-Bags',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,18 +23,17 @@ class BreakIE(InfoExtractor):
|
||||
video_id = mobj.group(1).split("-")[-1]
|
||||
embed_url = 'http://www.break.com/embed/%s' % video_id
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
|
||||
u'info json', flags=re.DOTALL)
|
||||
info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>',
|
||||
webpage, 'info json', flags=re.DOTALL)
|
||||
info = json.loads(info_json)
|
||||
video_url = info['videoUri']
|
||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
||||
if m_youtube is not None:
|
||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||
final_url = video_url + '?' + info['AuthToken']
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': determine_ext(final_url),
|
||||
'title': info['contentName'],
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'title': info['contentName'],
|
||||
'thumbnail': info['thumbUri'],
|
||||
}]
|
||||
}
|
||||
|
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
|
||||
ExtractorError,
|
||||
unsmuggle_url,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -139,7 +140,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
|
||||
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
||||
if url_m:
|
||||
return [url_m.group(1)]
|
||||
return [unescapeHTML(url_m.group(1))]
|
||||
|
||||
matches = re.findall(
|
||||
r'''(?sx)<object
|
||||
|
48
youtube_dl/extractor/canal13cl.py
Normal file
48
youtube_dl/extractor/canal13cl.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Canal13clIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||
'md5': '4cb1fa38adcad8fea88487a078831755',
|
||||
'info_dict': {
|
||||
'id': '1403022125',
|
||||
'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||
'ext': 'mp4',
|
||||
'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
|
||||
'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True)
|
||||
description = self._html_search_meta(
|
||||
'twitter:description', webpage, 'description')
|
||||
url = self._html_search_regex(
|
||||
r'articuloVideo = \"(.*?)\"', webpage, 'url')
|
||||
real_id = self._search_regex(
|
||||
r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
|
||||
|
||||
return {
|
||||
'id': real_id,
|
||||
'display_id': display_id,
|
||||
'url': url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -1,4 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -9,11 +11,12 @@ class Canalc2IE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
||||
u'file': u'12163.mp4',
|
||||
u'md5': u'060158428b650f896c542dfbb3d6487f',
|
||||
u'info_dict': {
|
||||
u'title': u'Terrasses du Numérique'
|
||||
'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
||||
'md5': '060158428b650f896c542dfbb3d6487f',
|
||||
'info_dict': {
|
||||
'id': '12163',
|
||||
'ext': 'mp4',
|
||||
'title': 'Terrasses du Numérique'
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,10 +31,11 @@ class Canalc2IE(InfoExtractor):
|
||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'class="evenement8">(.*?)</a>', webpage, u'title')
|
||||
|
||||
return {'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
}
|
||||
r'class="evenement8">(.*?)</a>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
}
|
||||
|
126
youtube_dl/extractor/ceskatelevize.py
Normal file
126
youtube_dl/extractor/ceskatelevize.py
Normal file
@@ -0,0 +1,126 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
|
||||
'info_dict': {
|
||||
'id': '213512120230004',
|
||||
'ext': 'flv',
|
||||
'title': 'První republika: Španělská chřipka',
|
||||
'duration': 3107.4,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
'skip': 'Works only from Czech Republic.',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
|
||||
'info_dict': {
|
||||
'id': '20138143440',
|
||||
'ext': 'flv',
|
||||
'title': 'Tsatsiki, maminka a policajt',
|
||||
'duration': 6754.1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
'skip': 'Works only from Czech Republic.',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
||||
'info_dict': {
|
||||
'id': '14716',
|
||||
'ext': 'flv',
|
||||
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
||||
'duration': 90,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
|
||||
episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
|
||||
|
||||
data = {
|
||||
'playlist[0][type]': typ,
|
||||
'playlist[0][id]': episode_id,
|
||||
'requestUrl': compat_urllib_parse_urlparse(url).path,
|
||||
'requestSource': 'iVysilani',
|
||||
}
|
||||
|
||||
req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
|
||||
data=compat_urllib_parse.urlencode(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
req.add_header('x-addr', '127.0.0.1')
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlistpage = self._download_json(req, video_id)
|
||||
|
||||
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist = self._download_xml(req, video_id)
|
||||
|
||||
formats = []
|
||||
for i in playlist.find('smilRoot/body'):
|
||||
if 'AD' not in i.attrib['id']:
|
||||
base_url = i.attrib['base']
|
||||
parsedurl = compat_urllib_parse_urlparse(base_url)
|
||||
duration = i.attrib['duration']
|
||||
|
||||
for video in i.findall('video'):
|
||||
if video.attrib['label'] != 'AD':
|
||||
format_id = video.attrib['label']
|
||||
play_path = video.attrib['src']
|
||||
vbr = int(video.attrib['system-bitrate'])
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'vbr': vbr,
|
||||
'play_path': play_path,
|
||||
'app': parsedurl.path[1:] + '?' + parsedurl.query,
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
|
||||
'duration': float(duration),
|
||||
'formats': formats,
|
||||
}
|
@@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
|
||||
'''
|
||||
IE_DESC = 'Channel 9'
|
||||
IE_NAME = 'channel9'
|
||||
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||
'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
|
||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||
'info_dict': {
|
||||
'id': 'Events/TechEd/Australia/2013/KOS002',
|
||||
'ext': 'mp4',
|
||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||
'duration': 4576,
|
||||
@@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
|
||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||
'info_dict': {
|
||||
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'ext': 'mp4',
|
||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
'duration': 1540,
|
||||
|
97
youtube_dl/extractor/chilloutzone.py
Normal file
97
youtube_dl/extractor/chilloutzone.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class ChilloutzoneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
|
||||
'md5': 'a76f3457e813ea0037e5244f509e66d1',
|
||||
'info_dict': {
|
||||
'id': 'enemene-meck-alle-katzen-weg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Enemene Meck - Alle Katzen weg',
|
||||
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
|
||||
},
|
||||
}, {
|
||||
'note': 'Video hosted at YouTube',
|
||||
'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
|
||||
'info_dict': {
|
||||
'id': '1YVQaAgHyRU',
|
||||
'ext': 'mp4',
|
||||
'title': '16 Photos Taken 1 Second Before Disaster',
|
||||
'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
|
||||
'uploader': 'BuzzFeedVideo',
|
||||
'uploader_id': 'BuzzFeedVideo',
|
||||
'upload_date': '20131105',
|
||||
},
|
||||
}, {
|
||||
'note': 'Video hosted at Vimeo',
|
||||
'url': 'http://www.chilloutzone.net/video/icon-blending.html',
|
||||
'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
|
||||
'info_dict': {
|
||||
'id': '85523671',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Sunday Times - Icons',
|
||||
'description': 'md5:3e1c0dc6047498d6728dcdaad0891762',
|
||||
'uploader': 'Us',
|
||||
'uploader_id': 'usfilms',
|
||||
'upload_date': '20140131'
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
base64_video_info = self._html_search_regex(
|
||||
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
||||
decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
|
||||
video_info_dict = json.loads(decoded_video_info)
|
||||
|
||||
# get video information from dict
|
||||
video_url = video_info_dict['mediaUrl']
|
||||
description = clean_html(video_info_dict.get('description'))
|
||||
title = video_info_dict['title']
|
||||
native_platform = video_info_dict['nativePlatform']
|
||||
native_video_id = video_info_dict['nativeVideoId']
|
||||
source_priority = video_info_dict['sourcePriority']
|
||||
|
||||
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
||||
if native_platform is None:
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||
webpage, 'fallback video URL', default=None)
|
||||
if youtube_url is not None:
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
||||
# the own CDN
|
||||
if source_priority == 'native':
|
||||
if native_platform == 'youtube':
|
||||
return self.url_result(native_video_id, ie='Youtube')
|
||||
if native_platform == 'vimeo':
|
||||
return self.url_result(
|
||||
'http://vimeo.com/' + native_video_id, ie='Vimeo')
|
||||
|
||||
if not video_url:
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
@@ -1,4 +1,5 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -8,73 +9,63 @@ from ..utils import (
|
||||
|
||||
|
||||
class CinemassacreIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?'
|
||||
_TESTS = [{
|
||||
u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||
u'file': u'19911.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20121110',
|
||||
u'title': u'“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
|
||||
_VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||
'file': '19911.mp4',
|
||||
'md5': 'fde81fbafaee331785f58cd6c0d46190',
|
||||
'info_dict': {
|
||||
'upload_date': '20121110',
|
||||
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
|
||||
},
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
u'file': u'521be8ef82b16.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20131002',
|
||||
u'title': u'The Mummy’s Hand (1940)',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
}]
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
'file': '521be8ef82b16.mp4',
|
||||
'md5': 'd72f10cd39eac4215048f62ab477a511',
|
||||
'info_dict': {
|
||||
'upload_date': '20131002',
|
||||
'title': 'The Mummy’s Hand (1940)',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
webpage_url = u'http://' + mobj.group('url')
|
||||
webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
|
||||
webpage = self._download_webpage(url, None) # Don't know video id yet
|
||||
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
||||
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
|
||||
if not mobj:
|
||||
raise ExtractorError(u'Can\'t extract embed url and video id')
|
||||
playerdata_url = mobj.group(u'embed_url')
|
||||
video_id = mobj.group(u'video_id')
|
||||
raise ExtractorError('Can\'t extract embed url and video id')
|
||||
playerdata_url = mobj.group('embed_url')
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
|
||||
webpage, u'title')
|
||||
webpage, 'title')
|
||||
video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||
webpage, u'description', flags=re.DOTALL, fatal=False)
|
||||
webpage, 'description', flags=re.DOTALL, fatal=False)
|
||||
if len(video_description) == 0:
|
||||
video_description = None
|
||||
|
||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||
url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url')
|
||||
|
||||
sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file')
|
||||
hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file')
|
||||
video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False)
|
||||
sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file')
|
||||
hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file')
|
||||
video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': url,
|
||||
'play_path': 'mp4:' + sd_file,
|
||||
'rtmp_live': True, # workaround
|
||||
'ext': 'flv',
|
||||
'url': sd_url,
|
||||
'ext': 'mp4',
|
||||
'format': 'sd',
|
||||
'format_id': 'sd',
|
||||
},
|
||||
{
|
||||
'url': url,
|
||||
'play_path': 'mp4:' + hd_file,
|
||||
'rtmp_live': True, # workaround
|
||||
'ext': 'flv',
|
||||
'url': hd_url,
|
||||
'ext': 'mp4',
|
||||
'format': 'hd',
|
||||
'format_id': 'hd',
|
||||
},
|
||||
|
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
@@ -98,3 +99,28 @@ class CNNIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
|
||||
_TEST = {
|
||||
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
|
||||
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Criminalizing journalism?',
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': cnn_url,
|
||||
'ie_key': CNNIE.ie_key(),
|
||||
}
|
||||
|
@@ -35,15 +35,15 @@ class CollegeHumorIE(InfoExtractor):
|
||||
},
|
||||
# embedded youtube video
|
||||
{
|
||||
'url': 'http://www.collegehumor.com/embed/6950457',
|
||||
'url': 'http://www.collegehumor.com/embed/6950306',
|
||||
'info_dict': {
|
||||
'id': 'W5gMp3ZjYg4',
|
||||
'id': 'Z-bao9fg6Yc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||
'uploader': 'Funnyplox TV',
|
||||
'uploader_id': 'funnyploxtv',
|
||||
'description': 'md5:11812366244110c3523968aa74f02521',
|
||||
'upload_date': '20140128',
|
||||
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
|
||||
'uploader': 'Mark Dice',
|
||||
'uploader_id': 'MarkDice',
|
||||
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
|
||||
'upload_date': '20140127',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@@ -88,6 +88,10 @@ class InfoExtractor(object):
|
||||
|
||||
The following fields are optional:
|
||||
|
||||
display_id An alternative identifier for the video, not necessarily
|
||||
unique, but available before title. Typically, id is
|
||||
something like "4234987", title "Dancing naked mole rats",
|
||||
and display_id "dancing-naked-mole-rats"
|
||||
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||
"url") for the varying thumbnails
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
@@ -114,9 +118,6 @@ class InfoExtractor(object):
|
||||
_real_extract() methods and define a _VALID_URL regexp.
|
||||
Probably, they should also be added to the list of extractors.
|
||||
|
||||
_real_extract() must return a *list* of information dictionaries as
|
||||
described above.
|
||||
|
||||
Finally, the _WORKING attribute should be set to False for broken IEs
|
||||
in order to warn the users and skip the tests.
|
||||
"""
|
||||
@@ -271,8 +272,11 @@ class InfoExtractor(object):
|
||||
|
||||
def _download_json(self, url_or_request, video_id,
|
||||
note=u'Downloading JSON metadata',
|
||||
errnote=u'Unable to download JSON metadata'):
|
||||
errnote=u'Unable to download JSON metadata',
|
||||
transform_source=None):
|
||||
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||
if transform_source:
|
||||
json_string = transform_source(json_string)
|
||||
try:
|
||||
return json.loads(json_string)
|
||||
except ValueError as ve:
|
||||
@@ -429,14 +433,14 @@ class InfoExtractor(object):
|
||||
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
||||
return self._html_search_regex(regexes, html, name, **kargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None):
|
||||
def _html_search_meta(self, name, html, display_name=None, fatal=False):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_regex(
|
||||
r'''(?ix)<meta
|
||||
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
|
||||
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
||||
html, display_name, fatal=False)
|
||||
html, display_name, fatal=fatal)
|
||||
|
||||
def _dc_search_uploader(self, html):
|
||||
return self._html_search_meta('dc.creator', html, 'uploader')
|
||||
|
@@ -1,7 +1,11 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re, base64, zlib
|
||||
import re
|
||||
import json
|
||||
import base64
|
||||
import zlib
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
@@ -19,13 +23,15 @@ from ..aes import (
|
||||
inc,
|
||||
)
|
||||
|
||||
|
||||
class CrunchyrollIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
'file': '645513.flv',
|
||||
#'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
|
||||
'info_dict': {
|
||||
'id': '645513',
|
||||
'ext': 'flv',
|
||||
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
||||
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
||||
@@ -36,7 +42,7 @@ class CrunchyrollIE(InfoExtractor):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
}
|
||||
|
||||
_FORMAT_IDS = {
|
||||
'360': ('60', '106'),
|
||||
@@ -68,7 +74,7 @@ class CrunchyrollIE(InfoExtractor):
|
||||
shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
|
||||
# Extend 160 Bit hash to 256 Bit
|
||||
return shaHash + [0] * 12
|
||||
|
||||
|
||||
key = obfuscate_key(id)
|
||||
class Counter:
|
||||
__value = iv
|
||||
@@ -80,9 +86,8 @@ class CrunchyrollIE(InfoExtractor):
|
||||
return zlib.decompress(decrypted_data)
|
||||
|
||||
def _convert_subtitles_to_srt(self, subtitles):
|
||||
i=1
|
||||
output = ''
|
||||
for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
|
||||
for i, (start, end, text) in enumerate(re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles), 1):
|
||||
start = start.replace('.', ',')
|
||||
end = end.replace('.', ',')
|
||||
text = clean_html(text)
|
||||
@@ -90,7 +95,6 @@ class CrunchyrollIE(InfoExtractor):
|
||||
if not text:
|
||||
continue
|
||||
output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
|
||||
i+=1
|
||||
return output
|
||||
|
||||
def _real_extract(self,url):
|
||||
@@ -108,6 +112,12 @@ class CrunchyrollIE(InfoExtractor):
|
||||
if note_m:
|
||||
raise ExtractorError(note_m)
|
||||
|
||||
mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
|
||||
if mobj:
|
||||
msg = json.loads(mobj.group('msg'))
|
||||
if msg.get('type') == 'error':
|
||||
raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
||||
@@ -123,7 +133,7 @@ class CrunchyrollIE(InfoExtractor):
|
||||
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
|
||||
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
|
||||
|
||||
|
||||
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')
|
||||
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
|
||||
|
||||
@@ -161,7 +171,7 @@ class CrunchyrollIE(InfoExtractor):
|
||||
data = base64.b64decode(data)
|
||||
|
||||
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||
lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||
if not lang_code:
|
||||
continue
|
||||
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
||||
|
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
get_element_by_id,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
int_or_none,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -124,7 +125,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if video_url is not None:
|
||||
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
|
||||
if m_size is not None:
|
||||
width, height = m_size.group(1), m_size.group(2)
|
||||
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
|
||||
else:
|
||||
width, height = None, None
|
||||
formats.append({
|
||||
|
@@ -1,60 +0,0 @@
|
||||
import re
|
||||
import os
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class DepositFilesIE(InfoExtractor):
|
||||
"""Information extractor for depositfiles.com"""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
file_id = url.split('/')[-1]
|
||||
# Rebuild url in english locale
|
||||
url = 'http://depositfiles.com/en/files/' + file_id
|
||||
|
||||
# Retrieve file webpage with 'Free download' button pressed
|
||||
free_download_indication = {'gateway_result' : '1'}
|
||||
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
|
||||
try:
|
||||
self.report_download_webpage(file_id)
|
||||
webpage = compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
|
||||
|
||||
# Search for the real file URL
|
||||
mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
|
||||
if (mobj is None) or (mobj.group(1) is None):
|
||||
# Try to figure out reason of the error.
|
||||
mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
|
||||
if (mobj is not None) and (mobj.group(1) is not None):
|
||||
restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
|
||||
raise ExtractorError(u'%s' % restriction_message)
|
||||
else:
|
||||
raise ExtractorError(u'Unable to extract download URL from: %s' % url)
|
||||
|
||||
file_url = mobj.group(1)
|
||||
file_extension = os.path.splitext(file_url)[1][1:]
|
||||
|
||||
# Search for file title
|
||||
file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
|
||||
|
||||
return [{
|
||||
'id': file_id.decode('utf-8'),
|
||||
'url': file_url.decode('utf-8'),
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': file_title,
|
||||
'ext': file_extension.decode('utf-8'),
|
||||
}]
|
@@ -1,41 +1,42 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class DotsubIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?dotsub\.com/view/([^/]+)'
|
||||
_VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||
u'file': u'aed3b8b2-1889-4df5-ae63-ad85f5572f27.flv',
|
||||
u'md5': u'0914d4d69605090f623b7ac329fea66e',
|
||||
u'info_dict': {
|
||||
u"title": u"Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary",
|
||||
u"uploader": u"4v4l0n42",
|
||||
u'description': u'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
|
||||
u'thumbnail': u'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||
u'upload_date': u'20101213',
|
||||
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||
'md5': '0914d4d69605090f623b7ac329fea66e',
|
||||
'info_dict': {
|
||||
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||
'ext': 'flv',
|
||||
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
|
||||
'uploader': '4v4l0n42',
|
||||
'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
|
||||
'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||
'upload_date': '20101213',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
info_url = "https://dotsub.com/api/media/%s/metadata" %(video_id)
|
||||
webpage = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(webpage)
|
||||
video_id = mobj.group('id')
|
||||
info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
|
||||
info = self._download_json(info_url, video_id)
|
||||
date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': info['mediaURI'],
|
||||
'ext': 'flv',
|
||||
'title': info['title'],
|
||||
'thumbnail': info['screenshotURI'],
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': info['mediaURI'],
|
||||
'ext': 'flv',
|
||||
'title': info['title'],
|
||||
'thumbnail': info['screenshotURI'],
|
||||
'description': info['description'],
|
||||
'uploader': info['user'],
|
||||
'view_count': info['numberOfViews'],
|
||||
'upload_date': u'%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
|
||||
}]
|
||||
'uploader': info['user'],
|
||||
'view_count': info['numberOfViews'],
|
||||
'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
|
||||
}
|
||||
|
@@ -10,11 +10,12 @@ from .common import InfoExtractor
|
||||
class DropboxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
|
||||
_TEST = {
|
||||
'url': 'https://www.dropbox.com/s/mcnzehi9wo55th4/20131219_085616.mp4',
|
||||
'file': 'mcnzehi9wo55th4.mp4',
|
||||
'md5': 'f6d65b1b326e82fd7ab7720bea3dacae',
|
||||
'url': 'https://www.dropbox.com/s/0qr9sai2veej4f8/THE_DOCTOR_GAMES.mp4',
|
||||
'md5': '8ae17c51172fb7f93bdd6a214cc8c896',
|
||||
'info_dict': {
|
||||
'title': '20131219_085616'
|
||||
'id': '0qr9sai2veej4f8',
|
||||
'ext': 'mp4',
|
||||
'title': 'THE_DOCTOR_GAMES'
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1,9 +1,9 @@
|
||||
import json
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
@@ -11,70 +11,68 @@ from ..utils import (
|
||||
|
||||
|
||||
class EscapistIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
||||
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
|
||||
_TEST = {
|
||||
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
|
||||
u'md5': u'ab3a706c681efca53f0a35f1415cf0d1',
|
||||
u'info_dict': {
|
||||
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||
u"uploader": u"the-escapist-presents",
|
||||
u"title": u"Breaking Down Baldur's Gate"
|
||||
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
||||
'info_dict': {
|
||||
'id': '6618',
|
||||
'ext': 'mp4',
|
||||
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||
'uploader': 'the-escapist-presents',
|
||||
'title': "Breaking Down Baldur's Gate",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
showName = mobj.group('showname')
|
||||
videoId = mobj.group('episode')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
self.report_extraction(videoId)
|
||||
webpage = self._download_webpage(url, videoId)
|
||||
self.report_extraction(video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
videoDesc = self._html_search_regex(
|
||||
r'<meta name="description" content="([^"]*)"',
|
||||
webpage, u'description', fatal=False)
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
playerUrl = self._og_search_video_url(webpage, name=u'player URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<meta name="title" content="([^"]*)"',
|
||||
webpage, u'title').split(' : ')[-1]
|
||||
webpage, 'title').split(' : ')[-1]
|
||||
|
||||
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config URL')
|
||||
configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
|
||||
configUrl = compat_urllib_parse.unquote(configUrl)
|
||||
|
||||
formats = []
|
||||
|
||||
def _add_format(name, cfgurl):
|
||||
configJSON = self._download_webpage(
|
||||
cfgurl, videoId,
|
||||
u'Downloading ' + name + ' configuration',
|
||||
u'Unable to download ' + name + ' configuration')
|
||||
def _add_format(name, cfgurl, quality):
|
||||
config = self._download_json(
|
||||
cfgurl, video_id,
|
||||
'Downloading ' + name + ' configuration',
|
||||
'Unable to download ' + name + ' configuration',
|
||||
transform_source=lambda s: s.replace("'", '"'))
|
||||
|
||||
# Technically, it's JavaScript, not JSON
|
||||
configJSON = configJSON.replace("'", '"')
|
||||
|
||||
try:
|
||||
config = json.loads(configJSON)
|
||||
except (ValueError,) as err:
|
||||
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
|
||||
playlist = config['playlist']
|
||||
formats.append({
|
||||
'url': playlist[1]['url'],
|
||||
'format_id': name,
|
||||
'quality': quality,
|
||||
})
|
||||
|
||||
_add_format(u'normal', configUrl)
|
||||
_add_format('normal', configUrl, quality=0)
|
||||
hq_url = (configUrl +
|
||||
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
|
||||
try:
|
||||
_add_format(u'hq', hq_url)
|
||||
_add_format('hq', hq_url, quality=1)
|
||||
except ExtractorError:
|
||||
pass # That's fine, we'll just use normal quality
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': videoId,
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'uploader': showName,
|
||||
'title': title,
|
||||
|
@@ -1,56 +1,58 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ExfmIE(InfoExtractor):
|
||||
IE_NAME = u'exfm'
|
||||
IE_DESC = u'ex.fm'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
||||
IE_NAME = 'exfm'
|
||||
IE_DESC = 'ex.fm'
|
||||
_VALID_URL = r'http://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
|
||||
_SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://ex.fm/song/eh359',
|
||||
u'file': u'44216187.mp3',
|
||||
u'md5': u'e45513df5631e6d760970b14cc0c11e7',
|
||||
u'info_dict': {
|
||||
u"title": u"Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive",
|
||||
u"uploader": u"deadjournalist",
|
||||
u'upload_date': u'20120424',
|
||||
u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
|
||||
'url': 'http://ex.fm/song/eh359',
|
||||
'md5': 'e45513df5631e6d760970b14cc0c11e7',
|
||||
'info_dict': {
|
||||
'id': '44216187',
|
||||
'ext': 'mp3',
|
||||
'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive',
|
||||
'uploader': 'deadjournalist',
|
||||
'upload_date': '20120424',
|
||||
'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
|
||||
},
|
||||
u'note': u'Soundcloud song',
|
||||
u'skip': u'The site is down too often',
|
||||
'note': 'Soundcloud song',
|
||||
'skip': 'The site is down too often',
|
||||
},
|
||||
{
|
||||
u'url': u'http://ex.fm/song/wddt8',
|
||||
u'file': u'wddt8.mp3',
|
||||
u'md5': u'966bd70741ac5b8570d8e45bfaed3643',
|
||||
u'info_dict': {
|
||||
u'title': u'Safe and Sound',
|
||||
u'uploader': u'Capital Cities',
|
||||
'url': 'http://ex.fm/song/wddt8',
|
||||
'md5': '966bd70741ac5b8570d8e45bfaed3643',
|
||||
'info_dict': {
|
||||
'id': 'wddt8',
|
||||
'ext': 'mp3',
|
||||
'title': 'Safe and Sound',
|
||||
'uploader': 'Capital Cities',
|
||||
},
|
||||
u'skip': u'The site is down too often',
|
||||
'skip': 'The site is down too often',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
song_id = mobj.group(1)
|
||||
info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
|
||||
webpage = self._download_webpage(info_url, song_id)
|
||||
info = json.loads(webpage)
|
||||
song_url = info['song']['url']
|
||||
song_id = mobj.group('id')
|
||||
info_url = "http://ex.fm/api/v3/song/%s" % song_id
|
||||
info = self._download_json(info_url, song_id)['song']
|
||||
song_url = info['url']
|
||||
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
||||
self.to_screen('Soundcloud song detected')
|
||||
return self.url_result(song_url.replace('/stream',''), 'Soundcloud')
|
||||
return [{
|
||||
'id': song_id,
|
||||
'url': song_url,
|
||||
'ext': 'mp3',
|
||||
'title': info['song']['title'],
|
||||
'thumbnail': info['song']['image']['large'],
|
||||
'uploader': info['song']['artist'],
|
||||
'view_count': info['song']['loved_count'],
|
||||
}]
|
||||
return self.url_result(song_url.replace('/stream', ''), 'Soundcloud')
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': song_url,
|
||||
'ext': 'mp3',
|
||||
'title': info['title'],
|
||||
'thumbnail': info['image']['large'],
|
||||
'uploader': info['artist'],
|
||||
'view_count': info['loved_count'],
|
||||
}
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
@@ -9,16 +11,15 @@ from ..utils import (
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
urlencode_postdata,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class FacebookIE(InfoExtractor):
|
||||
"""Information Extractor for Facebook"""
|
||||
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?(?:\w+\.)?facebook\.com/
|
||||
https?://(?:\w+\.)?facebook\.com/
|
||||
(?:[^#?]*\#!/)?
|
||||
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
|
||||
(?:v|video_id)=(?P<id>[0-9]+)
|
||||
@@ -26,21 +27,18 @@ class FacebookIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||
_NETRC_MACHINE = 'facebook'
|
||||
IE_NAME = u'facebook'
|
||||
IE_NAME = 'facebook'
|
||||
_TEST = {
|
||||
u'url': u'https://www.facebook.com/photo.php?v=120708114770723',
|
||||
u'file': u'120708114770723.mp4',
|
||||
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
|
||||
u'info_dict': {
|
||||
u"duration": 279,
|
||||
u"title": u"PEOPLE ARE AWESOME 2013"
|
||||
'url': 'https://www.facebook.com/photo.php?v=120708114770723',
|
||||
'md5': '48975a41ccc4b7a581abd68651c1a5a8',
|
||||
'info_dict': {
|
||||
'id': '120708114770723',
|
||||
'ext': 'mp4',
|
||||
'duration': 279,
|
||||
'title': 'PEOPLE ARE AWESOME 2013',
|
||||
}
|
||||
}
|
||||
|
||||
def report_login(self):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen(u'Logging in')
|
||||
|
||||
def _login(self):
|
||||
(useremail, password) = self._get_login_info()
|
||||
if useremail is None:
|
||||
@@ -48,11 +46,13 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
||||
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||
self.report_login()
|
||||
login_page = self._download_webpage(login_page_req, None, note=False,
|
||||
errnote=u'Unable to download login page')
|
||||
lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd')
|
||||
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd')
|
||||
login_page = self._download_webpage(login_page_req, None,
|
||||
note='Downloading login page',
|
||||
errnote='Unable to download login page')
|
||||
lsd = self._search_regex(
|
||||
r'<input type="hidden" name="lsd" value="([^"]*)"',
|
||||
login_page, 'lsd')
|
||||
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
|
||||
|
||||
login_form = {
|
||||
'email': useremail,
|
||||
@@ -65,27 +65,29 @@ class FacebookIE(InfoExtractor):
|
||||
'timezone': '-60',
|
||||
'trynum': '1',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
try:
|
||||
login_results = compat_urllib_request.urlopen(request).read()
|
||||
login_results = self._download_webpage(request, None,
|
||||
note='Logging in', errnote='unable to fetch login page')
|
||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||
return
|
||||
|
||||
check_form = {
|
||||
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'),
|
||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'),
|
||||
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
|
||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
|
||||
'name_action_selected': 'dont_save',
|
||||
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'),
|
||||
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
|
||||
}
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
check_response = compat_urllib_request.urlopen(check_req).read()
|
||||
check_response = self._download_webpage(check_req, None,
|
||||
note='Confirming login')
|
||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||
self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
||||
self._downloader.report_warning('unable to log in: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -93,8 +95,6 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
||||
@@ -107,10 +107,10 @@ class FacebookIE(InfoExtractor):
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
if m_msg is not None:
|
||||
raise ExtractorError(
|
||||
u'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError(u'Cannot parse data')
|
||||
raise ExtractorError('Cannot parse data')
|
||||
data = dict(json.loads(m.group(1)))
|
||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||
params = json.loads(params_raw)
|
||||
@@ -119,19 +119,15 @@ class FacebookIE(InfoExtractor):
|
||||
if not video_url:
|
||||
video_url = video_data['sd_src']
|
||||
if not video_url:
|
||||
raise ExtractorError(u'Cannot find video URL')
|
||||
video_duration = int(video_data['video_duration'])
|
||||
thumbnail = video_data['thumbnail_src']
|
||||
raise ExtractorError('Cannot find video URL')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title')
|
||||
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'duration': video_duration,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int(video_data['video_duration']),
|
||||
'thumbnail': video_data['thumbnail_src'],
|
||||
}
|
||||
return [info]
|
||||
|
60
youtube_dl/extractor/firsttv.py
Normal file
60
youtube_dl/extractor/firsttv.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class FirstTVIE(InfoExtractor):
|
||||
IE_NAME = 'firsttv'
|
||||
IE_DESC = 'Видеоархив - Первый канал'
|
||||
_VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.1tv.ru/videoarchive/73390',
|
||||
'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
|
||||
'info_dict': {
|
||||
'id': '73390',
|
||||
'ext': 'mp4',
|
||||
'title': 'Олимпийские канатные дороги',
|
||||
'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
|
||||
'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
|
||||
'duration': 149,
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
|
||||
|
||||
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'like count', fatal=False)
|
||||
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'dislike count', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': int_or_none(duration),
|
||||
'like_count': int_or_none(like_count),
|
||||
'dislike_count': int_or_none(dislike_count),
|
||||
}
|
95
youtube_dl/extractor/fourtube.py
Normal file
95
youtube_dl/extractor/fourtube.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
parse_duration,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class FourTubeIE(InfoExtractor):
|
||||
IE_NAME = '4tube'
|
||||
_VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
|
||||
'md5': '6516c8ac63b03de06bc8eac14362db4f',
|
||||
'info_dict': {
|
||||
'id': '209733',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
|
||||
'uploader': 'WCP Club',
|
||||
'uploader_id': 'wcp-club',
|
||||
'upload_date': '20131031',
|
||||
'duration': 583,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage_url = 'http://www.4tube.com/videos/' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
playlist_json = self._html_search_regex(r'var playerConfigPlaylist\s+=\s+([^;]+)', webpage, 'Playlist')
|
||||
media_id = self._search_regex(r'idMedia:\s*(\d+)', playlist_json, 'Media Id')
|
||||
sources = self._search_regex(r'sources:\s*\[([^\]]*)\]', playlist_json, 'Sources').split(',')
|
||||
title = self._search_regex(r'title:\s*"([^"]*)', playlist_json, 'Title')
|
||||
thumbnail_url = self._search_regex(r'image:\s*"([^"]*)', playlist_json, 'Thumbnail', fatal=False)
|
||||
|
||||
uploader_str = self._search_regex(r'<span>Uploaded by</span>(.*?)<span>', webpage, 'uploader', fatal=False)
|
||||
mobj = re.search(r'<a href="/sites/(?P<id>[^"]+)"><strong>(?P<name>[^<]+)</strong></a>', uploader_str)
|
||||
(uploader, uploader_id) = (mobj.group('name'), mobj.group('id')) if mobj else (clean_html(uploader_str), None)
|
||||
|
||||
upload_date = None
|
||||
view_count = None
|
||||
duration = None
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
if description:
|
||||
upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date',
|
||||
fatal=False)
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False)
|
||||
if view_count:
|
||||
view_count = str_to_int(view_count)
|
||||
duration = parse_duration(self._search_regex(r'Length: (\d+m\d+s)', description, 'duration', fatal=False))
|
||||
|
||||
token_url = "http://tkn.4tube.com/{0}/desktop/{1}".format(media_id, "+".join(sources))
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
b'Origin': b'http://www.4tube.com',
|
||||
}
|
||||
token_req = compat_urllib_request.Request(token_url, b'{}', headers)
|
||||
tokens = self._download_json(token_req, video_id)
|
||||
|
||||
formats = [{
|
||||
'url': tokens[format]['token'],
|
||||
'format_id': format + 'p',
|
||||
'resolution': format + 'p',
|
||||
'quality': int(format),
|
||||
} for format in sources]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail_url,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'age_limit': 18,
|
||||
'webpage_url': webpage_url,
|
||||
}
|
@@ -184,6 +184,7 @@ class GenerationQuoiIE(InfoExtractor):
|
||||
# It uses Dailymotion
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only available from France',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,18 +1,21 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class FreesoundIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.freesound.org/people/miklovan/sounds/194503/',
|
||||
u'file': u'194503.mp3',
|
||||
u'md5': u'12280ceb42c81f19a515c745eae07650',
|
||||
u'info_dict': {
|
||||
u"title": u"gulls in the city.wav",
|
||||
u"uploader" : u"miklovan",
|
||||
u'description': u'the sounds of seagulls in the city',
|
||||
'url': 'http://www.freesound.org/people/miklovan/sounds/194503/',
|
||||
'md5': '12280ceb42c81f19a515c745eae07650',
|
||||
'info_dict': {
|
||||
'id': '194503',
|
||||
'ext': 'mp3',
|
||||
'title': 'gulls in the city.wav',
|
||||
'uploader': 'miklovan',
|
||||
'description': 'the sounds of seagulls in the city',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,17 +23,17 @@ class FreesoundIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
music_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, music_id)
|
||||
title = self._html_search_regex(r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
|
||||
webpage, 'music title', flags=re.DOTALL)
|
||||
music_url = self._og_search_property('audio', webpage, 'music url')
|
||||
description = self._html_search_regex(r'<div id="sound_description">(.*?)</div>',
|
||||
webpage, 'description', fatal=False, flags=re.DOTALL)
|
||||
title = self._html_search_regex(
|
||||
r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
|
||||
webpage, 'music title', flags=re.DOTALL)
|
||||
description = self._html_search_regex(
|
||||
r'<div id="sound_description">(.*?)</div>', webpage, 'description',
|
||||
fatal=False, flags=re.DOTALL)
|
||||
|
||||
return [{
|
||||
'id': music_id,
|
||||
'title': title,
|
||||
'url': music_url,
|
||||
return {
|
||||
'id': music_id,
|
||||
'title': title,
|
||||
'url': self._og_search_property('audio', webpage, 'music url'),
|
||||
'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'),
|
||||
'ext': determine_ext(music_url),
|
||||
'description': description,
|
||||
}]
|
||||
}
|
||||
|
@@ -1,12 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FunnyOrDieIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
|
||||
_VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
|
||||
_TEST = {
|
||||
'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
|
||||
'file': '0732f586d7.mp4',
|
||||
@@ -30,10 +31,20 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
|
||||
webpage, 'video URL', flags=re.DOTALL)
|
||||
|
||||
if mobj.group('type') == 'embed':
|
||||
post_json = self._search_regex(
|
||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
||||
post = json.loads(post_json)['attachment']
|
||||
title = post['name']
|
||||
description = post.get('description')
|
||||
else:
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -6,13 +8,14 @@ from .common import InfoExtractor
|
||||
class GamekingsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
|
||||
u'file': u'20130811.mp4',
|
||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
# MD5 is flaky, seems to change regularly
|
||||
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
u'info_dict': {
|
||||
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
|
||||
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
|
||||
'id': '20130811',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||
'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -7,10 +7,11 @@ class GametrailersIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
||||
_TEST = {
|
||||
'url': 'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
|
||||
'file': '70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
|
||||
'md5': '4c8e67681a0ea7ec241e8c09b3ea8cf7',
|
||||
'info_dict': {
|
||||
'title': 'Mirror\'s Edge 2|E3 2013: Debut Trailer',
|
||||
'id': '70e9a5d7-cf25-4a10-9104-6f3e7342ae0d',
|
||||
'ext': 'mp4',
|
||||
'title': 'E3 2013: Debut Trailer',
|
||||
'description': 'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
|
||||
},
|
||||
}
|
||||
|
134
youtube_dl/extractor/gdcvault.py
Normal file
134
youtube_dl/extractor/gdcvault.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
|
||||
'md5': '7ce8388f544c88b7ac11c7ab1b593704',
|
||||
'info_dict': {
|
||||
'id': '1019721',
|
||||
'ext': 'mp4',
|
||||
'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
|
||||
'info_dict': {
|
||||
'id': '1015683',
|
||||
'ext': 'flv',
|
||||
'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _parse_mp4(self, xml_description):
|
||||
video_formats = []
|
||||
mp4_video = xml_description.find('./metadata/mp4video')
|
||||
if mp4_video is None:
|
||||
return None
|
||||
|
||||
mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text)
|
||||
video_root = mobj.group('root')
|
||||
formats = xml_description.findall('./metadata/MBRVideos/MBRVideo')
|
||||
for format in formats:
|
||||
mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text)
|
||||
url = video_root + mobj.group('path')
|
||||
vbr = format.find('bitrate').text
|
||||
video_formats.append({
|
||||
'url': url,
|
||||
'vbr': int(vbr),
|
||||
})
|
||||
return video_formats
|
||||
|
||||
def _parse_flv(self, xml_description):
|
||||
video_formats = []
|
||||
akami_url = xml_description.find('./metadata/akamaiHost').text
|
||||
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||
video_formats.append({
|
||||
'url': 'rtmp://' + akami_url + '/' + slide_video_path,
|
||||
'format_note': 'slide deck video',
|
||||
'quality': -2,
|
||||
'preference': -2,
|
||||
'format_id': 'slides',
|
||||
})
|
||||
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||
video_formats.append({
|
||||
'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
|
||||
'format_note': 'speaker video',
|
||||
'quality': -1,
|
||||
'preference': -1,
|
||||
'format_id': 'speaker',
|
||||
})
|
||||
return video_formats
|
||||
|
||||
def _login(self, webpage_url, video_id):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None or password is None:
|
||||
self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
|
||||
return None
|
||||
|
||||
mobj = re.match(r'(?P<root_url>https?://.*?/).*', webpage_url)
|
||||
login_url = mobj.group('root_url') + 'api/login.php'
|
||||
logout_url = mobj.group('root_url') + 'logout'
|
||||
|
||||
login_form = {
|
||||
'email': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(request, video_id, 'Logging in')
|
||||
start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page')
|
||||
self._download_webpage(logout_url, video_id, 'Logging out')
|
||||
|
||||
return start_page
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage_url = 'http://www.gdcvault.com/play/' + video_id
|
||||
start_page = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
|
||||
|
||||
if xml_root is None:
|
||||
# Probably need to authenticate
|
||||
start_page = self._login(webpage_url, video_id)
|
||||
if start_page is None:
|
||||
self.report_warning('Could not login.')
|
||||
else:
|
||||
# Grab the url from the authenticated page
|
||||
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
|
||||
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
|
||||
if xml_name is None:
|
||||
# Fallback to the older format
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
|
||||
|
||||
xml_decription_url = xml_root + 'xml/' + xml_name
|
||||
xml_description = self._download_xml(xml_decription_url, video_id)
|
||||
|
||||
video_title = xml_description.find('./metadata/title').text
|
||||
video_formats = self._parse_mp4(xml_description)
|
||||
if video_formats is None:
|
||||
video_formats = self._parse_flv(xml_description)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': video_formats,
|
||||
}
|
@@ -12,9 +12,11 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
parse_xml,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@@ -81,10 +83,10 @@ class GenericIE(InfoExtractor):
|
||||
# Direct link to a video
|
||||
{
|
||||
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
||||
'file': 'trailer.mp4',
|
||||
'md5': '67d406c2bcb6af27fa886f31aa934bbe',
|
||||
'info_dict': {
|
||||
'id': 'trailer',
|
||||
'ext': 'mp4',
|
||||
'title': 'trailer',
|
||||
'upload_date': '20100513',
|
||||
}
|
||||
@@ -92,7 +94,6 @@ class GenericIE(InfoExtractor):
|
||||
# ooyala video
|
||||
{
|
||||
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||
'file': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ.mp4',
|
||||
'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
|
||||
'info_dict': {
|
||||
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
|
||||
@@ -100,6 +101,50 @@ class GenericIE(InfoExtractor):
|
||||
'title': '2cc213299525360.mov', # that's what we get
|
||||
},
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
'info_dict': {
|
||||
'id': 'cmQHVoWB5FY',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130224',
|
||||
'uploader_id': 'TheVerge',
|
||||
'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
|
||||
'uploader': 'The Verge',
|
||||
'title': 'First Firefox OS phones side-by-side',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': False,
|
||||
}
|
||||
},
|
||||
# embed.ly video
|
||||
{
|
||||
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
|
||||
'info_dict': {
|
||||
'id': '9ODmcdjQcHQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
|
||||
'upload_date': '20140225',
|
||||
'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
|
||||
'uploader': 'Tested',
|
||||
'uploader_id': 'testedcom',
|
||||
},
|
||||
# No need to test YoutubeIE here
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# funnyordie embed
|
||||
{
|
||||
'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
|
||||
'md5': '7cf780be104d40fea7bae52eed4a470e',
|
||||
'info_dict': {
|
||||
'id': '18e820ec3f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
|
||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@@ -159,6 +204,25 @@ class GenericIE(InfoExtractor):
|
||||
raise ExtractorError('Invalid URL protocol')
|
||||
return response
|
||||
|
||||
def _extract_rss(self, url, video_id, doc):
|
||||
playlist_title = doc.find('./channel/title').text
|
||||
playlist_desc_el = doc.find('./channel/description')
|
||||
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
||||
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': e.find('link').text,
|
||||
'title': e.find('title').text,
|
||||
} for e in doc.findall('./channel/item')]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': url,
|
||||
'title': playlist_title,
|
||||
'description': playlist_desc,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
@@ -175,7 +239,7 @@ class GenericIE(InfoExtractor):
|
||||
else:
|
||||
assert ':' in default_search
|
||||
return self.url_result(default_search + url)
|
||||
video_id = os.path.splitext(url.split('/')[-1])[0]
|
||||
video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
|
||||
|
||||
self.to_screen('%s: Requesting header' % video_id)
|
||||
|
||||
@@ -219,6 +283,14 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed?
|
||||
try:
|
||||
doc = parse_xml(webpage)
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
# it's tempting to parse this further, but you would
|
||||
# have to take into account all the variations like
|
||||
# Video Title - Site Name
|
||||
@@ -334,11 +406,17 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group(1), 'Mpora')
|
||||
|
||||
# Look for embedded Novamov player
|
||||
# Look for embedded NovaMov player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Novamov')
|
||||
return self.url_result(mobj.group('url'), 'NovaMov')
|
||||
|
||||
# Look for embedded NowVideo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'NowVideo')
|
||||
|
||||
# Look for embedded Facebook player
|
||||
mobj = re.search(
|
||||
@@ -346,12 +424,33 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Facebook')
|
||||
|
||||
# Look for embedded VK player
|
||||
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'VK')
|
||||
|
||||
# Look for embedded Huffington Post player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'HuffPost')
|
||||
|
||||
# Look for embed.ly
|
||||
mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
|
||||
|
||||
# Look for funnyordie embed
|
||||
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
|
||||
for eurl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
@@ -376,6 +475,18 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is None:
|
||||
# HTML5 video
|
||||
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||
if mobj is None:
|
||||
mobj = re.search(
|
||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
|
||||
webpage)
|
||||
if mobj:
|
||||
new_url = mobj.group(1)
|
||||
self.report_following_redirect(new_url)
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': new_url,
|
||||
}
|
||||
if mobj is None:
|
||||
raise ExtractorError('Unsupported URL: %s' % url)
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
@@ -10,32 +11,28 @@ from ..utils import (
|
||||
|
||||
|
||||
class GooglePlusIE(InfoExtractor):
|
||||
IE_DESC = u'Google Plus'
|
||||
_VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
|
||||
IE_NAME = u'plus.google'
|
||||
IE_DESC = 'Google Plus'
|
||||
_VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
|
||||
IE_NAME = 'plus.google'
|
||||
_TEST = {
|
||||
u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
|
||||
u"file": u"ZButuJc6CtH.flv",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120613",
|
||||
u"uploader": u"井上ヨシマサ",
|
||||
u"title": u"嘆きの天使 降臨"
|
||||
'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH',
|
||||
'info_dict': {
|
||||
'id': 'ZButuJc6CtH',
|
||||
'ext': 'flv',
|
||||
'upload_date': '20120613',
|
||||
'uploader': '井上ヨシマサ',
|
||||
'title': '嘆きの天使 降臨',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
post_url = mobj.group(0)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
video_extension = 'flv'
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# Step 1, Retrieve post webpage to extract further information
|
||||
webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
@@ -43,7 +40,7 @@ class GooglePlusIE(InfoExtractor):
|
||||
upload_date = self._html_search_regex(
|
||||
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
|
||||
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
|
||||
webpage, u'upload date', fatal=False, flags=re.VERBOSE)
|
||||
webpage, 'upload date', fatal=False, flags=re.VERBOSE)
|
||||
if upload_date:
|
||||
# Convert timestring to a format suitable for filename
|
||||
upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
|
||||
@@ -51,28 +48,27 @@ class GooglePlusIE(InfoExtractor):
|
||||
|
||||
# Extract uploader
|
||||
uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
|
||||
webpage, u'uploader', fatal=False)
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
# Extract title
|
||||
# Get the first line for title
|
||||
video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
|
||||
webpage, 'title', default=u'NA')
|
||||
webpage, 'title', default='NA')
|
||||
|
||||
# Step 2, Simulate clicking the image box to launch video
|
||||
DOMAIN = 'https://plus.google.com/'
|
||||
video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
|
||||
webpage, u'video page URL')
|
||||
webpage, 'video page URL')
|
||||
if not video_page.startswith(DOMAIN):
|
||||
video_page = DOMAIN + video_page
|
||||
|
||||
webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
|
||||
webpage = self._download_webpage(video_page, video_id, 'Downloading video page')
|
||||
|
||||
# Extract video links on video page
|
||||
"""Extract video links of all sizes"""
|
||||
# Extract video links all sizes
|
||||
pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
|
||||
mobj = re.findall(pattern, webpage)
|
||||
if len(mobj) == 0:
|
||||
raise ExtractorError(u'Unable to extract video links')
|
||||
raise ExtractorError('Unable to extract video links')
|
||||
|
||||
# Sort in resolution
|
||||
links = sorted(mobj)
|
||||
@@ -87,12 +83,11 @@ class GooglePlusIE(InfoExtractor):
|
||||
except AttributeError: # Python 3
|
||||
video_url = bytes(video_url, 'ascii').decode('unicode-escape')
|
||||
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
}]
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
}
|
||||
|
62
youtube_dl/extractor/helsinki.py
Normal file
62
youtube_dl/extractor/helsinki.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HelsinkiIE(InfoExtractor):
|
||||
IE_DESC = 'helsinki.fi'
|
||||
_VALID_URL = r'https?://video\.helsinki\.fi/Arkisto/flash\.php\?id=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://video.helsinki.fi/Arkisto/flash.php?id=20258',
|
||||
'info_dict': {
|
||||
'id': '20258',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tietotekniikkafoorumi-iltapäivä',
|
||||
'description': 'md5:f5c904224d43c133225130fe156a5ee0',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # RTMP
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
formats = []
|
||||
|
||||
mobj = re.search(r'file=((\w+):[^&]+)', webpage)
|
||||
if mobj:
|
||||
formats.append({
|
||||
'ext': mobj.group(2),
|
||||
'play_path': mobj.group(1),
|
||||
'url': 'rtmp://flashvideo.it.helsinki.fi/vod/',
|
||||
'player_url': 'http://video.helsinki.fi/player.swf',
|
||||
'format_note': 'sd',
|
||||
'quality': 0,
|
||||
})
|
||||
|
||||
mobj = re.search(r'hd\.file=((\w+):[^&]+)', webpage)
|
||||
if mobj:
|
||||
formats.append({
|
||||
'ext': mobj.group(2),
|
||||
'play_path': mobj.group(1),
|
||||
'url': 'rtmp://flashvideo.it.helsinki.fi/vod/',
|
||||
'player_url': 'http://video.helsinki.fi/player.swf',
|
||||
'format_note': 'hd',
|
||||
'quality': 1,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage).replace('Video: ', ''),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
}
|
@@ -1,17 +1,20 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HowcastIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||
u'file': u'390161.mp4',
|
||||
u'md5': u'8b743df908c42f60cf6496586c7f12c3',
|
||||
u'info_dict': {
|
||||
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
||||
u"title": u"How to Tie a Square Knot Properly"
|
||||
'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||
'md5': '8b743df908c42f60cf6496586c7f12c3',
|
||||
'info_dict': {
|
||||
'id': '390161',
|
||||
'ext': 'mp4',
|
||||
'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
|
||||
'title': 'How to Tie a Square Knot Properly',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,22 +27,15 @@ class HowcastIE(InfoExtractor):
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
|
||||
webpage, u'video URL')
|
||||
|
||||
video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
|
||||
webpage, u'title')
|
||||
webpage, 'video URL')
|
||||
|
||||
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
|
||||
webpage, u'description', fatal=False)
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
|
||||
webpage, u'thumbnail', fatal=False)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_title,
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': video_description,
|
||||
'thumbnail': thumbnail,
|
||||
}]
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@@ -1,35 +1,39 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?instagram\.com/p/(.*?)/'
|
||||
_VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/'
|
||||
_TEST = {
|
||||
u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
u'file': u'aye83DjauH.mp4',
|
||||
u'md5': u'0d2da106a9d2631273e192b372806516',
|
||||
u'info_dict': {
|
||||
u"uploader_id": u"naomipq",
|
||||
u"title": u"Video by naomipq",
|
||||
u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||
'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
'md5': '0d2da106a9d2631273e192b372806516',
|
||||
'info_dict': {
|
||||
'id': 'aye83DjauH',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'naomipq',
|
||||
'title': 'Video by naomipq',
|
||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||
webpage, u'uploader id', fatal=False)
|
||||
desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
|
||||
fatal=False)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': self._og_search_video_url(webpage, secure=False),
|
||||
'ext': 'mp4',
|
||||
'title': u'Video by %s' % uploader_id,
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': self._og_search_video_url(webpage, secure=False),
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by %s' % uploader_id,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader_id' : uploader_id,
|
||||
'uploader_id': uploader_id,
|
||||
'description': desc,
|
||||
}]
|
||||
}
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import compat_urllib_request
|
||||
|
||||
|
||||
class IPrimaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/[^?#]+/(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||
@@ -22,20 +22,32 @@ class IPrimaIE(InfoExtractor):
|
||||
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
},
|
||||
]
|
||||
}, {
|
||||
'url': 'http://play.iprima.cz/particka/tchibo-particka-jarni-moda',
|
||||
'info_dict': {
|
||||
'id': '9718337',
|
||||
'ext': 'flv',
|
||||
'title': 'Tchibo Partička - Jarní móda',
|
||||
'description': 'md5:589f8f59f414220621ff8882eb3ce7be',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
|
||||
floor(random()*1073741824),
|
||||
floor(random()*1073741824))
|
||||
player_url = (
|
||||
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
|
||||
(floor(random()*1073741824), floor(random()*1073741824))
|
||||
)
|
||||
|
||||
req = compat_urllib_request.Request(player_url)
|
||||
req.add_header('Referer', url)
|
||||
@@ -44,18 +56,20 @@ class IPrimaIE(InfoExtractor):
|
||||
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
||||
|
||||
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
||||
|
||||
if zoneGEO != '0':
|
||||
base_url = base_url.replace('token', 'token_'+zoneGEO)
|
||||
base_url = base_url.replace('token', 'token_' + zoneGEO)
|
||||
|
||||
formats = []
|
||||
for format_id in ['lq', 'hq', 'hd']:
|
||||
filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
||||
filename = self._html_search_regex(
|
||||
r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
||||
|
||||
if filename == 'null':
|
||||
continue
|
||||
|
||||
real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
|
||||
real_id = self._search_regex(
|
||||
r'Prima-(?:[0-9]{10}|WEB)-([0-9]+)[-_]',
|
||||
filename, 'real video id')
|
||||
|
||||
if format_id == 'lq':
|
||||
quality = 0
|
||||
@@ -63,13 +77,13 @@ class IPrimaIE(InfoExtractor):
|
||||
quality = 1
|
||||
elif format_id == 'hd':
|
||||
quality = 2
|
||||
filename = 'hq/'+filename
|
||||
filename = 'hq/' + filename
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'quality': quality,
|
||||
'play_path': 'mp4:'+filename.replace('"', '')[:-4],
|
||||
'play_path': 'mp4:' + filename.replace('"', '')[:-4],
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
@@ -14,15 +14,16 @@ from ..utils import (
|
||||
class IviIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru'
|
||||
IE_NAME = 'ivi'
|
||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
# Single movie
|
||||
{
|
||||
'url': 'http://www.ivi.ru/watch/53141',
|
||||
'file': '53141.mp4',
|
||||
'md5': '6ff5be2254e796ed346251d117196cf4',
|
||||
'info_dict': {
|
||||
'id': '53141',
|
||||
'ext': 'mp4',
|
||||
'title': 'Иван Васильевич меняет профессию',
|
||||
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
|
||||
'duration': 5498,
|
||||
@@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
|
||||
# Serial's serie
|
||||
{
|
||||
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
||||
'file': '74791.mp4',
|
||||
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
||||
'info_dict': {
|
||||
'id': '74791',
|
||||
'ext': 'mp4',
|
||||
'title': 'Дежурный ангел - 1 серия',
|
||||
'duration': 2490,
|
||||
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
||||
@@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
|
||||
class IviCompilationIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru compilations'
|
||||
IE_NAME = 'ivi:compilation'
|
||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
||||
|
||||
def _extract_entries(self, html, compilation_id):
|
||||
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
|
||||
|
48
youtube_dl/extractor/jadorecettepub.py
Normal file
48
youtube_dl/extractor/jadorecettepub.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class JadoreCettePubIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
|
||||
'md5': '401286a06067c70b44076044b66515de',
|
||||
'info_dict': {
|
||||
'id': 'jLMja3tr7a4',
|
||||
'ext': 'mp4',
|
||||
'title': 'La pire utilisation de Star Wars',
|
||||
'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon. Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
|
||||
webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
|
||||
fatal=False)
|
||||
real_url = self._search_regex(
|
||||
r'\[/postlink\](.*)endofvid', webpage, 'video URL')
|
||||
video_id = YoutubeIE.extract_id(real_url)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': real_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
@@ -1,5 +1,7 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -10,12 +12,13 @@ class JeuxVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
u'file': u'5182.mp4',
|
||||
u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',
|
||||
u'info_dict': {
|
||||
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||
'info_dict': {
|
||||
'id': '5182',
|
||||
'ext': 'mp4',
|
||||
'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -25,14 +28,14 @@ class JeuxVideoIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, title)
|
||||
xml_link = self._html_search_regex(
|
||||
r'<param name="flashvars" value="config=(.*?)" />',
|
||||
webpage, u'config URL')
|
||||
webpage, 'config URL')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||
xml_link, u'video ID')
|
||||
xml_link, 'video ID')
|
||||
|
||||
config = self._download_xml(
|
||||
xml_link, title, u'Downloading XML config')
|
||||
xml_link, title, 'Downloading XML config')
|
||||
info_json = config.find('format.json').text
|
||||
info = json.loads(info_json)['versions'][0]
|
||||
|
||||
|
@@ -1,56 +1,61 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
RegexNotFoundError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class JukeboxIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
||||
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
|
||||
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
|
||||
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
|
||||
_IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"'
|
||||
_TEST = {
|
||||
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
||||
'md5': '5dc6477e74b1e37042ac5acedd8413e5',
|
||||
'info_dict': {
|
||||
'id': 'r303r',
|
||||
'ext': 'flv',
|
||||
'title': 'Kosheen-En Vivo Pride',
|
||||
'uploader': 'Kosheen',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
html = self._download_webpage(url, video_id)
|
||||
|
||||
mobj = re.search(self._IFRAME, html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract iframe url')
|
||||
iframe_url = unescapeHTML(mobj.group('iframe'))
|
||||
iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
|
||||
|
||||
iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
|
||||
mobj = re.search(r'class="jkb_waiting"', iframe_html)
|
||||
if mobj is not None:
|
||||
raise ExtractorError(u'Video is not available(in your country?)!')
|
||||
if re.search(r'class="jkb_waiting"', iframe_html) is not None:
|
||||
raise ExtractorError('Video is not available(in your country?)!')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
mobj = re.search(self._VIDEO_URL, iframe_html)
|
||||
if mobj is None:
|
||||
mobj = re.search(self._IS_YOUTUBE, iframe_html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract video url')
|
||||
youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/')
|
||||
self.to_screen(u'Youtube video detected')
|
||||
return self.url_result(youtube_url,ie='Youtube')
|
||||
video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/')
|
||||
video_ext = unescapeHTML(mobj.group('video_ext'))
|
||||
try:
|
||||
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
|
||||
iframe_html, 'video url')
|
||||
video_url = unescapeHTML(video_url).replace('\/', '/')
|
||||
except RegexNotFoundError:
|
||||
youtube_url = self._search_regex(
|
||||
r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"',
|
||||
iframe_html, 'youtube url')
|
||||
youtube_url = unescapeHTML(youtube_url).replace('\/', '/')
|
||||
self.to_screen('Youtube video detected')
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
mobj = re.search(self._TITLE, html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract title')
|
||||
title = unescapeHTML(mobj.group('title'))
|
||||
artist = unescapeHTML(mobj.group('artist'))
|
||||
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
|
||||
html, 'title')
|
||||
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
|
||||
html, 'artist')
|
||||
|
||||
return [{'id': video_id,
|
||||
'url': video_url,
|
||||
'title': artist + '-' + title,
|
||||
'ext': video_ext
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': artist + '-' + title,
|
||||
'uploader': artist,
|
||||
}
|
||||
|
66
youtube_dl/extractor/kontrtube.py
Normal file
66
youtube_dl/extractor/kontrtube.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class KontrTubeIE(InfoExtractor):
|
||||
IE_NAME = 'kontrtube'
|
||||
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
||||
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
||||
'md5': '975a991a4926c9a85f383a736a2e6b80',
|
||||
'info_dict': {
|
||||
'id': '2678',
|
||||
'ext': 'mp4',
|
||||
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
|
||||
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
|
||||
'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
|
||||
'duration': 270,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
|
||||
'video title')
|
||||
description = self._html_search_meta('description', webpage, 'video description')
|
||||
|
||||
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||
webpage)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
|
||||
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
|
||||
'view count', fatal=False)
|
||||
view_count = int(view_count) if view_count is not None else None
|
||||
|
||||
comment_count = None
|
||||
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
|
||||
fatal=False)
|
||||
if comment_str.startswith('комментариев нет'):
|
||||
comment_count = 0
|
||||
else:
|
||||
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||
if mobj:
|
||||
comment_count = int(mobj.group('total'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
@@ -4,19 +4,24 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class LifeNewsIE(InfoExtractor):
|
||||
IE_NAME = 'lifenews'
|
||||
IE_DESC = 'LIFE | NEWS'
|
||||
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
|
||||
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://lifenews.ru/news/126342',
|
||||
'file': '126342.mp4',
|
||||
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
|
||||
'info_dict': {
|
||||
'id': '126342',
|
||||
'ext': 'mp4',
|
||||
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
|
||||
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
|
||||
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
|
||||
@@ -28,13 +33,11 @@ class LifeNewsIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
|
||||
webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
||||
videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
|
||||
if not videos:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
||||
@@ -44,20 +47,28 @@ class LifeNewsIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
|
||||
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False)
|
||||
comment_count = self._html_search_regex(
|
||||
r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
|
||||
r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False)
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
|
||||
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False)
|
||||
if upload_date is not None:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'upload_date': unified_strdate(upload_date),
|
||||
}
|
||||
def make_entry(video_id, media, video_number=None):
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': media[1],
|
||||
'thumbnail': media[0],
|
||||
'title': title if video_number is None else '%s-video%s' % (title, video_number),
|
||||
'description': description,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
if len(videos) == 1:
|
||||
return make_entry(video_id, videos[0])
|
||||
else:
|
||||
return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)]
|
@@ -4,15 +4,17 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||
'file': '757_1364311680.mp4',
|
||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||
'info_dict': {
|
||||
'id': '757_1364311680',
|
||||
'ext': 'mp4',
|
||||
'description': 'extremely bad day for this guy..!',
|
||||
'uploader': 'ljfriel2',
|
||||
'title': 'Most unlucky car accident'
|
||||
@@ -20,25 +22,62 @@ class LiveLeakIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||
'file': 'f93_1390833151.mp4',
|
||||
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||
'info_dict': {
|
||||
'id': 'f93_1390833151',
|
||||
'ext': 'mp4',
|
||||
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
|
||||
'uploader': 'ARD_Stinkt',
|
||||
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
||||
'md5': '42c6d97d54f1db107958760788c5f48f',
|
||||
'info_dict': {
|
||||
'id': '4f7_1392687779',
|
||||
'ext': 'mp4',
|
||||
'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
|
||||
'uploader': 'CapObveus',
|
||||
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('video_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
||||
video_description = self._og_search_description(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
|
||||
age_limit = int_or_none(self._search_regex(
|
||||
r'you confirm that you are ([0-9]+) years and over.',
|
||||
webpage, 'age limit', default=None))
|
||||
|
||||
sources_raw = self._search_regex(
|
||||
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
|
||||
if sources_raw is None:
|
||||
sources_raw = '[{ %s}]' % (
|
||||
self._search_regex(r'(file: ".*?"),', webpage, 'video URL'))
|
||||
alt_source = self._search_regex(
|
||||
r'(file: ".*?"),', webpage, 'video URL', default=None)
|
||||
if alt_source:
|
||||
sources_raw = '[{ %s}]' % alt_source
|
||||
else:
|
||||
# Maybe an embed?
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"',
|
||||
webpage, 'embed URL')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
|
||||
sources = json.loads(sources_json)
|
||||
@@ -49,15 +88,11 @@ class LiveLeakIE(InfoExtractor):
|
||||
} for s in sources]
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
||||
video_description = self._og_search_description(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@@ -8,7 +8,9 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
ExtractorError
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
@@ -19,16 +21,17 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
||||
_NETRC_MACHINE = 'lynda'
|
||||
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account'
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
|
||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||
|
||||
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
'file': '114408.mp4',
|
||||
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
|
||||
'info_dict': {
|
||||
'id': '114408',
|
||||
'ext': 'mp4',
|
||||
'title': 'Using the exercise files',
|
||||
'duration': 68
|
||||
}
|
||||
@@ -41,27 +44,44 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
|
||||
'Downloading video JSON')
|
||||
video_json = json.loads(page)
|
||||
|
||||
if 'Status' in video_json:
|
||||
raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
|
||||
|
||||
if video_json['HasAccess'] is False:
|
||||
raise ExtractorError('Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
|
||||
raise ExtractorError(
|
||||
'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
|
||||
|
||||
video_id = video_json['ID']
|
||||
video_id = compat_str(video_json['ID'])
|
||||
duration = video_json['DurationInSeconds']
|
||||
title = video_json['Title']
|
||||
|
||||
formats = [{'url': fmt['Url'],
|
||||
formats = []
|
||||
|
||||
fmts = video_json.get('Formats')
|
||||
if fmts:
|
||||
formats.extend([
|
||||
{
|
||||
'url': fmt['Url'],
|
||||
'ext': fmt['Extension'],
|
||||
'width': fmt['Width'],
|
||||
'height': fmt['Height'],
|
||||
'filesize': fmt['FileSize'],
|
||||
'format_id': str(fmt['Resolution'])
|
||||
} for fmt in video_json['Formats']]
|
||||
} for fmt in fmts])
|
||||
|
||||
prioritized_streams = video_json.get('PrioritizedStreams')
|
||||
if prioritized_streams:
|
||||
formats.extend([
|
||||
{
|
||||
'url': video_url,
|
||||
'width': int_or_none(format_id),
|
||||
'format_id': format_id,
|
||||
} for format_id, video_url in prioritized_streams['0'].items()
|
||||
])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -91,7 +111,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
'stayPut': 'false'
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
|
||||
login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
|
||||
|
||||
# Not (yet) logged in
|
||||
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
|
||||
@@ -116,7 +136,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
'stayPut': 'false',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
|
||||
login_page = self._download_webpage(request, None, note='Confirming log in and log out from another device')
|
||||
login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
|
||||
|
||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||
raise ExtractorError('Unable to log in')
|
||||
@@ -150,7 +170,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||
sub = self._download_webpage(url, None, note=False)
|
||||
sub = self._download_webpage(url, None, False)
|
||||
sub_json = json.loads(sub)
|
||||
return {'en': url} if len(sub_json) > 0 else {}
|
||||
|
||||
@@ -179,6 +199,9 @@ class LyndaCourseIE(InfoExtractor):
|
||||
videos = []
|
||||
(username, _) = self._get_login_info()
|
||||
|
||||
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
|
||||
# by single video API anymore
|
||||
|
||||
for chapter in course_json['Chapters']:
|
||||
for video in chapter['Videos']:
|
||||
if username is None and video['HasAccess'] is False:
|
||||
|
66
youtube_dl/extractor/mailru.py
Normal file
66
youtube_dl/extractor/mailru.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MailRuIE(InfoExtractor):
|
||||
IE_NAME = 'mailru'
|
||||
IE_DESC = 'Видео@Mail.Ru'
|
||||
_VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
|
||||
'md5': 'dea205f03120046894db4ebb6159879a',
|
||||
'info_dict': {
|
||||
'id': '46301138',
|
||||
'ext': 'mp4',
|
||||
'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
|
||||
'upload_date': '20140224',
|
||||
'uploader': 'sonypicturesrus',
|
||||
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||
'duration': 184,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
author = video_data['author']
|
||||
uploader = author['name']
|
||||
uploader_id = author['id']
|
||||
|
||||
movie = video_data['movie']
|
||||
content_id = str(movie['contentId'])
|
||||
title = movie['title']
|
||||
thumbnail = movie['poster']
|
||||
duration = movie['duration']
|
||||
|
||||
upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d')
|
||||
view_count = video_data['views_count']
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video['name'],
|
||||
} for video in video_data['videos']
|
||||
]
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -166,6 +166,7 @@ class MetacafeIE(InfoExtractor):
|
||||
|
||||
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||
webpage, u'uploader nickname', fatal=False)
|
||||
@@ -183,6 +184,7 @@ class MetacafeIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'thumbnail':thumbnail,
|
||||
'ext': video_ext,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@@ -1,24 +1,30 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
get_element_by_id,
|
||||
)
|
||||
|
||||
|
||||
class TechTVMITIE(InfoExtractor):
|
||||
IE_NAME = u'techtv.mit.edu'
|
||||
IE_NAME = 'techtv.mit.edu'
|
||||
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
||||
u'file': u'25418.mp4',
|
||||
u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
|
||||
u'info_dict': {
|
||||
u'title': u'MIT DNA Learning Center Set',
|
||||
u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
|
||||
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
||||
'md5': '1f8cb3e170d41fd74add04d3c9330e5f',
|
||||
'info_dict': {
|
||||
'id': '25418',
|
||||
'ext': 'mp4',
|
||||
'title': 'MIT DNA Learning Center Set',
|
||||
'description': 'md5:82313335e8a8a3f243351ba55bc1b474',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -27,12 +33,12 @@ class TechTVMITIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
raw_page = self._download_webpage(
|
||||
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
||||
clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
|
||||
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
|
||||
|
||||
base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
|
||||
raw_page, u'base url')
|
||||
formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
|
||||
u'video formats')
|
||||
base_url = self._search_regex(
|
||||
r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url')
|
||||
formats_json = self._search_regex(
|
||||
r'bitrates: (\[.+?\])', raw_page, 'video formats')
|
||||
formats_mit = json.loads(formats_json)
|
||||
formats = [
|
||||
{
|
||||
@@ -48,28 +54,31 @@ class TechTVMITIE(InfoExtractor):
|
||||
|
||||
title = get_element_by_id('edit-title', clean_page)
|
||||
description = clean_html(get_element_by_id('edit-description', clean_page))
|
||||
thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
|
||||
raw_page, u'thumbnail', flags=re.DOTALL)
|
||||
thumbnail = self._search_regex(
|
||||
r'playlist:.*?url: \'(.+?)\'',
|
||||
raw_page, 'thumbnail', flags=re.DOTALL)
|
||||
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class MITIE(TechTVMITIE):
|
||||
IE_NAME = u'video.mit.edu'
|
||||
IE_NAME = 'video.mit.edu'
|
||||
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
|
||||
u'file': u'21783.mp4',
|
||||
u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
|
||||
u'info_dict': {
|
||||
u'title': u'The Government is Profiling You',
|
||||
u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
|
||||
'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
|
||||
'md5': '7db01d5ccc1895fc5010e9c9e13648da',
|
||||
'info_dict': {
|
||||
'id': '21783',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Government is Profiling You',
|
||||
'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -77,7 +86,73 @@ class MITIE(TechTVMITIE):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
|
||||
embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
|
||||
u'embed url')
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe .*?src="(.+?)"', webpage, 'embed url')
|
||||
return self.url_result(embed_url, ie='TechTVMIT')
|
||||
|
||||
|
||||
class OCWMITIE(InfoExtractor):
|
||||
IE_NAME = 'ocw.mit.edu'
|
||||
_VALID_URL = r'^http://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_BASE_URL = 'http://ocw.mit.edu/'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
|
||||
'info_dict': {
|
||||
'id': 'EObHWIEKGjA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
|
||||
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
|
||||
#'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://ocw.mit.edu/courses/mathematics/18-01sc-single-variable-calculus-fall-2010/1.-differentiation/part-a-definition-and-basic-rules/session-1-introduction-to-derivatives/',
|
||||
'info_dict': {
|
||||
'id': '7K1sB05pE0A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Session 1: Introduction to Derivatives',
|
||||
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
||||
#'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
topic = mobj.group('topic')
|
||||
|
||||
webpage = self._download_webpage(url, topic)
|
||||
title = self._html_search_meta('WT.cg_s', webpage)
|
||||
description = self._html_search_meta('Description', webpage)
|
||||
|
||||
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file)
|
||||
embed_chapter_media = re.search(r'ocw_embed_chapter_media\((.+?)\)', webpage)
|
||||
if embed_chapter_media:
|
||||
metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
|
||||
metadata = re.split(r', ?', metadata)
|
||||
yt = metadata[1]
|
||||
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
|
||||
else:
|
||||
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
|
||||
embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
|
||||
if embed_media:
|
||||
metadata = re.sub(r'[\'"]', '', embed_media.group(1))
|
||||
metadata = re.split(r', ?', metadata)
|
||||
yt = metadata[1]
|
||||
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
|
||||
else:
|
||||
raise ExtractorError('Unable to find embedded YouTube video.')
|
||||
video_id = YoutubeIE.extract_id(yt)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'url': yt,
|
||||
'url_transparent'
|
||||
'subtitles': subs,
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
|
@@ -5,18 +5,20 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class MixcloudIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
|
||||
IE_NAME = 'mixcloud'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
||||
'file': 'dholbach-cryptkeeper.mp3',
|
||||
'info_dict': {
|
||||
'id': 'dholbach-cryptkeeper',
|
||||
'ext': 'mp3',
|
||||
'title': 'Cryptkeeper',
|
||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||
'uploader': 'Daniel Holbach',
|
||||
@@ -45,7 +47,7 @@ class MixcloudIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader = mobj.group(1)
|
||||
cloudcast_name = mobj.group(2)
|
||||
track_id = '-'.join((uploader, cloudcast_name))
|
||||
track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name)))
|
||||
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
|
||||
|
@@ -5,9 +5,12 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
HEADRequest,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
RegexNotFoundError,
|
||||
)
|
||||
@@ -18,6 +21,7 @@ def _media_xml_tag(tag):
|
||||
|
||||
|
||||
class MTVServicesInfoExtractor(InfoExtractor):
|
||||
_MOBILE_TEMPLATE = None
|
||||
@staticmethod
|
||||
def _id_from_uri(uri):
|
||||
return uri.split(':')[-1]
|
||||
@@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
else:
|
||||
return thumb_node.attrib['url']
|
||||
|
||||
def _extract_video_formats(self, mdoc):
|
||||
if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
|
||||
raise ExtractorError('This video is not available from your country.', expected=True)
|
||||
def _extract_mobile_video_formats(self, mtvn_id):
|
||||
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
|
||||
req = compat_urllib_request.Request(webpage_url)
|
||||
# Otherwise we get a webpage that would execute some javascript
|
||||
req.add_header('Youtubedl-user-agent', 'curl/7')
|
||||
webpage = self._download_webpage(req, mtvn_id,
|
||||
'Downloading mobile page')
|
||||
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
||||
req = HEADRequest(metrics_url)
|
||||
response = self._request_webpage(req, mtvn_id, 'Resolving url')
|
||||
url = response.geturl()
|
||||
# Transform the url to get the best quality:
|
||||
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
|
||||
return [{'url': url,'ext': 'mp4'}]
|
||||
|
||||
def _extract_video_formats(self, mdoc, mtvn_id):
|
||||
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
|
||||
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
|
||||
self.to_screen('The normal version is not available from your '
|
||||
'country, trying with the mobile version')
|
||||
return self._extract_mobile_video_formats(mtvn_id)
|
||||
raise ExtractorError('This video is not available from your country.',
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
for rendition in mdoc.findall('.//rendition'):
|
||||
@@ -82,21 +106,28 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
title_el = find_xpath_attr(
|
||||
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||
'scheme', 'urn:mtvn:video_title')
|
||||
if title_el is None:
|
||||
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
||||
if title_el is None:
|
||||
title_el = itemdoc.find('.//title')
|
||||
if title_el.text is None:
|
||||
title_el = None
|
||||
if title_el is None:
|
||||
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
||||
|
||||
title = title_el.text
|
||||
if title is None:
|
||||
raise ExtractorError('Could not find video title')
|
||||
title = title.strip()
|
||||
|
||||
# This a short id that's used in the webpage urls
|
||||
mtvn_id = None
|
||||
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||
'scheme', 'urn:mtvn:id')
|
||||
if mtvn_id_node is not None:
|
||||
mtvn_id = mtvn_id_node.text
|
||||
|
||||
return {
|
||||
'title': title,
|
||||
'formats': self._extract_video_formats(mediagen_doc),
|
||||
'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
|
||||
'id': video_id,
|
||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||
'description': description,
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import binascii
|
||||
import base64
|
||||
import hashlib
|
||||
@@ -14,18 +16,16 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
|
||||
class MyVideoIE(InfoExtractor):
|
||||
"""Information Extractor for myvideo.de."""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
|
||||
IE_NAME = u'myvideo'
|
||||
_VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
|
||||
IE_NAME = 'myvideo'
|
||||
_TEST = {
|
||||
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
||||
u'file': u'8229274.flv',
|
||||
u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
|
||||
u'info_dict': {
|
||||
u"title": u"bowling-fail-or-win"
|
||||
'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
||||
'md5': '2d2753e8130479ba2cb7e0a37002053e',
|
||||
'info_dict': {
|
||||
'id': '8229274',
|
||||
'ext': 'flv',
|
||||
'title': 'bowling-fail-or-win',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,10 +53,7 @@ class MyVideoIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
GK = (
|
||||
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
|
||||
@@ -74,37 +71,33 @@ class MyVideoIE(InfoExtractor):
|
||||
video_url = mobj.group(1) + '.flv'
|
||||
|
||||
video_title = self._html_search_regex('<title>([^<]+)</title>',
|
||||
webpage, u'title')
|
||||
webpage, 'title')
|
||||
|
||||
video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_ext,
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
}
|
||||
|
||||
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
|
||||
if mobj is not None:
|
||||
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
||||
response = self._download_webpage(request, video_id,
|
||||
u'Downloading video info')
|
||||
'Downloading video info')
|
||||
info = json.loads(base64.b64decode(response).decode('utf-8'))
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
|
||||
'play_path': info['filename'],
|
||||
'ext': 'flv',
|
||||
'thumbnail': info['thumbnail'][0]['url'],
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
|
||||
'play_path': info['filename'],
|
||||
'ext': 'flv',
|
||||
'thumbnail': info['thumbnail'][0]['url'],
|
||||
}
|
||||
|
||||
# try encxml
|
||||
mobj = re.search('var flashvars={(.+?)}', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video')
|
||||
raise ExtractorError('Unable to extract video')
|
||||
|
||||
params = {}
|
||||
encxml = ''
|
||||
@@ -118,7 +111,7 @@ class MyVideoIE(InfoExtractor):
|
||||
params['domain'] = 'www.myvideo.de'
|
||||
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
|
||||
if 'flash_playertype=MTV' in xmldata_url:
|
||||
self._downloader.report_warning(u'avoiding MTV player')
|
||||
self._downloader.report_warning('avoiding MTV player')
|
||||
xmldata_url = (
|
||||
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
|
||||
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
|
||||
@@ -144,7 +137,7 @@ class MyVideoIE(InfoExtractor):
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||
if 'myvideo2flash' in video_url:
|
||||
self.report_warning(
|
||||
u'Rewriting URL to use unencrypted rtmp:// ...',
|
||||
'Rewriting URL to use unencrypted rtmp:// ...',
|
||||
video_id)
|
||||
video_url = video_url.replace('rtmpe://', 'rtmp://')
|
||||
|
||||
@@ -152,39 +145,31 @@ class MyVideoIE(InfoExtractor):
|
||||
# extract non rtmp videos
|
||||
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'unable to extract url')
|
||||
raise ExtractorError('unable to extract url')
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
|
||||
|
||||
video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
|
||||
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
|
||||
video_file = compat_urllib_parse.unquote(video_file)
|
||||
|
||||
if not video_file.endswith('f4m'):
|
||||
ppath, prefix = video_file.split('.')
|
||||
video_playpath = '%s:%s' % (prefix, ppath)
|
||||
video_hls_playlist = ''
|
||||
else:
|
||||
video_playpath = ''
|
||||
video_hls_playlist = (
|
||||
video_file
|
||||
).replace('.f4m', '.m3u8')
|
||||
|
||||
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
|
||||
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
||||
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
||||
|
||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||
webpage, u'title')
|
||||
webpage, 'title')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'tc_url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': u'flv',
|
||||
'play_path': video_playpath,
|
||||
'video_file': video_file,
|
||||
'video_hls_playlist': video_hls_playlist,
|
||||
'player_url': video_swfobj,
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'tc_url': video_url,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'play_path': video_playpath,
|
||||
'player_url': video_swfobj,
|
||||
}
|
||||
|
||||
|
@@ -1,19 +1,46 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import find_xpath_attr, compat_str
|
||||
|
||||
|
||||
class NBCIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
||||
'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
|
||||
'info_dict': {
|
||||
'id': 'u1RInQZRN7QJ',
|
||||
'ext': 'flv',
|
||||
'title': 'I Am a Firefighter',
|
||||
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url')
|
||||
if theplatform_url.startswith('//'):
|
||||
theplatform_url = 'http:' + theplatform_url
|
||||
return self.url_result(theplatform_url)
|
||||
|
||||
|
||||
class NBCNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
|
||||
u'file': u'52753292.flv',
|
||||
u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
|
||||
u'info_dict': {
|
||||
u'title': u'Crew emerges after four-month Mars food study',
|
||||
u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
||||
'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
|
||||
'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
|
||||
'info_dict': {
|
||||
'id': '52753292',
|
||||
'ext': 'flv',
|
||||
'title': 'Crew emerges after four-month Mars food study',
|
||||
'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -23,10 +50,11 @@ class NBCNewsIE(InfoExtractor):
|
||||
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||
info = all_info.find('video')
|
||||
|
||||
return {'id': video_id,
|
||||
'title': info.find('headline').text,
|
||||
'ext': 'flv',
|
||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
||||
'description': compat_str(info.find('caption').text),
|
||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('headline').text,
|
||||
'ext': 'flv',
|
||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
||||
'description': compat_str(info.find('caption').text),
|
||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||
}
|
||||
|
@@ -13,28 +13,28 @@ class NDRIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
# video
|
||||
{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/hallo_niedersachsen/media/hallonds19925.html',
|
||||
'md5': '20eba151ff165f386643dad9c1da08f7',
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html',
|
||||
'md5': 'e7a6079ca39d3568f4996cb858dd6708',
|
||||
'note': 'Video file',
|
||||
'info_dict': {
|
||||
'id': '19925',
|
||||
'id': '7959',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hallo Niedersachsen ',
|
||||
'description': 'Bei Hallo Niedersachsen um 19:30 Uhr erfahren Sie alles, was am Tag in Niedersachsen los war.',
|
||||
'duration': 1722,
|
||||
'title': 'Markt - die ganze Sendung',
|
||||
'description': 'md5:af9179cf07f67c5c12dc6d9997e05725',
|
||||
'duration': 2655,
|
||||
},
|
||||
},
|
||||
# audio
|
||||
{
|
||||
'url': 'http://www.ndr.de/903/audio191719.html',
|
||||
'md5': '41ed601768534dd18a9ae34d84798129',
|
||||
'url': 'http://www.ndr.de/info/audio51535.html',
|
||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||
'note': 'Audio file',
|
||||
'info_dict': {
|
||||
'id': '191719',
|
||||
'id': '51535',
|
||||
'ext': 'mp3',
|
||||
'title': '"Es war schockierend"',
|
||||
'description': 'md5:ed7ff8364793545021a6355b97e95f10',
|
||||
'duration': 112,
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||
'duration': 884,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@@ -49,20 +49,38 @@ class NFBIE(InfoExtractor):
|
||||
|
||||
config = self._download_xml(request, video_id, 'Downloading player config XML')
|
||||
|
||||
thumbnail = config.find("./player/stream/media[@type='posterImage']/assets/asset[@quality='high']/default/url").text
|
||||
video = config.find("./player/stream/media[@type='video']")
|
||||
duration = int(video.get('duration'))
|
||||
title = video.find('title').text
|
||||
description = video.find('description').text
|
||||
title = None
|
||||
description = None
|
||||
thumbnail = None
|
||||
duration = None
|
||||
formats = []
|
||||
|
||||
# It seems assets always go from lower to better quality, so no need to sort
|
||||
formats = [{
|
||||
'url': x.find('default/streamerURI').text + '/',
|
||||
'play_path': x.find('default/url').text,
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': x.get('quality'),
|
||||
} for x in video.findall('assets/asset')]
|
||||
def extract_thumbnail(media):
|
||||
thumbnails = {}
|
||||
for asset in media.findall('assets/asset'):
|
||||
thumbnails[asset.get('quality')] = asset.find('default/url').text
|
||||
if not thumbnails:
|
||||
return None
|
||||
if 'high' in thumbnails:
|
||||
return thumbnails['high']
|
||||
return list(thumbnails.values())[0]
|
||||
|
||||
for media in config.findall('./player/stream/media'):
|
||||
if media.get('type') == 'posterImage':
|
||||
thumbnail = extract_thumbnail(media)
|
||||
elif media.get('type') == 'video':
|
||||
duration = int(media.get('duration'))
|
||||
title = media.find('title').text
|
||||
description = media.find('description').text
|
||||
# It seems assets always go from lower to better quality, so no need to sort
|
||||
formats = [{
|
||||
'url': x.find('default/streamerURI').text,
|
||||
'app': x.find('default/streamerURI').text.split('/', 3)[3],
|
||||
'play_path': x.find('default/url').text,
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': x.get('quality'),
|
||||
} for x in media.findall('assets/asset')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,61 +1,51 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class NormalbootsIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
||||
_VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
||||
_TEST = {
|
||||
u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
|
||||
u'file': u'home-alone-games-jontron.mp4',
|
||||
u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
|
||||
u'info_dict': {
|
||||
u'title': u'Home Alone Games - JonTron - NormalBoots',
|
||||
u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
|
||||
u'uploader': u'JonTron',
|
||||
u'upload_date': u'20140125',
|
||||
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
|
||||
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
|
||||
'info_dict': {
|
||||
'id': 'home-alone-games-jontron',
|
||||
'ext': 'mp4',
|
||||
'title': 'Home Alone Games - JonTron - NormalBoots',
|
||||
'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
|
||||
'uploader': 'JonTron',
|
||||
'upload_date': '20140125',
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
}
|
||||
|
||||
if url[:4] != 'http':
|
||||
url = 'http://' + url
|
||||
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_title = self._og_search_title(webpage)
|
||||
video_description = self._og_search_description(webpage)
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||
webpage, 'uploader')
|
||||
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||
webpage, 'date')
|
||||
video_upload_date = unified_strdate(raw_upload_date)
|
||||
video_upload_date = unified_strdate(raw_upload_date)
|
||||
|
||||
|
||||
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
||||
player_page = self._download_webpage(player_url, video_id)
|
||||
video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
|
||||
|
||||
info['url'] = video_url
|
||||
info['title'] = video_title
|
||||
info['description'] = video_description
|
||||
info['thumbnail'] = video_thumbnail
|
||||
info['uploader'] = video_uploader
|
||||
info['upload_date'] = video_upload_date
|
||||
|
||||
return info
|
||||
video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
}
|
||||
|
@@ -9,14 +9,25 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class NovamovIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:(?:www\.)?novamov\.com/video/|(?:(?:embed|www)\.)novamov\.com/embed\.php\?v=)(?P<videoid>[a-z\d]{13})'
|
||||
class NovaMovIE(InfoExtractor):
|
||||
IE_NAME = 'novamov'
|
||||
IE_DESC = 'NovaMov'
|
||||
|
||||
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
|
||||
|
||||
_HOST = 'www.novamov.com'
|
||||
|
||||
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
|
||||
_FILEKEY_REGEX = r'flashvars\.filekey="(?P<filekey>[^"]+)";'
|
||||
_TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
|
||||
_DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.novamov.com/video/4rurhn9x446jj',
|
||||
'file': '4rurhn9x446jj.flv',
|
||||
'md5': '7205f346a52bbeba427603ba10d4b935',
|
||||
'info_dict': {
|
||||
'id': '4rurhn9x446jj',
|
||||
'ext': 'flv',
|
||||
'title': 'search engine optimization',
|
||||
'description': 'search engine optimization is used to rank the web page in the google search engine'
|
||||
},
|
||||
@@ -27,31 +38,26 @@ class NovamovIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
page = self._download_webpage('http://www.novamov.com/video/%s' % video_id,
|
||||
video_id, 'Downloading video page')
|
||||
page = self._download_webpage(
|
||||
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
|
||||
|
||||
if re.search(r'This file no longer exists on our servers!</h2>', page) is not None:
|
||||
if re.search(self._FILE_DELETED_REGEX, page) is not None:
|
||||
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
filekey = self._search_regex(
|
||||
r'flashvars\.filekey="(?P<filekey>[^"]+)";', page, 'filekey')
|
||||
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>',
|
||||
page, 'title', fatal=False)
|
||||
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>',
|
||||
page, 'description', fatal=False)
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
|
||||
|
||||
api_response = self._download_webpage(
|
||||
'http://www.novamov.com/api/player.api.php?key=%s&file=%s' % (filekey, video_id),
|
||||
video_id, 'Downloading video api response')
|
||||
'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
|
||||
'Downloading video api response')
|
||||
|
||||
response = compat_urlparse.parse_qs(api_response)
|
||||
|
||||
if 'error_msg' in response:
|
||||
raise ExtractorError('novamov returned error: %s' % response['error_msg'][0], expected=True)
|
||||
raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True)
|
||||
|
||||
video_url = response['url'][0]
|
||||
|
||||
@@ -60,4 +66,4 @@ class NovamovIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description
|
||||
}
|
||||
}
|
@@ -1,46 +1,28 @@
|
||||
import re
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urlparse
|
||||
from .novamov import NovaMovIE
|
||||
|
||||
|
||||
class NowVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.(?:ch|sx)/video/(?P<id>\w+)'
|
||||
class NowVideoIE(NovaMovIE):
|
||||
IE_NAME = 'nowvideo'
|
||||
IE_DESC = 'NowVideo'
|
||||
|
||||
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'}
|
||||
|
||||
_HOST = 'www.nowvideo.ch'
|
||||
|
||||
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||
_FILEKEY_REGEX = r'var fkzd="([^"]+)";'
|
||||
_TITLE_REGEX = r'<h4>([^<]+)</h4>'
|
||||
_DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
|
||||
u'file': u'0mw0yow7b6dxa.flv',
|
||||
u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817',
|
||||
u'info_dict': {
|
||||
u"title": u"youtubedl test video _BaW_jenozKc.mp4"
|
||||
'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
|
||||
'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
|
||||
'info_dict': {
|
||||
'id': '0mw0yow7b6dxa',
|
||||
'ext': 'flv',
|
||||
'title': 'youtubedl test video _BaW_jenozKc.mp4',
|
||||
'description': 'Description',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage_url = 'http://www.nowvideo.ch/video/' + video_id
|
||||
embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
u'Downloading embed page')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h4>(.*)</h4>',
|
||||
webpage, u'video title')
|
||||
|
||||
video_key = self._search_regex(r'var fkzd="(.*)";',
|
||||
embed_page, u'video key')
|
||||
|
||||
api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
|
||||
api_response = self._download_webpage(api_call, video_id,
|
||||
u'Downloading API page')
|
||||
video_url = compat_urlparse.parse_qs(api_response)[u'url'][0]
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': video_title,
|
||||
}]
|
||||
}
|
@@ -8,6 +8,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,7 +36,15 @@ class ORFIE(InfoExtractor):
|
||||
data_json = self._search_regex(
|
||||
r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
|
||||
all_data = json.loads(data_json)
|
||||
sdata = all_data[0]['values']['segments']
|
||||
|
||||
def get_segments(all_data):
|
||||
for data in all_data:
|
||||
if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM':
|
||||
return data['values']['segments']
|
||||
|
||||
sdata = get_segments(all_data)
|
||||
if not sdata:
|
||||
raise ExtractorError('Unable to extract segments')
|
||||
|
||||
def quality_to_int(s):
|
||||
m = re.search('([0-9]+)', s)
|
||||
|
@@ -9,7 +9,7 @@ class PBSIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
# Direct video URL
|
||||
video\.pbs\.org/video/(?P<id>[0-9]+)/? |
|
||||
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
||||
# Article with embedded player
|
||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||
# Player
|
||||
|
@@ -1,76 +1,43 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class PhotobucketIE(InfoExtractor):
|
||||
"""Information extractor for photobucket.com."""
|
||||
|
||||
# TODO: the original _VALID_URL was:
|
||||
# r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
|
||||
# Check if it's necessary to keep the old extracion process
|
||||
_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
|
||||
IE_NAME = u'photobucket'
|
||||
_VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
|
||||
_TEST = {
|
||||
u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
|
||||
u'file': u'zpsc0c3b9fa.mp4',
|
||||
u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20130504",
|
||||
u"uploader": u"rachaneronas",
|
||||
u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!"
|
||||
'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
|
||||
'file': 'zpsc0c3b9fa.mp4',
|
||||
'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
|
||||
'info_dict': {
|
||||
'upload_date': '20130504',
|
||||
'uploader': 'rachaneronas',
|
||||
'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_extension = mobj.group('ext')
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Extract URL, uploader, and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
# We try first by looking the javascript code:
|
||||
mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage)
|
||||
if mobj is not None:
|
||||
info = json.loads(mobj.group('json'))
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': info[u'downloadUrl'],
|
||||
'uploader': info[u'username'],
|
||||
'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'),
|
||||
'title': info[u'title'],
|
||||
'ext': video_extension,
|
||||
'thumbnail': info[u'thumbUrl'],
|
||||
}]
|
||||
|
||||
# We try looking in other parts of the webpage
|
||||
video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
|
||||
webpage, u'video URL')
|
||||
|
||||
mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
video_title = mobj.group(1).decode('utf-8')
|
||||
video_uploader = mobj.group(2).decode('utf-8')
|
||||
|
||||
return [{
|
||||
'id': video_id.decode('utf-8'),
|
||||
'url': video_url.decode('utf-8'),
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_extension.decode('utf-8'),
|
||||
}]
|
||||
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
|
||||
webpage, 'info json')
|
||||
info = json.loads(info_json)
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': info['downloadUrl'],
|
||||
'uploader': info['username'],
|
||||
'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'),
|
||||
'title': info['title'],
|
||||
'ext': video_extension,
|
||||
'thumbnail': info['thumbUrl'],
|
||||
}
|
||||
|
80
youtube_dl/extractor/playvid.py
Normal file
80
youtube_dl/extractor/playvid.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class PlayvidIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
|
||||
_TEST = {
|
||||
'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
|
||||
'md5': '44930f8afa616efdf9482daf4fe53e1e',
|
||||
'info_dict': {
|
||||
'id': 'agbDDi7WZTV',
|
||||
'ext': 'mp4',
|
||||
'title': 'Michelle Lewin in Miami Beach',
|
||||
'duration': 240,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = None
|
||||
duration = None
|
||||
video_thumbnail = None
|
||||
formats = []
|
||||
|
||||
# most of the information is stored in the flashvars
|
||||
flashvars = self._html_search_regex(
|
||||
r'flashvars="(.+?)"', webpage, 'flashvars')
|
||||
|
||||
infos = compat_urllib_parse.unquote(flashvars).split(r'&')
|
||||
for info in infos:
|
||||
videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
|
||||
if videovars_match:
|
||||
key = videovars_match.group(1)
|
||||
val = videovars_match.group(2)
|
||||
|
||||
if key == 'title':
|
||||
video_title = compat_urllib_parse.unquote_plus(val)
|
||||
if key == 'duration':
|
||||
try:
|
||||
duration = int(val)
|
||||
except ValueError:
|
||||
pass
|
||||
if key == 'big_thumb':
|
||||
video_thumbnail = val
|
||||
|
||||
videourl_match = re.match(
|
||||
r'^video_urls\]\[(?P<resolution>[0-9]+)p', key)
|
||||
if videourl_match:
|
||||
height = int(videourl_match.group('resolution'))
|
||||
formats.append({
|
||||
'height': height,
|
||||
'url': val,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Extract title - should be in the flashvars; if not, look elsewhere
|
||||
if video_title is None:
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(.*?)</title', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'duration': duration,
|
||||
'description': None,
|
||||
'age_limit': 18
|
||||
}
|
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class PodomaticIE(InfoExtractor):
|
||||
@@ -9,14 +12,14 @@ class PodomaticIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
||||
|
||||
_TEST = {
|
||||
u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
|
||||
u"file": u"2009-01-02T16_03_35-08_00.mp3",
|
||||
u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
|
||||
u"info_dict": {
|
||||
u"uploader": u"Science Teaching Tips",
|
||||
u"uploader_id": u"scienceteachingtips",
|
||||
u"title": u"64. When the Moon Hits Your Eye",
|
||||
u"duration": 446,
|
||||
"url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
|
||||
"file": "2009-01-02T16_03_35-08_00.mp3",
|
||||
"md5": "84bb855fcf3429e6bf72460e1eed782d",
|
||||
"info_dict": {
|
||||
"uploader": "Science Teaching Tips",
|
||||
"uploader_id": "scienceteachingtips",
|
||||
"title": "64. When the Moon Hits Your Eye",
|
||||
"duration": 446,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,7 +39,7 @@ class PodomaticIE(InfoExtractor):
|
||||
uploader = data['podcast']
|
||||
title = data['title']
|
||||
thumbnail = data['imageLocation']
|
||||
duration = int(data['length'] / 1000.0)
|
||||
duration = int_or_none(data.get('length'), 1000)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -44,7 +44,7 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ')
|
||||
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||
|
||||
formats = []
|
||||
|
297
youtube_dl/extractor/prosiebensat1.py
Normal file
297
youtube_dl/extractor/prosiebensat1.py
Normal file
@@ -0,0 +1,297 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from hashlib import sha1
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
RegexNotFoundError,
|
||||
)
|
||||
|
||||
|
||||
class ProSiebenSat1IE(InfoExtractor):
|
||||
IE_NAME = 'prosiebensat1'
|
||||
IE_DESC = 'ProSiebenSat.1 Digital'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|ran|the-voice-of-germany)\.de|fem\.com)/(?P<id>.+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '2104602',
|
||||
'ext': 'mp4',
|
||||
'title': 'Staffel 2, Episode 18 - Jahresrückblick',
|
||||
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
|
||||
'upload_date': '20131231',
|
||||
'duration': 5845.04,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
|
||||
'info_dict': {
|
||||
'id': '2570327',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lady-Umstyling für Audrina',
|
||||
'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d',
|
||||
'upload_date': '20131014',
|
||||
'duration': 606.76,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Seems to be broken',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '2429369',
|
||||
'ext': 'mp4',
|
||||
'title': 'Countdown für die Autowerkstatt',
|
||||
'description': 'md5:809fc051a457b5d8666013bc40698817',
|
||||
'upload_date': '20140223',
|
||||
'duration': 2595.04,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
|
||||
'info_dict': {
|
||||
'id': '2904997',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sexy laufen in Ugg Boots',
|
||||
'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6',
|
||||
'upload_date': '20140122',
|
||||
'duration': 245.32,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
|
||||
'info_dict': {
|
||||
'id': '2906572',
|
||||
'ext': 'mp4',
|
||||
'title': 'Im Interview: Kai Wiesinger',
|
||||
'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
|
||||
'upload_date': '20140225',
|
||||
'duration': 522.56,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '2992323',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
|
||||
'description': 'md5:2669cde3febe9bce13904f701e774eb6',
|
||||
'upload_date': '20140225',
|
||||
'duration': 2410.44,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '3004256',
|
||||
'ext': 'mp4',
|
||||
'title': 'Schalke: Tönnies möchte Raul zurück',
|
||||
'description': 'md5:4b5b271d9bcde223b54390754c8ece3f',
|
||||
'upload_date': '20140226',
|
||||
'duration': 228.96,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
|
||||
'info_dict': {
|
||||
'id': '2572814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andreas Kümmert: Rocket Man',
|
||||
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
|
||||
'upload_date': '20131017',
|
||||
'duration': 469.88,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
|
||||
'info_dict': {
|
||||
'id': '2156342',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kurztrips zum Valentinstag',
|
||||
'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528',
|
||||
'upload_date': '20130206',
|
||||
'duration': 307.24,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_CLIPID_REGEXES = [
|
||||
r'"clip_id"\s*:\s+"(\d+)"',
|
||||
r'clipid: "(\d+)"',
|
||||
]
|
||||
_TITLE_REGEXES = [
|
||||
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
|
||||
r'<header class="clearfix">\s*<h3>(.+?)</h3>',
|
||||
r'<!-- start video -->\s*<h1>(.+?)</h1>',
|
||||
r'<div class="ep-femvideos-pi4-video-txt">\s*<h2>(.+?)</h2>',
|
||||
]
|
||||
_DESCRIPTION_REGEXES = [
|
||||
r'<p itemprop="description">\s*(.+?)</p>',
|
||||
r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
|
||||
r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
|
||||
r'<p>(.+?)</p>\s*<div class="ep-femvideos-pi4-video-footer">',
|
||||
]
|
||||
_UPLOAD_DATE_REGEXES = [
|
||||
r'<meta property="og:published_time" content="(.+?)">',
|
||||
r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
|
||||
r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
|
||||
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
|
||||
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
def extract(patterns, name, page, fatal=False):
|
||||
for pattern in patterns:
|
||||
mobj = re.search(pattern, page)
|
||||
if mobj:
|
||||
return clean_html(mobj.group(1))
|
||||
if fatal:
|
||||
raise RegexNotFoundError(u'Unable to extract %s' % name)
|
||||
return None
|
||||
|
||||
clip_id = extract(self._CLIPID_REGEXES, 'clip id', page, fatal=True)
|
||||
|
||||
access_token = 'testclient'
|
||||
client_name = 'kolibri-1.2.5'
|
||||
client_location = url
|
||||
|
||||
videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({
|
||||
'access_token': access_token,
|
||||
'client_location': client_location,
|
||||
'client_name': client_name,
|
||||
'ids': clip_id,
|
||||
})
|
||||
|
||||
videos = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')
|
||||
|
||||
duration = float(videos[0]['duration'])
|
||||
source_ids = [source['id'] for source in videos[0]['sources']]
|
||||
source_ids_str = ','.join(map(str, source_ids))
|
||||
|
||||
g = '01!8d8F_)r9]4s[qeuXfP%'
|
||||
|
||||
client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name])
|
||||
.encode('utf-8')).hexdigest()
|
||||
|
||||
sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse.urlencode({
|
||||
'access_token': access_token,
|
||||
'client_id': client_id,
|
||||
'client_location': client_location,
|
||||
'client_name': client_name,
|
||||
}))
|
||||
|
||||
sources = self._download_json(sources_api_url, clip_id, 'Downloading sources JSON')
|
||||
server_id = sources['server_id']
|
||||
|
||||
client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id,
|
||||
client_location, source_ids_str, g, client_name])
|
||||
.encode('utf-8')).hexdigest()
|
||||
|
||||
url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse.urlencode({
|
||||
'access_token': access_token,
|
||||
'client_id': client_id,
|
||||
'client_location': client_location,
|
||||
'client_name': client_name,
|
||||
'server_id': server_id,
|
||||
'source_ids': source_ids_str,
|
||||
}))
|
||||
|
||||
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
|
||||
|
||||
title = extract(self._TITLE_REGEXES, 'title', page, fatal=True)
|
||||
description = extract(self._DESCRIPTION_REGEXES, 'description', page)
|
||||
thumbnail = self._og_search_thumbnail(page)
|
||||
|
||||
upload_date = extract(self._UPLOAD_DATE_REGEXES, 'upload date', page)
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
formats = []
|
||||
|
||||
urls_sources = urls['sources']
|
||||
if isinstance(urls_sources, dict):
|
||||
urls_sources = urls_sources.values()
|
||||
|
||||
def fix_bitrate(bitrate):
|
||||
return bitrate / 1000 if bitrate % 1000 == 0 else bitrate
|
||||
|
||||
for source in urls_sources:
|
||||
protocol = source['protocol']
|
||||
if protocol == 'rtmp' or protocol == 'rtmpe':
|
||||
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', source['url'])
|
||||
if not mobj:
|
||||
continue
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'app': mobj.group('app'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
|
||||
'page_url': 'http://www.prosieben.de',
|
||||
'vbr': fix_bitrate(source['bitrate']),
|
||||
'ext': 'mp4',
|
||||
'format_id': '%s_%s' % (source['cdn'], source['bitrate']),
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': source['url'],
|
||||
'vbr': fix_bitrate(source['bitrate']),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': clip_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,148 +1,165 @@
|
||||
# encoding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class RTLnowIE(InfoExtractor):
|
||||
"""Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
|
||||
_VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
||||
_TESTS = [{
|
||||
'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
||||
'file': '90419.flv',
|
||||
'info_dict': {
|
||||
'upload_date': '20070416',
|
||||
'title': 'Ahornallee - Folge 1 - Der Einzug',
|
||||
'description': 'Folge 1 - Der Einzug',
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?
|
||||
(?P<url>
|
||||
(?P<domain>
|
||||
rtl-now\.rtl\.de|
|
||||
rtl2now\.rtl2\.de|
|
||||
(?:www\.)?voxnow\.de|
|
||||
(?:www\.)?rtlnitronow\.de|
|
||||
(?:www\.)?superrtlnow\.de|
|
||||
(?:www\.)?n-tvnow\.de)
|
||||
/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?
|
||||
(?:container_id|film_id)=(?P<video_id>[0-9]+)&
|
||||
player=1(?:&season=[0-9]+)?(?:&.*)?
|
||||
)'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
||||
'info_dict': {
|
||||
'id': '90419',
|
||||
'ext': 'flv',
|
||||
'title': 'Ahornallee - Folge 1 - Der Einzug',
|
||||
'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de',
|
||||
'upload_date': '20070416',
|
||||
'duration': 1685,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only works from Germany',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
{
|
||||
'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
|
||||
'info_dict': {
|
||||
'id': '69756',
|
||||
'ext': 'flv',
|
||||
'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
|
||||
'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0',
|
||||
'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
|
||||
'upload_date': '20120519',
|
||||
'duration': 1245,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only works from Germany',
|
||||
},
|
||||
'skip': 'Only works from Germany',
|
||||
},
|
||||
{
|
||||
'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
|
||||
'file': '69756.flv',
|
||||
'info_dict': {
|
||||
'upload_date': '20120519',
|
||||
'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
|
||||
'description': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
|
||||
'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
|
||||
{
|
||||
'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
|
||||
'info_dict': {
|
||||
'id': '13883',
|
||||
'ext': 'flv',
|
||||
'title': 'Voxtours - Südafrika-Reporter II',
|
||||
'description': 'md5:de7f8d56be6fd4fed10f10f57786db00',
|
||||
'upload_date': '20090627',
|
||||
'duration': 1800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
{
|
||||
'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
|
||||
'info_dict': {
|
||||
'id': '99205',
|
||||
'ext': 'flv',
|
||||
'title': 'Medicopter 117 - Angst!',
|
||||
'description': 'md5:895b1df01639b5f61a04fc305a5cb94d',
|
||||
'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',
|
||||
'upload_date': '20080928',
|
||||
'duration': 2691,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
'skip': 'Only works from Germany',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
|
||||
'file': '13883.flv',
|
||||
'info_dict': {
|
||||
'upload_date': '20090627',
|
||||
'title': 'Voxtours - Südafrika-Reporter II',
|
||||
'description': 'Südafrika-Reporter II',
|
||||
{
|
||||
'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
|
||||
'info_dict': {
|
||||
'id': '153819',
|
||||
'ext': 'flv',
|
||||
'title': 'Deluxe - Alles was Spaß macht - Thema u.a.: Luxushotel für Vierbeiner',
|
||||
'description': 'md5:c3705e1bb32e1a5b2bcd634fc065c631',
|
||||
'thumbnail': 'http://autoimg.static-fra.de/ntvnow/383157/1500x1500/image2.jpg',
|
||||
'upload_date': '20140221',
|
||||
'duration': 2429,
|
||||
},
|
||||
'skip': 'Only works from Germany',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
|
||||
'file': '99205.flv',
|
||||
'info_dict': {
|
||||
'upload_date': '20080928',
|
||||
'title': 'Medicopter 117 - Angst!',
|
||||
'description': 'Angst!',
|
||||
'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
|
||||
'file': '124903.flv',
|
||||
'info_dict': {
|
||||
'upload_date': '20130101',
|
||||
'title': 'Top Gear vom 01.01.2013',
|
||||
'description': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only works from Germany',
|
||||
}]
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
webpage_url = 'http://' + mobj.group('url')
|
||||
video_page_url = 'http://' + mobj.group('domain') + '/'
|
||||
video_page_url = 'http://%s/' % mobj.group('domain')
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
webpage = self._download_webpage('http://' + mobj.group('url'), video_id)
|
||||
|
||||
note_m = re.search(r'''(?sx)
|
||||
<div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?)
|
||||
<div[ ]id="playerteaser">''', webpage)
|
||||
if note_m:
|
||||
msg = clean_html(note_m.group(1))
|
||||
raise ExtractorError(msg)
|
||||
mobj = re.search(r'(?s)<div style="margin-left: 20px; font-size: 13px;">(.*?)<div id="playerteaser">', webpage)
|
||||
if mobj:
|
||||
raise ExtractorError(clean_html(mobj.group(1)), expected=True)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date'))
|
||||
|
||||
mobj = re.search(r'<meta itemprop="duration" content="PT(?P<seconds>\d+)S" />', webpage)
|
||||
duration = int(mobj.group('seconds')) if mobj else None
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>',
|
||||
webpage, 'title')
|
||||
playerdata_url = self._html_search_regex(
|
||||
r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
|
||||
webpage, 'playerdata_url')
|
||||
r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url')
|
||||
|
||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata)
|
||||
if mobj:
|
||||
video_description = mobj.group('description')
|
||||
if mobj.group('upload_date_Y'):
|
||||
video_upload_date = mobj.group('upload_date_Y')
|
||||
elif mobj.group('upload_date_y'):
|
||||
video_upload_date = '20' + mobj.group('upload_date_y')
|
||||
playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
|
||||
|
||||
videoinfo = playerdata.find('./playlist/videoinfo')
|
||||
|
||||
formats = []
|
||||
for filename in videoinfo.findall('filename'):
|
||||
mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
|
||||
if mobj:
|
||||
fmt = {
|
||||
'url': mobj.group('url'),
|
||||
'play_path': 'mp4:' + mobj.group('play_path'),
|
||||
'page_url': video_page_url,
|
||||
'player_url': video_page_url + 'includes/vodplayer.swf',
|
||||
}
|
||||
else:
|
||||
video_upload_date = None
|
||||
if video_upload_date:
|
||||
video_upload_date += mobj.group('upload_date_m') + mobj.group('upload_date_d')
|
||||
else:
|
||||
video_description = None
|
||||
video_upload_date = None
|
||||
self._downloader.report_warning('Unable to extract description and upload date')
|
||||
|
||||
# Thumbnail: not every video has an thumbnail
|
||||
mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
|
||||
if mobj:
|
||||
video_thumbnail = mobj.group('thumbnail')
|
||||
else:
|
||||
video_thumbnail = None
|
||||
|
||||
mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Unable to extract media URL')
|
||||
video_url = mobj.group('url')
|
||||
video_play_path = 'mp4:' + mobj.group('play_path')
|
||||
video_player_url = video_page_url + 'includes/vodplayer.swf'
|
||||
fmt = {
|
||||
'url': filename.text,
|
||||
}
|
||||
fmt.update({
|
||||
'width': int_or_none(filename.get('width')),
|
||||
'height': int_or_none(filename.get('height')),
|
||||
'vbr': int_or_none(filename.get('bitrate')),
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
'page_url': video_page_url,
|
||||
'player_url': video_player_url,
|
||||
'ext': 'flv',
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'upload_date': video_upload_date,
|
||||
'thumbnail': video_thumbnail,
|
||||
}
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
37
youtube_dl/extractor/savefrom.py
Normal file
37
youtube_dl/extractor/savefrom.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SaveFromIE(InfoExtractor):
|
||||
IE_NAME = 'savefrom.net'
|
||||
_VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
|
||||
'info_dict': {
|
||||
'id': 'UlVRAPW2WJY',
|
||||
'ext': 'mp4',
|
||||
'title': 'About Team Radical MMA | MMA Fighting',
|
||||
'upload_date': '20120816',
|
||||
'uploader': 'Howcast',
|
||||
'uploader_id': 'Howcast',
|
||||
'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = os.path.splitext(url.split('/')[-1])[0]
|
||||
return {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'url': mobj.group('url'),
|
||||
}
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
@@ -12,11 +14,12 @@ class SlideshareIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
||||
u'file': u'25665706.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Managing Scale and Complexity',
|
||||
u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
|
||||
'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
||||
'info_dict': {
|
||||
'id': '25665706',
|
||||
'ext': 'mp4',
|
||||
'title': 'Managing Scale and Complexity',
|
||||
'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -26,15 +29,17 @@ class SlideshareIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
slideshare_obj = self._search_regex(
|
||||
r'var slideshare_object = ({.*?}); var user_info =',
|
||||
webpage, u'slideshare object')
|
||||
webpage, 'slideshare object')
|
||||
info = json.loads(slideshare_obj)
|
||||
if info['slideshow']['type'] != u'video':
|
||||
raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
||||
if info['slideshow']['type'] != 'video':
|
||||
raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
||||
|
||||
doc = info['doc']
|
||||
bucket = info['jsplayer']['video_bucket']
|
||||
ext = info['jsplayer']['video_extension']
|
||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||
description = self._html_search_regex(
|
||||
r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
@@ -43,5 +48,5 @@ class SlideshareIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
'thumbnail': info['slideshow']['pin_image_url'],
|
||||
'description': self._og_search_description(webpage),
|
||||
'description': description,
|
||||
}
|
||||
|
@@ -20,6 +20,7 @@ class SmotriIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com'
|
||||
IE_NAME = 'smotri'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
|
||||
_NETRC_MACHINE = 'smotri'
|
||||
|
||||
_TESTS = [
|
||||
# real video id 2610366
|
||||
|
@@ -17,6 +17,7 @@ class SohuIE(InfoExtractor):
|
||||
u'info_dict': {
|
||||
u'title': u'MV:Far East Movement《The Illest》',
|
||||
},
|
||||
u'skip': u'Only available from China',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'id': '47127627',
|
||||
'ext': 'mp3',
|
||||
'title': 'Goldrushed',
|
||||
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||
'uploader': 'The Royal Concept',
|
||||
'upload_date': '20120521',
|
||||
},
|
||||
@@ -217,7 +218,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
return self._extract_info_dict(info, full_title, secret_token=token)
|
||||
|
||||
class SoundcloudSetIE(SoundcloudIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
|
||||
IE_NAME = 'soundcloud:set'
|
||||
# it's in tests/test_playlists.py
|
||||
_TESTS = []
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -8,14 +10,14 @@ from ..utils import RegexNotFoundError, ExtractorError
|
||||
class SpaceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
|
||||
_TEST = {
|
||||
u'add_ie': ['Brightcove'],
|
||||
u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
|
||||
u'info_dict': {
|
||||
u'id': u'2780937028001',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
|
||||
u'description': u'md5:db81cf7f3122f95ed234b631a6ea1e61',
|
||||
u'uploader': u'TechMedia Networks',
|
||||
'add_ie': ['Brightcove'],
|
||||
'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
|
||||
'info_dict': {
|
||||
'id': '2780937028001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
|
||||
'description': 'md5:db81cf7f3122f95ed234b631a6ea1e61',
|
||||
'uploader': 'TechMedia Networks',
|
||||
},
|
||||
}
|
||||
|
||||
|
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -8,23 +7,27 @@ from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
int_or_none,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
)
|
||||
from ..aes import aes_decrypt_text
|
||||
|
||||
|
||||
class SpankwireIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
|
||||
_TEST = {
|
||||
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
|
||||
'file': '103545.mp4',
|
||||
'md5': '1b3f55e345500552dbc252a3e9c1af43',
|
||||
'md5': '8bbfde12b101204b39e4b9fe7eb67095',
|
||||
'info_dict': {
|
||||
"uploader": "oreusz",
|
||||
"title": "Buckcherry`s X Rated Music Video Crazy Bitch",
|
||||
"description": "Crazy Bitch X rated music video.",
|
||||
"age_limit": 18,
|
||||
'id': '103545',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
|
||||
'description': 'Crazy Bitch X rated music video.',
|
||||
'uploader': 'oreusz',
|
||||
'uploader_id': '124697',
|
||||
'upload_date': '20070508',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,13 +40,26 @@ class SpankwireIE(InfoExtractor):
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
|
||||
video_uploader = self._html_search_regex(
|
||||
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False)
|
||||
upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False)
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False)
|
||||
if view_count:
|
||||
view_count = str_to_int(view_count)
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False))
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
|
||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||
@@ -53,16 +69,13 @@ class SpankwireIE(InfoExtractor):
|
||||
formats = []
|
||||
for video_url in video_urls:
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[4].split('_')[:2]
|
||||
resolution, bitrate_str = format
|
||||
format = "-".join(format)
|
||||
height = int(resolution.rstrip('P'))
|
||||
tbr = int(bitrate_str.rstrip('K'))
|
||||
|
||||
height = int(resolution.rstrip('Pp'))
|
||||
tbr = int(bitrate_str.rstrip('Kk'))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'resolution': resolution,
|
||||
'format': format,
|
||||
'tbr': tbr,
|
||||
@@ -75,10 +88,14 @@ class SpankwireIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@@ -1,10 +1,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
|
||||
class SpikeIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(www\.spike\.com/(video-clips|episodes)/.+|
|
||||
m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+))
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
|
||||
'md5': '1a9265f32b0c375793d6c4ce45255256',
|
||||
@@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor):
|
||||
}
|
||||
|
||||
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
|
||||
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.search(self._VALID_URL, url)
|
||||
mobile_id = mobj.group('mobile_id')
|
||||
if mobile_id is not None:
|
||||
url = 'http://www.spike.com/video-clips/%s' % mobile_id
|
||||
return super(SpikeIE, self)._real_extract(url)
|
||||
|
67
youtube_dl/extractor/streamcz.py
Normal file
67
youtube_dl/extractor/streamcz.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class StreamCZIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
||||
'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',
|
||||
'info_dict': {
|
||||
'id': '765767',
|
||||
'ext': 'mp4',
|
||||
'title': 'Peklo na talíři: Éčka pro děti',
|
||||
'description': 'md5:49ace0df986e95e331d0fe239d421519',
|
||||
'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
|
||||
'duration': 256,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._html_search_regex(r'Stream\.Data\.Episode\((.+?)\);', webpage, 'stream data')
|
||||
|
||||
jsonData = json.loads(data)
|
||||
|
||||
formats = []
|
||||
for video in jsonData['instances']:
|
||||
for video_format in video['instances']:
|
||||
format_id = video_format['quality']
|
||||
|
||||
if format_id == '240p':
|
||||
quality = 0
|
||||
elif format_id == '360p':
|
||||
quality = 1
|
||||
elif format_id == '480p':
|
||||
quality = 2
|
||||
elif format_id == '720p':
|
||||
quality = 3
|
||||
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (video_format['type'].split('/')[1], format_id),
|
||||
'url': video_format['source'],
|
||||
'quality': quality,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': str(jsonData['id']),
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'duration': int_or_none(jsonData['duration']),
|
||||
'view_count': int_or_none(jsonData['stats_total']),
|
||||
}
|
27
youtube_dl/extractor/syfy.py
Normal file
27
youtube_dl/extractor/syfy.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SyfyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.syfy\.com/videos/.+?vid:(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458',
|
||||
'md5': 'e07de1d52c7278adbb9b9b1c93a66849',
|
||||
'info_dict': {
|
||||
'id': 'NmqMrGnXvmO1',
|
||||
'ext': 'flv',
|
||||
'title': 'George Lucas has Advice for his Daughter',
|
||||
'description': 'Listen to what insights George Lucas give his daughter Amanda.',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self.url_result(self._og_search_video_url(webpage))
|
@@ -6,115 +6,111 @@ import re
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
RegexNotFoundError,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class TEDIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL=r'''http://www\.ted\.com/
|
||||
(
|
||||
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
|
||||
|
|
||||
((?P<type_talk>talks)) # We have a simple talk
|
||||
)
|
||||
(/lang/(.*?))? # The url may contain the language
|
||||
/(?P<name>\w+) # Here goes the name and then ".html"
|
||||
'''
|
||||
_VALID_URL = r'''(?x)http://www\.ted\.com/
|
||||
(
|
||||
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
||||
|
|
||||
((?P<type_talk>talks)) # We have a simple talk
|
||||
)
|
||||
(/lang/(.*?))? # The url may contain the language
|
||||
/(?P<name>\w+) # Here goes the name and then ".html"
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
'file': '102.mp4',
|
||||
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
|
||||
'info_dict': {
|
||||
"description": "md5:c6fa72e6eedbd938c9caf6b2702f5922",
|
||||
"title": "Dan Dennett: The illusion of consciousness"
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
'title': 'The illusion of consciousness',
|
||||
'description': ('Philosopher Dan Dennett makes a compelling '
|
||||
'argument that not only don\'t we understand our own '
|
||||
'consciousness, but that half the time our brains are '
|
||||
'actively fooling us.'),
|
||||
'uploader': 'Dan Dennett',
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
_FORMATS_PREFERENCE = {
|
||||
'low': 1,
|
||||
'medium': 2,
|
||||
'high': 3,
|
||||
}
|
||||
|
||||
def _extract_info(self, webpage):
|
||||
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
|
||||
webpage, 'info json')
|
||||
return json.loads(info_json)
|
||||
|
||||
def _real_extract(self, url):
|
||||
m=re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
m = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
name = m.group('name')
|
||||
if m.group('type_talk'):
|
||||
return self._talk_info(url)
|
||||
else :
|
||||
playlist_id=m.group('playlist_id')
|
||||
name=m.group('name')
|
||||
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
|
||||
return [self._playlist_videos_info(url,name,playlist_id)]
|
||||
return self._talk_info(url, name)
|
||||
else:
|
||||
return self._playlist_videos_info(url, name)
|
||||
|
||||
|
||||
def _playlist_videos_info(self, url, name, playlist_id):
|
||||
def _playlist_videos_info(self, url, name):
|
||||
'''Returns the videos of the playlist'''
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, 'Downloading playlist webpage')
|
||||
matches = re.finditer(
|
||||
r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
|
||||
webpage)
|
||||
|
||||
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
|
||||
webpage, 'playlist title')
|
||||
webpage = self._download_webpage(url, name,
|
||||
'Downloading playlist webpage')
|
||||
info = self._extract_info(webpage)
|
||||
playlist_info = info['playlist']
|
||||
|
||||
playlist_entries = [
|
||||
self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
|
||||
for m in matches
|
||||
self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||
for talk in info['talks']
|
||||
]
|
||||
return self.playlist_result(
|
||||
playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
|
||||
playlist_entries,
|
||||
playlist_id=compat_str(playlist_info['id']),
|
||||
playlist_title=playlist_info['title'])
|
||||
|
||||
def _talk_info(self, url, video_id=0):
|
||||
"""Return the video for the talk in the url"""
|
||||
m = re.match(self._VALID_URL, url,re.VERBOSE)
|
||||
video_name = m.group('name')
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
|
||||
def _talk_info(self, url, video_name):
|
||||
webpage = self._download_webpage(url, video_name)
|
||||
self.report_extraction(video_name)
|
||||
# If the url includes the language we get the title translated
|
||||
title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
|
||||
webpage, 'title')
|
||||
json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
|
||||
webpage, 'json data')
|
||||
info = json.loads(json_data)
|
||||
desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
|
||||
webpage, 'description', flags = re.DOTALL)
|
||||
|
||||
thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
|
||||
webpage, 'thumbnail')
|
||||
|
||||
talk_info = self._extract_info(webpage)['talks'][0]
|
||||
|
||||
formats = [{
|
||||
'ext': 'mp4',
|
||||
'url': stream['file'],
|
||||
'format': stream['id']
|
||||
} for stream in info['htmlStreams']]
|
||||
|
||||
video_id = info['id']
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'format': format_id,
|
||||
'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
|
||||
} for (format_id, format_url) in talk_info['nativeDownloads'].items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_id = compat_str(talk_info['id'])
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||
video_subtitles = self.extract_subtitles(video_id, talk_info)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, webpage)
|
||||
self._list_available_subtitles(video_id, talk_info)
|
||||
return
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': desc,
|
||||
'title': talk_info['title'],
|
||||
'uploader': talk_info['speaker'],
|
||||
'thumbnail': talk_info['thumb'],
|
||||
'description': self._og_search_description(webpage),
|
||||
'subtitles': video_subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
|
||||
languages = re.findall(r'(?:<option value=")(\S+)"', options)
|
||||
if languages:
|
||||
sub_lang_list = {}
|
||||
for l in languages:
|
||||
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
||||
sub_lang_list[l] = url
|
||||
return sub_lang_list
|
||||
except RegexNotFoundError:
|
||||
def _get_available_subtitles(self, video_id, talk_info):
|
||||
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
|
||||
if languages:
|
||||
sub_lang_list = {}
|
||||
for l in languages:
|
||||
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
||||
sub_lang_list[l] = url
|
||||
return sub_lang_list
|
||||
else:
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
return {}
|
||||
return {}
|
||||
|
68
youtube_dl/extractor/testurl.py
Normal file
68
youtube_dl/extractor/testurl.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class TestURLIE(InfoExtractor):
|
||||
""" Allows adressing of the test cases as test:yout.*be_1 """
|
||||
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$'
|
||||
|
||||
def _real_extract(self, url):
|
||||
from ..extractor import gen_extractors
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
extractor_id = mobj.group('extractor')
|
||||
all_extractors = gen_extractors()
|
||||
|
||||
rex = re.compile(extractor_id, flags=re.IGNORECASE)
|
||||
matching_extractors = [
|
||||
e for e in all_extractors if rex.search(e.IE_NAME)]
|
||||
|
||||
if len(matching_extractors) == 0:
|
||||
raise ExtractorError(
|
||||
'No extractors matching %r found' % extractor_id,
|
||||
expected=True)
|
||||
elif len(matching_extractors) > 1:
|
||||
# Is it obvious which one to pick?
|
||||
try:
|
||||
extractor = next(
|
||||
ie for ie in matching_extractors
|
||||
if ie.IE_NAME.lower() == extractor_id.lower())
|
||||
except StopIteration:
|
||||
raise ExtractorError(
|
||||
('Found multiple matching extractors: %s' %
|
||||
' '.join(ie.IE_NAME for ie in matching_extractors)),
|
||||
expected=True)
|
||||
else:
|
||||
extractor = matching_extractors[0]
|
||||
|
||||
num_str = mobj.group('num')
|
||||
num = int(num_str) if num_str else 0
|
||||
|
||||
testcases = []
|
||||
t = getattr(extractor, '_TEST', None)
|
||||
if t:
|
||||
testcases.append(t)
|
||||
testcases.extend(getattr(extractor, '_TESTS', []))
|
||||
|
||||
try:
|
||||
tc = testcases[num]
|
||||
except IndexError:
|
||||
raise ExtractorError(
|
||||
('Test case %d not found, got only %d tests' %
|
||||
(num, len(testcases))),
|
||||
expected=True)
|
||||
|
||||
self.to_screen('Test URL: %s' % tc['url'])
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': tc['url'],
|
||||
'id': video_id,
|
||||
}
|
@@ -11,7 +11,10 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
|
||||
|
||||
|
||||
class ThePlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P<id>[^/\?]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
|
||||
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|
||||
|theplatform:)(?P<id>[^/\?&]+)'''
|
||||
|
||||
_TEST = {
|
||||
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
|
||||
@@ -29,9 +32,7 @@ class ThePlatformIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
def _get_info(self, video_id):
|
||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
||||
'format=smil&mbr=true'.format(video_id))
|
||||
def _get_info(self, video_id, smil_url):
|
||||
meta = self._download_xml(smil_url, video_id)
|
||||
|
||||
try:
|
||||
@@ -50,26 +51,39 @@ class ThePlatformIE(InfoExtractor):
|
||||
|
||||
head = meta.find(_x('smil:head'))
|
||||
body = meta.find(_x('smil:body'))
|
||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
||||
switch = body.find(_x('smil:switch'))
|
||||
formats = []
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
width = int(attr['width'])
|
||||
height = int(attr['height'])
|
||||
vbr = int(attr['system-bitrate']) // 1000
|
||||
format_id = '%dx%d_%dk' % (width, height, vbr)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'play_path': 'mp4:' + attr['src'],
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
f4m_node = body.find(_x('smil:seq/smil:video'))
|
||||
if f4m_node is not None:
|
||||
f4m_url = f4m_node.attrib['src']
|
||||
if 'manifest.f4m?' not in f4m_url:
|
||||
f4m_url += '?'
|
||||
# the parameters are from syfy.com, other sites may use others,
|
||||
# they also work for nbc.com
|
||||
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
|
||||
formats = [{
|
||||
'ext': 'flv',
|
||||
'url': f4m_url,
|
||||
}]
|
||||
else:
|
||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
||||
switch = body.find(_x('smil:switch'))
|
||||
formats = []
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
width = int(attr['width'])
|
||||
height = int(attr['height'])
|
||||
vbr = int(attr['system-bitrate']) // 1000
|
||||
format_id = '%dx%d_%dk' % (width, height, vbr)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'play_path': 'mp4:' + attr['src'],
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -83,4 +97,14 @@ class ThePlatformIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
return self._get_info(video_id)
|
||||
if mobj.group('config'):
|
||||
config_url = url+ '&form=json'
|
||||
config_url = config_url.replace('swf/', 'config/')
|
||||
config_url = config_url.replace('onsite/', 'onsite/config/')
|
||||
config_json = self._download_webpage(config_url, video_id, u'Downloading config')
|
||||
config = json.loads(config_json)
|
||||
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||
else:
|
||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
||||
'format=smil&mbr=true'.format(video_id))
|
||||
return self._get_info(video_id, smil_url)
|
||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from youtube_dl.utils import ExtractorError
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class TinyPicIE(InfoExtractor):
|
||||
|
44
youtube_dl/extractor/trutube.py
Normal file
44
youtube_dl/extractor/trutube.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TruTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
|
||||
_TEST = {
|
||||
'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
|
||||
'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
|
||||
'info_dict': {
|
||||
'id': '14880',
|
||||
'ext': 'flv',
|
||||
'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_title = self._og_search_title(webpage).strip()
|
||||
thumbnail = self._search_regex(
|
||||
r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
|
||||
|
||||
all_formats = re.finditer(
|
||||
r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
|
||||
formats = [{
|
||||
'format_id': m.group('key'),
|
||||
'quality': -i,
|
||||
'url': m.group('url'),
|
||||
} for i, m in enumerate(all_formats)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -11,7 +11,7 @@ from ..aes import (
|
||||
)
|
||||
|
||||
class Tube8IE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||
u'file': u'229795.mp4',
|
||||
|
84
youtube_dl/extractor/tvigle.py
Normal file
84
youtube_dl/extractor/tvigle.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TvigleIE(InfoExtractor):
|
||||
IE_NAME = 'tvigle'
|
||||
IE_DESC = 'Интернет-телевидение Tvigle.ru'
|
||||
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
|
||||
'md5': '09afba4616666249f087efc6dcf83cb3',
|
||||
'info_dict': {
|
||||
'id': '503081',
|
||||
'ext': 'flv',
|
||||
'title': 'Брат 2 ',
|
||||
'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
|
||||
'upload_date': '20110919',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433',
|
||||
'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
|
||||
'info_dict': {
|
||||
'id': '676433',
|
||||
'ext': 'flv',
|
||||
'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
|
||||
'description': 'md5:027f7dc872948f14c96d19b4178428a4',
|
||||
'upload_date': '20121218',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_data = self._download_xml(
|
||||
'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML')
|
||||
|
||||
video = video_data.find('./video')
|
||||
|
||||
title = video.get('name')
|
||||
description = video.get('anons')
|
||||
if description:
|
||||
description = clean_html(description)
|
||||
thumbnail = video_data.get('img')
|
||||
upload_date = unified_strdate(video.get('date'))
|
||||
like_count = int_or_none(video.get('vtp'))
|
||||
|
||||
formats = []
|
||||
for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]):
|
||||
video_url = video.get(format_id)
|
||||
if not video_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'format_note': format_note,
|
||||
'quality': num,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'like_count': like_count,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
@@ -4,6 +4,7 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_request
|
||||
|
||||
|
||||
class VeohIE(InfoExtractor):
|
||||
@@ -24,6 +25,13 @@ class VeohIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
age_limit = 0
|
||||
if 'class="adultwarning-container"' in webpage:
|
||||
self.report_age_confirmation()
|
||||
age_limit = 18
|
||||
request = compat_urllib_request.Request(url)
|
||||
request.add_header('Cookie', 'confirmedAdult=true')
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
|
||||
if m_youtube is not None:
|
||||
@@ -44,4 +52,5 @@ class VeohIE(InfoExtractor):
|
||||
'thumbnail': info.get('highResImage') or info.get('medResImage'),
|
||||
'description': info['description'],
|
||||
'view_count': info['views'],
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user