Compare commits

..

236 Commits

Author SHA1 Message Date
Philipp Hagemeister
8863d0de91 release 2013.12.16 2013-12-16 04:45:32 +01:00
Philipp Hagemeister
7b6fefc9d4 Apply --no-overwrites for --write-* files as well (Fixes #1980) 2013-12-16 04:39:13 +01:00
Philipp Hagemeister
525ef9227f Add --get-duration (Fixes #859) 2013-12-16 04:15:10 +01:00
Philipp Hagemeister
c0ba0f4859 Document duration field 2013-12-16 04:09:43 +01:00
Philipp Hagemeister
b466b7029d [youtube] Make duration an integer or None 2013-12-16 04:09:05 +01:00
Philipp Hagemeister
fa3ae234e0 [cbs] Add extractor (Fixes #1977) 2013-12-16 03:53:43 +01:00
Philipp Hagemeister
48462108f3 [theplatform] Fix geographic restriction check 2013-12-16 03:43:45 +01:00
Philipp Hagemeister
f8b56e95b8 [theplatform] Detect geoblocked content 2013-12-16 03:34:46 +01:00
Philipp Hagemeister
5fe18bdbde Add --min-views / --max-views (Fixes #1979) 2013-12-16 03:09:49 +01:00
Jaime Marquínez Ferrándiz
dca02c80bc Fix detection of the extension if the 'extractaudio' is given and improve the error message (#1969)
Using 'foo.mp4' shouldn't raise an error.
If 'foo' is given suggest using 'foo.%(ext)s' for the template
2013-12-15 11:42:38 +01:00
Jaime Marquínez Ferrándiz
9ee859b683 [daylimotion] Add support for urls from the mobile site (fixes #1953)
It uses the 'touch' subdomain and adds a '#' before 'video'
2013-12-14 14:20:12 +01:00
Jaime Marquínez Ferrándiz
5d574e143f [ign] Update one of test video's title 2013-12-13 17:04:40 +01:00
Philipp Hagemeister
2a203a6cda Merge pull request #1956 from dstftw/master
Fix typo in month name
2013-12-13 07:41:34 -08:00
dst
dadb8184e4 Fix typo in month name 2013-12-13 22:27:37 +07:00
Jaime Marquínez Ferrándiz
7a563df90a [daum] Recognize mobile urls (#1952) 2013-12-12 13:05:38 +01:00
Jaime Marquínez Ferrándiz
24b173fa5c [naver] Recognize mobile urls (fixes #1951) 2013-12-12 13:04:02 +01:00
Philipp Hagemeister
f2c36ee43e release 2013.12.11.2 2013-12-11 09:22:25 +01:00
Philipp Hagemeister
00381b4ccb [pornhub] Fix URL regexp 2013-12-11 09:22:08 +01:00
Philipp Hagemeister
fca1ef19c1 release 2013.12.11.1 2013-12-11 08:54:54 +01:00
Philipp Hagemeister
357ddadbf5 Fix thumbnail filename determination (Fixes #1945) 2013-12-11 08:54:48 +01:00
Philipp Hagemeister
08d03235f9 release 2013.12.11 2013-12-11 08:45:51 +01:00
Jaime Marquínez Ferrándiz
1825836235 Use _download_xml in more extractors 2013-12-10 21:03:53 +01:00
Jaime Marquínez Ferrándiz
a0088bdf93 [vimeo] Fix unused argument of the _real_extract method 2013-12-10 20:43:16 +01:00
Jaime Marquínez Ferrándiz
48ad51b243 [vimeo] Fix the extraction for some 'player' or 'pro' videos
The variable the config dict is assigned to can change, now we try to detect it or fallback to a, b or c
2013-12-10 20:28:12 +01:00
Jaime Marquínez Ferrándiz
5458b4cefb [dailymotion] Fix view count extraction and make it non fatal (fixes #1940) 2013-12-10 19:47:00 +01:00
Jaime Marquínez Ferrándiz
7c86cd5ab1 [dailymotion] Fix uploader extraction
Now it looks directly in the info dictionary
2013-12-10 19:44:16 +01:00
Jaime Marquínez Ferrándiz
cbfc470228 [mixcloud] Try to get the m4a url if the mp3 url fails to download (fixes #1939) 2013-12-10 13:42:41 +01:00
Jaime Marquínez Ferrándiz
f67ca84d4a [soundcloud] Fix the extension for 'downloadable' songs
In this case the 'original_format' field must be used.
2013-12-10 13:04:21 +01:00
Philipp Hagemeister
e2b38da931 [mtv] Fixup incorrectly encoded XML documents 2013-12-10 12:45:22 +01:00
Philipp Hagemeister
a30a60d8eb release 2013.12.10 2013-12-10 11:54:59 +01:00
Philipp Hagemeister
5a3ea17c94 [zdf] Correct order of unknown formats (#1936) 2013-12-10 11:52:10 +01:00
Philipp Hagemeister
475700acfe [soundcloud] Do not mistake original_format for ext (Fixes #1934) 2013-12-10 11:45:13 +01:00
Philipp Hagemeister
45598aab08 [YoutubeDL] Simplify filename preparation 2013-12-10 11:23:35 +01:00
Jaime Marquínez Ferrándiz
26e6393134 Set 'NA' as the default value for missing fields in the output template (fixes #1931)
Remove the `except KeyError` clause, it won't get raised anymore
2013-12-09 22:00:42 +01:00
Philipp Hagemeister
49929a20a7 release 2013.12.09.4 2013-12-09 20:05:27 +01:00
Philipp Hagemeister
f8bd0194a7 Remove superfluous spaces 2013-12-09 20:05:10 +01:00
Jaime Marquínez Ferrándiz
77526143e7 [brightcove] Use the original url (usually the player) as the default referer (fixes #1929) 2013-12-09 20:01:43 +01:00
Philipp Hagemeister
4ff50ef846 [soundcloud] Do not match sets (Fixes #1930) 2013-12-09 19:57:00 +01:00
Philipp Hagemeister
caefb1de87 [ndtv] Add extractor (Fixes #1924) 2013-12-09 19:44:33 +01:00
Philipp Hagemeister
1e1f84dac9 release 2013.12.09.3 2013-12-09 18:56:17 +01:00
Philipp Hagemeister
1d87e3a1c6 [rtlnow] Allow double slashes after domain name (Fixes #1928) 2013-12-09 18:56:05 +01:00
Philipp Hagemeister
df8ae1e3a2 release 2013.12.09.2 2013-12-09 18:31:31 +01:00
Philipp Hagemeister
f7d8d4a116 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-09 18:29:12 +01:00
Philipp Hagemeister
1c088fa89d Improve --bidi-workaround support 2013-12-09 18:29:07 +01:00
Jaime Marquínez Ferrándiz
de2dd4c502 [soundcloud] add support for private links (fixes #1927) 2013-12-09 17:08:58 +01:00
Philipp Hagemeister
395293a889 [--load-info] Always read file as UTF-8
This allows editing the file (and not escaping non-ASCII characters) and reloading it in.
2013-12-09 04:59:51 +01:00
Philipp Hagemeister
db4da14027 Merge remote-tracking branch 'jaimeMF/load-info' 2013-12-09 04:55:02 +01:00
Philipp Hagemeister
2101830c0d Remove unused imports 2013-12-09 04:53:23 +01:00
Philipp Hagemeister
977887469c Lower number of expected entries in top list 2013-12-09 04:50:48 +01:00
Philipp Hagemeister
ffa8f0df0a Merge remote-tracking branch 'jaimeMF/yt-toplists' 2013-12-09 04:49:32 +01:00
Philipp Hagemeister
693b8b2d31 Merge remote-tracking branch 'dstftw/smotri.com-broadcast'
Conflicts:
	youtube_dl/FileDownloader.py
	youtube_dl/extractor/smotri.py
2013-12-09 04:42:35 +01:00
Philipp Hagemeister
a0d96c9843 Add filename to --dump-json output (Fixes #1908) 2013-12-09 04:31:18 +01:00
Philipp Hagemeister
2a18bc9a4b Add some bug reporting hints 2013-12-09 04:20:14 +01:00
Philipp Hagemeister
eaa1a7bde3 release 2013.12.09.1 2013-12-09 04:09:06 +01:00
Philipp Hagemeister
0783b09b92 Add a workaround for terminals without bidi support (Fixes #1912) 2013-12-09 04:08:51 +01:00
Philipp Hagemeister
ffe62508e4 release 2013.12.09 2013-12-09 03:03:01 +01:00
Philipp Hagemeister
ac79fa02b8 Restore Python 2.6.<6 compatibility (Fixes #1860) 2013-12-09 03:02:54 +01:00
Philipp Hagemeister
7cc3570e53 Add fatal=False parameter to _download_* functions.
This allows us to simplify the calls in the youtube extractor even further.
2013-12-09 01:49:03 +01:00
Jaime Marquínez Ferrándiz
baa7b1978b Remove the calls to 'compat_urllib_request.urlopen' in a few extractors 2013-12-08 22:24:55 +01:00
Jaime Marquínez Ferrándiz
ac5118bcb9 [arte.tv:ddc] Add fields to the test and skip download (rtmp) 2013-12-08 16:35:29 +01:00
Jaime Marquínez Ferrándiz
5adb818947 Merge remote-tracking branch 'spjoe/master' (closes PR #1921) 2013-12-08 16:33:34 +01:00
Camillo Dell'mour
52defb0c9b made ddc.arte.tv test working 2013-12-08 16:22:31 +01:00
Camillo Dell'mour
56a8ab7d60 added arte.tv extractor support for subdomain ddc - Mit offenen Karten(german) Le Dessous des Cartes(france) 2013-12-08 14:43:15 +01:00
Philipp Hagemeister
22686b91f0 release 2013.12.08.1 2013-12-08 07:32:25 +01:00
Philipp Hagemeister
31812a9e0e [youtube:channel] Fix automated channel detection 2013-12-08 07:30:42 +01:00
Philipp Hagemeister
11bf848191 [wimp] simplify 2013-12-08 07:22:19 +01:00
Philipp Hagemeister
d4df5ed14c release 2013.12.08 2013-12-08 06:54:52 +01:00
Philipp Hagemeister
303b479e0a Automatically load SSL certs on Windows 2013-12-08 06:54:39 +01:00
Philipp Hagemeister
4c52160646 [FileDownloader] Fix progress report on Windows (Fixes #1918) 2013-12-08 06:53:46 +01:00
Philipp Hagemeister
a213880aaf Simplify status reporting (#1918) 2013-12-08 05:49:35 +01:00
Jaime Marquínez Ferrándiz
42d3bf844a Merge pull request #1919 from rzhxeo/xhamster
[XHamsterIE] Fix HD video detection
2013-12-07 14:35:17 -08:00
rzhxeo
b860967ce4 [XHamsterIE] Fix md5 in second test 2013-12-07 22:17:13 +01:00
rzhxeo
8ca6b8fba1 [XHamsterIE] Fix HD video detection 2013-12-07 21:39:32 +01:00
Jaime Marquínez Ferrándiz
c4d9e6731a [pyvideo] add support for videos that don't come from Youtube 2013-12-07 11:19:59 +01:00
Jaime Marquínez Ferrándiz
0d9ec5d963 [pyvideo] Cleanup and fix test 2013-12-07 11:00:56 +01:00
Jaime Marquínez Ferrándiz
870fc4e578 Merge remote-tracking branch 'gekitsuu/master' (closes PR #1913) 2013-12-07 10:50:06 +01:00
Adam Glenn
f623530d6e removing bad VALID_URL 2013-12-06 21:12:10 -08:00
Adam Glenn
ca9e02dc00 Adding pyvideo support 2013-12-06 21:11:01 -08:00
Jaime Marquínez Ferrándiz
fb30ec22fd [vimeo] Add an extractor for groups 2013-12-06 22:01:41 +01:00
Jaime Marquínez Ferrándiz
5cc14c2fd7 [vimeo] Add an extractor for albums (closes #1911) 2013-12-06 21:48:44 +01:00
Jaime Marquínez Ferrándiz
d349cd2240 [imdb] Fix extraction
The paths to each format's page may have leading whitespace.
The height and the duration can't be extracted.
2013-12-06 20:26:55 +01:00
Jaime Marquínez Ferrándiz
0b6a9f639f [vevo] Update test video's duration 2013-12-06 20:14:29 +01:00
Jaime Marquínez Ferrándiz
715c8e7bdb [youtube:playlist] Recognize mix ids for direct use (fixes #1295) 2013-12-06 19:52:41 +01:00
Jaime Marquínez Ferrándiz
7d4afc557f [youtube:playlist] Support mix ids longer than 13 (#1295) 2013-12-06 19:48:54 +01:00
Jaime Marquínez Ferrándiz
563e405411 [dailymotion] Fix view count regex
In some languages they can be in the format '123,456' instead of '123.456'
2013-12-06 13:41:07 +01:00
Jaime Marquínez Ferrándiz
f53c966a73 [dailymotion] Extract view count (#1895) 2013-12-06 13:36:36 +01:00
Jaime Marquínez Ferrándiz
336c3a69bd [youtube] Extract like and dislike count (#1895) 2013-12-06 13:22:27 +01:00
Jaime Marquínez Ferrándiz
4e76179476 [vimeo] Extract views count, likes count and comments count (#1895) 2013-12-06 13:03:08 +01:00
Philipp Hagemeister
ef4fd84857 [wistia] Add extractor 2013-12-06 09:15:04 +01:00
Philipp Hagemeister
72135030d1 Merge remote-tracking branch 'origin/master' 2013-12-05 22:30:04 +01:00
Jaime Marquínez Ferrándiz
3514813d5b [francetv] Add support for urls in the format http://www.france3.fr/emissions/{program}/diffusions/{date} (fixes #1898) 2013-12-05 21:49:30 +01:00
Jaime Marquínez Ferrándiz
9e60602084 [francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)
Rename the France2IE extractor to FranceTVIE
2013-12-05 21:48:41 +01:00
Philipp Hagemeister
19e3dfc9f8 [9gag] Like/dislike count (#1895) 2013-12-05 18:29:07 +01:00
Philipp Hagemeister
a1ef7e85d6 Remove unused imports 2013-12-05 14:31:54 +01:00
Philipp Hagemeister
ef2fac6f4a Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-05 14:29:14 +01:00
Philipp Hagemeister
7fc3fa0545 [9gag] Add extractor 2013-12-05 14:29:08 +01:00
Jaime Marquínez Ferrándiz
673d1273ff [vevo] Support '/watch/{id}' urls 2013-12-05 12:41:58 +01:00
Jaime Marquínez Ferrándiz
b9a2c53833 [metacafe] Add support for cbs videos (fixes #1838)
They use theplatform.com
2013-12-04 23:43:50 +01:00
Jaime Marquínez Ferrándiz
e9bf7479d2 Add an extractor for theplatform.com 2013-12-04 23:41:22 +01:00
Jaime Marquínez Ferrándiz
bfb9f7bc4c [hotnewhiphop] Update test's title 2013-12-04 20:36:26 +01:00
Jaime Marquínez Ferrándiz
6a656a843a Update description value for the write_info_json test (required after 27dcce1904) 2013-12-04 20:35:00 +01:00
Philipp Hagemeister
29030c0a4c Merge remote-tracking branch 'dstftw/correct-valid-urls' 2013-12-04 19:56:05 +01:00
dst
c0ade33e16 Correct some extractor _VALID_URL regexes 2013-12-04 20:34:47 +07:00
Philipp Hagemeister
671c0f151d release 2013.12.04 2013-12-04 14:19:07 +01:00
Philipp Hagemeister
27dcce1904 [youtube] Resolve URLs in comments 2013-12-04 14:18:49 +01:00
dst
8aff7b9bc4 [smotri] Fix broadcast ticket regex 2013-12-04 12:36:12 +07:00
dst
55f6597c67 [smotri] Add an extractor for live rtmp broadcasts 2013-12-04 08:41:09 +07:00
Jaime Marquínez Ferrándiz
d494389821 Option '--load-info': if the download fails, try extracting the info with the 'webpage_url' field of the info dict
The video url may have expired.
2013-12-03 20:16:52 +01:00
Jaime Marquínez Ferrándiz
1dcc4c0cad Add --load-info option (#972)
It just calls the 'YoutubeDL.process_ie_result' with the dictionary from the json file
2013-12-03 20:15:20 +01:00
Jaime Marquínez Ferrándiz
84db81815a Move common code for extractors based in MTV services to a new base class
Removes the duplication of the thumbnail extraction code (only MTVIE needs to override it)
2013-12-03 14:58:24 +01:00
Jaime Marquínez Ferrándiz
fb7abb31af Remove the compatibility code used before the new format system was implemented 2013-12-03 14:31:20 +01:00
Philipp Hagemeister
ce93879a9b [daum] Fix real video ID extraction 2013-12-03 14:16:58 +01:00
Philipp Hagemeister
938384c587 [redtube] Fix search for title 2013-12-03 14:08:16 +01:00
Philipp Hagemeister
e9d8e302aa [xhamster] Change test checksum 2013-12-03 14:06:16 +01:00
Jaime Marquínez Ferrándiz
cb7fb54600 Change the ie_name of YoutubeSearchDateIE
It produced a duplicate entry when listing the extractors with '--list-extractors' and generates noise in the commit log when generating the supported sites webpage (like in 09f355f73b)
2013-12-03 13:55:25 +01:00
Philipp Hagemeister
cf6758d204 Document disabling proxy (#1882) 2013-12-03 13:33:07 +01:00
Philipp Hagemeister
731e3dde29 release 2013.12.03 2013-12-03 13:13:09 +01:00
Philipp Hagemeister
a0eaa341e1 [configuration] Undo code breakage 2013-12-03 13:11:20 +01:00
Philipp Hagemeister
fb27c2295e Correct configuration file locations 2013-12-03 13:09:48 +01:00
Philipp Hagemeister
1b753cb334 Add Windows configuration file locations (#1881) 2013-12-03 13:04:02 +01:00
Philipp Hagemeister
36a826a50d Clarify --download-archive help (#1757) 2013-12-03 11:54:52 +01:00
Philipp Hagemeister
8796857429 Credit @dstftw for smotri IE 2013-12-02 17:43:22 +01:00
Philipp Hagemeister
aaebed13a8 [smotri] Simplify 2013-12-02 17:08:17 +01:00
Philipp Hagemeister
25939ffe56 Merge branch 'smotri.com' of https://github.com/dstftw/youtube-dl 2013-12-02 15:56:35 +01:00
dst
5270d8cb13 Added extractors for smotri.com 2013-12-02 20:10:19 +07:00
Philipp Hagemeister
0037e02921 release 2013.12.02 2013-12-02 13:37:26 +01:00
Philipp Hagemeister
6ad14cab59 Add --socket-timeout option 2013-12-02 13:37:05 +01:00
Philipp Hagemeister
a9be0cc736 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-02 13:36:20 +01:00
Jaime Marquínez Ferrándiz
55a10eab48 [vimeo] Add an extractor for users (closes #1871) 2013-12-01 22:36:18 +01:00
Philipp Hagemeister
e344693b65 Make socket timeout configurable, and bump default to 10 minutes (#1862) 2013-12-01 11:42:02 +01:00
Philipp Hagemeister
355e4fd07e [generic] Find embedded dailymotion videos (Fixes #1848) 2013-12-01 01:21:33 +01:00
Philipp Hagemeister
5e09d6abbd [clipfish] Skip test on travis 2013-12-01 01:16:20 +01:00
Jaime Marquínez Ferrándiz
0a688bc0b2 [youtube] Add support for downloading top lists (fixes #1868)
It needs to know the channel and the title of the list, because the ids change every time you browse the channels and are attached to a 'VISITOR_INFO1_LIVE' cookie.
2013-11-30 14:56:51 +01:00
Philipp Hagemeister
b138de72f2 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-30 00:42:56 +01:00
Philipp Hagemeister
06dcbb71d8 Clarify help of --write-pages (#1853) 2013-11-30 00:42:43 +01:00
Jaime Marquínez Ferrándiz
c5171c454b [yahoo] Force use of the http protocol for downloading the videos. 2013-11-29 22:06:17 +01:00
Philipp Hagemeister
323ec6ae56 Clarify --download-archive help 2013-11-29 15:57:43 +01:00
Jaime Marquínez Ferrándiz
befd88b786 [yahoo] Add an extractor for yahoo news (closes #1849) 2013-11-29 15:25:43 +01:00
Philipp Hagemeister
a3fb4675fb Do not mutate default arguments
In this case, it looks rather harmless (since the conditions for --restrict-filenames should not change while a process is running), but just to be sure.
This also simplifies the interface for callers, who can just pass in the idiomatic None for "I don't care, whatever is the default".
2013-11-29 15:25:11 +01:00
Philipp Hagemeister
5f077efcb1 Merge pull request #1850 from nikai3d/master
fix typo in help
2013-11-29 01:48:14 -08:00
Nicolas Kaiser
9986238ba9 fix typo in help 2013-11-29 09:48:38 +01:00
Nicolas Kaiser
e1f900d6a4 fix typo in README.md 2013-11-29 09:44:05 +01:00
Jaime Marquínez Ferrándiz
acf37ca151 [imdb] Fix the resolution values (fixes #1847)
We were using the size of the player, it was the same for all the formats
2013-11-29 07:56:14 +01:00
Philipp Hagemeister
17769d5a6c release 2013.11.29 2013-11-29 03:34:26 +01:00
Philipp Hagemeister
677c18092d [podomatic] Add extractor 2013-11-29 03:33:25 +01:00
Jaime Marquínez Ferrándiz
3862402ff3 Add an extractor for Clipsyndicate (closes #1744) 2013-11-28 14:38:10 +01:00
Jaime Marquínez Ferrándiz
b03d0d064c [imdb] Fix extraction in python 2.6
Using a regular expression because the html cannot be parsed.
2013-11-28 13:49:00 +01:00
Jaime Marquínez Ferrándiz
d8d6148628 Add an extractor for Internet Movie Database trailers (closes #1832) 2013-11-28 13:32:49 +01:00
Philipp Hagemeister
2be54167d0 release 2013.11.28.1 2013-11-28 06:17:56 +01:00
Philipp Hagemeister
4e0084d92e [youtube/subtitles] Change MD5 of vtt subtitle in test 2013-11-28 06:14:17 +01:00
Philipp Hagemeister
fc9e1cc697 [clipfish] Use FIFA trailer as testcase (#1842) 2013-11-28 06:10:37 +01:00
Philipp Hagemeister
f8f60d2793 [clipfish] Fix imports (#1842) 2013-11-28 05:54:46 +01:00
Philipp Hagemeister
ea07dbb8b1 release 2013.11.28 2013-11-28 05:48:32 +01:00
Philipp Hagemeister
2a275ab007 [zdf] Use _download_xml 2013-11-28 05:47:50 +01:00
Philipp Hagemeister
a2e6db365c [zdf] add a pseudo-testcase and fix URL matching 2013-11-28 05:47:20 +01:00
Philipp Hagemeister
9d93e7da6c Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-28 04:37:02 +01:00
Jaime Marquínez Ferrándiz
0e44d8381a [youtube:feeds] Use the 'paging' value from the downloaded json information (fixes #1845) 2013-11-28 00:33:27 +01:00
Jaime Marquínez Ferrándiz
35907e23ec [yahoo] Fix video extraction and use the new format system exclusively 2013-11-27 21:24:55 +01:00
Jaime Marquínez Ferrándiz
76d1700b28 [youtube:playlist] Fix the extraction of the title for some mixes (#1844)
Like https://www.youtube.com/watch?v=g8jDB5xOiuE&list=RDIh2gxLqR7HM
2013-11-27 20:01:51 +01:00
Philipp Hagemeister
dcca796ce4 [clipfish] Effect a better error message (#1842) 2013-11-27 18:33:51 +01:00
Filippo Valsorda
4b19e38954 [videopremium] support new .me domain 2013-11-27 02:54:51 +01:00
Jaime Marquínez Ferrándiz
5f09bbff4d [bash-completion] Complete the ':ythistory' keyword 2013-11-27 00:42:59 +01:00
Jaime Marquínez Ferrándiz
c1f9c59d11 [bash-completion] Complete filenames or directories if the previous option requires it 2013-11-27 00:41:30 +01:00
Jaime Marquínez Ferrándiz
652cdaa269 [youtube:playlist] Add support for YouTube mixes (fixes #1839) 2013-11-26 21:35:03 +01:00
Jaime Marquínez Ferrándiz
e26f871228 Use the new '_download_xml' helper in more extractors 2013-11-26 19:17:25 +01:00
Jaime Marquínez Ferrándiz
6e47b51eef [youtube:playlist] Remove the link with index 0
It's not the first video of the playlist, it appears in the 'Play all' button (see the test course for an example)
2013-11-26 19:09:14 +01:00
Jaime Marquínez Ferrándiz
4a98cdbf3b YoutubeDL: set the 'params' property before any message/warning/error is sent (fixes #1840)
If it sets the 'restrictfilenames' param, it will first report a warning. It will try to get the logger from the 'params' property, which would be set at that moment to None, raising the error 'AttributeError: 'NoneType' object has no attribute 'get''
2013-11-26 18:54:14 +01:00
Philipp Hagemeister
c5ed4e8f7e release 2013.11.26 2013-11-26 10:41:35 +01:00
Jaime Marquínez Ferrándiz
c2e52508cc Include the proxy in the parameters for YoutubeDL (fixes #1831) 2013-11-26 08:03:11 +01:00
Philipp Hagemeister
d8ec4959c8 Merge pull request #1830 from jaimeMF/download-archive
Use the 'extractor_key' field for the download archive file
2013-11-25 14:14:25 -08:00
Jaime Marquínez Ferrándiz
d31209a144 Use the 'extractor_key' field for the download archive file
It has the same value as the ie_key.
2013-11-25 22:57:15 +01:00
Jaime Marquínez Ferrándiz
529a2e2cc3 Fix typo in the documentation of the 'download_archive' param 2013-11-25 22:52:09 +01:00
Philipp Hagemeister
781a7d0546 release 2013.11.25.3 2013-11-25 22:36:18 +01:00
Philipp Hagemeister
fb04e40396 [soundcloud] Support for listing of audio-only files 2013-11-25 22:34:56 +01:00
Philipp Hagemeister
d9b011f201 Fix rtmpdump with non-ASCII filenames on Windows on 2.x
Reported in #1798
2013-11-25 22:31:38 +01:00
Philipp Hagemeister
b0b9eaa196 Merge pull request #1829 from jaimeMF/ydl-empty-params
Allow to initialize a YoutubeDL object without parameters
2013-11-25 13:19:59 -08:00
Philipp Hagemeister
8b134b1062 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-25 22:16:07 +01:00
Philipp Hagemeister
0c75c3fa7a Do not warn about fixed output template if --max-downloads is 1
Fixes #1828
2013-11-25 22:15:33 +01:00
Jaime Marquínez Ferrándiz
a3927cf7ee Allow to initialize a YoutubeDL object without parameters
Having to pass the 'outtmpl' parameter feels really strange when you just want to extract the info of a video.
2013-11-25 22:03:39 +01:00
Jaime Marquínez Ferrándiz
1a62c18f65 [bambuser] Skip the download in the test
It doesn't respect the 'Range' header.
2013-11-25 22:03:20 +01:00
Philipp Hagemeister
2a15e7063b [soundcloud] Prefer HTTP over RTMP (#1798) 2013-11-25 20:30:41 +01:00
Philipp Hagemeister
d46cc192d7 Reduce socket timeout 2013-11-25 19:11:01 +01:00
Philipp Hagemeister
bb2bebdbe1 release 2013.11.25.2 2013-11-25 15:47:14 +01:00
Philipp Hagemeister
5db07df634 Fix --download-archive (Fixes #1826) 2013-11-25 15:46:54 +01:00
Philipp Hagemeister
ea36cbac5e Merge remote-tracking branch 'rbrito/swap-dimensions' 2013-11-25 06:19:15 +01:00
Philipp Hagemeister
d0d2b49ab7 [FileDownloader] use moved format_bytes method 2013-11-25 06:17:41 +01:00
Philipp Hagemeister
31cb6d8fef Merge remote-tracking branch 'rzhxeo/rtmpdump' 2013-11-25 06:16:18 +01:00
Philipp Hagemeister
daa0dd2973 release 2013.11.25.1 2013-11-25 06:06:39 +01:00
Philipp Hagemeister
de79c46c8f [viki] Fix subtitle extraction 2013-11-25 06:06:18 +01:00
Philipp Hagemeister
94ccb6fa2e [viki] Fix subtitles extraction 2013-11-25 05:58:04 +01:00
Philipp Hagemeister
07e4035879 [viki] Fix uploader extraction 2013-11-25 05:57:55 +01:00
Philipp Hagemeister
d0efb9ec9a [tests] Remove global_setup function 2013-11-25 03:47:32 +01:00
Philipp Hagemeister
ac05067d3d release 2013.11.25 2013-11-25 03:37:49 +01:00
Philipp Hagemeister
113577e155 [generic] Improve detection
Allow download of http://goo.gl/7X5tOk
Fixes #1818
2013-11-25 03:35:53 +01:00
Philipp Hagemeister
79d09f47c2 Merge branch 'opener-to-ydl' 2013-11-25 03:30:37 +01:00
Philipp Hagemeister
c059bdd432 Remove quality_name field and improve zdf extractor 2013-11-25 03:28:55 +01:00
Philipp Hagemeister
02dbf93f0e [zdf/common] Use API in ZDF extractor.
This also comes with a lot of extra format fields
Fixes #1518
2013-11-25 03:13:22 +01:00
Philipp Hagemeister
1fb2bcbbf7 [viki] Make uploader field optional (#1813) 2013-11-25 02:02:34 +01:00
Jaime Marquínez Ferrándiz
16e055849e Update the keywords tests for the rename of the old ComedyCentralIE 2013-11-24 22:13:20 +01:00
Jaime Marquínez Ferrándiz
66cfab4226 [comedycentral] Add support for comedycentral.com videos (closes #1824)
It's a subclass of MTVIE

The extractor for colbertnation.com and thedailyshow.com is called now ComedyCentralShowsIE
2013-11-24 21:18:35 +01:00
Philipp Hagemeister
6d88bc37a3 [viki] Skip travis test
Also provide a better error message for geoblocked videos.
2013-11-24 15:28:50 +01:00
Philipp Hagemeister
b7553b2554 [vik] Clarify output 2013-11-24 15:20:16 +01:00
Philipp Hagemeister
e03db0a077 Merge branch 'master' into opener-to-ydl 2013-11-24 15:18:44 +01:00
Philipp Hagemeister
a1ee09e815 Document proxy 2013-11-24 15:03:25 +01:00
Jaime Marquínez Ferrándiz
267ed0c5d3 [collegehumor] Encode the xml before calling xml.etree.ElementTree.fromstring (fixes #1822)
Uses a new helper method in InfoExtractor: _download_xml
2013-11-24 14:59:19 +01:00
Jaime Marquínez Ferrándiz
f459d17018 [youtube] Add an extractor for downloading the watch history (closes #1821) 2013-11-24 14:33:50 +01:00
Jaime Marquínez Ferrándiz
dc65dcbb6d [mixcloud] The description field may be missing (fixes #1819) 2013-11-24 11:28:44 +01:00
Jaime Marquínez Ferrándiz
d214fdb8fe [brightcove] Don't use 'or' with the xml nodes, use the 'value' attribute instead 2013-11-24 11:02:34 +01:00
Philipp Hagemeister
138df537ff release 2013.11.24.1 2013-11-24 07:51:56 +01:00
Philipp Hagemeister
0c7c19d6bc [clipfish] Add extractor (Fixes #1760) 2013-11-24 07:51:44 +01:00
Philipp Hagemeister
eaaafc59c2 release 2013.11.24 2013-11-24 07:30:34 +01:00
Philipp Hagemeister
382ed50e0e [viki] Add extractor (fixes #1813) 2013-11-24 07:30:05 +01:00
Philipp Hagemeister
66ec019240 [youtube] do not use variable name twice 2013-11-24 06:54:26 +01:00
Philipp Hagemeister
bd49928f7a [niconico] Clarify download 2013-11-24 06:53:50 +01:00
Philipp Hagemeister
23e6d50d73 [bandcamp] Remove unused variable 2013-11-24 06:52:53 +01:00
Philipp Hagemeister
2e767313e4 [update] fix error 2013-11-24 06:52:21 +01:00
Philipp Hagemeister
38b2db6a66 Credit @takuya0301 for niconico 2013-11-24 06:39:49 +01:00
Philipp Hagemeister
13ebea791f [niconico] Simplify and make work with old Python versions
The website requires SSLv3, otherwise it just times out during SSL negotiation.
2013-11-24 06:39:10 +01:00
Philipp Hagemeister
4c9c57428f Merge remote-tracking branch 'takuya0301/niconico' 2013-11-24 06:09:11 +01:00
Philipp Hagemeister
8bf9319e9c Simplify logger code(#1811) 2013-11-24 06:08:11 +01:00
Philipp Hagemeister
4914120727 Merge remote-tracking branch 'iTaybb/master' 2013-11-24 06:07:12 +01:00
Jaime Marquínez Ferrándiz
36de0a0e1a [brightcove] Set the 'videoPlayer' value to the 'videoId' if it's missing in the parameters (fixes #1815) 2013-11-23 23:27:15 +01:00
Philipp Hagemeister
e5c146d586 [streamcloud] skip test on travis 2013-11-23 15:57:42 +01:00
Takuya Tsuchida
52ad14aeb0 Add support for niconico 2013-11-23 18:19:44 +09:00
Itay Brandes
43afe28588 Log to an external logger (fixes #1810)
Sadly applications using youtube-dl's python sources can't directly
access it's log stream. It's pretty much limited to stdout and stderr
only.

It should log to logging.Logger instance passed to YoutubeDL's params
dictionary.
2013-11-23 10:22:18 +02:00
Philipp Hagemeister
a87b0615aa release 2013.11.22.2 2013-11-22 23:08:15 +01:00
Philipp Hagemeister
d7386f6276 [update] Check if version from repository is newer before updating
Closes #1704
2013-11-22 23:05:58 +01:00
Philipp Hagemeister
081640940e Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-22 22:46:57 +01:00
Philipp Hagemeister
7012b23c94 Match --download-archive during playlist processing (Fixes #1745) 2013-11-22 22:46:46 +01:00
Jaime Marquínez Ferrándiz
d3b30148ed [bambuser:channel] Update test 2013-11-22 21:26:31 +01:00
Jaime Marquínez Ferrándiz
9f79463803 [howcast] update test's checksum 2013-11-22 21:25:12 +01:00
Jaime Marquínez Ferrándiz
d35dc6d3b5 [bandcamp] move the album test to the album extractor and return a single track instead of a playlist 2013-11-22 21:19:31 +01:00
Philipp Hagemeister
dca0872056 Move the opener to the YoutubeDL object.
This is the first step towards being able to just import youtube_dl and start using it.
Apart from removing global state, this would fix problems like #1805.
2013-11-22 19:57:52 +01:00
rzhxeo
2b35c9ef74 Merge branch 'master' into rtmpdump
Conflicts:
	youtube_dl/FileDownloader.py

Merge
2013-11-18 00:27:06 +01:00
rzhxeo
4894fe8c5b Report download progress of rtmpdump 2013-11-09 11:14:40 +01:00
Rogério Brito
d5a9bb4ea9 extractor: youtube: Swap video dimensions to match standard practice.
While working on this, I thought about simplifying things like changing
480x854 to 480p, and that seemed like a good option, until I realized that
people (me included) usually link the concept of some number followed by a p
with the video being 16:9.

So, we would be losing some information and, as we all know,
[explicit is better than implicit][*].

[*]: http://www.python.org/dev/peps/pep-0020/

This closes #1446.

Signed-off-by: Rogério Brito <rbrito@ime.usp.br>
2013-10-19 14:04:44 -03:00
117 changed files with 3048 additions and 1012 deletions

View File

@@ -30,13 +30,16 @@ which means you can modify it, redistribute it or use it however you like.
--list-extractors List all supported extractors and the URLs they
would handle
--extractor-descriptions Output descriptions of all supported extractors
--proxy URL Use the specified HTTP/HTTPS proxy
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in an
empty string (--proxy "") for direct connection
--no-check-certificate Suppress HTTPS certificate validation.
--cache-dir DIR Location in the filesystem where youtube-dl can
store downloaded information permanently. By
default $XDG_CACHE_HOME/youtube-dl or ~/.cache
/youtube-dl .
--no-cache-dir Disable filesystem caching
--bidi-workaround Work around terminals that lack bidirectional
text support. Requires fribidi executable in PATH
## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)
@@ -53,10 +56,15 @@ which means you can modify it, redistribute it or use it however you like.
--date DATE download only videos uploaded in this date
--datebefore DATE download only videos uploaded before this date
--dateafter DATE download only videos uploaded after this date
--min-views COUNT Do not download any videos with less than COUNT
views
--max-views COUNT Do not download any videos with more than COUNT
views
--no-playlist download only the currently playing video
--age-limit YEARS download only videos suitable for the given age
--download-archive FILE Download only videos not present in the archive
file. Record all downloaded videos in it.
--download-archive FILE Download only videos not listed in the archive
file. Record the IDs of all downloaded videos in
it.
## Download Options:
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
@@ -96,6 +104,8 @@ which means you can modify it, redistribute it or use it however you like.
--restrict-filenames Restrict filenames to only ASCII characters, and
avoid "&" and spaces in filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
--load-info FILE json file containing the video information
(created with the "--write-json" option
-w, --no-overwrites do not overwrite files
-c, --continue force resume of partially downloaded files. By
default, youtube-dl will resume downloads if
@@ -121,6 +131,7 @@ which means you can modify it, redistribute it or use it however you like.
--get-id simulate, quiet but print id
--get-thumbnail simulate, quiet but print thumbnail URL
--get-description simulate, quiet but print video description
--get-duration simulate, quiet but print video length
--get-filename simulate, quiet but print output filename
--get-format simulate, quiet but print output format
-j, --dump-json simulate, quiet but print JSON information
@@ -130,11 +141,11 @@ which means you can modify it, redistribute it or use it however you like.
-v, --verbose print various debugging information
--dump-intermediate-pages print downloaded pages to debug problems(very
verbose)
--write-pages Write downloaded pages to files in the current
directory
--write-pages Write downloaded intermediary pages to files in
the current directory to debug problems
## Video Format Options:
-f, --format FORMAT video format code, specifiy the order of
-f, --format FORMAT video format code, specify the order of
preference using slashes: "-f 22/17/18". "-f mp4"
and "-f flv" are also supported
--all-formats download all available video formats
@@ -182,7 +193,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
# OUTPUT TEMPLATE
@@ -272,14 +283,54 @@ This README file was originally written by Daniel Bolton (<https://github.com/db
# BUGS
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
Please include:
* Your exact command line, like `youtube-dl -t "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"`. A common mistake is not to escape the `&`. Putting URLs in quotes should solve this problem.
* If possible re-run the command with `--verbose`, and include the full output, it is really helpful to us.
* The output of `youtube-dl --version`
* The output of `python --version`
* The name and version of your Operating System ("Ubuntu 11.04 x64" or "Windows 7 x64" is usually enough).
Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
For discussions, join us in the irc channel #youtube-dl on freenode.
When you submit a request, please re-read it once to avoid a couple of mistakes (you can and should use this as a checklist):
### Is the description of the issue itself sufficient?
We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
- What the problem is
- How it could be fixed
- How your proposed solution would look like
If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
Site support requests must contain an example URL. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
### Are you using the latest version?
Before reporting any issue, type youtube-dl -U. This should report that you're up-to-date. Ábout 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
### Is the issue already documented?
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or at https://github.com/rg3/youtube-dl/search?type=Issues . If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
### Why are existing options not enough?
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
### Is there enough context in your bug report?
People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one).
We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful.
### Does the issue involve one problem, and one problem only?
Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
### Is anyone going to need the feature?
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.

View File

@@ -1,10 +1,21 @@
__youtube_dl()
{
local cur prev opts
local cur prev opts fileopts diropts keywords
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
opts="{{flags}}"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
fileopts="-a|--batch-file|--download-archive|--cookies"
diropts="--cache-dir"
if [[ ${prev} =~ ${fileopts} ]]; then
COMPREPLY=( $(compgen -f -- ${cur}) )
return 0
elif [[ ${prev} =~ ${diropts} ]]; then
COMPREPLY=( $(compgen -d -- ${cur}) )
return 0
fi
if [[ ${cur} =~ : ]]; then
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )

View File

@@ -12,10 +12,6 @@ from youtube_dl import YoutubeDL
from youtube_dl.utils import preferredencoding
def global_setup():
youtube_dl._setup_opener(timeout=10)
def get_params(override=None):
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"parameters.json")

View File

@@ -39,5 +39,6 @@
"writeinfojson": true,
"writesubtitles": false,
"allsubtitles": false,
"listssubtitles": false
"listssubtitles": false,
"socket_timeout": 20
}

View File

@@ -7,6 +7,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from youtube_dl import YoutubeDL
class YDL(FakeYDL):
@@ -140,6 +141,20 @@ class TestFormatSelection(unittest.TestCase):
self.assertEqual(test_dict['extractor'], 'Foo')
self.assertEqual(test_dict['playlist'], 'funny videos')
def test_prepare_filename(self):
info = {
u'id': u'1234',
u'ext': u'mp4',
u'width': None,
}
def fname(templ):
ydl = YoutubeDL({'outtmpl': templ})
return ydl.prepare_filename(info)
self.assertEqual(fname(u'%(id)s.%(ext)s'), u'1234.mp4')
self.assertEqual(fname(u'%(id)s-%(width)s.%(ext)s'), u'1234-NA.mp4')
# Replace missing fields with 'NA'
self.assertEqual(fname(u'%(uploader_date)s-%(id)s.%(ext)s'), u'NA-1234.mp4')
if __name__ == '__main__':
unittest.main()

View File

@@ -6,8 +6,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup, try_rm
global_setup()
from test.helper import try_rm
from youtube_dl import YoutubeDL

View File

@@ -100,11 +100,19 @@ class TestAllURLsMatching(unittest.TestCase):
def test_keywords(self):
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
self.assertMatch(':thedailyshow', ['ComedyCentral'])
self.assertMatch(':tds', ['ComedyCentral'])
self.assertMatch(':colbertreport', ['ComedyCentral'])
self.assertMatch(':cr', ['ComedyCentral'])
self.assertMatch(':ythistory', ['youtube:history'])
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
self.assertMatch(':tds', ['ComedyCentralShows'])
self.assertMatch(':colbertreport', ['ComedyCentralShows'])
self.assertMatch(':cr', ['ComedyCentralShows'])
def test_vimeo_matching(self):
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
# https://github.com/rg3/youtube-dl/issues/1930
def test_soundcloud_not_matching_sets(self):
self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])
if __name__ == '__main__':
unittest.main()

View File

@@ -9,12 +9,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import (
get_params,
get_testcases,
global_setup,
try_rm,
md5,
report_warning
)
global_setup()
import hashlib

View File

@@ -8,21 +8,25 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup
global_setup()
from test.helper import FakeYDL
from youtube_dl.extractor import (
DailymotionPlaylistIE,
DailymotionUserIE,
VimeoChannelIE,
VimeoUserIE,
VimeoAlbumIE,
VimeoGroupsIE,
UstreamChannelIE,
SoundcloudSetIE,
SoundcloudUserIE,
LivestreamIE,
NHLVideocenterIE,
BambuserChannelIE,
BandcampAlbumIE
BandcampAlbumIE,
SmotriCommunityIE,
SmotriUserIE
)
@@ -55,6 +59,30 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Vimeo Tributes')
self.assertTrue(len(result['entries']) > 24)
def test_vimeo_user(self):
dl = FakeYDL()
ie = VimeoUserIE(dl)
result = ie.extract('http://vimeo.com/nkistudio/videos')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Nki')
self.assertTrue(len(result['entries']) > 65)
def test_vimeo_album(self):
dl = FakeYDL()
ie = VimeoAlbumIE(dl)
result = ie.extract('http://vimeo.com/album/2632481')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Staff Favorites: November 2013')
self.assertTrue(len(result['entries']) > 12)
def test_vimeo_groups(self):
dl = FakeYDL()
ie = VimeoGroupsIE(dl)
result = ie.extract('http://vimeo.com/groups/rolexawards')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Rolex Awards for Enterprise')
self.assertTrue(len(result['entries']) > 72)
def test_ustream_channel(self):
dl = FakeYDL()
ie = UstreamChannelIE(dl)
@@ -102,7 +130,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'pixelversity')
self.assertTrue(len(result['entries']) >= 66)
self.assertTrue(len(result['entries']) >= 60)
def test_bandcamp_album(self):
dl = FakeYDL()
@@ -111,6 +139,24 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Nightmare Night EP')
self.assertTrue(len(result['entries']) >= 4)
def test_smotri_community(self):
dl = FakeYDL()
ie = SmotriCommunityIE(dl)
result = ie.extract('http://smotri.com/community/video/kommuna')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'kommuna')
self.assertEqual(result['title'], u'КПРФ')
self.assertTrue(len(result['entries']) >= 4)
def test_smotri_user(self):
dl = FakeYDL()
ie = SmotriUserIE(dl)
result = ie.extract('http://smotri.com/user/inspector')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'inspector')
self.assertEqual(result['title'], u'Inspector')
self.assertTrue(len(result['entries']) >= 9)
if __name__ == '__main__':
unittest.main()

View File

@@ -6,8 +6,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from test.helper import FakeYDL, md5
from youtube_dl.extractor import (
@@ -73,7 +72,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions')

View File

@@ -26,6 +26,7 @@ from youtube_dl.utils import (
unsmuggle_url,
shell_quote,
encodeFilename,
str_to_int,
)
if sys.version_info < (3, 0):
@@ -176,6 +177,10 @@ class TestUtil(unittest.TestCase):
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
if __name__ == '__main__':
unittest.main()

View File

@@ -7,8 +7,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, global_setup, try_rm
global_setup()
from test.helper import get_params, try_rm
import io

View File

@@ -7,8 +7,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, global_setup
global_setup()
from test.helper import get_params
import io
@@ -34,6 +33,7 @@ TEST_ID = 'BaW_jenozKc'
INFO_JSON_FILE = TEST_ID + '.info.json'
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
test URL: https://github.com/rg3/youtube-dl/issues/1892
This is a test video for youtube-dl.

View File

@@ -6,8 +6,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup
global_setup()
from test.helper import FakeYDL
from youtube_dl.extractor import (
@@ -16,6 +15,7 @@ from youtube_dl.extractor import (
YoutubeIE,
YoutubeChannelIE,
YoutubeShowIE,
YoutubeTopListIE,
)
@@ -84,16 +84,16 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL()
ie = YoutubeChannelIE(dl)
#test paginated channel
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
self.assertTrue(len(result['entries']) > 90)
#test autogenerated channel
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
self.assertTrue(len(result['entries']) >= 18)
def test_youtube_user(self):
dl = FakeYDL()
ie = YoutubeUserIE(dl)
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
self.assertTrue(len(result['entries']) >= 320)
def test_youtube_safe_search(self):
@@ -108,5 +108,21 @@ class TestYoutubeLists(unittest.TestCase):
result = ie.extract('http://www.youtube.com/show/airdisasters')
self.assertTrue(len(result) >= 3)
def test_youtube_mix(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
entries = result['entries']
self.assertTrue(len(entries) >= 20)
original_video = entries[0]
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
def test_youtube_toplist(self):
dl = FakeYDL()
ie = YoutubeTopListIE(dl)
result = ie.extract('yttoplist:music:Top Tracks')
entries = result['entries']
self.assertTrue(len(entries) >= 5)
if __name__ == '__main__':
unittest.main()

View File

@@ -6,9 +6,6 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup
global_setup()
import io
import re

View File

@@ -1,4 +1,3 @@
import math
import os
import re
import subprocess
@@ -11,6 +10,7 @@ from .utils import (
ContentTooShortError,
determine_ext,
encodeFilename,
format_bytes,
sanitize_open,
timeconvert,
)
@@ -53,20 +53,6 @@ class FileDownloader(object):
self._progress_hooks = []
self.params = params
@staticmethod
def format_bytes(bytes):
if bytes is None:
return 'N/A'
if type(bytes) is str:
bytes = float(bytes)
if bytes == 0.0:
exponent = 0
else:
exponent = int(math.log(bytes, 1024.0))
suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
converted = float(bytes) / float(1024 ** exponent)
return '%.2f%s' % (converted, suffix)
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
@@ -117,7 +103,7 @@ class FileDownloader(object):
def format_speed(speed):
if speed is None:
return '%10s' % '---b/s'
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
return '%10s' % ('%s/s' % format_bytes(speed))
@staticmethod
def best_block_size(elapsed_time, bytes):
@@ -218,11 +204,27 @@ class FileDownloader(object):
"""Report destination filename."""
self.to_screen(u'[download] Destination: ' + filename)
def _report_progress_status(self, msg, is_last_line=False):
fullmsg = u'[download] ' + msg
if self.params.get('progress_with_newline', False):
self.to_screen(fullmsg)
else:
if os.name == 'nt':
prev_len = getattr(self, '_report_progress_prev_line_length',
0)
if prev_len > len(fullmsg):
fullmsg += u' ' * (prev_len - len(fullmsg))
self._report_progress_prev_line_length = len(fullmsg)
clear_line = u'\r'
else:
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
self.to_console_title(u'youtube-dl ' + msg)
def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress."""
if self.params.get('noprogress', False):
return
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
if eta is not None:
eta_str = self.format_eta(eta)
else:
@@ -232,14 +234,29 @@ class FileDownloader(object):
else:
percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
if self.params.get('progress_with_newline', False):
self.to_screen(u'[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
msg = (u'%s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
self._report_progress_status(msg)
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
if self.params.get('noprogress', False):
return
downloaded_str = format_bytes(downloaded_data_len)
speed_str = self.format_speed(speed)
elapsed_str = FileDownloader.format_seconds(elapsed)
msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
self._report_progress_status(msg)
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else:
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
self._report_progress_status(
(u'100%% of %s in %s' %
(data_len_str, self.format_seconds(tot_time))),
is_last_line=True)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
@@ -260,16 +277,78 @@ class FileDownloader(object):
"""Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume')
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else:
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
(clear_line, data_len_str, self.format_seconds(tot_time)))
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live, conn):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = u''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
data_len_str = u'~' + format_bytes(data_len)
self.report_progress(percent, data_len_str, speed, eta)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'speed': speed,
})
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen(u'')
cursor_in_new_line = True
self.to_screen(u'[rtmpdump] '+line)
proc.wait()
if not cursor_in_new_line:
self.to_screen(u'')
return proc.returncode
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
@@ -280,12 +359,11 @@ class FileDownloader(object):
except (OSError, IOError):
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
return False
verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
@@ -298,31 +376,51 @@ class FileDownloader(object):
basic_args += ['--stop', '1']
if live:
basic_args += ['--live']
if conn:
basic_args += ['--conn', conn]
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if sys.platform == 'win32' and sys.version_info < (3, 0):
# Windows subprocess module does not actually support Unicode
# on Python 2.x
# See http://stackoverflow.com/a/9951851/35070
subprocess_encoding = sys.getfilesystemencoding()
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
else:
subprocess_encoding = None
if self.params.get('verbose', False):
if subprocess_encoding:
str_args = [
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
for a in args]
else:
str_args = args
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, args))
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
retval = subprocess.call(args)
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
retval = run_rtmpdump(args)
while (retval == 2 or retval == 1) and not test:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == 1:
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == 2 and cursize > 1024:
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0
break
if retval == 0 or (test and retval == 2):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
@@ -421,7 +519,8 @@ class FileDownloader(object):
info_dict.get('page_url', None),
info_dict.get('play_path', None),
info_dict.get('tc_url', None),
info_dict.get('rtmp_live', False))
info_dict.get('rtmp_live', False),
info_dict.get('rtmp_conn', None))
# Attempt to download using mplayer
if url.startswith('mms') or url.startswith('rtsp'):
@@ -525,7 +624,7 @@ class FileDownloader(object):
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = self.format_bytes(data_len)
data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()

View File

@@ -3,12 +3,15 @@
from __future__ import absolute_import
import collections
import errno
import io
import json
import os
import platform
import re
import shutil
import subprocess
import socket
import sys
import time
@@ -18,8 +21,8 @@ if os.name == 'nt':
import ctypes
from .utils import (
compat_cookiejar,
compat_http_client,
compat_print,
compat_str,
compat_urllib_error,
compat_urllib_request,
@@ -30,9 +33,14 @@ from .utils import (
DownloadError,
encodeFilename,
ExtractorError,
format_bytes,
formatSeconds,
get_term_width,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
PostProcessingError,
platform_name,
preferredencoding,
SameFileError,
sanitize_filename,
@@ -41,9 +49,11 @@ from .utils import (
UnavailableVideoError,
write_json_file,
write_string,
YoutubeDLHandler,
)
from .extractor import get_info_extractor, gen_extractors
from .FileDownloader import FileDownloader
from .version import __version__
class YoutubeDL(object):
@@ -85,6 +95,7 @@ class YoutubeDL(object):
forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description.
forcefilename: Force printing final filename.
forceduration: Force printing duration.
forcejson: Force printing info_dict as JSON.
simulate: Do not download the video files.
format: Video format code.
@@ -97,6 +108,7 @@ class YoutubeDL(object):
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance.
logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
@@ -117,9 +129,24 @@ class YoutubeDL(object):
noplaylist: Download single video instead of a playlist if in doubt.
age_limit: An integer representing the user's age in years.
Unsuitable videos for the given age are skipped.
downloadarchive: File name of a file where all downloads are recorded.
min_views: An integer representing the minimum view count the video
must have in order to not be skipped.
Videos without view count information are always
downloaded. None for no limit.
max_views: An integer representing the maximum view count.
Videos that are more popular than that are not
downloaded.
Videos without view count information are always
downloaded. None for no limit.
download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates
proxy: URL of the proxy server to use
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@@ -134,7 +161,7 @@ class YoutubeDL(object):
_num_downloads = None
_screen_file = None
def __init__(self, params):
def __init__(self, params=None):
"""Create a FileDownloader object with the given options."""
self._ies = []
self._ies_instances = {}
@@ -143,6 +170,29 @@ class YoutubeDL(object):
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self._err_file = sys.stderr
self.params = {} if params is None else params
if params.get('bidi_workaround', False):
try:
import pty
master, slave = pty.openpty()
width = get_term_width()
if width is None:
width_args = []
else:
width_args = ['-w', str(width)]
self._fribidi = subprocess.Popen(
['fribidi', '-c', 'UTF-8'] + width_args,
stdin=subprocess.PIPE,
stdout=slave,
stderr=self._err_file)
self._fribidi_channel = os.fdopen(master, 'rb')
except OSError as ose:
if ose.errno == 2:
self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
else:
raise
if (sys.version_info >= (3,) and sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
@@ -152,14 +202,15 @@ class YoutubeDL(object):
u'Assuming --restrict-filenames since file system encoding '
u'cannot encode all charactes. '
u'Set the LC_ALL environment variable to fix this.')
params['restrictfilenames'] = True
self.params['restrictfilenames'] = True
self.params = params
self.fd = FileDownloader(self, self.params)
if '%(stitle)s' in self.params['outtmpl']:
if '%(stitle)s' in self.params.get('outtmpl', ''):
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
self._setup_opener()
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
@@ -190,20 +241,42 @@ class YoutubeDL(object):
self._pps.append(pp)
pp.set_downloader(self)
def _bidi_workaround(self, message):
if not hasattr(self, '_fribidi_channel'):
return message
assert type(message) == type(u'')
line_count = message.count(u'\n') + 1
self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
self._fribidi.stdin.flush()
res = u''.join(self._fribidi_channel.readline().decode('utf-8')
for _ in range(line_count))
return res[:-len(u'\n')]
def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode."""
if not self.params.get('quiet', False):
return self.to_stdout(message, skip_eol, check_quiet=True)
def to_stdout(self, message, skip_eol=False, check_quiet=False):
"""Print message to stdout if not in quiet mode."""
if self.params.get('logger'):
self.params['logger'].debug(message)
elif not check_quiet or not self.params.get('quiet', False):
message = self._bidi_workaround(message)
terminator = [u'\n', u''][skip_eol]
output = message + terminator
write_string(output, self._screen_file)
def to_stderr(self, message):
"""Print message to stderr."""
assert type(message) == type(u'')
output = message + u'\n'
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output.encode(preferredencoding())
sys.stderr.write(output)
if self.params.get('logger'):
self.params['logger'].error(message)
else:
message = self._bidi_workaround(message)
output = message + u'\n'
write_string(output, self._err_file)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
@@ -235,10 +308,9 @@ class YoutubeDL(object):
def __exit__(self, *args):
self.restore_console_title()
def fixed_template(self):
"""Checks if the output template is fixed."""
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
if self.params.get('cookiefile') is not None:
self.cookiejar.save()
def trouble(self, message=None, tb=None):
"""Determine action to take when a download problem appears.
@@ -275,7 +347,7 @@ class YoutubeDL(object):
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
if sys.stderr.isatty() and os.name != 'nt':
if self._err_file.isatty() and os.name != 'nt':
_msg_header = u'\033[0;33mWARNING:\033[0m'
else:
_msg_header = u'WARNING:'
@@ -287,29 +359,13 @@ class YoutubeDL(object):
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
if sys.stderr.isatty() and os.name != 'nt':
if self._err_file.isatty() and os.name != 'nt':
_msg_header = u'\033[0;31mERROR:\033[0m'
else:
_msg_header = u'ERROR:'
error_message = u'%s %s' % (_msg_header, message)
self.trouble(error_message, tb)
def report_writedescription(self, descfn):
""" Report that the description file is being written """
self.to_screen(u'[info] Writing video description to: ' + descfn)
def report_writesubtitles(self, sub_filename):
""" Report that the subtitles file is being written """
self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
def report_writeinfojson(self, infofn):
""" Report that the metadata file has been written """
self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
def report_writeannotations(self, annofn):
""" Report that the annotations file has been written. """
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
@@ -336,18 +392,17 @@ class YoutubeDL(object):
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
sanitize = lambda k, v: sanitize_filename(
u'NA' if v is None else compat_str(v),
compat_str(v),
restricted=self.params.get('restrictfilenames'),
is_id=(k == u'id'))
template_dict = dict((k, sanitize(k, v))
for k, v in template_dict.items())
for k, v in template_dict.items()
if v is not None)
template_dict = collections.defaultdict(lambda: u'NA', template_dict)
tmpl = os.path.expanduser(self.params['outtmpl'])
filename = tmpl % template_dict
return filename
except KeyError as err:
self.report_error(u'Erroneous output template')
return None
except ValueError as err:
self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
return None
@@ -355,27 +410,37 @@ class YoutubeDL(object):
def _match_entry(self, info_dict):
""" Returns None iff the file should be downloaded """
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle:
if not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
video_title = info_dict.get('title', info_dict.get('id', u'video'))
if 'title' in info_dict:
# This can happen when we're just evaluating the playlist
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle:
if not re.search(matchtitle, title, re.IGNORECASE):
return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
date = info_dict.get('upload_date', None)
if date is not None:
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
view_count = info_dict.get('view_count', None)
if view_count is not None:
min_views = self.params.get('min_views')
if min_views is not None and view_count < min_views:
return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
max_views = self.params.get('max_views')
if max_views is not None and view_count > max_views:
return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
age_limit = self.params.get('age_limit')
if age_limit is not None:
if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict):
return (u'%(title)s has already been recorded in archive'
% info_dict)
return u'%s has already been recorded in archive' % video_title
return None
@staticmethod
@@ -384,7 +449,8 @@ class YoutubeDL(object):
for key, value in extra_info.items():
info_dict.setdefault(key, value)
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
def extract_info(self, url, download=True, ie_key=None, extra_info={},
process=True):
'''
Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
@@ -420,7 +486,10 @@ class YoutubeDL(object):
'webpage_url': url,
'extractor_key': ie.ie_key(),
})
return self.process_ie_result(ie_result, download, extra_info)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
break
@@ -453,8 +522,33 @@ class YoutubeDL(object):
download,
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'url_transparent':
# Use the information from the embedding page
info = self.extract_info(
ie_result['url'], ie_key=ie_result.get('ie_key'),
extra_info=extra_info, download=False, process=False)
def make_result(embedded_info):
new_result = ie_result.copy()
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
'entries', 'urlhandle', 'ie_key', 'duration',
'subtitles', 'annotations', 'format',
'thumbnail', 'thumbnails'):
if f in new_result:
del new_result[f]
if f in embedded_info:
new_result[f] = embedded_info[f]
return new_result
new_result = make_result(info)
assert new_result.get('_type') != 'url_transparent'
if new_result.get('_type') == 'compat_list':
new_result['entries'] = [
make_result(e) for e in new_result['entries']]
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
elif result_type == 'playlist':
self.add_extra_info(ie_result, extra_info)
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -484,6 +578,12 @@ class YoutubeDL(object):
'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'],
}
reason = self._match_entry(entry)
if reason is not None:
self.to_screen(u'[download] ' + reason)
continue
entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
@@ -639,22 +739,25 @@ class YoutubeDL(object):
# Forced printings
if self.params.get('forcetitle', False):
compat_print(info_dict['fulltitle'])
self.to_stdout(info_dict['fulltitle'])
if self.params.get('forceid', False):
compat_print(info_dict['id'])
self.to_stdout(info_dict['id'])
if self.params.get('forceurl', False):
# For RTMP URLs, also include the playpath
compat_print(info_dict['url'] + info_dict.get('play_path', u''))
self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
compat_print(info_dict['thumbnail'])
self.to_stdout(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
compat_print(info_dict['description'])
self.to_stdout(info_dict['description'])
if self.params.get('forcefilename', False) and filename is not None:
compat_print(filename)
self.to_stdout(filename)
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
self.to_stdout(formatSeconds(info_dict['duration']))
if self.params.get('forceformat', False):
compat_print(info_dict['format'])
self.to_stdout(info_dict['format'])
if self.params.get('forcejson', False):
compat_print(json.dumps(info_dict))
info_dict['_filename'] = filename
self.to_stdout(json.dumps(info_dict))
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
@@ -672,28 +775,34 @@ class YoutubeDL(object):
return
if self.params.get('writedescription', False):
try:
descfn = filename + u'.description'
self.report_writedescription(descfn)
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(info_dict['description'])
except (KeyError, TypeError):
self.report_warning(u'There\'s no description to write.')
except (OSError, IOError):
self.report_error(u'Cannot write description file ' + descfn)
return
descfn = filename + u'.description'
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
self.to_screen(u'[info] Video description is already present')
else:
try:
self.to_screen(u'[info] Writing video description to: ' + descfn)
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(info_dict['description'])
except (KeyError, TypeError):
self.report_warning(u'There\'s no description to write.')
except (OSError, IOError):
self.report_error(u'Cannot write description file ' + descfn)
return
if self.params.get('writeannotations', False):
try:
annofn = filename + u'.annotations.xml'
self.report_writeannotations(annofn)
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations'])
except (KeyError, TypeError):
self.report_warning(u'There are no annotations to write.')
except (OSError, IOError):
self.report_error(u'Cannot write annotations file: ' + annofn)
return
annofn = filename + u'.annotations.xml'
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
self.to_screen(u'[info] Video annotations are already present')
else:
try:
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations'])
except (KeyError, TypeError):
self.report_warning(u'There are no annotations to write.')
except (OSError, IOError):
self.report_error(u'Cannot write annotations file: ' + annofn)
return
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
@@ -709,38 +818,48 @@ class YoutubeDL(object):
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
self.report_writesubtitles(sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
else:
self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
except (OSError, IOError):
self.report_error(u'Cannot write subtitles file ' + descfn)
return
if self.params.get('writeinfojson', False):
infofn = os.path.splitext(filename)[0] + u'.info.json'
self.report_writeinfojson(infofn)
try:
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
write_json_file(json_info_dict, encodeFilename(infofn))
except (OSError, IOError):
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
return
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
self.to_screen(u'[info] Video description metadata is already present')
else:
self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
try:
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
write_json_file(json_info_dict, encodeFilename(infofn))
except (OSError, IOError):
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
return
if self.params.get('writethumbnail', False):
if info_dict.get('thumbnail') is not None:
thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
(info_dict['extractor'], info_dict['id']))
try:
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
(info_dict['extractor'], info_dict['id'], thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning(u'Unable to download thumbnail "%s": %s' %
(info_dict['thumbnail'], compat_str(err)))
thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
self.to_screen(u'[%s] %s: Thumbnail is already present' %
(info_dict['extractor'], info_dict['id']))
else:
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
(info_dict['extractor'], info_dict['id']))
try:
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
(info_dict['extractor'], info_dict['id'], thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning(u'Unable to download thumbnail "%s": %s' %
(info_dict['thumbnail'], compat_str(err)))
if not self.params.get('skip_download', False):
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
@@ -768,13 +887,15 @@ class YoutubeDL(object):
def download(self, url_list):
"""Download a given list of URLs."""
if len(url_list) > 1 and self.fixed_template():
if (len(url_list) > 1 and
'%' not in self.params['outtmpl']
and self.params.get('max_downloads') != 1):
raise SameFileError(self.params['outtmpl'])
for url in url_list:
try:
#It also downloads the videos
videos = self.extract_info(url)
self.extract_info(url)
except UnavailableVideoError:
self.report_error(u'unable to download video')
except MaxDownloadsReached:
@@ -783,6 +904,20 @@ class YoutubeDL(object):
return self._download_retcode
def download_with_info_file(self, info_filename):
with io.open(info_filename, 'r', encoding='utf-8') as f:
info = json.load(f)
try:
self.process_ie_result(info, download=True)
except DownloadError:
webpage_url = info.get('webpage_url')
if webpage_url is not None:
self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
return self.download([webpage_url])
else:
raise
return self._download_retcode
def post_process(self, filename, ie_info):
"""Run all the postprocessors on the given file."""
info = dict(ie_info)
@@ -806,11 +941,26 @@ class YoutubeDL(object):
except (IOError, OSError):
self.report_warning(u'Unable to remove downloaded video file')
def _make_archive_id(self, info_dict):
# Future-proof against any change in case
# and backwards compatibility with prior versions
extractor = info_dict.get('extractor_key')
if extractor is None:
if 'id' in info_dict:
extractor = info_dict.get('ie_key') # key in a playlist
if extractor is None:
return None # Incomplete video information
return extractor.lower() + u' ' + info_dict['id']
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
if fn is None:
return False
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
vid_id = self._make_archive_id(info_dict)
if vid_id is None:
return False # Incomplete video information
try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
@@ -825,12 +975,15 @@ class YoutubeDL(object):
fn = self.params.get('download_archive')
if fn is None:
return
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
vid_id = self._make_archive_id(info_dict)
assert vid_id
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
archive_file.write(vid_id + u'\n')
@staticmethod
def format_resolution(format, default='unknown'):
if format.get('vcodec') == 'none':
return 'audio only'
if format.get('_resolution') is not None:
return format['_resolution']
if format.get('height') is not None:
@@ -844,10 +997,11 @@ class YoutubeDL(object):
def list_formats(self, info_dict):
def format_note(fdict):
if fdict.get('format_note') is not None:
return fdict['format_note']
res = u''
if fdict.get('vcodec') is not None:
if fdict.get('format_note') is not None:
res += fdict['format_note'] + u' '
if (fdict.get('vcodec') is not None and
fdict.get('vcodec') != 'none'):
res += u'%-5s' % fdict['vcodec']
elif fdict.get('vbr') is not None:
res += u'video'
@@ -863,25 +1017,103 @@ class YoutubeDL(object):
res += 'audio'
if fdict.get('abr') is not None:
res += u'@%3dk' % fdict['abr']
if fdict.get('filesize') is not None:
if res:
res += u', '
res += format_bytes(fdict['filesize'])
return res
def line(format):
return (u'%-20s%-10s%-12s%s' % (
def line(format, idlen=20):
return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
format['format_id'],
format['ext'],
self.format_resolution(format),
format_note(format),
)
)
))
formats = info_dict.get('formats', [info_dict])
formats_s = list(map(line, formats))
idlen = max(len(u'format code'),
max(len(f['format_id']) for f in formats))
formats_s = [line(f, idlen) for f in formats]
if len(formats) > 1:
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
header_line = line({
'format_id': u'format code', 'ext': u'extension',
'_resolution': u'resolution', 'format_note': u'note'})
'_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, u"\n".join(formats_s)))
def urlopen(self, req):
""" Start an HTTP download """
return self._opener.open(req)
def print_debug_header(self):
if not self.params.get('verbose'):
return
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + u'\n')
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
def _setup_opener(self):
timeout_val = self.params.get('socket_timeout')
timeout = 600 if timeout_val is None else float(timeout_val)
opts_cookiefile = self.params.get('cookiefile')
opts_proxy = self.params.get('proxy')
if opts_cookiefile is None:
self.cookiejar = compat_cookiejar.CookieJar()
else:
self.cookiejar = compat_cookiejar.MozillaCookieJar(
opts_cookiefile)
if os.access(opts_cookiefile, os.R_OK):
self.cookiejar.load()
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
self.cookiejar)
if opts_proxy is not None:
if opts_proxy == '':
proxies = {}
else:
proxies = {'http': opts_proxy, 'https': opts_proxy}
else:
proxies = compat_urllib_request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(
self.params.get('nocheckcertificate', False))
opener = compat_urllib_request.build_opener(
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders = []
self._opener = opener
# TODO remove this global modification
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(timeout)

View File

@@ -35,50 +35,42 @@ __authors__ = (
'Jelle van der Waa',
'Marcin Cieślak',
'Anton Larionov',
'Takuya Tsuchida',
'Sergey M.',
)
__license__ = 'Public Domain'
import codecs
import collections
import getpass
import optparse
import os
import random
import re
import shlex
import socket
import subprocess
import sys
import traceback
import platform
from .utils import (
compat_cookiejar,
compat_print,
compat_str,
compat_urllib_request,
DateRange,
decodeOption,
determine_ext,
get_term_width,
DownloadError,
get_cachedir,
make_HTTPS_handler,
MaxDownloadsReached,
platform_name,
preferredencoding,
SameFileError,
std_headers,
write_string,
YoutubeDLHandler,
)
from .update import update_self
from .version import __version__
from .FileDownloader import (
FileDownloader,
)
from .extractor import gen_extractors
from .version import __version__
from .YoutubeDL import YoutubeDL
from .PostProcessor import (
FFmpegMetadataPP,
@@ -89,11 +81,11 @@ from .PostProcessor import (
def parseOpts(overrideArguments=None):
def _readOptions(filename_bytes):
def _readOptions(filename_bytes, default=[]):
try:
optionf = open(filename_bytes)
except IOError:
return [] # silently skip if file is not present
return default # silently skip if file is not present
try:
res = []
for l in optionf:
@@ -121,19 +113,6 @@ def parseOpts(overrideArguments=None):
def _comma_separated_values_options_callback(option, opt_str, value, parser):
setattr(parser.values, option.dest, value.split(','))
def _find_term_columns():
columns = os.environ.get('COLUMNS', None)
if columns:
return int(columns)
try:
sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out,err = sp.communicate()
return int(out.split()[1])
except:
pass
return None
def _hide_login_info(opts):
opts = list(opts)
for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
@@ -148,7 +127,7 @@ def parseOpts(overrideArguments=None):
max_help_position = 80
# No need to wrap help messages if we're on a wide console
columns = _find_term_columns()
columns = get_term_width()
if columns: max_width = columns
fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
@@ -199,7 +178,9 @@ def parseOpts(overrideArguments=None):
general.add_option('--extractor-descriptions',
action='store_true', dest='list_extractor_descriptions',
help='Output descriptions of all supported extractors', default=False)
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
general.add_option(
'--proxy', dest='proxy', default=None, metavar='URL',
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
@@ -207,6 +188,12 @@ def parseOpts(overrideArguments=None):
general.add_option(
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
help='Disable filesystem caching')
general.add_option(
'--socket-timeout', dest='socket_timeout',
type=float, default=None, help=optparse.SUPPRESS_HELP)
general.add_option(
'--bidi-workaround', dest='bidi_workaround', action='store_true',
help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
selection.add_option('--playlist-start',
@@ -215,19 +202,29 @@ def parseOpts(overrideArguments=None):
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
selection.add_option('--max-downloads', metavar='NUMBER',
dest='max_downloads', type=int, default=None,
help='Abort after downloading NUMBER files')
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
selection.add_option(
'--min-views', metavar='COUNT', dest='min_views',
default=None, type=int,
help="Do not download any videos with less than COUNT views",)
selection.add_option(
'--max-views', metavar='COUNT', dest='max_views',
default=None, type=int,
help="Do not download any videos with more than COUNT views",)
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
help='download only videos suitable for the given age',
default=None, type=int)
selection.add_option('--download-archive', metavar='FILE',
dest='download_archive',
help='Download only videos not present in the archive file. Record all downloaded videos in it.')
help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
authentication.add_option('-u', '--username',
@@ -242,7 +239,7 @@ def parseOpts(overrideArguments=None):
video_format.add_option('-f', '--format',
action='store', dest='format', metavar='FORMAT', default='best',
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='all')
video_format.add_option('--prefer-free-formats',
@@ -301,6 +298,9 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--get-description',
action='store_true', dest='getdescription',
help='simulate, quiet but print video description', default=False)
verbosity.add_option('--get-duration',
action='store_true', dest='getduration',
help='simulate, quiet but print video length', default=False)
verbosity.add_option('--get-filename',
action='store_true', dest='getfilename',
help='simulate, quiet but print output filename', default=False)
@@ -324,7 +324,7 @@ def parseOpts(overrideArguments=None):
help='print downloaded pages to debug problems(very verbose)')
verbosity.add_option('--write-pages',
action='store_true', dest='write_pages', default=False,
help='Write downloaded pages to files in the current directory')
help='Write downloaded intermediary pages to files in the current directory to debug problems')
verbosity.add_option('--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP)
@@ -361,6 +361,9 @@ def parseOpts(overrideArguments=None):
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('--load-info',
dest='load_info_filename', metavar='FILE',
help='json file containing the video information (created with the "--write-json" option')
filesystem.add_option('-w', '--no-overwrites',
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
filesystem.add_option('-c', '--continue',
@@ -422,6 +425,8 @@ def parseOpts(overrideArguments=None):
if opts.verbose:
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
systemConf = _readOptions('/etc/youtube-dl.conf')
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
@@ -431,8 +436,31 @@ def parseOpts(overrideArguments=None):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
systemConf = _readOptions('/etc/youtube-dl.conf')
userConf = _readOptions(userConfFile)
userConf = _readOptions(userConfFile, None)
if userConf is None:
appdata_dir = os.environ.get('appdata')
if appdata_dir:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
default=None)
if userConf is None:
userConf = []
commandLineConf = sys.argv[1:]
argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv)
@@ -451,19 +479,6 @@ def _real_main(argv=None):
parser, opts, args = parseOpts(argv)
# Open appropriate CookieJar
if opts.cookiefile is None:
jar = compat_cookiejar.CookieJar()
else:
try:
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
if os.access(opts.cookiefile, os.R_OK):
jar.load()
except (IOError, OSError) as err:
if opts.verbose:
traceback.print_exc()
write_string(u'ERROR: unable to open cookie file\n')
sys.exit(101)
# Set user agent
if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent
@@ -495,8 +510,6 @@ def _real_main(argv=None):
all_urls = batchurls + args
all_urls = [url.strip() for url in all_urls]
opener = _setup_opener(jar=jar, opts=opts)
extractors = gen_extractors()
if opts.list_extractors:
@@ -551,7 +564,7 @@ def _real_main(argv=None):
if opts.retries is not None:
try:
opts.retries = int(opts.retries)
except (TypeError, ValueError) as err:
except (TypeError, ValueError):
parser.error(u'invalid retry count specified')
if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
@@ -562,13 +575,13 @@ def _real_main(argv=None):
opts.playliststart = int(opts.playliststart)
if opts.playliststart <= 0:
raise ValueError(u'Playlist start must be positive')
except (TypeError, ValueError) as err:
except (TypeError, ValueError):
parser.error(u'invalid playlist start number specified')
try:
opts.playlistend = int(opts.playlistend)
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
raise ValueError(u'Playlist end must be greater than playlist start')
except (TypeError, ValueError) as err:
except (TypeError, ValueError):
parser.error(u'invalid playlist end number specified')
if opts.extractaudio:
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
@@ -602,27 +615,30 @@ def _real_main(argv=None):
or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(title)s-%(id)s.%(ext)s')
if '%(ext)s' not in outtmpl and opts.extractaudio:
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error(u'Cannot download a video and extract audio into the same'
u' file! Use "%%(ext)s" instead of %r' %
determine_ext(outtmpl, u''))
u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
u' template'.format(outtmpl))
any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson
ydl_opts = {
'usenetrc': opts.usenetrc,
'username': opts.username,
'password': opts.password,
'videopassword': opts.videopassword,
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
'quiet': (opts.quiet or any_printing),
'forceurl': opts.geturl,
'forcetitle': opts.gettitle,
'forceid': opts.getid,
'forcethumbnail': opts.getthumbnail,
'forcedescription': opts.getdescription,
'forceduration': opts.getduration,
'forcefilename': opts.getfilename,
'forceformat': opts.getformat,
'forcejson': opts.dumpjson,
'simulate': opts.simulate,
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
'skip_download': (opts.skip_download or opts.simulate or any_printing),
'format': opts.format,
'format_limit': opts.format_limit,
'listformats': opts.listformats,
@@ -666,39 +682,22 @@ def _real_main(argv=None):
'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize,
'min_views': opts.min_views,
'max_views': opts.max_views,
'daterange': date,
'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
'download_archive': opts.download_archive,
'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate,
'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout,
'bidi_workaround': opts.bidi_workaround,
}
with YoutubeDL(ydl_opts) as ydl:
if opts.verbose:
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + u'\n')
proxy_map = {}
for handler in opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
ydl.print_debug_header()
ydl.add_default_info_extractors()
# PostProcessors
@@ -717,58 +716,24 @@ def _real_main(argv=None):
update_self(ydl.to_screen, opts.verbose)
# Maybe do nothing
if len(all_urls) < 1:
if (len(all_urls) < 1) and (opts.load_info_filename is None):
if not opts.update_self:
parser.error(u'you must provide at least one URL')
else:
sys.exit()
try:
retcode = ydl.download(all_urls)
if opts.load_info_filename is not None:
retcode = ydl.download_with_info_file(opts.load_info_filename)
else:
retcode = ydl.download(all_urls)
except MaxDownloadsReached:
ydl.to_screen(u'--max-download limit reached, aborting.')
retcode = 101
# Dump cookie jar if requested
if opts.cookiefile is not None:
try:
jar.save()
except (IOError, OSError):
sys.exit(u'ERROR: unable to save cookie jar')
sys.exit(retcode)
def _setup_opener(jar=None, opts=None, timeout=300):
if opts is None:
FakeOptions = collections.namedtuple(
'FakeOptions', ['proxy', 'no_check_certificate'])
opts = FakeOptions(proxy=None, no_check_certificate=False)
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
if opts.proxy is not None:
if opts.proxy == '':
proxies = {}
else:
proxies = {'http': opts.proxy, 'https': opts.proxy}
else:
proxies = compat_urllib_request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(opts)
opener = compat_urllib_request.build_opener(
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders = []
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(timeout)
return opener
def main(argv=None):
try:
_real_main(argv)

View File

@@ -8,6 +8,7 @@ from .arte import (
ArteTVPlus7IE,
ArteTVCreativeIE,
ArteTVFutureIE,
ArteTVDDCIE,
)
from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
@@ -19,10 +20,13 @@ from .brightcove import BrightcoveIE
from .c56 import C56IE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .clipsyndicate import ClipsyndicateIE
from .cnn import CNNIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
@@ -54,7 +58,7 @@ from .flickr import FlickrIE
from .francetv import (
PluzzIE,
FranceTvInfoIE,
France2IE,
FranceTVIE,
GenerationQuoiIE
)
from .freesound import FreesoundIE
@@ -70,6 +74,7 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE
from .ign import IGNIE, OneUPIE
from .imdb import ImdbIE
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
@@ -96,15 +101,20 @@ from .myvideo import MyVideoIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import NBCNewsIE
from .ndtv import NDTVIE
from .newgrounds import NewgroundsIE
from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE
from .ninegag import NineGagIE
from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .podomatic import PodomaticIE
from .pornhub import PornHubIE
from .pornotube import PornotubeIE
from .pyvideo import PyvideoIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE
@@ -116,6 +126,12 @@ from .rutube import RutubeIE
from .sina import SinaIE
from .slashdot import SlashdotIE
from .slideshare import SlideshareIE
from .smotri import (
SmotriIE,
SmotriCommunityIE,
SmotriUserIE,
SmotriBroadcastIE,
)
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
from .southparkstudios import (
@@ -134,6 +150,7 @@ from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tf1 import TF1IE
from .theplatform import ThePlatformIE
from .thisav import ThisAVIE
from .toutv import TouTvIE
from .traileraddict import TrailerAddictIE
@@ -154,19 +171,31 @@ from .viddler import ViddlerIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vimeo import (
VimeoIE,
VimeoChannelIE,
VimeoUserIE,
VimeoAlbumIE,
VimeoGroupsIE,
)
from .vine import VineIE
from .viki import VikiIE
from .vk import VKIE
from .wat import WatIE
from .websurg import WeBSurgIE
from .weibo import WeiboIE
from .wimp import WimpIE
from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE
from .xhamster import XHamsterIE
from .xnxx import XNXXIE
from .xvideos import XVideosIE
from .xtube import XTubeIE
from .yahoo import YahooIE, YahooSearchIE
from .yahoo import (
YahooIE,
YahooNewsIE,
YahooSearchIE,
)
from .youjizz import YouJizzIE
from .youku import YoukuIE
from .youporn import YouPornIE
@@ -183,6 +212,8 @@ from .youtube import (
YoutubeTruncatedURLIE,
YoutubeWatchLaterIE,
YoutubeFavouritesIE,
YoutubeHistoryIE,
YoutubeTopListIE,
)
from .zdf import ZDFIE

View File

@@ -13,7 +13,7 @@ from ..utils import (
class AddAnimeIE(InfoExtractor):
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
IE_NAME = u'AddAnime'
_TEST = {
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -28,9 +27,8 @@ class AnitubeIE(InfoExtractor):
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
webpage, u'key')
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
key)
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
video_title = config_xml.find('title').text

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
import json
from .common import InfoExtractor
@@ -10,7 +9,7 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = {
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
u"playlist": [
@@ -65,18 +64,18 @@ class AppleTrailersIE(InfoExtractor):
uploader_id = mobj.group('company')
playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
playlist_snippet = self._download_webpage(playlist_url, movie)
playlist_cleaned = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', playlist_snippet)
playlist_cleaned = re.sub(r'<img ([^<]*?)>', r'<img \1/>', playlist_cleaned)
# The ' in the onClick attributes are not escaped, it couldn't be parsed
# with xml.etree.ElementTree.fromstring
# like: http://trailers.apple.com/trailers/wb/gravity/
def _clean_json(m):
return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned)
playlist_html = u'<html>' + playlist_cleaned + u'</html>'
def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
# The ' in the onClick attributes are not escaped, it couldn't be parsed
# like: http://trailers.apple.com/trailers/wb/gravity/
def _clean_json(m):
return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
s = re.sub(self._JSON_RE, _clean_json, s)
s = u'<html>' + s + u'</html>'
return s
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
doc = xml.etree.ElementTree.fromstring(playlist_html)
playlist = []
for li in doc.findall('./div/ul/li'):
on_click = li.find('.//a').attrib['onClick']
@@ -113,7 +112,7 @@ class AppleTrailersIE(InfoExtractor):
})
formats = sorted(formats, key=lambda f: (f['height'], f['width']))
info = {
playlist.append({
'_type': 'video',
'id': video_id,
'title': title,
@@ -124,12 +123,7 @@ class AppleTrailersIE(InfoExtractor):
'upload_date': upload_date,
'uploader_id': uploader_id,
'user_agent': 'QuickTime compatible (youtube-dl)',
}
# TODO: Remove when #980 has been merged
info['url'] = formats[-1]['url']
info['ext'] = formats[-1]['ext']
playlist.append(info)
})
return {
'_type': 'playlist',

View File

@@ -11,7 +11,7 @@ from ..utils import (
class ArchiveOrgIE(InfoExtractor):
IE_NAME = 'archive.org'
IE_DESC = 'archive.org videos'
_VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
_VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
_TEST = {
u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
@@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor):
for f in formats:
f['ext'] = determine_ext(f['url'])
info = {
return {
'_type': 'video',
'id': video_id,
'title': title,
@@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor):
'description': description,
'uploader': uploader,
'upload_date': upload_date,
'thumbnail': data.get('misc', {}).get('image'),
}
thumbnail = data.get('misc', {}).get('image')
if thumbnail:
info['thumbnail'] = thumbnail
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -1,7 +1,6 @@
# encoding: utf-8
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -11,6 +10,7 @@ from ..utils import (
determine_ext,
get_element_by_id,
compat_str,
get_element_by_attribute,
)
# There are different sources of video in arte.tv, the extraction process
@@ -18,8 +18,8 @@ from ..utils import (
# add tests.
class ArteTvIE(InfoExtractor):
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
_LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv'
@@ -78,8 +78,7 @@ class ArteTvIE(InfoExtractor):
"""Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
config_xml_url = config_node.attrib['ref']
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
@@ -109,9 +108,8 @@ class ArteTvIE(InfoExtractor):
"""Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
video_id, u'Downloading information')
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:
@@ -145,7 +143,9 @@ class ArteTVPlus7IE(InfoExtractor):
def _extract_from_webpage(self, webpage, video_id, lang):
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
return self._extract_from_json_url(json_url, video_id, lang)
def _extract_from_json_url(self, json_url, video_id, lang):
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
self.report_extraction(video_id)
info = json.loads(json_info)
@@ -260,3 +260,35 @@ class ArteTVFutureIE(ArteTVPlus7IE):
webpage = self._download_webpage(url, anchor_id)
row = get_element_by_id(anchor_id, webpage)
return self._extract_from_webpage(row, anchor_id, lang)
class ArteTVDDCIE(ArteTVPlus7IE):
IE_NAME = u'arte.tv:ddc'
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
_TEST = {
u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
u'file': u'049881-009_PLUS7-D.flv',
u'info_dict': {
u'title': u'Mit offenen Karten',
u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6',
u'upload_date': u'20131207',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
def _real_extract(self, url):
video_id, lang = self._extract_url_info(url)
if lang == 'folge':
lang = 'de'
elif lang == 'emission':
lang = 'fr'
webpage = self._download_webpage(url, video_id)
scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage)
script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url')
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
return self._extract_from_json_url(json_url, video_id, lang)

View File

@@ -16,7 +16,7 @@ class AUEngineIE(InfoExtractor):
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
}
}
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -25,6 +25,11 @@ class BambuserIE(InfoExtractor):
u'uploader': u'pixelversity',
u'uploader_id': u'344706',
},
u'params': {
# It doesn't respect the 'Range' header, it would download the whole video
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
u'skip_download': True,
},
}
def _real_extract(self, url):
@@ -49,7 +54,7 @@ class BambuserIE(InfoExtractor):
class BambuserChannelIE(InfoExtractor):
IE_NAME = u'bambuser:channel'
_VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request
_STEP = 50

View File

@@ -20,28 +20,6 @@ class BandcampIE(InfoExtractor):
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
},
u'skip': u'There is a limit of 200 free downloads / month for the test song'
}, {
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
u'playlist': [
{
u'file': u'1353101989.mp3',
u'md5': u'39bc1eded3476e927c724321ddf116cf',
u'info_dict': {
u'title': u'Intro',
}
},
{
u'file': u'38097443.mp3',
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
u'info_dict': {
u'title': u'Kero One - Keep It Alive (Blazo remix)',
}
},
],
u'params': {
u'playlistend': 2
},
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}]
def _real_extract(self, url):
@@ -56,20 +34,17 @@ class BandcampIE(InfoExtractor):
json_code = m_trackinfo.group(1)
data = json.loads(json_code)
entries = []
for d in data:
formats = [{
'format_id': 'format_id',
'url': format_url,
'ext': format_id.partition('-')[0]
} for format_id, format_url in sorted(d['file'].items())]
entries.append({
return {
'id': compat_str(d['id']),
'title': d['title'],
'formats': formats,
})
return self.playlist_result(entries, title, title)
}
else:
raise ExtractorError(u'No free songs found')
@@ -112,6 +87,30 @@ class BandcampAlbumIE(InfoExtractor):
IE_NAME = u'Bandcamp:album'
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
_TEST = {
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
u'playlist': [
{
u'file': u'1353101989.mp3',
u'md5': u'39bc1eded3476e927c724321ddf116cf',
u'info_dict': {
u'title': u'Intro',
}
},
{
u'file': u'38097443.mp3',
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
u'info_dict': {
u'title': u'Kero One - Keep It Alive (Blazo remix)',
}
},
],
u'params': {
u'playlistend': 2
},
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')

View File

@@ -51,8 +51,7 @@ class BlipTVIE(InfoExtractor):
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
urlp = compat_urllib_parse_urlparse(url)
if urlp.path.startswith('/play/'):
request = compat_urllib_request.Request(url)
response = compat_urllib_request.urlopen(request)
response = self._request_webpage(url, None, False)
redirecturl = response.geturl()
rurlp = compat_urllib_parse_urlparse(redirecturl)
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
@@ -69,25 +68,23 @@ class BlipTVIE(InfoExtractor):
request.add_header('User-Agent', 'iTunes/10.6.1')
self.report_extraction(mobj.group(1))
info = None
try:
urlh = compat_urllib_request.urlopen(request)
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
basename = url.split('/')[-1]
title,ext = os.path.splitext(basename)
title = title.decode('UTF-8')
ext = ext.replace('.', '')
self.report_direct_download(title)
info = {
'id': title,
'url': url,
'uploader': None,
'upload_date': None,
'title': title,
'ext': ext,
'urlhandle': urlh
}
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
urlh = self._request_webpage(request, None, False,
u'unable to download video info webpage')
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
basename = url.split('/')[-1]
title,ext = os.path.splitext(basename)
title = title.decode('UTF-8')
ext = ext.replace('.', '')
self.report_direct_download(title)
info = {
'id': title,
'url': url,
'uploader': None,
'upload_date': None,
'title': title,
'ext': ext,
'urlhandle': urlh
}
if info is None: # Regular URL
try:
json_code_bytes = urlh.read()

View File

@@ -4,7 +4,7 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html'
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
_TEST = {
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',

View File

@@ -55,6 +55,18 @@ class BrightcoveIE(InfoExtractor):
u'uploader': u'Mashable',
},
},
{
# test that the default referer works
# from http://national.ballet.ca/interact/video/Lost_in_Motion_II/
u'url': u'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001',
u'info_dict': {
u'id': u'2878862109001',
u'ext': u'mp4',
u'title': u'Lost in Motion II',
u'description': u'md5:363109c02998fee92ec02211bd8000df',
u'uploader': u'National Ballet of Canada',
},
},
]
@classmethod
@@ -75,16 +87,22 @@ class BrightcoveIE(InfoExtractor):
params = {'flashID': object_doc.attrib['id'],
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
}
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
def find_param(name):
node = find_xpath_attr(object_doc, './param', 'name', name)
if node is not None:
return node.attrib['value']
return None
playerKey = find_param('playerKey')
# Not all pages define this value
if playerKey is not None:
params['playerKey'] = playerKey.attrib['value']
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
params['playerKey'] = playerKey
# The three fields hold the id of the video
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
if videoPlayer is not None:
params['@videoPlayer'] = videoPlayer.attrib['value']
linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
params['@videoPlayer'] = videoPlayer
linkBase = find_param('linkBaseURL')
if linkBase is not None:
params['linkBaseURL'] = linkBase.attrib['value']
params['linkBaseURL'] = linkBase
data = compat_urllib_parse.urlencode(params)
return cls._FEDERATED_URL_TEMPLATE % data
@@ -112,17 +130,21 @@ class BrightcoveIE(InfoExtractor):
videoPlayer = query.get('@videoPlayer')
if videoPlayer:
return self._get_video_info(videoPlayer[0], query_str, query)
return self._get_video_info(videoPlayer[0], query_str, query,
# We set the original url as the default 'Referer' header
referer=url)
else:
player_key = query['playerKey']
return self._get_playlist_info(player_key[0])
def _get_video_info(self, video_id, query_str, query):
def _get_video_info(self, video_id, query_str, query, referer=None):
request_url = self._FEDERATED_URL_TEMPLATE % query_str
req = compat_urllib_request.Request(request_url)
linkBase = query.get('linkBaseURL')
if linkBase is not None:
req.add_header('Referer', linkBase[0])
referer = linkBase[0]
if referer is not None:
req.add_header('Referer', referer)
webpage = self._download_webpage(req, video_id)
self.report_extraction(video_id)

View File

@@ -1,6 +1,5 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import unified_strdate
@@ -31,11 +30,10 @@ class CanalplusIE(InfoExtractor):
webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
info_url = self._VIDEO_INFO_TEMPLATE % video_id
info_page = self._download_webpage(info_url,video_id,
doc = self._download_xml(info_url,video_id,
u'Downloading video info')
self.report_extraction(video_id)
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
video_info = [video for video in doc if video.find('ID').text == video_id][0]
infos = video_info.find('INFOS')
media = video_info.find('MEDIA')

View File

@@ -0,0 +1,30 @@
import re
from .common import InfoExtractor
class CBSIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*'
_TEST = {
u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
u'file': u'4JUVEwq3wUT7.flv',
u'info_dict': {
u'title': u'Connect Chat feat. Garth Brooks',
u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
u'duration': 1495,
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
real_id = self._search_regex(
r"video\.settings\.pid\s*=\s*'([^']+)';",
webpage, u'real video ID')
return self.url_result(u'theplatform:%s' % real_id)

View File

@@ -0,0 +1,58 @@
import re
import time
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import ExtractorError
class ClipfishIE(InfoExtractor):
IE_NAME = u'clipfish'
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
_TEST = {
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
u'file': u'3966754.mp4',
u'md5': u'2521cd644e862936cf2e698206e47385',
u'info_dict': {
u'title': u'FIFA 14 - E3 2013 Trailer',
u'duration': 82,
},
u'skip': 'Blocked in the US'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
(video_id, int(time.time())))
doc = self._download_xml(
info_url, video_id, note=u'Downloading info page')
title = doc.find('title').text
video_url = doc.find('filename').text
if video_url is None:
xml_bytes = xml.etree.ElementTree.tostring(doc)
raise ExtractorError(u'Cannot find video URL in document %r' %
xml_bytes)
thumbnail = doc.find('imageurl').text
duration_str = doc.find('duration').text
m = re.match(
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
duration_str)
if m:
duration = (
(int(m.group('hours')) * 60 * 60) +
(int(m.group('minutes')) * 60) +
(int(m.group('seconds')))
)
else:
duration = None
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -0,0 +1,50 @@
import re
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
fix_xml_all_ampersand,
)
class ClipsyndicateIE(InfoExtractor):
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
u'info_dict': {
u'id': u'4629301',
u'ext': u'mp4',
u'title': u'Brick Briscoe',
u'duration': 612,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
js_player = self._download_webpage(
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
video_id, u'Downlaoding player')
# it includes a required token
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
pdoc = self._download_xml(
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
video_id, u'Downloading video info',
transform_source=fix_xml_all_ampersand)
track_doc = pdoc.find('trackList/track')
def find_param(name):
node = find_xpath_attr(track_doc, './/param', 'name', name)
if node is not None:
return node.attrib['value']
return {
'id': video_id,
'title': find_param('title'),
'url': track_doc.find('location').text,
'thumbnail': find_param('thumbnail'),
'duration': int(find_param('duration')),
}

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import determine_ext
@@ -33,8 +32,7 @@ class CNNIE(InfoExtractor):
path = mobj.group('path')
page_title = mobj.group('title')
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
info_xml = self._download_webpage(info_url, page_title)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
info = self._download_xml(info_url, page_title)
formats = []
for f in info.findall('files/file'):

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
self.report_extraction(video_id)
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
metaXml = self._download_webpage(xmlUrl, video_id,
mdoc = self._download_xml(xmlUrl, video_id,
u'Downloading info XML',
u'Unable to download video info XML')
mdoc = xml.etree.ElementTree.fromstring(metaXml)
try:
videoNode = mdoc.findall('./video')[0]
youtubeIdNode = videoNode.find('./youtubeID')
@@ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor):
if next_url.endswith(u'manifest.f4m'):
manifest_url = next_url + '?hdcore=2.10.3'
manifestXml = self._download_webpage(manifest_url, video_id,
adoc = self._download_xml(manifest_url, video_id,
u'Downloading XML manifest',
u'Unable to download video info XML')
adoc = xml.etree.ElementTree.fromstring(manifestXml)
try:
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError:

View File

@@ -1,7 +1,7 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from .mtv import MTVServicesInfoExtractor
from ..utils import (
compat_str,
compat_urllib_parse,
@@ -11,7 +11,31 @@ from ..utils import (
)
class ComedyCentralIE(InfoExtractor):
class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
_TEST = {
u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
u'md5': u'4167875aae411f903b751a21f357f1ee',
u'info_dict': {
u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
u'ext': u'mp4',
u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
webpage, u'mgid')
return self._get_videos_info(mgid)
class ComedyCentralShowsIE(InfoExtractor):
IE_DESC = u'The Daily Show / Colbert Report'
# urls can be abbreviations like :thedailyshow or :colbert
# urls for episodes like:
@@ -127,13 +151,12 @@ class ComedyCentralIE(InfoExtractor):
uri = mMovieParams[0][1]
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
indexXml = self._download_webpage(indexUrl, epTitle,
idoc = self._download_xml(indexUrl, epTitle,
u'Downloading show index',
u'unable to download episode index')
results = []
idoc = xml.etree.ElementTree.fromstring(indexXml)
itemEls = idoc.findall('.//item')
for partNum,itemEl in enumerate(itemEls):
mediaId = itemEl.findall('./guid')[0].text
@@ -144,10 +167,9 @@ class ComedyCentralIE(InfoExtractor):
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
compat_urllib_parse.urlencode({'uri': mediaId}))
configXml = self._download_webpage(configUrl, epTitle,
cdoc = self._download_xml(configUrl, epTitle,
u'Downloading configuration for %s' % shortMediaId)
cdoc = xml.etree.ElementTree.fromstring(configXml)
turls = []
for rendition in cdoc.findall('.//rendition'):
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
@@ -169,7 +191,7 @@ class ComedyCentralIE(InfoExtractor):
})
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
info = {
results.append({
'id': shortMediaId,
'formats': formats,
'uploader': showId,
@@ -177,11 +199,6 @@ class ComedyCentralIE(InfoExtractor):
'title': effTitle,
'thumbnail': None,
'description': compat_str(officialTitle),
}
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
results.append(info)
})
return results

View File

@@ -4,11 +4,11 @@ import re
import socket
import sys
import netrc
import xml.etree.ElementTree
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_request,
compat_str,
clean_html,
@@ -19,6 +19,7 @@ from ..utils import (
unescapeHTML,
)
class InfoExtractor(object):
"""Information Extractor class.
@@ -33,8 +34,8 @@ class InfoExtractor(object):
The dictionaries must include the following fields:
id: Video identifier.
url: Final video URL.
title: Video title, unescaped.
url: Final video URL.
ext: Video filename extension.
Instead of url and ext, formats can also specified.
@@ -53,7 +54,11 @@ class InfoExtractor(object):
player_url: SWF Player URL (used for rtmpdump).
subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
comment_count: Number of comments on the video
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
age_limit: Age restriction for the video, as an integer (years)
@@ -75,6 +80,7 @@ class InfoExtractor(object):
* acodec Name of the audio codec in use
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
* filesize The number of bytes, if known in advance
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
@@ -149,27 +155,38 @@ class InfoExtractor(object):
def IE_NAME(self):
return type(self).__name__[:-2]
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns the response handle """
if note is None:
self.report_download_webpage(video_id)
elif note is not False:
self.to_screen(u'%s: %s' % (video_id, note))
if video_id is None:
self.to_screen(u'%s' % (note,))
else:
self.to_screen(u'%s: %s' % (video_id, note))
try:
return compat_urllib_request.urlopen(url_or_request)
return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
errmsg = u'%s: %s' % (errnote, compat_str(err))
if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
else:
self._downloader.report_warning(errmsg)
return False
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns a tuple (page content as string, URL handle) """
# Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)):
url_or_request = url_or_request.partition('#')[0]
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
if urlh is False:
assert not fatal
return False
content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
@@ -204,9 +221,23 @@ class InfoExtractor(object):
content = webpage_bytes.decode(encoding, 'replace')
return (content, urlh)
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns the data of the page as a string """
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
if res is False:
return res
else:
content, _ = res
return content
def _download_xml(self, url_or_request, video_id,
note=u'Downloading XML', errnote=u'Unable to download XML',
transform_source=None):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
if transform_source:
xml_string = transform_source(xml_string)
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'"""
@@ -229,12 +260,14 @@ class InfoExtractor(object):
self.to_screen(u'Logging in')
#Methods for following #608
def url_result(self, url, ie=None):
def url_result(self, url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
'url': url,
'ie_key': ie}
if video_id is not None:
video_info['id'] = video_id
return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""
@@ -354,7 +387,8 @@ class InfoExtractor(object):
if display_name is None:
display_name = name
return self._html_search_regex(
r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
r'''(?ix)<meta
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=False)

View File

@@ -6,7 +6,7 @@ from ..utils import (
)
class CSpanIE(InfoExtractor):
_VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
_VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
_TEST = {
u'url': u'http://www.c-spanvideo.org/program/HolderonV',
u'file': u'315139.flv',

View File

@@ -11,6 +11,7 @@ from ..utils import (
get_element_by_attribute,
get_element_by_id,
orderedSet,
str_to_int,
ExtractorError,
)
@@ -27,7 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
IE_NAME = u'dailymotion'
_FORMATS = [
@@ -80,7 +81,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
# Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1).split('_')[0].split('?')[0]
video_id = mobj.group('id')
url = 'http://www.dailymotion.com/video/%s' % video_id
@@ -100,10 +101,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
self.to_screen(u'Vevo video detected: %s' % vevo_id)
return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
# Looking for official user
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
webpage, 'video uploader', fatal=False)
age_limit = self._rta_search(webpage)
video_upload_date = None
@@ -146,15 +143,21 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
self._list_available_subtitles(video_id, webpage)
return
view_count = self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)
if view_count is not None:
view_count = str_to_int(view_count)
return {
'id': video_id,
'formats': formats,
'uploader': video_uploader,
'uploader': info['owner_screenname'],
'upload_date': video_upload_date,
'title': self._og_search_title(webpage),
'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url'],
'age_limit': age_limit,
'view_count': view_count,
}
def _get_available_subtitles(self, video_id, webpage):

View File

@@ -1,6 +1,5 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -10,7 +9,7 @@ from ..utils import (
class DaumIE(InfoExtractor):
_VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
IE_NAME = u'daum.net'
_TEST = {
@@ -29,17 +28,16 @@ class DaumIE(InfoExtractor):
video_id = mobj.group(1)
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id)
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
full_id = self._search_regex(
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
webpage, u'full id')
query = compat_urllib_parse.urlencode({'vid': full_id})
info_xml = self._download_webpage(
info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
u'Downloading video info')
urls_xml = self._download_webpage(
urls = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
self.to_screen(u'%s: Getting video urls' % video_id)
formats = []
@@ -49,10 +47,9 @@ class DaumIE(InfoExtractor):
'vid': full_id,
'profile': profile,
})
url_xml = self._download_webpage(
url_doc = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
video_id, note=False)
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
format_url = url_doc.find('result/url').text
formats.append({
'url': format_url,
@@ -60,7 +57,7 @@ class DaumIE(InfoExtractor):
'format_id': profile,
})
info = {
return {
'id': video_id,
'title': info.find('TITLE').text,
'formats': formats,
@@ -69,6 +66,3 @@ class DaumIE(InfoExtractor):
'duration': int(info.find('DURATION').text),
'upload_date': info.find('REGDTTM').text[:8],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -1,7 +1,6 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -12,7 +11,7 @@ from ..utils import (
class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TEST = {
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
u'file': u'36983.webm',
@@ -30,8 +29,7 @@ class DreiSatIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
thumbnail_els = details_doc.findall('.//teaserimage')
thumbnails = [{
@@ -67,7 +65,7 @@ class DreiSatIE(InfoExtractor):
return (qidx, prefer_http, format['video_bitrate'])
formats.sort(key=_sortkey)
info = {
return {
'_type': 'video',
'id': video_id,
'title': video_title,
@@ -78,8 +76,3 @@ class DreiSatIE(InfoExtractor):
'uploader': video_uploader,
'upload_date': upload_date,
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import determine_ext
@@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
config_xml = self._download_webpage(
config = self._download_xml(
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video_url = config.find('file').text
return {

View File

@@ -10,7 +10,7 @@ from ..utils import (
class EightTracksIE(InfoExtractor):
IE_NAME = '8tracks'
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
_VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
_TEST = {
u"name": u"EightTracks",
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",

View File

@@ -8,7 +8,7 @@ class ExfmIE(InfoExtractor):
IE_NAME = u'exfm'
IE_DESC = u'ex.fm'
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
_TESTS = [
{
u'url': u'http://ex.fm/song/eh359',

View File

@@ -1,6 +1,5 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -10,7 +9,7 @@ from ..utils import (
class FazIE(InfoExtractor):
IE_NAME = u'faz.net'
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html'
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
_TEST = {
u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
@@ -28,9 +27,8 @@ class FazIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
u'config xml url')
config_xml = self._download_webpage(config_xml_url, video_id,
config = self._download_xml(config_xml_url, video_id,
u'Downloading config xml')
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
encodings = config.find('ENCODINGS')
formats = []
@@ -46,13 +44,10 @@ class FazIE(InfoExtractor):
})
descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
info = {
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': descr,
'thumbnail': config.find('STILL/STILL_BIG').text,
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -12,7 +12,7 @@ from ..utils import (
class FKTVIE(InfoExtractor):
IE_NAME = u'fernsehkritik.tv'
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
_TEST = {
u'url': u'http://fernsehkritik.tv/folge-1',
@@ -52,7 +52,7 @@ class FKTVIE(InfoExtractor):
class FKTVPosteckeIE(InfoExtractor):
IE_NAME = u'fernsehkritik.tv:postecke'
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
_TEST = {
u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
u'file': u'0120.flv',

View File

@@ -1,6 +1,5 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
import json
from .common import InfoExtractor
@@ -11,11 +10,10 @@ from ..utils import (
class FranceTVBaseInfoExtractor(InfoExtractor):
def _extract_video(self, video_id):
xml_desc = self._download_webpage(
info = self._download_xml(
'http://www.francetvinfo.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion='
+ video_id, video_id, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
@@ -23,7 +21,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
thumbnail_path = info.find('image').text
return {'id': video_id,
'ext': 'mp4',
'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
'url': video_url,
'title': info.find('titre').text,
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
@@ -47,7 +45,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = u'francetvinfo.fr'
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
_TEST = {
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
@@ -68,35 +66,101 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
return self._extract_video(video_id)
class France2IE(FranceTVBaseInfoExtractor):
IE_NAME = u'france2.fr'
_VALID_URL = r'''(?x)https?://www\.france2\.fr/
class FranceTVIE(FranceTVBaseInfoExtractor):
IE_NAME = u'francetv'
IE_DESC = u'France 2, 3, 4, 5 and Ô'
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
(?:
emissions/.*?/videos/(?P<id>\d+)
| emission/(?P<key>[^/?]+)
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
| (emissions?|jt)/(?P<key>[^/?]+)
)'''
_TEST = {
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
u'file': u'75540104.mp4',
u'info_dict': {
u'title': u'13h15, le samedi...',
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
_TESTS = [
# france2
{
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
u'file': u'75540104.mp4',
u'info_dict': {
u'title': u'13h15, le samedi...',
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
},
u'params': {
# m3u8 download
u'skip_download': True,
},
},
u'params': {
u'skip_download': True,
# france3
{
u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
u'info_dict': {
u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
u'ext': u'flv',
u'title': u'Le scandale du prix des médicaments',
u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
},
u'params': {
# rtmp download
u'skip_download': True,
},
},
}
# france4
{
u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
u'info_dict': {
u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
u'ext': u'flv',
u'title': u'Hero Corp Making of - Extrait 1',
u'description': u'md5:c87d54871b1790679aec1197e73d650a',
},
u'params': {
# rtmp download
u'skip_download': True,
},
},
# france5
{
u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
u'info_dict': {
u'id': u'92837968',
u'ext': u'mp4',
u'title': u'C à dire ?!',
u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
},
u'params': {
# m3u8 download
u'skip_download': True,
},
},
# franceo
{
u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
u'info_dict': {
u'id': u'92327925',
u'ext': u'mp4',
u'title': u'Infô-Afrique',
u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
},
u'params': {
# m3u8 download
u'skip_download': True,
},
u'skip': u'The id changes frequently',
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj.group('key'):
webpage = self._download_webpage(url, mobj.group('key'))
video_id = self._html_search_regex(
r'''(?x)<div\s+class="video-player">\s*
id_res = [
(r'''(?x)<div\s+class="video-player">\s*
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
class="francetv-video-player">''',
webpage, u'video ID')
class="francetv-video-player">'''),
(r'<a id="player_direct" href="http://info\.francetelevisions'
'\.fr/\?id-video=([^"/&]+)'),
(r'<a class="video" id="ftv_player_(.+?)"'),
]
video_id = self._html_search_regex(id_res, webpage, u'video ID')
else:
video_id = mobj.group('id')
return self._extract_video(video_id)

View File

@@ -4,7 +4,7 @@ from .common import InfoExtractor
class GamekingsIE(InfoExtractor):
_VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
_TEST = {
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
u'file': u'20130811.mp4',

View File

@@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor):
'format_id': q,
})
info = {
return {
'id': data_video['guid'],
'title': compat_urllib_parse.unquote(data_video['title']),
'formats': formats,
'description': get_meta_content('description', webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -1,13 +1,10 @@
import re
from .mtv import MTVIE, _media_xml_tag
from .mtv import MTVServicesInfoExtractor
class GametrailersIE(MTVIE):
"""
Gametrailers use the same videos system as MTVIE, it just changes the feed
url, where the uri is and the method to get the thumbnails.
"""
_VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
class GametrailersIE(MTVServicesInfoExtractor):
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
_TEST = {
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
@@ -17,15 +14,9 @@ class GametrailersIE(MTVIE):
u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
},
}
# Overwrite MTVIE properties we don't want
_TESTS = []
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
return itemdoc.find(search_path).attrib['url']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')

View File

@@ -169,8 +169,13 @@ class GenericIE(InfoExtractor):
# Site Name | Video Title
# Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical
video_title = self._html_search_regex(r'<title>(.*)</title>',
webpage, u'video title', default=u'video', flags=re.DOTALL)
video_title = self._html_search_regex(
r'(?s)<title>(.*?)</title>', webpage, u'video title',
default=u'video')
# video uploader is domain name
video_uploader = self._search_regex(
r'^(?:https?://)?([^/]*)/.*', url, u'video uploader')
# Look for BrightCove:
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
@@ -188,13 +193,35 @@ class GenericIE(InfoExtractor):
# Look for embedded YouTube player
matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for embedded Dailymotion player
matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for embedded Wistia player
match = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
if match:
return {
'_type': 'url_transparent',
'url': unescapeHTML(match.group('url')),
'ie_key': 'Wistia',
'uploader': video_uploader,
'title': video_title,
'id': video_id,
}
# Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None:
@@ -209,7 +236,7 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
if mobj is None:
# Broaden the search a little bit: JWPlayer JS loader
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
if mobj is None:
# Try to find twitter cards info
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
@@ -236,18 +263,11 @@ class GenericIE(InfoExtractor):
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
# here's a fun little line of code for you:
video_extension = os.path.splitext(video_id)[1][1:]
video_id = os.path.splitext(video_id)[0]
# video uploader is domain name
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
url, u'video uploader')
return [{
return {
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'upload_date': None,
'title': video_title,
'ext': video_extension,
}]
}

View File

@@ -11,7 +11,7 @@ class HotNewHipHopIE(InfoExtractor):
u'file': u'1435540.mp3',
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
u'info_dict': {
u"title": u"Freddie Gibbs - Lay It Down"
u"title": u'Freddie Gibbs "Lay It Down"'
}
}

View File

@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
_TEST = {
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
u'file': u'390161.mp4',
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
u'md5': u'8b743df908c42f60cf6496586c7f12c3',
u'info_dict': {
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
u"title": u"How to Tie a Square Knot Properly"

View File

@@ -44,7 +44,7 @@ class IGNIE(InfoExtractor):
{
u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
u'info_dict': {
u'title': u'GTA 5\'s Twisted Beauty in Super Slow Motion',
u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
},
},
@@ -103,7 +103,7 @@ class IGNIE(InfoExtractor):
class OneUPIE(IGNIE):
"""Extractor for 1up.com, it uses the ign videos system."""
_VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
IE_NAME = '1up.com'
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'

View File

@@ -0,0 +1,57 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
get_element_by_attribute,
)
class ImdbIE(InfoExtractor):
IE_NAME = u'imdb'
IE_DESC = u'Internet Movie Database trailers'
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_TEST = {
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
u'info_dict': {
u'id': u'2524815897',
u'ext': u'mp4',
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url,video_id)
descr = get_element_by_attribute('itemprop', 'description', webpage)
available_formats = re.findall(
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
flags=re.MULTILINE)
formats = []
for f_id, f_path in available_formats:
f_path = f_path.strip()
format_page = self._download_webpage(
compat_urlparse.urljoin(url, f_path),
u'Downloading info for %s format' % f_id)
json_data = self._search_regex(
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
format_page, u'json data', flags=re.DOTALL)
info = json.loads(json_data)
format_info = info['videoPlayerObject']['video']
formats.append({
'format_id': f_id,
'url': format_info['url'],
})
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': descr,
'thumbnail': format_info['slate'],
}

View File

@@ -3,7 +3,7 @@ import re
from .common import InfoExtractor
class InstagramIE(InfoExtractor):
_VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
_VALID_URL = r'(?:http://)?instagram\.com/p/(.*?)/'
_TEST = {
u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
u'file': u'aye83DjauH.mp4',

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -43,9 +42,8 @@ class InternetVideoArchiveIE(InfoExtractor):
video_id = query_dic['publishedid'][0]
url = self._build_url(query)
flashconfiguration_xml = self._download_webpage(url, video_id,
flashconfiguration = self._download_xml(url, video_id,
u'Downloading flash configuration')
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
file_url = flashconfiguration.find('file').text
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
# Replace some of the parameters in the query to get the best quality
@@ -53,9 +51,8 @@ class InternetVideoArchiveIE(InfoExtractor):
file_url = re.sub(r'(?<=\?)(.+)$',
lambda m: self._clean_query(m.group()),
file_url)
info_xml = self._download_webpage(file_url, video_id,
info = self._download_xml(file_url, video_id,
u'Downloading video info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
item = info.find('channel/item')
def _bp(p):

View File

@@ -2,7 +2,6 @@
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -32,12 +31,9 @@ class JeuxVideoIE(InfoExtractor):
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
xml_link, u'video ID')
xml_config = self._download_webpage(
config = self._download_xml(
xml_link, title, u'Downloading XML config')
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
info_json = self._search_regex(
r'(?sm)<format\.json>(.*?)</format\.json>',
xml_config, u'JSON information')
info_json = config.find('format.json').text
info = json.loads(info_json)['versions'][0]
video_url = 'http://video720.jeuxvideo.com/' + info['file']

View File

@@ -8,7 +8,7 @@ from ..utils import (
)
class JukeboxIE(InfoExtractor):
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+).html'
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'

View File

@@ -1,7 +1,6 @@
import json
import os
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor):
archive_id = m.group(1)
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
chapter_info_xml = self._download_webpage(api, chapter_id,
doc = self._download_xml(api, chapter_id,
note=u'Downloading chapter information',
errnote=u'Chapter information download failed')
doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
for a in doc.findall('.//archive'):
if archive_id == a.find('./id').text:
break

View File

@@ -8,7 +8,7 @@ from ..utils import (
class LiveLeakIE(InfoExtractor):
_VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
IE_NAME = u'liveleak'
_TEST = {
u'url': u'http://www.liveleak.com/view?i=757_1364311680',

View File

@@ -1,6 +1,5 @@
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -12,7 +11,7 @@ from ..utils import (
class LivestreamIE(InfoExtractor):
IE_NAME = u'livestream'
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
_TEST = {
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
u'file': u'4719370.mp4',
@@ -80,8 +79,7 @@ class LivestreamOriginalIE(InfoExtractor):
user = mobj.group('user')
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
api_response = self._download_webpage(api_url, video_id)
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
info = self._download_xml(api_url, video_id)
item = info.find('channel').find('item')
ns = {'media': 'http://search.yahoo.com/mrss'}
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']

View File

@@ -1,14 +1,10 @@
import re
import socket
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_parse_qs,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
compat_str,
determine_ext,
ExtractorError,
)
@@ -69,6 +65,21 @@ class MetacafeIE(InfoExtractor):
u'age_limit': 18,
},
},
# cbs video
{
u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
u'info_dict': {
u'id': u'0rOxMBabDXN6',
u'ext': u'flv',
u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
u'duration': 129,
},
u'params': {
# rtmp download
u'skip_download': True,
},
},
]
@@ -78,12 +89,8 @@ class MetacafeIE(InfoExtractor):
def _real_initialize(self):
# Retrieve disclaimer
request = compat_urllib_request.Request(self._DISCLAIMER)
try:
self.report_disclaimer()
compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to retrieve disclaimer: %s' % compat_str(err))
self.report_disclaimer()
self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
# Confirm age
disclaimer_form = {
@@ -92,11 +99,8 @@ class MetacafeIE(InfoExtractor):
}
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try:
self.report_age_confirmation()
compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
self.report_age_confirmation()
self._download_webpage(request, None, False, u'Unable to confirm age')
def _real_extract(self, url):
# Extract id and simplified title from URL
@@ -106,10 +110,16 @@ class MetacafeIE(InfoExtractor):
video_id = mobj.group(1)
# Check if video comes from YouTube
mobj2 = re.match(r'^yt-(.*)$', video_id)
if mobj2 is not None:
return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
# the video may come from an external site
m_external = re.match('^(\w{2})-(.*)$', video_id)
if m_external is not None:
prefix, ext_id = m_external.groups()
# Check if video comes from YouTube
if prefix == 'yt':
return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube')
# CBS videos use theplatform.com
if prefix == 'cb':
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
# Retrieve video webpage to extract further information
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)

View File

@@ -1,8 +1,10 @@
import re
import xml.etree.ElementTree
import operator
from .common import InfoExtractor
from ..utils import (
fix_xml_all_ampersand,
)
class MetacriticIE(InfoExtractor):
@@ -23,9 +25,8 @@ class MetacriticIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
# The xml is not well formatted, there are raw '&'
info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
video_id, u'Downloading info xml').replace('&', '&amp;')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
video_id, u'Downloading info xml', transform_source=fix_xml_all_ampersand)
clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
formats = []
@@ -43,13 +44,10 @@ class MetacriticIE(InfoExtractor):
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
webpage, u'description', flags=re.DOTALL)
info = {
return {
'id': video_id,
'title': clip.find('title').text,
'formats': formats,
'description': description,
'duration': int(clip.find('duration').text),
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -1,13 +1,10 @@
import json
import re
import socket
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_request,
unified_strdate,
ExtractorError,
)
@@ -31,13 +28,18 @@ class MixcloudIE(InfoExtractor):
"""Returns 1st active url from list"""
for url in url_list:
try:
compat_urllib_request.urlopen(url)
# We only want to know if the request succeed
# don't download the whole file
self._request_webpage(url, None, False)
return url
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error):
except ExtractorError:
url = None
return None
def _get_url(self, template_url):
return self.check_urls(template_url % i for i in range(30))
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -53,14 +55,19 @@ class MixcloudIE(InfoExtractor):
preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
final_song_url = self.check_urls(template_url % i for i in range(30))
final_song_url = self._get_url(template_url)
if final_song_url is None:
self.to_screen('Trying with m4a extension')
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
final_song_url = self._get_url(template_url)
if final_song_url is None:
raise ExtractorError(u'Unable to extract track url')
return {
'id': track_id,
'title': info['name'],
'url': final_song_url,
'ext': 'mp3',
'description': info['description'],
'description': info.get('description'),
'thumbnail': info['pictures'].get('extra_large'),
'uploader': info['user']['name'],
'uploader_id': info['user']['username'],

View File

@@ -10,35 +10,8 @@ from ..utils import (
def _media_xml_tag(tag):
return '{http://search.yahoo.com/mrss/}%s' % tag
class MTVIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
_TESTS = [
{
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
u'file': u'853555.mp4',
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
u'info_dict': {
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
},
},
{
u'add_ie': ['Vevo'],
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
u'file': u'USCJY1331283.mp4',
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
u'info_dict': {
u'title': u'Everything Has Changed',
u'upload_date': u'20130606',
u'uploader': u'Taylor Swift',
},
u'skip': u'VEVO is only available in some countries',
},
]
class MTVServicesInfoExtractor(InfoExtractor):
@staticmethod
def _id_from_uri(uri):
return uri.split(':')[-1]
@@ -53,7 +26,12 @@ class MTVIE(InfoExtractor):
return base + m.group('finalid')
def _get_thumbnail_url(self, uri, itemdoc):
return 'http://mtv.mtvnimages.com/uri/' + uri
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
thumb_node = itemdoc.find(search_path)
if thumb_node is None:
return None
else:
return thumb_node.attrib['url']
def _extract_video_formats(self, metadataXml):
if '/error_country_block.swf' in metadataXml:
@@ -93,7 +71,7 @@ class MTVIE(InfoExtractor):
else:
description = None
info = {
return {
'title': itemdoc.find('title').text,
'formats': self._extract_video_formats(mediagen_page),
'id': video_id,
@@ -101,19 +79,51 @@ class MTVIE(InfoExtractor):
'description': description,
}
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
return info
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
data = compat_urllib_parse.urlencode({'uri': uri})
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
u'Downloading info')
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
def fix_ampersand(s):
""" Fix unencoded ampersand in XML """
return s.replace(u'& ', '&amp; ')
idoc = self._download_xml(
self._FEED_URL + '?' + data, video_id,
u'Downloading info', transform_source=fix_ampersand)
return [self._get_video_info(item) for item in idoc.findall('.//item')]
class MTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
_TESTS = [
{
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
u'file': u'853555.mp4',
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
u'info_dict': {
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
},
},
{
u'add_ie': ['Vevo'],
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
u'file': u'USCJY1331283.mp4',
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
u'info_dict': {
u'title': u'Everything Has Changed',
u'upload_date': u'20130606',
u'uploader': u'Taylor Swift',
},
u'skip': u'VEVO is only available in some countries',
},
]
def _get_thumbnail_url(self, uri, itemdoc):
return 'http://mtv.mtvnimages.com/uri/' + uri
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')

View File

@@ -9,7 +9,7 @@ from ..utils import (
class MuzuTVIE(InfoExtractor):
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
_VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
IE_NAME = u'muzu.tv'
_TEST = {

View File

@@ -1,5 +1,4 @@
import os.path
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -10,7 +9,7 @@ from ..utils import (
class MySpassIE(InfoExtractor):
_VALID_URL = r'http://www.myspass.de/.*'
_VALID_URL = r'http://www\.myspass\.de/.*'
_TEST = {
u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
u'file': u'11741.mp4',
@@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor):
# get metadata
metadata_url = META_DATA_URL_TEMPLATE % video_id
metadata_text = self._download_webpage(metadata_url, video_id)
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
metadata = self._download_xml(metadata_url, video_id)
# extract values from metadata
url_flv_el = metadata.find('url_flv')

View File

@@ -1,6 +1,5 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -10,7 +9,7 @@ from ..utils import (
class NaverIE(InfoExtractor):
_VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
_TEST = {
u'url': u'http://tvcast.naver.com/v/81652',
@@ -38,14 +37,12 @@ class NaverIE(InfoExtractor):
'protocol': 'p2p',
'inKey': key,
})
info_xml = self._download_webpage(
info = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
video_id, u'Downloading video info')
urls_xml = self._download_webpage(
urls = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
formats = []
for format_el in urls.findall('EncodingOptions/EncodingOption'):
@@ -59,7 +56,7 @@ class NaverIE(InfoExtractor):
'height': int(format_el.find('height').text),
})
info = {
return {
'id': video_id,
'title': info.find('Subject').text,
'formats': formats,
@@ -68,6 +65,3 @@ class NaverIE(InfoExtractor):
'upload_date': info.find('WriteDate').text.replace('.', ''),
'view_count': int(info.find('PlayCount').text),
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import find_xpath_attr, compat_str
@@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
info = all_info.find('video')
return {'id': video_id,
'title': info.find('headline').text,

View File

@@ -0,0 +1,66 @@
import json
import re
import time
from .common import InfoExtractor
from ..utils import month_by_name
class NDTVIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
_TEST = {
u"url": u"http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710",
u"file": u"300710.mp4",
u"md5": u"39f992dbe5fb531c395d8bbedb1e5e88",
u"info_dict": {
u"title": u"NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
u"description": u"In an exclusive interview to NDTV, Aam Aadmi Party's Arvind Kejriwal says it makes no difference to him that Rahul Gandhi said the Congress needs to learn from his party.",
u"upload_date": u"20131208",
u"duration": 1327,
u"thumbnail": u"http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg",
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
filename = self._search_regex(
r"__filename='([^']+)'", webpage, u'video filename')
video_url = (u'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
filename)
duration_str = filename = self._search_regex(
r"__duration='([^']+)'", webpage, u'duration', fatal=False)
duration = None if duration_str is None else int(duration_str)
date_m = re.search(r'''(?x)
<p\s+class="vod_dateline">\s*
Published\s+On:\s*
(?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
''', webpage)
upload_date = None
assert date_m
if date_m is not None:
month = month_by_name(date_m.group('monthname'))
if month is not None:
upload_date = '%s%02d%02d' % (
date_m.group('year'), month, int(date_m.group('day')))
description = self._og_search_description(webpage)
READ_MORE = u' (Read more)'
if description.endswith(READ_MORE):
description = description[:-len(READ_MORE)]
return {
'id': video_id,
'url': video_url,
'title': self._og_search_title(webpage),
'description': description,
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'upload_date': upload_date,
}

View File

@@ -1,6 +1,5 @@
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
})
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_response = self._download_webpage(path_url, video_id,
path_doc = self._download_xml(path_url, video_id,
u'Downloading final video url')
path_doc = xml.etree.ElementTree.fromstring(path_response)
video_url = path_doc.find('path').text
join = compat_urlparse.urljoin

View File

@@ -0,0 +1,127 @@
# encoding: utf-8
import re
import socket
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
compat_str,
ExtractorError,
unified_strdate,
)
class NiconicoIE(InfoExtractor):
IE_NAME = u'niconico'
IE_DESC = u'ニコニコ動画'
_TEST = {
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
u'file': u'sm22312215.mp4',
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
u'info_dict': {
u'title': u'Big Buck Bunny',
u'uploader': u'takuya0301',
u'uploader_id': u'2698420',
u'upload_date': u'20131123',
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
},
u'params': {
u'username': u'ydl.niconico@gmail.com',
u'password': u'youtube-dl',
},
}
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
_NETRC_MACHINE = 'niconico'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = True
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
if self._LOGIN_REQUIRED:
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return False
# Log in
login_form_strs = {
u'mail': username,
u'password': password,
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
request = compat_urllib_request.Request(
u'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage(
request, u'', note=u'Logging in', errnote=u'Unable to log in')
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
self._downloader.report_warning(u'unable to log in: bad username or password')
return False
return True
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
# Get video webpage. We are not actually interested in it, but need
# the cookies in order to be able to download the info webpage
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
video_info = self._download_xml(
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note=u'Downloading video info page')
# Get flv info
flv_info_webpage = self._download_webpage(
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
video_id, u'Downloading flv info')
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
video_title = video_info.find('.//title').text
video_extension = video_info.find('.//movie_type').text
video_format = video_extension.upper()
video_thumbnail = video_info.find('.//thumbnail_url').text
video_description = video_info.find('.//description').text
video_uploader_id = video_info.find('.//user_id').text
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
video_view_count = video_info.find('.//view_counter').text
video_webpage_url = video_info.find('.//watch_url').text
# uploader
video_uploader = video_uploader_id
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
try:
user_info = self._download_xml(
url, video_id, note=u'Downloading user information')
video_uploader = user_info.find('.//nickname').text
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
return {
'id': video_id,
'url': video_real_url,
'title': video_title,
'ext': video_extension,
'format': video_format,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'view_count': video_view_count,
'webpage_url': video_webpage_url,
}

View File

@@ -0,0 +1,43 @@
import json
import re
from .common import InfoExtractor
class NineGagIE(InfoExtractor):
IE_NAME = '9gag'
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
_TEST = {
u"url": u"http://9gag.tv/v/1912",
u"file": u"1912.mp4",
u"info_dict": {
u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
},
u'add_ie': [u'Youtube']
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
data_json = self._html_search_regex(r'''(?x)
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
data-video-meta="([^"]+)"''', webpage, u'video metadata')
data = json.loads(data_json)
return {
'_type': 'url_transparent',
'url': data['youtubeVideoId'],
'ie_key': 'Youtube',
'id': video_id,
'title': data['title'],
'description': data['description'],
'view_count': int(data['view_count']),
'like_count': int(data['statistic']['like']),
'dislike_count': int(data['statistic']['dislike']),
'thumbnail': data['thumbnail_url'],
}

View File

@@ -12,7 +12,7 @@ from ..utils import (
)
class ORFIE(InfoExtractor):
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
_VALID_URL = r'https?://tvthek\.orf\.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -5,7 +5,7 @@ from .common import InfoExtractor
class PBSIE(InfoExtractor):
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
_VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
_TEST = {
u'url': u'http://video.pbs.org/video/2365006249/',

View File

@@ -0,0 +1,49 @@
import json
import re
from .common import InfoExtractor
class PodomaticIE(InfoExtractor):
IE_NAME = 'podomatic'
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
_TEST = {
u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
u"file": u"2009-01-02T16_03_35-08_00.mp3",
u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
u"info_dict": {
u"uploader": u"Science Teaching Tips",
u"uploader_id": u"scienceteachingtips",
u"title": u"64. When the Moon Hits Your Eye",
u"duration": 446,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
channel = mobj.group('channel')
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
'?permalink=true&rtmp=0') %
(mobj.group('proto'), channel, video_id))
data_json = self._download_webpage(
json_url, video_id, note=u'Downloading video info')
data = json.loads(data_json)
video_url = data['downloadLink']
uploader = data['podcast']
title = data['title']
thumbnail = data['imageLocation']
duration = int(data['length'] / 1000.0)
return {
'id': video_id,
'url': video_url,
'title': title,
'uploader': uploader,
'uploader_id': channel,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -12,7 +12,7 @@ from ..aes import (
)
class PornHubIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))'
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))'
_TEST = {
u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
u'file': u'648719015.mp4',

View File

@@ -0,0 +1,51 @@
import re
import os
from .common import InfoExtractor
class PyvideoIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
_TESTS = [{
u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
u'file': u'24_4WWkSmNo.mp4',
u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
u'info_dict': {
u"title": u"Become a logging expert in 30 minutes",
u"description": u"md5:9665350d466c67fb5b1598de379021f7",
u"upload_date": u"20130320",
u"uploader": u"NextDayVideo",
u"uploader_id": u"NextDayVideo",
},
u'add_ie': ['Youtube'],
},
{
u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
u'info_dict': {
u'id': u'2542',
u'ext': u'm4v',
u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
if m_youtube is not None:
return self.url_result(m_youtube.group(1), 'Youtube')
title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
webpage, u'title', flags=re.DOTALL)
video_url = self._search_regex([r'<source src="(.*?)"',
r'<dt>Download</dt>.*?<a href="(.+?)"'],
webpage, u'video url', flags=re.DOTALL)
return {
'id': video_id,
'title': os.path.splitext(title)[0],
'url': video_url,
}

View File

@@ -30,7 +30,7 @@ class RedTubeIE(InfoExtractor):
r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
video_title = self._html_search_regex(
r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
webpage, u'title')
# No self-labeling, but they describe themselves as

View File

@@ -7,14 +7,15 @@ from ..utils import (
ExtractorError,
)
class RTLnowIE(InfoExtractor):
"""Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?rtlnitronow\.de/|(?:www\.)?superrtlnow\.de/|(?:www\.)?n-tvnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_TESTS = [{
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
u'file': u'90419.flv',
u'info_dict': {
u'upload_date': u'20070416',
u'upload_date': u'20070416',
u'title': u'Ahornallee - Folge 1 - Der Einzug',
u'description': u'Folge 1 - Der Einzug',
},

View File

@@ -11,7 +11,7 @@ from ..utils import (
class RutubeIE(InfoExtractor):
_VALID_URL = r'https?://rutube.ru/video/(?P<long_id>\w+)'
_VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)'
_TEST = {
u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',

View File

@@ -1,7 +1,6 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -35,12 +34,11 @@ class SinaIE(InfoExtractor):
def _extract_video(self, video_id):
data = compat_urllib_parse.urlencode({'vid': video_id})
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
video_id, u'Downloading video url')
image_page = self._download_webpage(
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
video_id, u'Downloading thumbnail info')
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
return {'id': video_id,
'url': url_doc.find('./durl/url').text,

View File

@@ -4,7 +4,7 @@ from .common import InfoExtractor
class SlashdotIE(InfoExtractor):
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
_VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
_TEST = {
u'add_ie': ['Ooyala'],

View File

@@ -0,0 +1,356 @@
# encoding: utf-8
import re
import json
import hashlib
import uuid
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
)
class SmotriIE(InfoExtractor):
IE_DESC = u'Smotri.com'
IE_NAME = u'smotri'
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
_TESTS = [
# real video id 2610366
{
u'url': u'http://smotri.com/video/view/?id=v261036632ab',
u'file': u'v261036632ab.mp4',
u'md5': u'2a7b08249e6f5636557579c368040eb9',
u'info_dict': {
u'title': u'катастрофа с камер видеонаблюдения',
u'uploader': u'rbc2008',
u'uploader_id': u'rbc08',
u'upload_date': u'20131118',
u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
},
},
# real video id 57591
{
u'url': u'http://smotri.com/video/view/?id=v57591cb20',
u'file': u'v57591cb20.flv',
u'md5': u'830266dfc21f077eac5afd1883091bcd',
u'info_dict': {
u'title': u'test',
u'uploader': u'Support Photofile@photofile',
u'uploader_id': u'support-photofile',
u'upload_date': u'20070704',
u'description': u'test, видео test',
u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
},
},
# video-password
{
u'url': u'http://smotri.com/video/view/?id=v1390466a13c',
u'file': u'v1390466a13c.mp4',
u'md5': u'f6331cef33cad65a0815ee482a54440b',
u'info_dict': {
u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
u'uploader': u'timoxa40',
u'uploader_id': u'timoxa40',
u'upload_date': u'20100404',
u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
},
u'params': {
u'videopassword': u'qwerty',
},
},
# age limit + video-password
{
u'url': u'http://smotri.com/video/view/?id=v15408898bcf',
u'file': u'v15408898bcf.flv',
u'md5': u'91e909c9f0521adf5ee86fbe073aad70',
u'info_dict': {
u'title': u'этот ролик не покажут по ТВ',
u'uploader': u'zzxxx',
u'uploader_id': u'ueggb',
u'upload_date': u'20101001',
u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
u'age_limit': 18,
u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
},
u'params': {
u'videopassword': u'333'
}
}
]
_SUCCESS = 0
_PASSWORD_NOT_VERIFIED = 1
_PASSWORD_DETECTED = 2
_VIDEO_NOT_FOUND = 3
def _search_meta(self, name, html, display_name=None):
if display_name is None:
display_name = name
return self._html_search_regex(
r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
html, display_name, fatal=False)
return self._html_search_meta(name, html, display_name)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
real_video_id = mobj.group('realvideoid')
# Download video JSON data
video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON')
video_json = json.loads(video_json_page)
status = video_json['status']
if status == self._VIDEO_NOT_FOUND:
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
# video-password set
video_password = self._downloader.params.get('videopassword', None)
if not video_password:
raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True)
video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)')
video_json = json.loads(video_json_page)
status = video_json['status']
if status == self._PASSWORD_NOT_VERIFIED:
raise ExtractorError(u'Video password is invalid', expected=True)
if status != self._SUCCESS:
raise ExtractorError(u'Unexpected status value %s' % status)
# Extract the URL of the video
video_url = video_json['file_data']
# Video JSON does not provide enough meta data
# We will extract some from the video web page instead
video_page_url = 'http://' + mobj.group('url')
video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
# Adult content
if re.search(u'EroConfirmText">', video_page) is not None:
self.report_age_confirmation()
confirm_string = self._html_search_regex(
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
video_page, u'confirm string')
confirm_url = video_page_url + '&confirm=%s' % confirm_string
video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)')
adult_content = True
else:
adult_content = False
# Extract the rest of meta data
video_title = self._search_meta(u'name', video_page, u'title')
if not video_title:
video_title = video_url.rsplit('/', 1)[-1]
video_description = self._search_meta(u'description', video_page)
END_TEXT = u' на сайте Smotri.com'
if video_description.endswith(END_TEXT):
video_description = video_description[:-len(END_TEXT)]
START_TEXT = u'Смотреть онлайн ролик '
if video_description.startswith(START_TEXT):
video_description = video_description[len(START_TEXT):]
video_thumbnail = self._search_meta(u'thumbnail', video_page)
upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
video_upload_date = (
(
upload_date_m.group('year') +
upload_date_m.group('month') +
upload_date_m.group('day')
)
if upload_date_m else None
)
duration_str = self._search_meta(u'duration', video_page)
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
video_duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m else None
)
video_uploader = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_uploader_id = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_view_count = self._html_search_regex(
u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
return {
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'video_duration': video_duration,
'view_count': video_view_count,
'age_limit': 18 if adult_content else 0,
'video_page_url': video_page_url
}
class SmotriCommunityIE(InfoExtractor):
IE_DESC = u'Smotri.com community videos'
IE_NAME = u'smotri:community'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
community_id = mobj.group('communityid')
url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
rss = self._download_xml(url, community_id, u'Downloading community RSS')
entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text
community_title = self._html_search_regex(
u'^Видео сообщества "([^"]+)"$', description_text, u'community title')
return self.playlist_result(entries, community_id, community_title)
class SmotriUserIE(InfoExtractor):
IE_DESC = u'Smotri.com user videos'
IE_NAME = u'smotri:user'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('userid')
url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
rss = self._download_xml(url, user_id, u'Downloading user RSS')
entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text
user_nickname = self._html_search_regex(
u'^Видео режиссера (.*)$', description_text,
u'user nickname')
return self.playlist_result(entries, user_id, user_nickname)
class SmotriBroadcastIE(InfoExtractor):
IE_DESC = u'Smotri.com broadcasts'
IE_NAME = u'smotri:broadcast'
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
broadcast_id = mobj.group('broadcastid')
broadcast_url = 'http://' + mobj.group('url')
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, u'Downloading broadcast page')
if re.search(u'>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
raise ExtractorError(u'Broadcast %s does not exist' % broadcast_id, expected=True)
# Adult content
if re.search(u'EroConfirmText">', broadcast_page) is not None:
(username, password) = self._get_login_info()
if username is None:
raise ExtractorError(u'Erotic broadcasts allowed only for registered users, '
u'use --username and --password options to provide account credentials.', expected=True)
# Log in
login_form_strs = {
u'login-hint53': '1',
u'confirm_erotic': '1',
u'login': username,
u'password': password,
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
login_url = broadcast_url + '/?no_redirect=1'
request = compat_urllib_request.Request(login_url, login_data)
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
broadcast_page = self._download_webpage(
request, broadcast_id, note=u'Logging in and confirming age')
if re.search(u'>Неверный логин или пароль<', broadcast_page) is not None:
raise ExtractorError(u'Unable to log in: bad username or password', expected=True)
adult_content = True
else:
adult_content = False
ticket = self._html_search_regex(
u'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
broadcast_page, u'broadcast ticket')
url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
broadcast_password = self._downloader.params.get('videopassword', None)
if broadcast_password:
url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
broadcast_json_page = self._download_webpage(url, broadcast_id, u'Downloading broadcast JSON')
try:
broadcast_json = json.loads(broadcast_json_page)
protected_broadcast = broadcast_json['_pass_protected'] == 1
if protected_broadcast and not broadcast_password:
raise ExtractorError(u'This broadcast is protected by a password, use the --video-password option', expected=True)
broadcast_offline = broadcast_json['is_play'] == 0
if broadcast_offline:
raise ExtractorError(u'Broadcast %s is offline' % broadcast_id, expected=True)
rtmp_url = broadcast_json['_server']
if not rtmp_url.startswith('rtmp://'):
raise ExtractorError(u'Unexpected broadcast rtmp URL')
broadcast_playpath = broadcast_json['_streamName']
broadcast_thumbnail = broadcast_json['_imgURL']
broadcast_title = broadcast_json['title']
broadcast_description = broadcast_json['description']
broadcaster_nick = broadcast_json['nick']
broadcaster_login = broadcast_json['login']
rtmp_conn = 'S:%s' % uuid.uuid4().hex
except KeyError:
if protected_broadcast:
raise ExtractorError(u'Bad broadcast password', expected=True)
raise ExtractorError(u'Unexpected broadcast JSON')
return {
'id': broadcast_id,
'url': rtmp_url,
'title': broadcast_title,
'thumbnail': broadcast_thumbnail,
'description': broadcast_description,
'uploader': broadcaster_nick,
'uploader_id': broadcaster_login,
'age_limit': 18 if adult_content else 0,
'ext': 'flv',
'play_path': broadcast_playpath,
'rtmp_live': True,
'rtmp_conn': rtmp_conn
}

View File

@@ -1,3 +1,4 @@
# encoding: utf-8
import json
import re
import itertools
@@ -23,9 +24,12 @@ class SoundcloudIE(InfoExtractor):
"""
_VALID_URL = r'''^(?:https?://)?
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
(?:(?:(?:www\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/
(?!sets/)(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|(?P<widget>w.soundcloud.com/player/?.*?url=.*)
|(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
)
'''
IE_NAME = u'soundcloud'
@@ -56,6 +60,32 @@ class SoundcloudIE(InfoExtractor):
u'skip_download': True,
},
},
# private link
{
u'url': u'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
u'md5': u'aa0dd32bfea9b0c5ef4f02aacd080604',
u'info_dict': {
u'id': u'123998367',
u'ext': u'mp3',
u'title': u'Youtube - Dl Test Video \'\' Ä↭',
u'uploader': u'jaimeMF',
u'description': u'test chars: \"\'/\\ä↭',
u'upload_date': u'20131209',
},
},
# downloadable song
{
u'url': u'https://soundcloud.com/simgretina/just-your-problem-baby-1',
u'md5': u'56a8b69568acaa967b4c49f9d1d52d19',
u'info_dict': {
u'id': u'105614606',
u'ext': u'wav',
u'title': u'Just Your Problem Baby (Acapella)',
u'description': u'Vocals',
u'uploader': u'Sim Gretina',
u'upload_date': u'20130815',
},
},
]
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
@@ -73,47 +103,83 @@ class SoundcloudIE(InfoExtractor):
def _resolv_url(cls, url):
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
def _extract_info_dict(self, info, full_title=None, quiet=False):
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
track_id = compat_str(info['id'])
name = full_title or track_id
if quiet == False:
if quiet:
self.report_extraction(name)
thumbnail = info['artwork_url']
if thumbnail is not None:
thumbnail = thumbnail.replace('-large', '-t500x500')
ext = u'mp3'
result = {
'id': track_id,
'id': track_id,
'uploader': info['user']['username'],
'upload_date': unified_strdate(info['created_at']),
'title': info['title'],
'ext': info.get('original_format', u'mp3'),
'title': info['title'],
'description': info['description'],
'thumbnail': thumbnail,
}
if info.get('downloadable', False):
# We can build a direct link to the song
result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
format_url = (
u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
track_id, self._CLIENT_ID))
result['formats'] = [{
'format_id': 'download',
'ext': info.get('original_format', u'mp3'),
'url': format_url,
'vcodec': 'none',
}]
else:
# We have to retrieve the url
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
stream_json = self._download_webpage(
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
streams_url,
track_id, u'Downloading track url')
# There should be only one entry in the dictionary
key, stream_url = list(json.loads(stream_json).items())[0]
if key.startswith(u'http'):
result['url'] = stream_url
elif key.startswith(u'rtmp'):
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = stream_url.split('mp3:', 1)
result.update({
'url': url,
'play_path': 'mp3:' + path,
})
else:
formats = []
format_dict = json.loads(stream_json)
for key, stream_url in format_dict.items():
if key.startswith(u'http'):
formats.append({
'format_id': key,
'ext': ext,
'url': stream_url,
'vcodec': 'none',
})
elif key.startswith(u'rtmp'):
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = stream_url.split('mp3:', 1)
formats.append({
'format_id': key,
'url': url,
'play_path': 'mp3:' + path,
'ext': ext,
'vcodec': 'none',
})
if not formats:
# We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error
result['url'] = info['stream_url'] + '?client_id=' + self._CLIENT_ID,
formats.append({
'format_id': u'fallback',
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
'ext': ext,
'vcodec': 'none',
})
def format_pref(f):
if f['format_id'].startswith('http'):
return 2
if f['format_id'].startswith('rtmp'):
return 1
return 0
formats.sort(key=format_pref)
result['formats'] = formats
return result
@@ -123,6 +189,7 @@ class SoundcloudIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
track_id = mobj.group('track_id')
token = None
if track_id is not None:
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
full_title = track_id
@@ -131,19 +198,22 @@ class SoundcloudIE(InfoExtractor):
return self.url_result(query['url'][0], ie='Soundcloud')
else:
# extract uploader (which is in the url)
uploader = mobj.group(1)
uploader = mobj.group('uploader')
# extract simple title (uploader + slug of song title)
slug_title = mobj.group(2)
full_title = '%s/%s' % (uploader, slug_title)
slug_title = mobj.group('title')
token = mobj.group('token')
full_title = resolve_title = '%s/%s' % (uploader, slug_title)
if token:
resolve_title += '/%s' % token
self.report_resolve(full_title)
url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
url = 'http://soundcloud.com/%s' % resolve_title
info_json_url = self._resolv_url(url)
info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
info = json.loads(info_json)
return self._extract_info_dict(info, full_title)
return self._extract_info_dict(info, full_title, secret_token=token)
class SoundcloudSetIE(SoundcloudIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
@@ -183,7 +253,7 @@ class SoundcloudSetIE(SoundcloudIE):
class SoundcloudUserIE(SoundcloudIE):
_VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
_VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
IE_NAME = u'soundcloud:user'
# it's in tests/test_playlists.py

View File

@@ -1,15 +1,14 @@
import re
from .mtv import MTVIE, _media_xml_tag
from .mtv import MTVServicesInfoExtractor
class SouthParkStudiosIE(MTVIE):
class SouthParkStudiosIE(MTVServicesInfoExtractor):
IE_NAME = u'southparkstudios.com'
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
# Overwrite MTVIE properties we don't want
_TESTS = [{
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
@@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE):
},
}]
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
thumb_node = itemdoc.find(search_path)
if thumb_node is None:
return None
else:
return thumb_node.attrib['url']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url = u'http://www.' + mobj.group(u'url')

View File

@@ -6,7 +6,7 @@ from ..utils import RegexNotFoundError, ExtractorError
class SpaceIE(InfoExtractor):
_VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
_VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
_TEST = {
u'add_ie': ['Brightcove'],
u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -33,12 +32,10 @@ class SpiegelIE(InfoExtractor):
r'<div class="module-title">(.*?)</div>', webpage, u'title')
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
xml_code = self._download_webpage(
idoc = self._download_xml(
xml_url, video_id,
note=u'Downloading XML', errnote=u'Failed to download XML')
idoc = xml.etree.ElementTree.fromstring(xml_code)
formats = [
{
'format_id': n.tag.rpartition('type')[2],

View File

@@ -1,14 +1,7 @@
import re
import socket
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_str,
compat_urllib_error,
compat_urllib_request,
ExtractorError,
orderedSet,
unescapeHTML,
@@ -18,7 +11,7 @@ from ..utils import (
class StanfordOpenClassroomIE(InfoExtractor):
IE_NAME = u'stanfordoc'
IE_DESC = u'Stanford Open ClassRoom'
_VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
_VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
_TEST = {
u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
u'file': u'PracticalUnix_intro-environment.mp4',
@@ -45,11 +38,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
self.report_extraction(info['id'])
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
xmlUrl = baseUrl + video + '.xml'
try:
metaXml = compat_urllib_request.urlopen(xmlUrl).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
mdoc = xml.etree.ElementTree.fromstring(metaXml)
mdoc = self._download_xml(xmlUrl, info['id'])
try:
info['title'] = mdoc.findall('./title')[0].text
info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
@@ -95,12 +84,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
'upload_date': None,
}
self.report_download_webpage(info['id'])
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
try:
rootpage = compat_urllib_request.urlopen(rootURL).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download course info page: ' + compat_str(err))
rootpage = self._download_webpage(rootURL, info['id'],
errnote=u'Unable to download course info page')
info['title'] = info['id']

View File

@@ -21,6 +21,7 @@ class StreamcloudIE(InfoExtractor):
u'title': u'youtube-dl test video \'/\\ ä ↭',
u'duration': 9,
},
u'skip': u'Only available from the EU'
}
def _real_extract(self, url):

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -32,8 +31,7 @@ class TeamcocoIE(InfoExtractor):
self.report_extraction(video_id)
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
data = self._download_xml(data_url, video_id, 'Downloading data webpage')
qualities = ['500k', '480p', '1000k', '720p', '1080p']

View File

@@ -7,7 +7,7 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player."""
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
_VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
_TEST = {
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
u'file': u'10635995.mp4',

View File

@@ -0,0 +1,80 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
xpath_with_ns,
)
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
class ThePlatformIE(InfoExtractor):
_VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P<id>[^/\?]+)'
_TEST = {
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
u'info_dict': {
u'id': u'e9I_cZgTgIPd',
u'ext': u'flv',
u'title': u'Blackberry\'s big, bold Z30',
u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
u'duration': 247,
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
def _get_info(self, video_id):
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
'format=smil&mbr=true'.format(video_id))
meta = self._download_xml(smil_url, video_id)
try:
error_msg = next(
n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref'))
if n.attrib.get('title') == u'Geographic Restriction')
except StopIteration:
pass
else:
raise ExtractorError(error_msg, expected=True)
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)
head = meta.find(_x('smil:head'))
body = meta.find(_x('smil:body'))
base_url = head.find(_x('smil:meta')).attrib['base']
switch = body.find(_x('smil:switch'))
formats = []
for f in switch.findall(_x('smil:video')):
attr = f.attrib
formats.append({
'url': base_url,
'play_path': 'mp4:' + attr['src'],
'ext': 'flv',
'width': int(attr['width']),
'height': int(attr['height']),
'vbr': int(attr['system-bitrate']),
})
formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))
return {
'id': video_id,
'title': info['title'],
'formats': formats,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
'duration': info['duration']//1000,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
return self._get_info(video_id)

View File

@@ -1,6 +1,5 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -40,11 +39,9 @@ class TouTvIE(InfoExtractor):
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
streams_webpage = self._download_webpage(
streams_doc = self._download_xml(
streams_url, video_id, note=u'Downloading stream list')
streams_doc = xml.etree.ElementTree.fromstring(
streams_webpage.encode('utf-8'))
video_url = next(n.text
for n in streams_doc.findall('.//choice/url')
if u'//ad.doubleclick' not in n.text)

View File

@@ -1,6 +1,5 @@
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -36,12 +35,10 @@ class TriluliluIE(InfoExtractor):
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
u'video-formats2' % log)
format_str = self._download_webpage(
format_doc = self._download_xml(
format_url, video_id,
note=u'Downloading formats',
errnote=u'Error while downloading formats')
format_doc = xml.etree.ElementTree.fromstring(format_str)
video_url_template = (
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
@@ -58,7 +55,7 @@ class TriluliluIE(InfoExtractor):
for fnode in format_doc.findall('./formats/format')
]
info = {
return {
'_type': 'video',
'id': video_id,
'formats': formats,
@@ -67,7 +64,3 @@ class TriluliluIE(InfoExtractor):
'thumbnail': thumbnail,
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -3,7 +3,7 @@ import re
from .common import InfoExtractor
class UnistraIE(InfoExtractor):
_VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)'
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
_TEST = {
u'url': u'http://utv.unistra.fr/video.php?id_video=154',

View File

@@ -9,7 +9,7 @@ from ..utils import (
)
class VeeHDIE(InfoExtractor):
_VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
_VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
_TEST = {
u'url': u'http://veehd.com/video/4686958',

View File

@@ -15,7 +15,7 @@ class VevoIE(InfoExtractor):
Accepts urls from vevo.com or in the format 'vevo:{id}'
(currently used by MTVIE)
"""
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
_VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
_TESTS = [{
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4',
@@ -24,7 +24,7 @@ class VevoIE(InfoExtractor):
u"upload_date": u"20130624",
u"uploader": u"Hurts",
u"title": u"Somebody to Die For",
u"duration": 230,
u"duration": 230.12,
u"width": 1920,
u"height": 1080,
}

Some files were not shown because too many files have changed in this diff Show More