Compare commits
	
		
			81 Commits
		
	
	
		
			2015.02.02
			...
			2015.02.10
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 34814eb66e | ||
|  | 3a5bcd0326 | ||
|  | 99c2398bc6 | ||
|  | 28f1272870 | ||
|  | f18e3a2fc0 | ||
|  | c4c5dc27cb | ||
|  | 2caf182f37 | ||
|  | 43f244b6d5 | ||
|  | 1309b396d0 | ||
|  | ba61796458 | ||
|  | 3255fe7141 | ||
|  | e98b8e79ea | ||
|  | 196121c51b | ||
|  | 5269028951 | ||
|  | f7bc056b5a | ||
|  | a0f7198544 | ||
|  | dd8930684e | ||
|  | bdb186f3b0 | ||
|  | 64f9baa084 | ||
|  | b29231c040 | ||
|  | 6128bf07a9 | ||
|  | 2ec19e9558 | ||
|  | 9ddb6925bf | ||
|  | 12931e1c6e | ||
|  | 41c23b0da5 | ||
|  | 2578ab19e4 | ||
|  | d87ec897e9 | ||
|  | 3bd4bffb1c | ||
|  | c36b09a502 | ||
|  | 641eb10d34 | ||
|  | 955c5505e7 | ||
|  | 69319969de | ||
|  | a14292e848 | ||
|  | 5d678df64a | ||
|  | 8ca8cbe2bd | ||
|  | ba322d8209 | ||
|  | 2f38289b79 | ||
|  | f23a3ca699 | ||
|  | 77d2b106cc | ||
|  | c0e46412e9 | ||
|  | 0161353d7d | ||
|  | 2b4ecde2c8 | ||
|  | b3a286d69d | ||
|  | 467d3c9a0c | ||
|  | ad5747bad1 | ||
|  | d6eb66ed3c | ||
|  | 7f2a9f1b49 | ||
|  | 1e1896f2de | ||
|  | c831973366 | ||
|  | 1a2548d9e9 | ||
|  | 3900eec27c | ||
|  | a02d212638 | ||
|  | 9c91a8fa70 | ||
|  | 41469f335e | ||
|  | 67ce4f8820 | ||
|  | bc63d56cca | ||
|  | c893d70805 | ||
|  | 3ee6e02564 | ||
|  | e3aaace400 | ||
|  | 300753a069 | ||
|  | f13b88c616 | ||
|  | 60ca389c64 | ||
|  | 1b0f3919c1 | ||
|  | 6a348cf7d5 | ||
|  | 9e91449c8d | ||
|  | 25e5ebf382 | ||
|  | 7dfc356625 | ||
|  | 58ba6c0160 | ||
|  | f076b63821 | ||
|  | 12f0454cd6 | ||
|  | cd7342755f | ||
|  | 9bb8e0a3f9 | ||
|  | 1a6373ef39 | ||
|  | f6c24009be | ||
|  | d862042301 | ||
|  | 23d9ded655 | ||
|  | 4c1a017e69 | ||
|  | ee623d9247 | ||
|  | 330537d08a | ||
|  | 2cf0ecac7b | ||
|  | d200b11c7e | 
							
								
								
									
										2
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -108,3 +108,5 @@ Enam Mijbah Noor | ||||
| David Luhmer | ||||
| Shaya Goldberg | ||||
| Paul Hartmann | ||||
| Frans de Jonge | ||||
| Robin de Rooij | ||||
|   | ||||
| @@ -1,4 +1,6 @@ | ||||
| Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. | ||||
| **Please include the full output of youtube-dl when run with `-v`**. | ||||
|  | ||||
| The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. | ||||
|  | ||||
| Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): | ||||
|  | ||||
| @@ -122,7 +124,7 @@ If you want to add support for a new site, you can follow this quick list (assum | ||||
| 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). | ||||
| 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. | ||||
| 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. | ||||
| 8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501). | ||||
| 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). | ||||
| 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: | ||||
|  | ||||
|         $ git add youtube_dl/extractor/__init__.py | ||||
|   | ||||
							
								
								
									
										5
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,10 +1,7 @@ | ||||
| all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites | ||||
|  | ||||
| clean: | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp | ||||
|  | ||||
| cleanall: clean | ||||
| 	rm -f youtube-dl youtube-dl.exe | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe | ||||
|  | ||||
| PREFIX ?= /usr/local | ||||
| BINDIR ?= $(PREFIX)/bin | ||||
|   | ||||
							
								
								
									
										36
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										36
									
								
								README.md
									
									
									
									
									
								
							| @@ -292,18 +292,20 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      video results by putting a condition in | ||||
|                                      brackets, as in -f "best[height=720]" (or | ||||
|                                      -f "[filesize>10M]").  This works for | ||||
|                                      filesize, height, width, tbr, abr, vbr, and | ||||
|                                      fps and the comparisons <, <=, >, >=, =, != | ||||
|                                      . Formats for which the value is not known | ||||
|                                      are excluded unless you put a question mark | ||||
|                                      (?) after the operator. You can combine | ||||
|                                      format filters, so  -f "[height <=? | ||||
|                                      720][tbr>500]" selects up to 720p videos | ||||
|                                      (or videos where the height is not known) | ||||
|                                      with a bitrate of at least 500 KBit/s. By | ||||
|                                      default, youtube-dl will pick the best | ||||
|                                      quality. Use commas to download multiple | ||||
|                                      audio formats, such as -f | ||||
|                                      filesize, height, width, tbr, abr, vbr, | ||||
|                                      asr, and fps and the comparisons <, <=, >, | ||||
|                                      >=, =, != and for ext, acodec, vcodec, | ||||
|                                      container, and protocol and the comparisons | ||||
|                                      =, != . Formats for which the value is not | ||||
|                                      known are excluded unless you put a | ||||
|                                      question mark (?) after the operator. You | ||||
|                                      can combine format filters, so  -f "[height | ||||
|                                      <=? 720][tbr>500]" selects up to 720p | ||||
|                                      videos (or videos where the height is not | ||||
|                                      known) with a bitrate of at least 500 | ||||
|                                      KBit/s. By default, youtube-dl will pick | ||||
|                                      the best quality. Use commas to download | ||||
|                                      multiple audio formats, such as -f | ||||
|                                      136/137/mp4/bestvideo,140/m4a/bestaudio. | ||||
|                                      You can merge the video and audio of two | ||||
|                                      formats into a single file using -f <video- | ||||
| @@ -532,6 +534,14 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt | ||||
|     youtube-dl -- -wNyEUrxzFU | ||||
|     youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU" | ||||
|  | ||||
| ### Can you add support for this anime video site, or site which shows current movies for free? | ||||
|  | ||||
| As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl. | ||||
|  | ||||
| A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into youtube-dl. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization. | ||||
|  | ||||
| Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content. | ||||
|  | ||||
| ### How can I detect whether a given URL is supported by youtube-dl? | ||||
|  | ||||
| For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. | ||||
| @@ -728,7 +738,7 @@ In particular, every site support request issue should only pertain to services | ||||
|  | ||||
| ###  Is anyone going to need the feature? | ||||
|  | ||||
| Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. | ||||
| Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. | ||||
|  | ||||
| ###  Is your question about youtube-dl? | ||||
|  | ||||
|   | ||||
| @@ -35,7 +35,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us | ||||
| if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi | ||||
|  | ||||
| /bin/echo -e "\n### First of all, testing..." | ||||
| make cleanall | ||||
| make clean | ||||
| if $skip_tests ; then | ||||
|     echo 'SKIPPING TESTS' | ||||
| else | ||||
| @@ -45,9 +45,9 @@ fi | ||||
| /bin/echo -e "\n### Changing version in version.py..." | ||||
| sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py | ||||
|  | ||||
| /bin/echo -e "\n### Committing README.md and youtube_dl/version.py..." | ||||
| make README.md | ||||
| git add README.md youtube_dl/version.py | ||||
| /bin/echo -e "\n### Committing documentation and youtube_dl/version.py..." | ||||
| make README.md CONTRIBUTING.md supportedsites | ||||
| git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py | ||||
| git commit -m "release $version" | ||||
|  | ||||
| /bin/echo -e "\n### Now tagging, signing and pushing..." | ||||
|   | ||||
| @@ -9,16 +9,21 @@ | ||||
|  - **8tracks** | ||||
|  - **9gag** | ||||
|  - **abc.net.au** | ||||
|  - **Abc7News** | ||||
|  - **AcademicEarth:Course** | ||||
|  - **AddAnime** | ||||
|  - **AdobeTV** | ||||
|  - **AdultSwim** | ||||
|  - **Aftenposten** | ||||
|  - **Aftonbladet** | ||||
|  - **AlJazeera** | ||||
|  - **Allocine** | ||||
|  - **AlphaPorno** | ||||
|  - **anitube.se** | ||||
|  - **AnySex** | ||||
|  - **Aparat** | ||||
|  - **AppleDailyAnimationNews** | ||||
|  - **AppleDailyRealtimeNews** | ||||
|  - **AppleTrailers** | ||||
|  - **archive.org**: archive.org videos | ||||
|  - **ARD** | ||||
| @@ -30,8 +35,10 @@ | ||||
|  - **arte.tv:ddc** | ||||
|  - **arte.tv:embed** | ||||
|  - **arte.tv:future** | ||||
|  - **AtresPlayer** | ||||
|  - **ATTTechChannel** | ||||
|  - **audiomack** | ||||
|  - **AUEngine** | ||||
|  - **audiomack:album** | ||||
|  - **Azubu** | ||||
|  - **bambuser** | ||||
|  - **bambuser:channel** | ||||
| @@ -71,8 +78,10 @@ | ||||
|  - **cmt.com** | ||||
|  - **CNET** | ||||
|  - **CNN** | ||||
|  - **CNNArticle** | ||||
|  - **CNNBlogs** | ||||
|  - **CollegeHumor** | ||||
|  - **CollegeRama** | ||||
|  - **ComCarCoff** | ||||
|  - **ComedyCentral** | ||||
|  - **ComedyCentralShows**: The Daily Show / The Colbert Report | ||||
| @@ -82,23 +91,27 @@ | ||||
|  - **Crunchyroll** | ||||
|  - **crunchyroll:playlist** | ||||
|  - **CSpan**: C-SPAN | ||||
|  - **CtsNews** | ||||
|  - **culturebox.francetvinfo.fr** | ||||
|  - **dailymotion** | ||||
|  - **dailymotion:playlist** | ||||
|  - **dailymotion:user** | ||||
|  - **daum.net** | ||||
|  - **DBTV** | ||||
|  - **DctpTv** | ||||
|  - **DeezerPlaylist** | ||||
|  - **defense.gouv.fr** | ||||
|  - **Discovery** | ||||
|  - **divxstage**: DivxStage | ||||
|  - **Dotsub** | ||||
|  - **DRBonanza** | ||||
|  - **Dropbox** | ||||
|  - **DrTuber** | ||||
|  - **DRTV** | ||||
|  - **Dump** | ||||
|  - **dvtv**: http://video.aktualne.cz/ | ||||
|  - **EbaumsWorld** | ||||
|  - **EchoMsk** | ||||
|  - **eHow** | ||||
|  - **Einthusan** | ||||
|  - **eitb.tv** | ||||
| @@ -108,6 +121,7 @@ | ||||
|  - **EMPFlix** | ||||
|  - **Engadget** | ||||
|  - **Eporner** | ||||
|  - **EroProfile** | ||||
|  - **Escapist** | ||||
|  - **EveryonesMixtape** | ||||
|  - **exfm**: ex.fm | ||||
| @@ -143,6 +157,7 @@ | ||||
|  - **GDCVault** | ||||
|  - **generic**: Generic downloader that works on some sites | ||||
|  - **GiantBomb** | ||||
|  - **Giga** | ||||
|  - **Glide**: Glide mobile video messages (glide.me) | ||||
|  - **Globo** | ||||
|  - **GodTube** | ||||
| @@ -153,9 +168,14 @@ | ||||
|  - **Grooveshark** | ||||
|  - **Groupon** | ||||
|  - **Hark** | ||||
|  - **HearThisAt** | ||||
|  - **Heise** | ||||
|  - **HellPorno** | ||||
|  - **Helsinki**: helsinki.fi | ||||
|  - **HentaiStigma** | ||||
|  - **HistoricFilms** | ||||
|  - **hitbox** | ||||
|  - **hitbox:live** | ||||
|  - **HornBunny** | ||||
|  - **HostingBulk** | ||||
|  - **HotNewHipHop** | ||||
| @@ -182,6 +202,7 @@ | ||||
|  - **jpopsuki.tv** | ||||
|  - **Jukebox** | ||||
|  - **Kankan** | ||||
|  - **Karaoketv** | ||||
|  - **keek** | ||||
|  - **KeezMovies** | ||||
|  - **KhanAcademy** | ||||
| @@ -195,6 +216,7 @@ | ||||
|  - **LiveLeak** | ||||
|  - **livestream** | ||||
|  - **livestream:original** | ||||
|  - **LnkGo** | ||||
|  - **lrt.lt** | ||||
|  - **lynda**: lynda.com videos | ||||
|  - **lynda:course**: lynda.com online courses | ||||
| @@ -235,6 +257,7 @@ | ||||
|  - **MySpass** | ||||
|  - **myvideo** | ||||
|  - **MyVidster** | ||||
|  - **n-tv.de** | ||||
|  - **Naver** | ||||
|  - **NBA** | ||||
|  - **NBC** | ||||
| @@ -242,11 +265,16 @@ | ||||
|  - **ndr**: NDR.de - Mediathek | ||||
|  - **NDTV** | ||||
|  - **NerdCubedFeed** | ||||
|  - **Nerdist** | ||||
|  - **Netzkino** | ||||
|  - **Newgrounds** | ||||
|  - **Newstube** | ||||
|  - **NextMedia** | ||||
|  - **NextMediaActionNews** | ||||
|  - **nfb**: National Film Board of Canada | ||||
|  - **nfl.com** | ||||
|  - **nhl.com** | ||||
|  - **nhl.com:news**: NHL news | ||||
|  - **nhl.com:videocenter**: NHL videocenter category | ||||
|  - **niconico**: ニコニコ動画 | ||||
|  - **NiconicoPlaylist** | ||||
| @@ -257,18 +285,20 @@ | ||||
|  - **Nowness** | ||||
|  - **nowvideo**: NowVideo | ||||
|  - **npo.nl** | ||||
|  - **npo.nl:live** | ||||
|  - **NRK** | ||||
|  - **NRKTV** | ||||
|  - **NTV** | ||||
|  - **ntv.ru** | ||||
|  - **Nuvid** | ||||
|  - **NYTimes** | ||||
|  - **ocw.mit.edu** | ||||
|  - **OktoberfestTV** | ||||
|  - **on.aol.com** | ||||
|  - **Ooyala** | ||||
|  - **OpenFilm** | ||||
|  - **orf:fm4**: radio FM4 | ||||
|  - **orf:oe1**: Radio Österreich 1 | ||||
|  - **orf:tvthek**: ORF TVthek | ||||
|  - **ORFFM4**: radio FM4 | ||||
|  - **parliamentlive.tv**: UK parliament videos | ||||
|  - **Patreon** | ||||
|  - **PBS** | ||||
| @@ -290,6 +320,7 @@ | ||||
|  - **Pyvideo** | ||||
|  - **QuickVid** | ||||
|  - **radio.de** | ||||
|  - **radiobremen** | ||||
|  - **radiofrance** | ||||
|  - **Rai** | ||||
|  - **RBMARadio** | ||||
| @@ -300,6 +331,8 @@ | ||||
|  - **RottenTomatoes** | ||||
|  - **Roxwel** | ||||
|  - **RTBF** | ||||
|  - **Rte** | ||||
|  - **RTL2** | ||||
|  - **RTLnow** | ||||
|  - **rtlxl.nl** | ||||
|  - **RTP** | ||||
| @@ -309,6 +342,7 @@ | ||||
|  - **RUHD** | ||||
|  - **rutube**: Rutube videos | ||||
|  - **rutube:channel**: Rutube channels | ||||
|  - **rutube:embed**: Rutube embedded videos | ||||
|  - **rutube:movie**: Rutube movies | ||||
|  - **rutube:person**: Rutube person videos | ||||
|  - **RUTV**: RUTV.RU | ||||
| @@ -351,12 +385,14 @@ | ||||
|  - **Sport5** | ||||
|  - **SportBox** | ||||
|  - **SportDeutschland** | ||||
|  - **SRMediathek**: Süddeutscher Rundfunk | ||||
|  - **SRMediathek**: Saarländischer Rundfunk | ||||
|  - **stanfordoc**: Stanford Open ClassRoom | ||||
|  - **Steam** | ||||
|  - **streamcloud.eu** | ||||
|  - **StreamCZ** | ||||
|  - **StreetVoice** | ||||
|  - **SunPorno** | ||||
|  - **SVTPlay** | ||||
|  - **SWRMediathek** | ||||
|  - **Syfy** | ||||
|  - **SztvHu** | ||||
| @@ -375,7 +411,9 @@ | ||||
|  - **TeleBruxelles** | ||||
|  - **telecinco.es** | ||||
|  - **TeleMB** | ||||
|  - **TeleTask** | ||||
|  - **TenPlay** | ||||
|  - **TestTube** | ||||
|  - **TF1** | ||||
|  - **TheOnion** | ||||
|  - **ThePlatform** | ||||
| @@ -403,8 +441,16 @@ | ||||
|  - **tv.dfb.de** | ||||
|  - **tvigle**: Интернет-телевидение Tvigle.ru | ||||
|  - **tvp.pl** | ||||
|  - **tvp.pl:Series** | ||||
|  - **TVPlay**: TV3Play and related services | ||||
|  - **Twitch** | ||||
|  - **Tweakers** | ||||
|  - **twitch:bookmarks** | ||||
|  - **twitch:chapter** | ||||
|  - **twitch:past_broadcasts** | ||||
|  - **twitch:profile** | ||||
|  - **twitch:stream** | ||||
|  - **twitch:video** | ||||
|  - **twitch:vod** | ||||
|  - **Ubu** | ||||
|  - **udemy** | ||||
|  - **udemy:course** | ||||
| @@ -433,6 +479,8 @@ | ||||
|  - **videoweed**: VideoWeed | ||||
|  - **Vidme** | ||||
|  - **Vidzi** | ||||
|  - **vier** | ||||
|  - **vier:videos** | ||||
|  - **viki** | ||||
|  - **vimeo** | ||||
|  - **vimeo:album** | ||||
| @@ -460,11 +508,13 @@ | ||||
|  - **WDR** | ||||
|  - **wdr:mobile** | ||||
|  - **WDRMaus**: Sendung mit der Maus | ||||
|  - **WebOfStories** | ||||
|  - **Weibo** | ||||
|  - **Wimp** | ||||
|  - **Wistia** | ||||
|  - **WorldStarHipHop** | ||||
|  - **wrzuta.pl** | ||||
|  - **WSJ**: Wall Street Journal | ||||
|  - **XBef** | ||||
|  - **XboxClips** | ||||
|  - **XHamster** | ||||
| @@ -472,7 +522,9 @@ | ||||
|  - **XNXX** | ||||
|  - **XTube** | ||||
|  - **XTubeUser**: XTube user profile | ||||
|  - **Xuite** | ||||
|  - **XVideos** | ||||
|  - **XXXYMovies** | ||||
|  - **Yahoo**: Yahoo screen and movies | ||||
|  - **YesJapan** | ||||
|  - **Ynet** | ||||
| @@ -491,7 +543,6 @@ | ||||
|  - **youtube:search_url**: YouTube.com search URLs | ||||
|  - **youtube:show**: YouTube.com (multi-season) shows | ||||
|  - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) | ||||
|  - **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks") | ||||
|  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) | ||||
|  - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) | ||||
|  - **ZDF** | ||||
|   | ||||
| @@ -103,6 +103,16 @@ def expect_info_dict(self, got_dict, expected_dict): | ||||
|             self.assertTrue( | ||||
|                 match_rex.match(got), | ||||
|                 'field %s (value: %r) should match %r' % (info_field, got, match_str)) | ||||
|         elif isinstance(expected, compat_str) and expected.startswith('startswith:'): | ||||
|             got = got_dict.get(info_field) | ||||
|             start_str = expected[len('startswith:'):] | ||||
|             self.assertTrue( | ||||
|                 isinstance(got, compat_str), | ||||
|                 'Expected a %s object, but got %s for field %s' % ( | ||||
|                     compat_str.__name__, type(got).__name__, info_field)) | ||||
|             self.assertTrue( | ||||
|                 got.startswith(start_str), | ||||
|                 'field %s (value: %r) should start with %r' % (info_field, got, start_str)) | ||||
|         elif isinstance(expected, type): | ||||
|             got = got_dict.get(info_field) | ||||
|             self.assertTrue(isinstance(got, expected), | ||||
|   | ||||
| @@ -13,6 +13,7 @@ import copy | ||||
| from test.helper import FakeYDL, assertRegexpMatches | ||||
| from youtube_dl import YoutubeDL | ||||
| from youtube_dl.extractor import YoutubeIE | ||||
| from youtube_dl.postprocessor.common import PostProcessor | ||||
|  | ||||
|  | ||||
| class YDL(FakeYDL): | ||||
| @@ -370,5 +371,35 @@ class TestFormatSelection(unittest.TestCase): | ||||
|             'vbr': 10, | ||||
|         }), '^\s*10k$') | ||||
|  | ||||
|     def test_postprocessors(self): | ||||
|         filename = 'post-processor-testfile.mp4' | ||||
|         audiofile = filename + '.mp3' | ||||
|  | ||||
|         class SimplePP(PostProcessor): | ||||
|             def run(self, info): | ||||
|                 with open(audiofile, 'wt') as f: | ||||
|                     f.write('EXAMPLE') | ||||
|                 info['filepath'] | ||||
|                 return False, info | ||||
|  | ||||
|         def run_pp(params): | ||||
|             with open(filename, 'wt') as f: | ||||
|                 f.write('EXAMPLE') | ||||
|             ydl = YoutubeDL(params) | ||||
|             ydl.add_post_processor(SimplePP()) | ||||
|             ydl.post_process(filename, {'filepath': filename}) | ||||
|  | ||||
|         run_pp({'keepvideo': True}) | ||||
|         self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) | ||||
|         self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) | ||||
|         os.unlink(filename) | ||||
|         os.unlink(audiofile) | ||||
|  | ||||
|         run_pp({'keepvideo': False}) | ||||
|         self.assertFalse(os.path.exists(filename), '%s exists' % filename) | ||||
|         self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) | ||||
|         os.unlink(audiofile) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -156,6 +156,9 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual( | ||||
|             unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), | ||||
|             '20141126') | ||||
|         self.assertEqual( | ||||
|             unified_strdate('2/2/2015 6:47:40 PM', day_first=False), | ||||
|             '20150202') | ||||
|  | ||||
|     def test_find_xpath_attr(self): | ||||
|         testxml = '''<root> | ||||
|   | ||||
| @@ -826,27 +826,44 @@ class YoutubeDL(object): | ||||
|             '!=': operator.ne, | ||||
|         } | ||||
|         operator_rex = re.compile(r'''(?x)\s*\[ | ||||
|             (?P<key>width|height|tbr|abr|vbr|filesize|fps) | ||||
|             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps) | ||||
|             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* | ||||
|             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) | ||||
|             \]$ | ||||
|             ''' % '|'.join(map(re.escape, OPERATORS.keys()))) | ||||
|         m = operator_rex.search(format_spec) | ||||
|         if m: | ||||
|             try: | ||||
|                 comparison_value = int(m.group('value')) | ||||
|             except ValueError: | ||||
|                 comparison_value = parse_filesize(m.group('value')) | ||||
|                 if comparison_value is None: | ||||
|                     comparison_value = parse_filesize(m.group('value') + 'B') | ||||
|                 if comparison_value is None: | ||||
|                     raise ValueError( | ||||
|                         'Invalid value %r in format specification %r' % ( | ||||
|                             m.group('value'), format_spec)) | ||||
|             op = OPERATORS[m.group('op')] | ||||
|  | ||||
|         if not m: | ||||
|             STR_OPERATORS = { | ||||
|                 '=': operator.eq, | ||||
|                 '!=': operator.ne, | ||||
|             } | ||||
|             str_operator_rex = re.compile(r'''(?x)\s*\[ | ||||
|                 \s*(?P<key>ext|acodec|vcodec|container|protocol) | ||||
|                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)? | ||||
|                 \s*(?P<value>[a-zA-Z0-9_-]+) | ||||
|                 \s*\]$ | ||||
|                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) | ||||
|             m = str_operator_rex.search(format_spec) | ||||
|             if m: | ||||
|                 comparison_value = m.group('value') | ||||
|                 op = STR_OPERATORS[m.group('op')] | ||||
|  | ||||
|         if not m: | ||||
|             raise ValueError('Invalid format specification %r' % format_spec) | ||||
|  | ||||
|         try: | ||||
|             comparison_value = int(m.group('value')) | ||||
|         except ValueError: | ||||
|             comparison_value = parse_filesize(m.group('value')) | ||||
|             if comparison_value is None: | ||||
|                 comparison_value = parse_filesize(m.group('value') + 'B') | ||||
|             if comparison_value is None: | ||||
|                 raise ValueError( | ||||
|                     'Invalid value %r in format specification %r' % ( | ||||
|                         m.group('value'), format_spec)) | ||||
|         op = OPERATORS[m.group('op')] | ||||
|  | ||||
|         def _filter(f): | ||||
|             actual_value = f.get(m.group('key')) | ||||
|             if actual_value is None: | ||||
| @@ -938,6 +955,9 @@ class YoutubeDL(object): | ||||
|             def has_header(self, h): | ||||
|                 return h in self.headers | ||||
|  | ||||
|             def get_header(self, h, default=None): | ||||
|                 return self.headers.get(h, default) | ||||
|  | ||||
|         pr = _PseudoRequest(info_dict['url']) | ||||
|         self.cookiejar.add_cookie_header(pr) | ||||
|         return pr.headers.get('Cookie') | ||||
| @@ -964,9 +984,11 @@ class YoutubeDL(object): | ||||
|             thumbnails.sort(key=lambda t: ( | ||||
|                 t.get('preference'), t.get('width'), t.get('height'), | ||||
|                 t.get('id'), t.get('url'))) | ||||
|             for t in thumbnails: | ||||
|             for i, t in enumerate(thumbnails): | ||||
|                 if 'width' in t and 'height' in t: | ||||
|                     t['resolution'] = '%dx%d' % (t['width'], t['height']) | ||||
|                 if t.get('id') is None: | ||||
|                     t['id'] = '%d' % i | ||||
|  | ||||
|         if thumbnails and 'thumbnail' not in info_dict: | ||||
|             info_dict['thumbnail'] = thumbnails[-1]['url'] | ||||
| @@ -1074,7 +1096,8 @@ class YoutubeDL(object): | ||||
|                                 else self.params['merge_output_format']) | ||||
|                             selected_format = { | ||||
|                                 'requested_formats': formats_info, | ||||
|                                 'format': rf, | ||||
|                                 'format': '%s+%s' % (formats_info[0].get('format'), | ||||
|                                                      formats_info[1].get('format')), | ||||
|                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'), | ||||
|                                                         formats_info[1].get('format_id')), | ||||
|                                 'width': formats_info[0].get('width'), | ||||
| @@ -1523,7 +1546,6 @@ class YoutubeDL(object): | ||||
|             line(f, idlen) for f in formats | ||||
|             if f.get('preference') is None or f['preference'] >= -1000] | ||||
|         if len(formats) > 1: | ||||
|             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' | ||||
|             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' | ||||
|  | ||||
|         header_line = line({ | ||||
|   | ||||
| @@ -6,6 +6,7 @@ from .academicearth import AcademicEarthCourseIE | ||||
| from .addanime import AddAnimeIE | ||||
| from .adobetv import AdobeTVIE | ||||
| from .adultswim import AdultSwimIE | ||||
| from .aftenposten import AftenpostenIE | ||||
| from .aftonbladet import AftonbladetIE | ||||
| from .aljazeera import AlJazeeraIE | ||||
| from .alphaporno import AlphaPornoIE | ||||
| @@ -427,6 +428,7 @@ from .streamcloud import StreamcloudIE | ||||
| from .streamcz import StreamCZIE | ||||
| from .streetvoice import StreetVoiceIE | ||||
| from .sunporno import SunPornoIE | ||||
| from .svtplay import SVTPlayIE | ||||
| from .swrmediathek import SWRMediathekIE | ||||
| from .syfy import SyfyIE | ||||
| from .sztvhu import SztvHuIE | ||||
| @@ -475,6 +477,7 @@ from .tutv import TutvIE | ||||
| from .tvigle import TvigleIE | ||||
| from .tvp import TvpIE, TvpSeriesIE | ||||
| from .tvplay import TVPlayIE | ||||
| from .tweakers import TweakersIE | ||||
| from .twentyfourvideo import TwentyFourVideoIE | ||||
| from .twitch import ( | ||||
|     TwitchVideoIE, | ||||
| @@ -554,6 +557,7 @@ from .wimp import WimpIE | ||||
| from .wistia import WistiaIE | ||||
| from .worldstarhiphop import WorldStarHipHopIE | ||||
| from .wrzuta import WrzutaIE | ||||
| from .wsj import WSJIE | ||||
| from .xbef import XBefIE | ||||
| from .xboxclips import XboxClipsIE | ||||
| from .xhamster import XHamsterIE | ||||
|   | ||||
							
								
								
									
										103
									
								
								youtube_dl/extractor/aftenposten.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								youtube_dl/extractor/aftenposten.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     xpath_with_ns, | ||||
|     xpath_text, | ||||
|     find_xpath_attr, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AftenpostenIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish', | ||||
|         'md5': 'fd828cd29774a729bf4d4425fe192972', | ||||
|         'info_dict': { | ||||
|             'id': '21039', | ||||
|             'ext': 'mov', | ||||
|             'title': 'TRAILER: "Sweatshop" - I can´t take any more', | ||||
|             'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238', | ||||
|             'timestamp': 1416927969, | ||||
|             'upload_date': '20141125', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._html_search_regex( | ||||
|             r'data-xs-id="(\d+)"', webpage, 'video id') | ||||
|  | ||||
|         data = self._download_xml( | ||||
|             'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id) | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'atom': 'http://www.w3.org/2005/Atom', | ||||
|             'xt': 'http://xstream.dk/', | ||||
|             'media': 'http://search.yahoo.com/mrss/', | ||||
|         } | ||||
|  | ||||
|         entry = data.find(xpath_with_ns('./atom:entry', NS_MAP)) | ||||
|  | ||||
|         title = xpath_text( | ||||
|             entry, xpath_with_ns('./atom:title', NS_MAP), 'title') | ||||
|         description = xpath_text( | ||||
|             entry, xpath_with_ns('./atom:summary', NS_MAP), 'description') | ||||
|         timestamp = parse_iso8601(xpath_text( | ||||
|             entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date')) | ||||
|  | ||||
|         formats = [] | ||||
|         media_group = entry.find(xpath_with_ns('./media:group', NS_MAP)) | ||||
|         for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)): | ||||
|             media_url = media_content.get('url') | ||||
|             if not media_url: | ||||
|                 continue | ||||
|             tbr = int_or_none(media_content.get('bitrate')) | ||||
|             mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url) | ||||
|             if mobj: | ||||
|                 formats.append({ | ||||
|                     'url': mobj.group('url'), | ||||
|                     'play_path': 'mp4:%s' % mobj.group('playpath'), | ||||
|                     'app': mobj.group('app'), | ||||
|                     'ext': 'flv', | ||||
|                     'tbr': tbr, | ||||
|                     'format_id': 'rtmp-%d' % tbr, | ||||
|                 }) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': media_url, | ||||
|                     'tbr': tbr, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         link = find_xpath_attr( | ||||
|             entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original') | ||||
|         if link is not None: | ||||
|             formats.append({ | ||||
|                 'url': link.get('href'), | ||||
|                 'format_id': link.get('rel'), | ||||
|             }) | ||||
|  | ||||
|         thumbnails = [{ | ||||
|             'url': splash.get('url'), | ||||
|             'width': int_or_none(splash.get('width')), | ||||
|             'height': int_or_none(splash.get('height')), | ||||
|         } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'formats': formats, | ||||
|             'thumbnails': thumbnails, | ||||
|         } | ||||
| @@ -1,8 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -21,9 +19,7 @@ class AftonbladetIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.search(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('video_id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # find internal video meta data | ||||
|   | ||||
| @@ -20,6 +20,7 @@ class AparatIE(InfoExtractor): | ||||
|             'id': 'wP8On', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'تیم گلکسی 11 - زومیت', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         # 'skip': 'Extremely unreliable', | ||||
|     } | ||||
| @@ -34,7 +35,8 @@ class AparatIE(InfoExtractor): | ||||
|                      video_id + '/vt/frame') | ||||
|         webpage = self._download_webpage(embed_url, video_id) | ||||
|  | ||||
|         video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage) | ||||
|         video_urls = [video_url.replace('\\/', '/') for video_url in re.findall( | ||||
|             r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)] | ||||
|         for i, video_url in enumerate(video_urls): | ||||
|             req = HEADRequest(video_url) | ||||
|             res = self._request_webpage( | ||||
| @@ -46,7 +48,7 @@ class AparatIE(InfoExtractor): | ||||
|  | ||||
|         title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') | ||||
|         thumbnail = self._search_regex( | ||||
|             r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) | ||||
|             r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -54,4 +56,5 @@ class AparatIE(InfoExtractor): | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'thumbnail': thumbnail, | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|         } | ||||
|   | ||||
| @@ -72,26 +72,29 @@ class BandcampIE(InfoExtractor): | ||||
|  | ||||
|         download_link = m_download.group(1) | ||||
|         video_id = self._search_regex( | ||||
|             r'var TralbumData = {.*?id: (?P<id>\d+),?$', | ||||
|             webpage, 'video id', flags=re.MULTILINE | re.DOTALL) | ||||
|             r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') | ||||
|         # We get the dictionary of the track from some javascript code | ||||
|         info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1) | ||||
|         info = json.loads(info)[0] | ||||
|         all_info = self._parse_json(self._search_regex( | ||||
|             r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id) | ||||
|         info = All_info[0] | ||||
|         # We pick mp3-320 for now, until format selection can be easily implemented. | ||||
|         mp3_info = info['downloads']['mp3-320'] | ||||
|         # If we try to use this url it says the link has expired | ||||
|         initial_url = mp3_info['url'] | ||||
|         re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' | ||||
|         m_url = re.match(re_url, initial_url) | ||||
|         m_url = re.match( | ||||
|             r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$', | ||||
|             initial_url) | ||||
|         # We build the url we will use to get the final track url | ||||
|         # This url is build in Bandcamp in the script download_bunde_*.js | ||||
|         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) | ||||
|         final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') | ||||
|         # If we could correctly generate the .rand field the url would be | ||||
|         # in the "download_url" key | ||||
|         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) | ||||
|         final_url = self._search_regex( | ||||
|             r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -145,6 +145,7 @@ class InfoExtractor(object): | ||||
|     thumbnail:      Full URL to a video thumbnail image. | ||||
|     description:    Full video description. | ||||
|     uploader:       Full name of the video uploader. | ||||
|     creator:        The main artist who created the video. | ||||
|     timestamp:      UNIX timestamp of the moment the video became available. | ||||
|     upload_date:    Video upload date (YYYYMMDD). | ||||
|                     If not explicitly set, calculated from timestamp. | ||||
| @@ -263,8 +264,15 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def extract(self, url): | ||||
|         """Extracts URL information and returns it in list of dicts.""" | ||||
|         self.initialize() | ||||
|         return self._real_extract(url) | ||||
|         try: | ||||
|             self.initialize() | ||||
|             return self._real_extract(url) | ||||
|         except ExtractorError: | ||||
|             raise | ||||
|         except compat_http_client.IncompleteRead as e: | ||||
|             raise ExtractorError('A network error has occured.', cause=e, expected=True) | ||||
|         except (KeyError,) as e: | ||||
|             raise ExtractorError('An extractor error has occured.', cause=e) | ||||
|  | ||||
|     def set_downloader(self, downloader): | ||||
|         """Sets the downloader for this IE.""" | ||||
| @@ -655,6 +663,21 @@ class InfoExtractor(object): | ||||
|         } | ||||
|         return RATING_TABLE.get(rating.lower(), None) | ||||
|  | ||||
|     def _family_friendly_search(self, html): | ||||
|         # See http://schema.org/VideoObj | ||||
|         family_friendly = self._html_search_meta('isFamilyFriendly', html) | ||||
|  | ||||
|         if not family_friendly: | ||||
|             return None | ||||
|  | ||||
|         RATING_TABLE = { | ||||
|             '1': 0, | ||||
|             'true': 0, | ||||
|             '0': 18, | ||||
|             'false': 18, | ||||
|         } | ||||
|         return RATING_TABLE.get(family_friendly.lower(), None) | ||||
|  | ||||
|     def _twitter_search_player(self, html): | ||||
|         return self._html_search_meta('twitter:player', html, | ||||
|                                       'twitter card player') | ||||
| @@ -704,11 +727,11 @@ class InfoExtractor(object): | ||||
|                 preference, | ||||
|                 f.get('language_preference') if f.get('language_preference') is not None else -1, | ||||
|                 f.get('quality') if f.get('quality') is not None else -1, | ||||
|                 f.get('tbr') if f.get('tbr') is not None else -1, | ||||
|                 f.get('vbr') if f.get('vbr') is not None else -1, | ||||
|                 f.get('height') if f.get('height') is not None else -1, | ||||
|                 f.get('width') if f.get('width') is not None else -1, | ||||
|                 ext_preference, | ||||
|                 f.get('tbr') if f.get('tbr') is not None else -1, | ||||
|                 f.get('vbr') if f.get('vbr') is not None else -1, | ||||
|                 f.get('abr') if f.get('abr') is not None else -1, | ||||
|                 audio_ext_preference, | ||||
|                 f.get('fps') if f.get('fps') is not None else -1, | ||||
| @@ -764,7 +787,7 @@ class InfoExtractor(object): | ||||
|         self.to_screen(msg) | ||||
|         time.sleep(timeout) | ||||
|  | ||||
|     def _extract_f4m_formats(self, manifest_url, video_id): | ||||
|     def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None): | ||||
|         manifest = self._download_xml( | ||||
|             manifest_url, video_id, 'Downloading f4m manifest', | ||||
|             'Unable to download f4m manifest') | ||||
| @@ -777,26 +800,28 @@ class InfoExtractor(object): | ||||
|             media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') | ||||
|         for i, media_el in enumerate(media_nodes): | ||||
|             if manifest_version == '2.0': | ||||
|                 manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href') | ||||
|                 manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' | ||||
|                                 + (media_el.attrib.get('href') or media_el.attrib.get('url'))) | ||||
|             tbr = int_or_none(media_el.attrib.get('bitrate')) | ||||
|             format_id = 'f4m-%d' % (i if tbr is None else tbr) | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), | ||||
|                 'url': manifest_url, | ||||
|                 'ext': 'flv', | ||||
|                 'tbr': tbr, | ||||
|                 'width': int_or_none(media_el.attrib.get('width')), | ||||
|                 'height': int_or_none(media_el.attrib.get('height')), | ||||
|                 'preference': preference, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return formats | ||||
|  | ||||
|     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, | ||||
|                               entry_protocol='m3u8', preference=None): | ||||
|                               entry_protocol='m3u8', preference=None, | ||||
|                               m3u8_id=None): | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'm3u8-meta', | ||||
|             'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])), | ||||
|             'url': m3u8_url, | ||||
|             'ext': ext, | ||||
|             'protocol': 'm3u8', | ||||
| @@ -832,9 +857,8 @@ class InfoExtractor(object): | ||||
|                     formats.append({'url': format_url(line)}) | ||||
|                     continue | ||||
|                 tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) | ||||
|  | ||||
|                 f = { | ||||
|                     'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), | ||||
|                     'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])), | ||||
|                     'url': format_url(line.strip()), | ||||
|                     'tbr': tbr, | ||||
|                     'ext': ext, | ||||
|   | ||||
| @@ -1,77 +1,69 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FranceCultureIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.franceculture.fr/player/reecouter?play=4795174', | ||||
|         'info_dict': { | ||||
|             'id': '4795174', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Rendez-vous au pays des geeks', | ||||
|             'alt_title': 'Carnet nomade | 13-14', | ||||
|             'vcodec': 'none', | ||||
|             'uploader': 'Colette Fellous', | ||||
|             'upload_date': '20140301', | ||||
|             'duration': 3601, | ||||
|             'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$', | ||||
|             'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...', | ||||
|             'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats', | ||||
|             'timestamp': 1393700400, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         baseurl = mobj.group('baseurl') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         params_code = self._search_regex( | ||||
|             r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />", | ||||
|             webpage, 'parameter code') | ||||
|         params = compat_parse_qs(params_code) | ||||
|         video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0]) | ||||
|  | ||||
|         video_path = self._search_regex( | ||||
|             r'<a id="player".*?href="([^"]+)"', webpage, 'video path') | ||||
|         video_url = compat_urlparse.urljoin(url, video_path) | ||||
|         timestamp = int_or_none(self._search_regex( | ||||
|             r'<a id="player".*?data-date="([0-9]+)"', | ||||
|             webpage, 'upload date', fatal=False)) | ||||
|         thumbnail = self._search_regex( | ||||
|             r'<a id="player".*?>\s+<img src="([^"]+)"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title') | ||||
|             r'<span class="title-diffusion">(.*?)</span>', webpage, 'title') | ||||
|         alt_title = self._html_search_regex( | ||||
|             r'<span class="title">(.*?)</span>', | ||||
|             webpage, 'alt_title', fatal=False) | ||||
|         description = self._html_search_regex( | ||||
|             r'<span class="description">(.*?)</span>', | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         uploader = self._html_search_regex( | ||||
|             r'(?s)<div id="emission".*?<span class="author">(.*?)</span>', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         thumbnail_part = self._html_search_regex( | ||||
|             r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage, | ||||
|             'thumbnail', fatal=False) | ||||
|         if thumbnail_part is None: | ||||
|             thumbnail = None | ||||
|         else: | ||||
|             thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part) | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<p class="desc">(.*?)</p>', webpage, 'description') | ||||
|  | ||||
|         info = json.loads(params['infoData'][0])[0] | ||||
|         duration = info.get('media_length') | ||||
|         upload_date_candidate = info.get('media_section5') | ||||
|         upload_date = ( | ||||
|             upload_date_candidate | ||||
|             if (upload_date_candidate is not None and | ||||
|                 re.match(r'[0-9]{8}$', upload_date_candidate)) | ||||
|             else None) | ||||
|             webpage, 'uploader', default=None) | ||||
|         vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'vcodec': 'none' if video_url.lower().endswith('.mp3') else None, | ||||
|             'duration': duration, | ||||
|             'vcodec': vcodec, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'timestamp': timestamp, | ||||
|             'title': title, | ||||
|             'alt_title': alt_title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|         } | ||||
|   | ||||
| @@ -1,41 +1,67 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GamekingsIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', | ||||
|         # MD5 is flaky, seems to change regularly | ||||
|         # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', | ||||
|         'info_dict': { | ||||
|             'id': '20130811', | ||||
|             'id': 'phoenix-wright-ace-attorney-dual-destinies-review', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', | ||||
|             'description': 'md5:36fd701e57e8c15ac8682a2374c99731', | ||||
|         } | ||||
|     } | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     }, { | ||||
|         # vimeo video | ||||
|         'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/', | ||||
|         'md5': '12bf04dfd238e70058046937657ea68d', | ||||
|         'info_dict': { | ||||
|             'id': 'the-legend-of-zelda-majoras-mask', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Legend of Zelda: Majora’s Mask', | ||||
|             'description': 'md5:9917825fe0e9f4057601fe1e38860de3', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         video_url = self._og_search_video_url(webpage) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video = re.search(r'[0-9]+', video_url) | ||||
|         video_id = video.group(0) | ||||
|         playlist_id = self._search_regex( | ||||
|             r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id') | ||||
|  | ||||
|         # Todo: add medium format | ||||
|         video_url = video_url.replace(video_id, 'large/' + video_id) | ||||
|         playlist = self._download_xml( | ||||
|             'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id, | ||||
|             video_id) | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'jwplayer': 'http://rss.jwpcdn.com/' | ||||
|         } | ||||
|  | ||||
|         item = playlist.find('./channel/item') | ||||
|  | ||||
|         thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail') | ||||
|         video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'ext': 'mp4', | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|   | ||||
| @@ -140,6 +140,19 @@ class GenericIE(InfoExtractor): | ||||
|             }, | ||||
|             'add_ie': ['Ooyala'], | ||||
|         }, | ||||
|         # multiple ooyala embeds on SBN network websites | ||||
|         { | ||||
|             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', | ||||
|             'info_dict': { | ||||
|                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok', | ||||
|                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com', | ||||
|             }, | ||||
|             'playlist_mincount': 3, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'add_ie': ['Ooyala'], | ||||
|         }, | ||||
|         # google redirect | ||||
|         { | ||||
|             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', | ||||
| @@ -511,6 +524,19 @@ class GenericIE(InfoExtractor): | ||||
|                 'upload_date': '20150126', | ||||
|             }, | ||||
|             'add_ie': ['Viddler'], | ||||
|         }, | ||||
|         # jwplayer YouTube | ||||
|         { | ||||
|             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/', | ||||
|             'info_dict': { | ||||
|                 'id': 'Mrj4DVp2zeA', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20150204', | ||||
|                 'uploader': 'The National Archives UK', | ||||
|                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', | ||||
|                 'uploader_id': 'NationalArchives08', | ||||
|                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue', | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -882,10 +908,19 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for Ooyala videos | ||||
|         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or | ||||
|                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)) | ||||
|                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or | ||||
|                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)) | ||||
|         if mobj is not None: | ||||
|             return OoyalaIE._build_url_result(mobj.group('ec')) | ||||
|  | ||||
|         # Look for multiple Ooyala embeds on SBN network websites | ||||
|         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) | ||||
|         if mobj is not None: | ||||
|             embeds = self._parse_json(mobj.group(1), video_id, fatal=False) | ||||
|             if embeds: | ||||
|                 return _playlist_from_matches( | ||||
|                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala') | ||||
|  | ||||
|         # Look for Aparat videos | ||||
|         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage) | ||||
|         if mobj is not None: | ||||
| @@ -1012,7 +1047,12 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for embedded sbs.com.au player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1', | ||||
|             r'''(?x) | ||||
|             (?: | ||||
|                 <meta\s+property="og:video"\s+content=| | ||||
|                 <iframe[^>]+?src= | ||||
|             ) | ||||
|             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'SBS') | ||||
| @@ -1043,6 +1083,8 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result(mobj.group('url'), 'Livestream') | ||||
|  | ||||
|         def check_video(vurl): | ||||
|             if YoutubeIE.suitable(vurl): | ||||
|                 return True | ||||
|             vpath = compat_urlparse.urlparse(vurl).path | ||||
|             vext = determine_ext(vpath) | ||||
|             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml') | ||||
| @@ -1060,7 +1102,8 @@ class GenericIE(InfoExtractor): | ||||
|                     JWPlayerOptions| | ||||
|                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup | ||||
|                 ) | ||||
|                 .*?file\s*:\s*["\'](.*?)["\']''', webpage)) | ||||
|                 .*? | ||||
|                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) | ||||
|         if not found: | ||||
|             # Broaden the search a little bit | ||||
|             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) | ||||
|   | ||||
| @@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor): | ||||
|         duration = parse_duration(self._html_search_regex( | ||||
|             r'<span class="duration">\s*-?\s*(.*?)</span>', | ||||
|             webpage, 'duration', fatal=False)) | ||||
|         family_friendly = self._html_search_meta( | ||||
|             'isFamilyFriendly', webpage, default='false') | ||||
|  | ||||
|         flashvars = compat_parse_qs(self._html_search_regex( | ||||
|             r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"', | ||||
| @@ -49,5 +47,5 @@ class GoshgayIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'age_limit': 0 if family_friendly == 'true' else 18, | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|         } | ||||
|   | ||||
| @@ -80,9 +80,6 @@ class IzleseneIE(InfoExtractor): | ||||
|             r'comment_count\s*=\s*\'([^\']+)\';', | ||||
|             webpage, 'comment_count', fatal=False) | ||||
|  | ||||
|         family_friendly = self._html_search_meta( | ||||
|             'isFamilyFriendly', webpage, 'age limit', fatal=False) | ||||
|  | ||||
|         content_url = self._html_search_meta( | ||||
|             'contentURL', webpage, 'content URL', fatal=False) | ||||
|         ext = determine_ext(content_url, 'mp4') | ||||
| @@ -120,6 +117,6 @@ class IzleseneIE(InfoExtractor): | ||||
|             'duration': duration, | ||||
|             'view_count': int_or_none(view_count), | ||||
|             'comment_count': int_or_none(comment_count), | ||||
|             'age_limit': 18 if family_friendly == 'False' else 0, | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ class MixcloudIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' | ||||
|     IE_NAME = 'mixcloud' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', | ||||
|         'info_dict': { | ||||
|             'id': 'dholbach-cryptkeeper', | ||||
| @@ -33,7 +33,20 @@ class MixcloudIE(InfoExtractor): | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', | ||||
|         'info_dict': { | ||||
|             'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', | ||||
|             'ext': 'm4a', | ||||
|             'title': 'Electric Relaxation vol. 3', | ||||
|             'description': 'md5:2b8aec6adce69f9d41724647c65875e8', | ||||
|             'uploader': 'Daniel Drumz', | ||||
|             'uploader_id': 'gillespeterson', | ||||
|             'thumbnail': 're:https?://.*\.jpg', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _get_url(self, track_id, template_url): | ||||
|         server_count = 30 | ||||
| @@ -60,7 +73,7 @@ class MixcloudIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, track_id) | ||||
|  | ||||
|         preview_url = self._search_regex( | ||||
|             r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url') | ||||
|             r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') | ||||
|         song_url = preview_url.replace('/previews/', '/c/originals/') | ||||
|         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) | ||||
|         final_song_url = self._get_url(track_id, template_url) | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     fix_xml_ampersands, | ||||
|     parse_duration, | ||||
| @@ -11,7 +11,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NPOBaseIE(InfoExtractor): | ||||
| class NPOBaseIE(SubtitlesInfoExtractor): | ||||
|     def _get_token(self, video_id): | ||||
|         token_page = self._download_webpage( | ||||
|             'http://ida.omroep.nl/npoplayer/i.js', | ||||
| @@ -161,6 +161,16 @@ class NPOIE(NPOBaseIE): | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         if metadata.get('tt888') == 'ja': | ||||
|             subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         subtitles = self.extract_subtitles(video_id, subtitles) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': metadata['titel'], | ||||
| @@ -169,6 +179,7 @@ class NPOIE(NPOBaseIE): | ||||
|             'upload_date': unified_strdate(metadata.get('gidsdatum')), | ||||
|             'duration': parse_duration(metadata.get('tijdsduur')), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -91,6 +91,15 @@ class RTLnowIE(InfoExtractor): | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5', | ||||
|             'info_dict': { | ||||
|                 'id': '188729', | ||||
|                 'ext': 'flv', | ||||
|                 'upload_date': '20150204', | ||||
|                 'description': 'md5:5e1ce23095e61a79c166d134b683cecc', | ||||
|                 'title': 'Der Bachelor - Folge 4', | ||||
|             } | ||||
|         }, { | ||||
|             'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
| @@ -134,9 +143,18 @@ class RTLnowIE(InfoExtractor): | ||||
|                     'player_url': video_page_url + 'includes/vodplayer.swf', | ||||
|                 } | ||||
|             else: | ||||
|                 fmt = { | ||||
|                     'url': filename.text, | ||||
|                 } | ||||
|                 mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text) | ||||
|                 if mobj: | ||||
|                     fmt = { | ||||
|                         'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'), | ||||
|                         'play_path': 'mp4:' + mobj.group('play_path'), | ||||
|                         'page_url': url, | ||||
|                         'player_url': video_page_url + 'includes/vodplayer.swf', | ||||
|                     } | ||||
|                 else: | ||||
|                     fmt = { | ||||
|                         'url': filename.text, | ||||
|                     } | ||||
|             fmt.update({ | ||||
|                 'width': int_or_none(filename.get('width')), | ||||
|                 'height': int_or_none(filename.get('height')), | ||||
|   | ||||
| @@ -1,16 +1,16 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import js_to_json | ||||
|  | ||||
|  | ||||
| class RTPIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', | ||||
|         'md5': 'e736ce0c665e459ddb818546220b4ef8', | ||||
|         'info_dict': { | ||||
|             'id': 'e174042', | ||||
|             'ext': 'mp3', | ||||
| @@ -18,9 +18,6 @@ class RTPIE(InfoExtractor): | ||||
|             'description': 'As paixões musicais de António Cartaxo e António Macedo', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # RTMP download | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', | ||||
|         'only_matching': True, | ||||
| @@ -37,20 +34,48 @@ class RTPIE(InfoExtractor): | ||||
|  | ||||
|         player_config = self._search_regex( | ||||
|             r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config') | ||||
|         config = json.loads(js_to_json(player_config)) | ||||
|         config = self._parse_json(player_config, video_id) | ||||
|  | ||||
|         path, ext = config.get('file').rsplit('.', 1) | ||||
|         formats = [{ | ||||
|             'format_id': 'rtmp', | ||||
|             'ext': ext, | ||||
|             'vcodec': config.get('type') == 'audio' and 'none' or None, | ||||
|             'preference': -2, | ||||
|             'url': 'rtmp://{streamer:s}/{application:s}'.format(**config), | ||||
|             'app': config.get('application'), | ||||
|             'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path), | ||||
|             'page_url': url, | ||||
|             'url': 'rtmp://{streamer:s}/{application:s}'.format(**config), | ||||
|             'rtmp_live': config.get('live', False), | ||||
|             'ext': ext, | ||||
|             'vcodec': config.get('type') == 'audio' and 'none' or None, | ||||
|             'player_url': 'http://programas.rtp.pt/play/player.swf?v3', | ||||
|             'rtmp_real_time': True, | ||||
|         }] | ||||
|  | ||||
|         # Construct regular HTTP download URLs | ||||
|         replacements = { | ||||
|             'audio': { | ||||
|                 'format_id': 'mp3', | ||||
|                 'pattern': r'^nas2\.share/wavrss/', | ||||
|                 'repl': 'http://rsspod.rtp.pt/podcasts/', | ||||
|                 'vcodec': 'none', | ||||
|             }, | ||||
|             'video': { | ||||
|                 'format_id': 'mp4_h264', | ||||
|                 'pattern': r'^nas2\.share/h264/', | ||||
|                 'repl': 'http://rsspod.rtp.pt/videocasts/', | ||||
|                 'vcodec': 'h264', | ||||
|             }, | ||||
|         } | ||||
|         r = replacements[config['type']] | ||||
|         if re.match(r['pattern'], config['file']) is not None: | ||||
|             formats.append({ | ||||
|                 'format_id': r['format_id'], | ||||
|                 'url': re.sub(r['pattern'], r['repl'], config['file']), | ||||
|                 'vcodec': r['vcodec'], | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|   | ||||
| @@ -6,12 +6,14 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     unescapeHTML, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -159,11 +161,27 @@ class RTSIE(InfoExtractor): | ||||
|             return int_or_none(self._search_regex( | ||||
|                 r'-([0-9]+)k\.', url, 'bitrate', default=None)) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': fid, | ||||
|             'url': furl, | ||||
|             'tbr': extract_bitrate(furl), | ||||
|         } for fid, furl in info['streams'].items()] | ||||
|         formats = [] | ||||
|         for format_id, format_url in info['streams'].items(): | ||||
|             if format_url.endswith('.f4m'): | ||||
|                 token = self._download_xml( | ||||
|                     'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path, | ||||
|                     video_id, 'Downloading %s token' % format_id) | ||||
|                 auth_params = xpath_text(token, './/authparams', 'auth params') | ||||
|                 if not auth_params: | ||||
|                     continue | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     '%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params), | ||||
|                     video_id, f4m_id=format_id)) | ||||
|             elif format_url.endswith('.m3u8'): | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     format_url, video_id, 'mp4', m3u8_id=format_id)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'url': format_url, | ||||
|                     'tbr': extract_bitrate(format_url), | ||||
|                 }) | ||||
|  | ||||
|         if 'media' in info: | ||||
|             formats.extend([{ | ||||
|   | ||||
| @@ -1,80 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     HEADRequest, | ||||
|     urlhandle_detect_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SoulAnimeWatchingIE(InfoExtractor): | ||||
|     IE_NAME = "soulanime:watching" | ||||
|     IE_DESC = "SoulAnime video" | ||||
|     _TEST = { | ||||
|         'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/', | ||||
|         'md5': '05fae04abf72298098b528e98abf4298', | ||||
|         'info_dict': { | ||||
|             'id': 'seirei-tsukai-no-blade-dance-episode-9', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'seirei-tsukai-no-blade-dance-episode-9', | ||||
|             'description': 'seirei-tsukai-no-blade-dance-episode-9' | ||||
|         } | ||||
|     } | ||||
|     _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         domain = mobj.group('domain') | ||||
|  | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url_encoded = self._html_search_regex( | ||||
|             r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') | ||||
|         video_url = "http://www.soul-anime." + domain + video_url_encoded | ||||
|  | ||||
|         ext_req = HEADRequest(video_url) | ||||
|         ext_handle = self._request_webpage( | ||||
|             ext_req, video_id, note='Determining extension') | ||||
|         ext = urlhandle_detect_ext(ext_handle) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': ext, | ||||
|             'title': video_id, | ||||
|             'description': video_id | ||||
|         } | ||||
|  | ||||
|  | ||||
| class SoulAnimeSeriesIE(InfoExtractor): | ||||
|     IE_NAME = "soulanime:series" | ||||
|     IE_DESC = "SoulAnime Series" | ||||
|  | ||||
|     _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)' | ||||
|  | ||||
|     _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/', | ||||
|         'info_dict': { | ||||
|             'id': 'black-rock-shooter-tv' | ||||
|         }, | ||||
|         'playlist_count': 8 | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         series_id = mobj.group('id') | ||||
|         domain = mobj.group('domain') | ||||
|  | ||||
|         pattern = re.compile(self._EPISODE_REGEX) | ||||
|  | ||||
|         page = self._download_webpage(url, series_id, "Downloading series page") | ||||
|         mobj = pattern.findall(page) | ||||
|  | ||||
|         entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj] | ||||
|  | ||||
|         return self.playlist_result(entries, series_id) | ||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/svtplay.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/svtplay.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SVTPlayIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?svtplay\.se/video/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final', | ||||
|         'md5': 'f4a184968bc9c802a9b41316657aaa80', | ||||
|         'info_dict': { | ||||
|             'id': '2609989', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'SM veckan vinter, Örebro - Rally, final', | ||||
|             'duration': 4500, | ||||
|             'thumbnail': 're:^https?://.*[\.-]jpg$', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         info = self._download_json( | ||||
|             'http://www.svtplay.se/video/%s?output=json' % video_id, video_id) | ||||
|  | ||||
|         title = info['context']['title'] | ||||
|         thumbnail = info['context'].get('thumbnailImage') | ||||
|  | ||||
|         video_info = info['video'] | ||||
|         formats = [] | ||||
|         for vr in video_info['videoReferences']: | ||||
|             vurl = vr['url'] | ||||
|             if determine_ext(vurl) == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     vurl, video_id, | ||||
|                     ext='mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id=vr.get('playerType'))) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'format_id': vr.get('playerType'), | ||||
|                     'url': vurl, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         duration = video_info.get('materialLength') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|         } | ||||
| @@ -15,7 +15,8 @@ class TeamcocoIE(InfoExtractor): | ||||
|                 'id': '80187', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Conan Becomes A Mary Kay Beauty Consultant', | ||||
|                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' | ||||
|                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', | ||||
|                 'age_limit': 0, | ||||
|             } | ||||
|         }, { | ||||
|             'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', | ||||
| @@ -24,7 +25,8 @@ class TeamcocoIE(InfoExtractor): | ||||
|                 'id': '19705', | ||||
|                 'ext': 'mp4', | ||||
|                 "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", | ||||
|                 "title": "Louis C.K. Interview Pt. 1 11/3/11" | ||||
|                 "title": "Louis C.K. Interview Pt. 1 11/3/11", | ||||
|                 'age_limit': 0, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
| @@ -83,4 +85,5 @@ class TeamcocoIE(InfoExtractor): | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|         } | ||||
|   | ||||
| @@ -1,40 +1,55 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class TriluliluIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/video-[^/]+/(?P<id>[^/]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/(?:video-[^/]+/)?(?P<id>[^/#\?]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1', | ||||
|         'md5': 'c1450a00da251e2769b74b9005601cac', | ||||
|         'info_dict': { | ||||
|             'id': 'big-buck-bunny-1', | ||||
|             'id': 'ae2899e124140b', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Big Buck Bunny', | ||||
|             'description': ':) pentru copilul din noi', | ||||
|         }, | ||||
|         # Server ignores Range headers (--test) | ||||
|         'params': { | ||||
|             'skip_download': True | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         if re.search(r'Fişierul nu este disponibil pentru vizionare în ţara dumneavoastră', webpage): | ||||
|             raise ExtractorError( | ||||
|                 'This video is not available in your country.', expected=True) | ||||
|         elif re.search('Fişierul poate fi accesat doar de către prietenii lui', webpage): | ||||
|             raise ExtractorError('This video is private.', expected=True) | ||||
|  | ||||
|         flashvars_str = self._search_regex( | ||||
|             r'block_flash_vars\s*=\s*(\{[^\}]+\})', webpage, 'flashvars', fatal=False, default=None) | ||||
|  | ||||
|         if flashvars_str: | ||||
|             flashvars = self._parse_json(flashvars_str, display_id) | ||||
|         else: | ||||
|             raise ExtractorError( | ||||
|                 'This page does not contain videos', expected=True) | ||||
|  | ||||
|         if flashvars['isMP3'] == 'true': | ||||
|             raise ExtractorError( | ||||
|                 'Audio downloads are currently not supported', expected=True) | ||||
|  | ||||
|         video_id = flashvars['hash'] | ||||
|         title = self._og_search_title(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|  | ||||
|         log_str = self._search_regex( | ||||
|             r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, 'log info') | ||||
|         log = json.loads(log_str) | ||||
|         description = self._og_search_description(webpage, default=None) | ||||
|  | ||||
|         format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/' | ||||
|                       'video-formats2' % log) | ||||
|                       'video-formats2' % flashvars) | ||||
|         format_doc = self._download_xml( | ||||
|             format_url, video_id, | ||||
|             note='Downloading formats', | ||||
| @@ -44,10 +59,10 @@ class TriluliluIE(InfoExtractor): | ||||
|             'http://fs%(server)s.trilulilu.ro/stream.php?type=video' | ||||
|             '&source=site&hash=%(hash)s&username=%(userid)s&' | ||||
|             'key=ministhebest&format=%%s&sig=&exp=' % | ||||
|             log) | ||||
|             flashvars) | ||||
|         formats = [ | ||||
|             { | ||||
|                 'format': fnode.text, | ||||
|                 'format_id': fnode.text.partition('-')[2], | ||||
|                 'url': video_url_template % fnode.text, | ||||
|                 'ext': fnode.text.partition('-')[0] | ||||
|             } | ||||
| @@ -56,8 +71,8 @@ class TriluliluIE(InfoExtractor): | ||||
|         ] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'formats': formats, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
| @@ -11,7 +13,7 @@ from ..utils import ( | ||||
| class TvigleIE(InfoExtractor): | ||||
|     IE_NAME = 'tvigle' | ||||
|     IE_DESC = 'Интернет-телевидение Tvigle.ru' | ||||
|     _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -38,16 +40,22 @@ class TvigleIE(InfoExtractor): | ||||
|                 'duration': 186.080, | ||||
|                 'age_limit': 0, | ||||
|             }, | ||||
|         }, | ||||
|         }, { | ||||
|             'url': 'https://cloud.tvigle.ru/video/5267604/', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._html_search_regex( | ||||
|             r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id') | ||||
|         if not video_id: | ||||
|             webpage = self._download_webpage(url, display_id) | ||||
|             video_id = self._html_search_regex( | ||||
|                 r'<li class="video-preview current_playing" id="(\d+)">', | ||||
|                 webpage, 'video id') | ||||
|  | ||||
|         video_data = self._download_json( | ||||
|             'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id) | ||||
|   | ||||
							
								
								
									
										65
									
								
								youtube_dl/extractor/tweakers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								youtube_dl/extractor/tweakers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TweakersIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html', | ||||
|         'md5': '1b5afa817403bb5baa08359dca31e6df', | ||||
|         'info_dict': { | ||||
|             'id': '9926', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'New Nintendo 3DS XL - Op alle fronten beter', | ||||
|             'description': 'md5:f97324cc71e86e11c853f0763820e3ba', | ||||
|             'thumbnail': 're:^https?://.*\.jpe?g$', | ||||
|             'duration': 386, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         playlist = self._download_xml( | ||||
|             'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id, | ||||
|             video_id) | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'xspf': 'http://xspf.org/ns/0/', | ||||
|             's1': 'http://static.streamone.nl/player/ns/0', | ||||
|         } | ||||
|  | ||||
|         track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)) | ||||
|  | ||||
|         title = xpath_text( | ||||
|             track, xpath_with_ns('./xspf:title', NS_MAP), 'title') | ||||
|         description = xpath_text( | ||||
|             track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description') | ||||
|         thumbnail = xpath_text( | ||||
|             track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail') | ||||
|         duration = float_or_none( | ||||
|             xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), | ||||
|             1000) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': location.text, | ||||
|             'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), | ||||
|             'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), | ||||
|             'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), | ||||
|         } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										89
									
								
								youtube_dl/extractor/wsj.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								youtube_dl/extractor/wsj.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,89 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class WSJIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=(?P<id>[a-zA-Z0-9-]+)' | ||||
|     IE_DESC = 'Wall Street Journal' | ||||
|     _TEST = { | ||||
|         'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', | ||||
|         'md5': '9747d7a6ebc2f4df64b981e1dde9efa9', | ||||
|         'info_dict': { | ||||
|             'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20150202', | ||||
|             'uploader_id': 'bbright', | ||||
|             'creator': 'bbright', | ||||
|             'categories': list,  # a long list | ||||
|             'duration': 90, | ||||
|             'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         bitrates = [128, 174, 264, 320, 464, 664, 1264] | ||||
|         api_url = ( | ||||
|             'http://video-api.wsj.com/api-video/find_all_videos.asp?' | ||||
|             'type=guid&count=1&query=%s&' | ||||
|             'fields=hls,adZone,thumbnailList,guid,state,secondsUntilStartTime,' | ||||
|             'author,description,name,linkURL,videoStillURL,duration,videoURL,' | ||||
|             'adCategory,catastrophic,linkShortURL,doctypeID,youtubeID,' | ||||
|             'titletag,rssURL,wsj-section,wsj-subsection,allthingsd-section,' | ||||
|             'allthingsd-subsection,sm-section,sm-subsection,provider,' | ||||
|             'formattedCreationDate,keywords,keywordsOmniture,column,editor,' | ||||
|             'emailURL,emailPartnerID,showName,omnitureProgramName,' | ||||
|             'omnitureVideoFormat,linkRelativeURL,touchCastID,' | ||||
|             'omniturePublishDate,%s') % ( | ||||
|                 video_id, ','.join('video%dkMP4Url' % br for br in bitrates)) | ||||
|         info = self._download_json(api_url, video_id)['items'][0] | ||||
|  | ||||
|         # Thumbnails are conveniently in the correct format already | ||||
|         thumbnails = info.get('thumbnailList') | ||||
|         creator = info.get('author') | ||||
|         uploader_id = info.get('editor') | ||||
|         categories = info.get('keywords') | ||||
|         duration = int_or_none(info.get('duration')) | ||||
|         upload_date = unified_strdate( | ||||
|             info.get('formattedCreationDate'), day_first=False) | ||||
|         title = info.get('name', info.get('titletag')) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'f4m', | ||||
|             'format_note': 'f4m (meta URL)', | ||||
|             'url': info['videoURL'], | ||||
|         }] | ||||
|         if info.get('hls'): | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 info['hls'], video_id, ext='mp4', | ||||
|                 preference=0, entry_protocol='m3u8_native')) | ||||
|         for br in bitrates: | ||||
|             field = 'video%dkMP4Url' % br | ||||
|             if info.get(field): | ||||
|                 formats.append({ | ||||
|                     'format_id': 'mp4-%d' % br, | ||||
|                     'container': 'mp4', | ||||
|                     'tbr': br, | ||||
|                     'url': info[field], | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'thumbnails': thumbnails, | ||||
|             'creator': creator, | ||||
|             'uploader_id': uploader_id, | ||||
|             'duration': duration, | ||||
|             'upload_date': upload_date, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'categories': categories, | ||||
|         } | ||||
| @@ -780,8 +780,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                     fo for fo in formats | ||||
|                     if fo['format_id'] == format_id) | ||||
|             except StopIteration: | ||||
|                 f.update(self._formats.get(format_id, {}).items()) | ||||
|                 formats.append(f) | ||||
|                 full_info = self._formats.get(format_id, {}).copy() | ||||
|                 full_info.update(f) | ||||
|                 formats.append(full_info) | ||||
|             else: | ||||
|                 existing_format.update(f) | ||||
|         return formats | ||||
|   | ||||
| @@ -297,8 +297,10 @@ def parseOpts(overrideArguments=None): | ||||
|             ' You can filter the video results by putting a condition in' | ||||
|             ' brackets, as in -f "best[height=720]"' | ||||
|             ' (or -f "[filesize>10M]"). ' | ||||
|             ' This works for filesize, height, width, tbr, abr, vbr, and fps' | ||||
|             ' and the comparisons <, <=, >, >=, =, != .' | ||||
|             ' This works for filesize, height, width, tbr, abr, vbr, asr, and fps' | ||||
|             ' and the comparisons <, <=, >, >=, =, !=' | ||||
|             ' and for ext, acodec, vcodec, container, and protocol' | ||||
|             ' and the comparisons =, != .' | ||||
|             ' Formats for which the value is not known are excluded unless you' | ||||
|             ' put a question mark (?) after the operator.' | ||||
|             ' You can combine format filters, so  ' | ||||
|   | ||||
| @@ -166,14 +166,13 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|         if filecodec is None: | ||||
|             raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe') | ||||
|  | ||||
|         uses_avconv = self._uses_avconv() | ||||
|         more_opts = [] | ||||
|         if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): | ||||
|             if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']: | ||||
|                 # Lossless, but in another container | ||||
|                 acodec = 'copy' | ||||
|                 extension = 'm4a' | ||||
|                 more_opts = ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc'] | ||||
|                 more_opts = ['-bsf:a', 'aac_adtstoasc'] | ||||
|             elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']: | ||||
|                 # Lossless if possible | ||||
|                 acodec = 'copy' | ||||
| @@ -189,9 +188,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|                 more_opts = [] | ||||
|                 if self._preferredquality is not None: | ||||
|                     if int(self._preferredquality) < 10: | ||||
|                         more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality] | ||||
|                         more_opts += ['-q:a', self._preferredquality] | ||||
|                     else: | ||||
|                         more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k'] | ||||
|                         more_opts += ['-b:a', self._preferredquality + 'k'] | ||||
|         else: | ||||
|             # We convert the audio (lossy) | ||||
|             acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] | ||||
| @@ -200,13 +199,13 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|             if self._preferredquality is not None: | ||||
|                 # The opus codec doesn't support the -aq option | ||||
|                 if int(self._preferredquality) < 10 and extension != 'opus': | ||||
|                     more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality] | ||||
|                     more_opts += ['-q:a', self._preferredquality] | ||||
|                 else: | ||||
|                     more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k'] | ||||
|                     more_opts += ['-b:a', self._preferredquality + 'k'] | ||||
|             if self._preferredcodec == 'aac': | ||||
|                 more_opts += ['-f', 'adts'] | ||||
|             if self._preferredcodec == 'm4a': | ||||
|                 more_opts += ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc'] | ||||
|                 more_opts += ['-bsf:a', 'aac_adtstoasc'] | ||||
|             if self._preferredcodec == 'vorbis': | ||||
|                 extension = 'ogg' | ||||
|             if self._preferredcodec == 'wav': | ||||
| @@ -511,8 +510,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor): | ||||
|             metadata['artist'] = info['uploader_id'] | ||||
|         if info.get('description') is not None: | ||||
|             metadata['description'] = info['description'] | ||||
|             metadata['comment'] = info['description'] | ||||
|         if info.get('webpage_url') is not None: | ||||
|             metadata['comment'] = info['webpage_url'] | ||||
|             metadata['purl'] = info['webpage_url'] | ||||
|  | ||||
|         if not metadata: | ||||
|             self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add') | ||||
|   | ||||
| @@ -701,7 +701,7 @@ def unified_strdate(date_str, day_first=True): | ||||
|     # %z (UTC offset) is only supported in python>=3.2 | ||||
|     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) | ||||
|     # Remove AM/PM + timezone | ||||
|     date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str) | ||||
|     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) | ||||
|  | ||||
|     format_expressions = [ | ||||
|         '%d %B %Y', | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __version__ = '2015.02.02.5' | ||||
| __version__ = '2015.02.10' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user