Compare commits
	
		
			192 Commits
		
	
	
		
			2016.07.09
			...
			2016.08.06
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 491c42e690 | ||
|  | 7f2339c617 | ||
|  | 8122e79fef | ||
|  | fe3ad1d456 | ||
|  | 038a5e1a65 | ||
|  | 84bc23b41b | ||
|  | 46933a15d6 | ||
|  | 3859ebeee6 | ||
|  | d50aca41f8 | ||
|  | 0ca057b965 | ||
|  | 5ca968d0a6 | ||
|  | f0d31c624e | ||
|  | 08c655906c | ||
|  | 5a993e1692 | ||
|  | a7d2953073 | ||
|  | fdd0b8f8e0 | ||
|  | f65dc41b72 | ||
|  | 962250f7ea | ||
|  | 7dc2a74e0a | ||
|  | b02b960c6b | ||
|  | 4f427c4be8 | ||
|  | 8a00ea567b | ||
|  | 8895be01fc | ||
|  | 52e7fcfeb7 | ||
|  | 2396062c74 | ||
|  | 14704aeff6 | ||
|  | 3c2c3af059 | ||
|  | 1891ea2d76 | ||
|  | 1094074c04 | ||
|  | 217d5ae013 | ||
|  | 8b40854529 | ||
|  | 6bb0fbf9fb | ||
|  | 8d3b226b83 | ||
|  | 42b7a5afe0 | ||
|  | 899d2bea63 | ||
|  | 9cb0e65d7e | ||
|  | b070564efb | ||
|  | ce28252c48 | ||
|  | 3aa9a73554 | ||
|  | 6a9b3b61ea | ||
|  | 45408eb075 | ||
|  | eafc66855d | ||
|  | e03d3e6453 | ||
|  | a70e45f80a | ||
|  | 697655a7c0 | ||
|  | e382b953f0 | ||
|  | 116e7e0d04 | ||
|  | cf03e34ad3 | ||
|  | 2903137292 | ||
|  | 9361f2169c | ||
|  | 35aa6c538f | ||
|  | fa9f1d16b8 | ||
|  | 485fedf6fd | ||
|  | da0baba5c8 | ||
|  | bb9f3bfedf | ||
|  | dbc0b39b91 | ||
|  | 481c5c5137 | ||
|  | 0cacae2807 | ||
|  | d9d56deadf | ||
|  | 74ba450a81 | ||
|  | db19df6ca0 | ||
|  | fbdf8d15d1 | ||
|  | 94aae01548 | ||
|  | 39eef54cf0 | ||
|  | 05c8268c81 | ||
|  | 289a16b4f3 | ||
|  | 7935926baa | ||
|  | dcbb07c35a | ||
|  | 40090e8d51 | ||
|  | 3e050d51d4 | ||
|  | ced70c8640 | ||
|  | 9a700deea4 | ||
|  | dc35ba0eba | ||
|  | 88bd486b9a | ||
|  | 7f8b92e3cf | ||
|  | 35f6e0ff36 | ||
|  | 326fa4e6e5 | ||
|  | c74299a72c | ||
|  | 10a1bb3a78 | ||
|  | 4d3e543c73 | ||
|  | 05d1e7aaa9 | ||
|  | a3aa814b77 | ||
|  | 5c32a77cad | ||
|  | 14a28e705b | ||
|  | cc99d4f826 | ||
|  | 712c7530ff | ||
|  | 0a147785e8 | ||
|  | 59eaf69e33 | ||
|  | e8be2943a7 | ||
|  | 8fdc538b46 | ||
|  | 9513c1eb17 | ||
|  | ae6fff4e64 | ||
|  | 5a65668e25 | ||
|  | f75e6890db | ||
|  | d9cb92c840 | ||
|  | 94c04a3c79 | ||
|  | f094834857 | ||
|  | 111de00289 | ||
|  | b4a131e1a5 | ||
|  | f1991ce928 | ||
|  | 6548030a17 | ||
|  | 3a8947650b | ||
|  | 1979969f91 | ||
|  | 0673741af3 | ||
|  | c8e170b209 | ||
|  | bbe1f3634a | ||
|  | 4671dd41b2 | ||
|  | f164b97123 | ||
|  | 5275efe30d | ||
|  | b13647cf3c | ||
|  | add7d2a0e2 | ||
|  | e298d3a08c | ||
|  | fd8c8c7dcd | ||
|  | 9158af16cc | ||
|  | c6668e4ad1 | ||
|  | 84e8cca48b | ||
|  | 790b06b7d4 | ||
|  | 740d7c49c2 | ||
|  | 4e51ec5f57 | ||
|  | 05087d1b4c | ||
|  | a66a73ee90 | ||
|  | 8188b923db | ||
|  | d993a1354d | ||
|  | e8882e7043 | ||
|  | 1056821799 | ||
|  | 890e6d3309 | ||
|  | 246080d378 | ||
|  | b1ea680270 | ||
|  | 45550d1039 | ||
|  | 7cdfc4c90f | ||
|  | af21f56f98 | ||
|  | 1a8f0773b6 | ||
|  | 59cc5bd8bf | ||
|  | 49bc16b95e | ||
|  | a2f9ca1e67 | ||
|  | 371ddb14fe | ||
|  | 998895dffa | ||
|  | aadd3ce21f | ||
|  | ae7b846203 | ||
|  | 21ba7d0981 | ||
|  | 691fbe7f98 | ||
|  | 2e221ca3a8 | ||
|  | 317f7ab634 | ||
|  | 23495d6a39 | ||
|  | 224db034ab | ||
|  | ad27649be3 | ||
|  | 84571be645 | ||
|  | 7b0d333a7e | ||
|  | 342f0c3682 | ||
|  | 38e0f16a94 | ||
|  | e910fe2fe4 | ||
|  | 233b58dec7 | ||
|  | c39b2ed990 | ||
|  | 35ec86689c | ||
|  | c485959034 | ||
|  | a0560d8ab8 | ||
|  | 0385aa6199 | ||
|  | 00f4764cb7 | ||
|  | 51c2cd0b83 | ||
|  | 5f5a9d6158 | ||
|  | 2d19fb5072 | ||
|  | 9d865a1af6 | ||
|  | 41aa44259d | ||
|  | 381ff44756 | ||
|  | 7f29cf545a | ||
|  | 7d1219f3e0 | ||
|  | f1b4af7d79 | ||
|  | 8a8590a617 | ||
|  | 4a7a5e41f7 | ||
|  | 2a49d01600 | ||
|  | b99af8a51c | ||
|  | 8e7020daef | ||
|  | a26bcc61c1 | ||
|  | 5c4dcf8172 | ||
|  | e9fb6a4bbe | ||
|  | e2dbcaa1bf | ||
|  | ae01850165 | ||
|  | c3baaedfc8 | ||
|  | 0b68de3cc1 | ||
|  | 39e9d524e5 | ||
|  | 865b087224 | ||
|  | 3121b25639 | ||
|  | 0286b85c79 | ||
|  | ab52bb5137 | ||
|  | 61a98b8623 | ||
|  | 6daf34a045 | ||
|  | c03adf90bd | ||
|  | 0ece114b7b | ||
|  | 5b6a74856b | ||
|  | 59bbe4911a | ||
|  | 4f3c5e0627 | ||
|  | ccff2c404d | 
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							| @@ -6,8 +6,8 @@ | ||||
|  | ||||
| --- | ||||
|  | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.09.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.09.1** | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.06** | ||||
|  | ||||
| ### Before submitting an *issue* make sure you have: | ||||
| - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections | ||||
| @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> | ||||
| [debug] User config: [] | ||||
| [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] | ||||
| [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 | ||||
| [debug] youtube-dl version 2016.07.09.1 | ||||
| [debug] youtube-dl version 2016.08.06 | ||||
| [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 | ||||
| [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 | ||||
| [debug] Proxy map: {} | ||||
|   | ||||
| @@ -7,9 +7,6 @@ python: | ||||
|   - "3.4" | ||||
|   - "3.5" | ||||
| sudo: false | ||||
| install: | ||||
|   - bash ./devscripts/install_srelay.sh | ||||
|   - export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6 | ||||
| script: nosetests test --verbose | ||||
| notifications: | ||||
|   email: | ||||
|   | ||||
							
								
								
									
										2
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -177,3 +177,5 @@ Roman Tsiupa | ||||
| Artur Krysiak | ||||
| Jakub Adam Wieczorek | ||||
| Aleksandar Topuzović | ||||
| Nehal Patel | ||||
| Rob van Bekkum | ||||
|   | ||||
							
								
								
									
										307
									
								
								ChangeLog
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										307
									
								
								ChangeLog
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,307 @@ | ||||
| version 2016.08.06 | ||||
|  | ||||
| Core | ||||
| * Add support for JSON-LD root list entries (#10203) | ||||
| * Improve unified_timestamp | ||||
| * Lower preference of RTSP formats in generic sorting | ||||
| + Add support for multiple properties in _og_search_property | ||||
| * Improve password hiding from verbose output | ||||
|  | ||||
| Extractors | ||||
| + [adultswim] Add support for trailers (#10235) | ||||
| * [archiveorg] Improve extraction (#10219) | ||||
| + [jwplatform] Add support for playlists | ||||
| + [jwplatform] Add support for relative URLs | ||||
| * [jwplatform] Improve audio detection | ||||
| + [tvplay] Capture and output native error message | ||||
| + [tvplay] Extract series metadata | ||||
| + [tvplay] Add support for subtitles (#10194) | ||||
| * [tvp] Improve extraction (#7799) | ||||
| * [cbslocal] Fix timestamp parsing (#10213) | ||||
| + [naver] Add support for subtitles (#8096) | ||||
| * [naver] Improve extraction | ||||
| * [condenast] Improve extraction | ||||
| * [engadget] Relax URL regular expression | ||||
| * [5min] Fix extraction | ||||
| + [nationalgeographic] Add support for Episode Guide | ||||
| + [kaltura] Add support for subtitles | ||||
| * [kaltura] Optimize network requests | ||||
| + [vodplatform] Add extractor for vod-platform.net | ||||
| - [gamekings] Remove extractor | ||||
| * [limelight] Extract HTTP formats | ||||
| * [ntvru] Fix extraction | ||||
| + [comedycentral] Re-add :tds and :thedailyshow shortnames | ||||
|  | ||||
|  | ||||
| version 2016.08.01 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - [yandexmusic:track] Adapt to changes in track location JSON (#10193) | ||||
| - [bloomberg] Support another form of player (#10187) | ||||
| - [limelight] Skip DRM protected videos | ||||
| - [safari] Relax regular expressions for URL matching (#10202) | ||||
| - [cwtv] Add support for cwtvpr.com (#10196) | ||||
|  | ||||
|  | ||||
| version 2016.07.30 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - [twitch:clips] Sort formats | ||||
| - [tv2] Use m3u8_native | ||||
| - [tv2:article] Fix video detection (#10188) | ||||
| - rtve (#10076) | ||||
| - [dailymotion:playlist] Optimize download archive processing (#10180) | ||||
|  | ||||
|  | ||||
| version 2016.07.28 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - shared (#10170) | ||||
| - soundcloud (#10179) | ||||
| - twitch (#9767) | ||||
|  | ||||
|  | ||||
| version 2016.07.26.2 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - smotri | ||||
| - camdemy | ||||
| - mtv | ||||
| - comedycentral | ||||
| - cmt | ||||
| - cbc | ||||
| - mgtv | ||||
| - orf | ||||
|  | ||||
|  | ||||
| version 2016.07.24 | ||||
|  | ||||
| New extractors | ||||
| - arkena (#8682) | ||||
| - lcp (#8682) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - facebook (#10151) | ||||
| - dailymail | ||||
| - telegraaf | ||||
| - dcn | ||||
| - onet | ||||
| - tvp | ||||
|  | ||||
| Miscellaneous | ||||
| - Support $Time$ in DASH manifests | ||||
|  | ||||
|  | ||||
| version 2016.07.22 | ||||
|  | ||||
| New extractors | ||||
| - odatv (#9285) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - bbc | ||||
| - youjizz (#10131) | ||||
| - youtube (#10140) | ||||
| - pornhub (#10138) | ||||
| - eporner (#10139) | ||||
|  | ||||
|  | ||||
| version 2016.07.17 | ||||
|  | ||||
| New extractors | ||||
| - nintendo (#9986) | ||||
| - streamable (#9122) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - ard (#10095) | ||||
| - mtv | ||||
| - comedycentral (#10101) | ||||
| - viki (#10098) | ||||
| - spike (#10106) | ||||
|  | ||||
| Miscellaneous | ||||
| - Improved twitter player detection (#10090) | ||||
|  | ||||
|  | ||||
| version 2016.07.16 | ||||
|  | ||||
| New extractors | ||||
| - ninenow (#5181) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - rtve (#10076) | ||||
| - brightcove | ||||
| - 3qsdn | ||||
| - syfy (#9087, #3820, #2388) | ||||
| - youtube (#10083) | ||||
|  | ||||
| Miscellaneous | ||||
| - Fix subtitle embedding for video-only and audio-only files (#10081) | ||||
|  | ||||
|  | ||||
| version 2016.07.13 | ||||
|  | ||||
| New extractors | ||||
| - rudo | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - biobiochiletv | ||||
| - tvplay | ||||
| - dbtv | ||||
| - brightcove | ||||
| - tmz | ||||
| - youtube (#10059) | ||||
| - shahid (#10062) | ||||
| - vk | ||||
| - ellentv (#10067) | ||||
|  | ||||
|  | ||||
| version 2016.07.11 | ||||
|  | ||||
| New Extractors | ||||
| - roosterteeth (#9864) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - miomio (#9605) | ||||
| - vuclip | ||||
| - youtube | ||||
| - vidzi (#10058) | ||||
|  | ||||
|  | ||||
| version 2016.07.09.2 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - vimeo (#1638) | ||||
| - facebook (#10048) | ||||
| - lynda (#10047) | ||||
| - animeondemand | ||||
|  | ||||
| Fixed/improved features | ||||
| - Embedding subtitles no longer throws an error with problematic inputs (#9063) | ||||
|  | ||||
|  | ||||
| version 2016.07.09.1 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - youtube | ||||
| - ard | ||||
| - srmediatek (#9373) | ||||
|  | ||||
|  | ||||
| version 2016.07.09 | ||||
|  | ||||
| New extractors | ||||
| - Flipagram (#9898) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - telecinco | ||||
| - toutv | ||||
| - radiocanada | ||||
| - tweakers (#9516) | ||||
| - lynda | ||||
| - nick (#7542) | ||||
| - polskieradio (#10028) | ||||
| - le | ||||
| - facebook (#9851) | ||||
| - mgtv | ||||
| - animeondemand (#10031) | ||||
|  | ||||
| Fixed/improved features | ||||
| - `--postprocessor-args` and `--downloader-args` now accepts non-ASCII inputs | ||||
|   on non-Windows systems | ||||
|  | ||||
|  | ||||
| version 2016.07.07 | ||||
|  | ||||
| New extractors | ||||
| - kamcord (#10001) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - spiegel (#10018) | ||||
| - metacafe (#8539, #3253) | ||||
| - onet (#9950) | ||||
| - francetv (#9955) | ||||
| - brightcove (#9965) | ||||
| - daum (#9972) | ||||
|  | ||||
|  | ||||
| version 2016.07.06 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - youtube (#10007, #10009) | ||||
| - xuite | ||||
| - stitcher | ||||
| - spiegel | ||||
| - slideshare | ||||
| - sandia | ||||
| - rtvnh | ||||
| - prosiebensat1 | ||||
| - onionstudios | ||||
|  | ||||
|  | ||||
| version 2016.07.05 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - brightcove | ||||
| - yahoo (#9995) | ||||
| - pornhub (#9997) | ||||
| - iqiyi | ||||
| - kaltura (#5557) | ||||
| - la7 | ||||
| - Changed features | ||||
| - Rename --cn-verfication-proxy to --geo-verification-proxy | ||||
| Miscellaneous | ||||
| - Add script for displaying downloads statistics | ||||
|  | ||||
|  | ||||
| version 2016.07.03.1 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - theplatform | ||||
| - aenetworks | ||||
| - nationalgeographic | ||||
| - hrti (#9482) | ||||
| - facebook (#5701) | ||||
| - buzzfeed (#5701) | ||||
| - rai (#8617, #9157, #9232, #8552, #8551) | ||||
| - nationalgeographic (#9991) | ||||
| - iqiyi | ||||
|  | ||||
|  | ||||
| version 2016.07.03 | ||||
|  | ||||
| New extractors | ||||
| - hrti (#9482) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - vk (#9981) | ||||
| - facebook (#9938) | ||||
| - xtube (#9953, #9961) | ||||
|  | ||||
|  | ||||
| version 2016.07.02 | ||||
|  | ||||
| New extractors | ||||
| - fusion (#9958) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - twitch (#9975) | ||||
| - vine (#9970) | ||||
| - periscope (#9967) | ||||
| - pornhub (#8696) | ||||
|  | ||||
|  | ||||
| version 2016.07.01 | ||||
|  | ||||
| New extractors | ||||
| - 9c9media | ||||
| - ctvnews (#2156) | ||||
| - ctv (#4077) | ||||
|  | ||||
| Fixed/Improved extractors | ||||
| - rds | ||||
| - meta (#8789) | ||||
| - pornhub (#9964) | ||||
| - sixplay (#2183) | ||||
|  | ||||
| New features | ||||
| - Accept quoted strings across multiple lines (#9940) | ||||
							
								
								
									
										4
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								Makefile
									
									
									
									
									
								
							| @@ -94,7 +94,7 @@ _EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'la | ||||
| youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) | ||||
| 	$(PYTHON) devscripts/make_lazy_extractors.py $@ | ||||
|  | ||||
| youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish | ||||
| youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog | ||||
| 	@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ | ||||
| 		--exclude '*.DS_Store' \ | ||||
| 		--exclude '*.kate-swp' \ | ||||
| @@ -107,7 +107,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- | ||||
| 		--exclude 'docs/_build' \ | ||||
| 		-- \ | ||||
| 		bin devscripts test youtube_dl docs \ | ||||
| 		LICENSE README.md README.txt \ | ||||
| 		ChangeLog LICENSE README.md README.txt \ | ||||
| 		Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ | ||||
| 		youtube-dl.zsh youtube-dl.fish setup.py \ | ||||
| 		youtube-dl | ||||
|   | ||||
| @@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms | ||||
|  | ||||
| To install it right away for all UNIX users (Linux, OS X, etc.), type: | ||||
|  | ||||
|     sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl | ||||
|     sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl | ||||
|     sudo chmod a+rx /usr/local/bin/youtube-dl | ||||
|  | ||||
| If you do not have curl, you can alternatively use a recent wget: | ||||
|   | ||||
| @@ -15,13 +15,9 @@ data = urllib.request.urlopen(URL).read() | ||||
| with open('download.html.in', 'r', encoding='utf-8') as tmplf: | ||||
|     template = tmplf.read() | ||||
|  | ||||
| md5sum = hashlib.md5(data).hexdigest() | ||||
| sha1sum = hashlib.sha1(data).hexdigest() | ||||
| sha256sum = hashlib.sha256(data).hexdigest() | ||||
| template = template.replace('@PROGRAM_VERSION@', version) | ||||
| template = template.replace('@PROGRAM_URL@', URL) | ||||
| template = template.replace('@PROGRAM_MD5SUM@', md5sum) | ||||
| template = template.replace('@PROGRAM_SHA1SUM@', sha1sum) | ||||
| template = template.replace('@PROGRAM_SHA256SUM@', sha256sum) | ||||
| template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0]) | ||||
| template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1]) | ||||
|   | ||||
| @@ -1,8 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| mkdir -p tmp && cd tmp | ||||
| wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz | ||||
| tar zxvf srelay-0.4.8b6.tar.gz | ||||
| cd srelay-0.4.8b6 | ||||
| ./configure | ||||
| make | ||||
| @@ -71,9 +71,12 @@ fi | ||||
| /bin/echo -e "\n### Changing version in version.py..." | ||||
| sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py | ||||
|  | ||||
| /bin/echo -e "\n### Changing version in ChangeLog..." | ||||
| sed -i "s/<unreleased>/$version/" ChangeLog | ||||
|  | ||||
| /bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." | ||||
| make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites | ||||
| git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py | ||||
| git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py ChangeLog | ||||
| git commit $gpg_sign_commits -m "release $version" | ||||
|  | ||||
| /bin/echo -e "\n### Now tagging, signing and pushing..." | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| #!/usr/bin/env python | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| @@ -21,21 +22,26 @@ def format_size(bytes): | ||||
|  | ||||
| total_bytes = 0 | ||||
|  | ||||
| releases = json.loads(compat_urllib_request.urlopen( | ||||
|     'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8')) | ||||
| for page in itertools.count(1): | ||||
|     releases = json.loads(compat_urllib_request.urlopen( | ||||
|         'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page | ||||
|     ).read().decode('utf-8')) | ||||
|  | ||||
| for release in releases: | ||||
|     compat_print(release['name']) | ||||
|     for asset in release['assets']: | ||||
|         asset_name = asset['name'] | ||||
|         total_bytes += asset['download_count'] * asset['size'] | ||||
|         if all(not re.match(p, asset_name) for p in ( | ||||
|                 r'^youtube-dl$', | ||||
|                 r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$', | ||||
|                 r'^youtube-dl\.exe$')): | ||||
|             continue | ||||
|         compat_print( | ||||
|             ' %s size: %s downloads: %d' | ||||
|             % (asset_name, format_size(asset['size']), asset['download_count'])) | ||||
|     if not releases: | ||||
|         break | ||||
|  | ||||
|     for release in releases: | ||||
|         compat_print(release['name']) | ||||
|         for asset in release['assets']: | ||||
|             asset_name = asset['name'] | ||||
|             total_bytes += asset['download_count'] * asset['size'] | ||||
|             if all(not re.match(p, asset_name) for p in ( | ||||
|                     r'^youtube-dl$', | ||||
|                     r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$', | ||||
|                     r'^youtube-dl\.exe$')): | ||||
|                 continue | ||||
|             compat_print( | ||||
|                 ' %s size: %s downloads: %d' | ||||
|                 % (asset_name, format_size(asset['size']), asset['download_count'])) | ||||
|  | ||||
| compat_print('total downloads traffic: %s' % format_size(total_bytes)) | ||||
|   | ||||
| @@ -14,6 +14,7 @@ | ||||
|  - **8tracks** | ||||
|  - **91porn** | ||||
|  - **9gag** | ||||
|  - **9now.com.au** | ||||
|  - **abc.net.au** | ||||
|  - **Abc7News** | ||||
|  - **abcnews** | ||||
| @@ -45,6 +46,7 @@ | ||||
|  - **archive.org**: archive.org videos | ||||
|  - **ARD** | ||||
|  - **ARD:mediathek** | ||||
|  - **Arkena** | ||||
|  - **arte.tv** | ||||
|  - **arte.tv:+7** | ||||
|  - **arte.tv:cinema** | ||||
| @@ -140,7 +142,8 @@ | ||||
|  - **CollegeRama** | ||||
|  - **ComCarCoff** | ||||
|  - **ComedyCentral** | ||||
|  - **ComedyCentralShows**: The Daily Show / The Colbert Report | ||||
|  - **ComedyCentralShortname** | ||||
|  - **ComedyCentralTV** | ||||
|  - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED | ||||
|  - **Coub** | ||||
|  - **Cracked** | ||||
| @@ -245,7 +248,6 @@ | ||||
|  - **FunnyOrDie** | ||||
|  - **Fusion** | ||||
|  - **GameInformer** | ||||
|  - **Gamekings** | ||||
|  - **GameOne** | ||||
|  - **gameone:playlist** | ||||
|  - **Gamersyde** | ||||
| @@ -335,6 +337,8 @@ | ||||
|  - **kuwo:song**: 酷我音乐 | ||||
|  - **la7.it** | ||||
|  - **Laola1Tv** | ||||
|  - **Lcp** | ||||
|  - **LcpPlay** | ||||
|  - **Le**: 乐视网 | ||||
|  - **Learnr** | ||||
|  - **Lecture2Go** | ||||
| @@ -396,7 +400,6 @@ | ||||
|  - **MSN** | ||||
|  - **MTV** | ||||
|  - **mtv.de** | ||||
|  - **mtviggy.com** | ||||
|  - **mtvservices:embedded** | ||||
|  - **MuenchenTV**: münchen.tv | ||||
|  - **MusicPlayOn** | ||||
| @@ -412,7 +415,8 @@ | ||||
|  - **MyVidster** | ||||
|  - **n-tv.de** | ||||
|  - **natgeo** | ||||
|  - **natgeo:channel** | ||||
|  - **natgeo:episodeguide** | ||||
|  - **natgeo:video** | ||||
|  - **Naver** | ||||
|  - **NBA** | ||||
|  - **NBC** | ||||
| @@ -436,7 +440,6 @@ | ||||
|  - **Newstube** | ||||
|  - **NextMedia**: 蘋果日報 | ||||
|  - **NextMediaActionNews**: 蘋果日報 - 動新聞 | ||||
|  - **nextmovie.com** | ||||
|  - **nfb**: National Film Board of Canada | ||||
|  - **nfl.com** | ||||
|  - **nhl.com** | ||||
| @@ -448,6 +451,7 @@ | ||||
|  - **niconico**: ニコニコ動画 | ||||
|  - **NiconicoPlaylist** | ||||
|  - **NineCNineMedia** | ||||
|  - **Nintendo** | ||||
|  - **njoy**: N-JOY | ||||
|  - **njoy:embed** | ||||
|  - **Noco** | ||||
| @@ -475,6 +479,7 @@ | ||||
|  - **NYTimes** | ||||
|  - **NYTimesArticle** | ||||
|  - **ocw.mit.edu** | ||||
|  - **OdaTV** | ||||
|  - **Odnoklassniki** | ||||
|  - **OktoberfestTV** | ||||
|  - **on.aol.com** | ||||
| @@ -554,6 +559,7 @@ | ||||
|  - **RICE** | ||||
|  - **RingTV** | ||||
|  - **RockstarGames** | ||||
|  - **RoosterTeeth** | ||||
|  - **RottenTomatoes** | ||||
|  - **Roxwel** | ||||
|  - **RTBF** | ||||
| @@ -566,7 +572,9 @@ | ||||
|  - **rtve.es:alacarta**: RTVE a la carta | ||||
|  - **rtve.es:infantil**: RTVE infantil | ||||
|  - **rtve.es:live**: RTVE.es live streams | ||||
|  - **rtve.es:television** | ||||
|  - **RTVNH** | ||||
|  - **Rudo** | ||||
|  - **RUHD** | ||||
|  - **RulePorn** | ||||
|  - **rutube**: Rutube videos | ||||
| @@ -641,6 +649,7 @@ | ||||
|  - **stanfordoc**: Stanford Open ClassRoom | ||||
|  - **Steam** | ||||
|  - **Stitcher** | ||||
|  - **Streamable** | ||||
|  - **streamcloud.eu** | ||||
|  - **StreamCZ** | ||||
|  - **StreetVoice** | ||||
| @@ -688,6 +697,7 @@ | ||||
|  - **TNAFlix** | ||||
|  - **TNAFlixNetworkEmbed** | ||||
|  - **toggle** | ||||
|  - **Tosh**: Tosh.0 | ||||
|  - **tou.tv** | ||||
|  - **Toypics**: Toypics user profile | ||||
|  - **ToypicsUser**: Toypics user profile | ||||
| @@ -717,6 +727,7 @@ | ||||
|  - **tvigle**: Интернет-телевидение Tvigle.ru | ||||
|  - **tvland.com** | ||||
|  - **tvp**: Telewizja Polska | ||||
|  - **tvp:embed**: Telewizja Polska | ||||
|  - **tvp:series** | ||||
|  - **TVPlay**: TV3Play and related services | ||||
|  - **Tweakers** | ||||
| @@ -793,8 +804,10 @@ | ||||
|  - **vine:user** | ||||
|  - **vk**: VK | ||||
|  - **vk:uservideos**: VK - User's Videos | ||||
|  - **vk:wallpost** | ||||
|  - **vlive** | ||||
|  - **Vodlocker** | ||||
|  - **VODPlatform** | ||||
|  - **VoiceRepublic** | ||||
|  - **VoxMedia** | ||||
|  - **Vporn** | ||||
|   | ||||
| @@ -48,6 +48,9 @@ class TestInfoExtractor(unittest.TestCase): | ||||
|         self.assertEqual(ie._og_search_property('foobar', html), 'Foo') | ||||
|         self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') | ||||
|         self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') | ||||
|         self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') | ||||
|         self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) | ||||
|         self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) | ||||
|  | ||||
|     def test_html_search_meta(self): | ||||
|         ie = self.ie | ||||
|   | ||||
| @@ -335,6 +335,40 @@ class TestFormatSelection(unittest.TestCase): | ||||
|             downloaded = ydl.downloaded_info_dicts[0] | ||||
|             self.assertEqual(downloaded['format_id'], f1['format_id']) | ||||
|  | ||||
|     def test_audio_only_extractor_format_selection(self): | ||||
|         # For extractors with incomplete formats (all formats are audio-only or | ||||
|         # video-only) best and worst should fallback to corresponding best/worst | ||||
|         # video-only or audio-only formats (as per | ||||
|         # https://github.com/rg3/youtube-dl/pull/5556) | ||||
|         formats = [ | ||||
|             {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, | ||||
|             {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, | ||||
|         ] | ||||
|         info_dict = _make_result(formats) | ||||
|  | ||||
|         ydl = YDL({'format': 'best'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'high') | ||||
|  | ||||
|         ydl = YDL({'format': 'worst'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'low') | ||||
|  | ||||
|     def test_format_not_available(self): | ||||
|         formats = [ | ||||
|             {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL}, | ||||
|             {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, | ||||
|         ] | ||||
|         info_dict = _make_result(formats) | ||||
|  | ||||
|         # This must fail since complete video-audio format does not match filter | ||||
|         # and extractor does not provide incomplete only formats (i.e. only | ||||
|         # video-only or audio-only). | ||||
|         ydl = YDL({'format': 'best[height>360]'}) | ||||
|         self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) | ||||
|  | ||||
|     def test_invalid_format_specs(self): | ||||
|         def assert_syntax_error(format_spec): | ||||
|             ydl = YDL({'format': format_spec}) | ||||
|   | ||||
| @@ -101,8 +101,6 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertMatch(':ytsubs', ['youtube:subscriptions']) | ||||
|         self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) | ||||
|         self.assertMatch(':ythistory', ['youtube:history']) | ||||
|         self.assertMatch(':thedailyshow', ['ComedyCentralShows']) | ||||
|         self.assertMatch(':tds', ['ComedyCentralShows']) | ||||
|  | ||||
|     def test_vimeo_matching(self): | ||||
|         self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel']) | ||||
|   | ||||
| @@ -81,6 +81,7 @@ from youtube_dl.utils import ( | ||||
|     cli_option, | ||||
|     cli_valueless_option, | ||||
|     cli_bool_option, | ||||
|     parse_codecs, | ||||
| ) | ||||
| from youtube_dl.compat import ( | ||||
|     compat_chr, | ||||
| @@ -307,6 +308,7 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(unified_timestamp('25-09-2014'), 1411603200) | ||||
|         self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) | ||||
|         self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) | ||||
|         self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) | ||||
|  | ||||
|     def test_determine_ext(self): | ||||
|         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') | ||||
| @@ -608,6 +610,29 @@ class TestUtil(unittest.TestCase): | ||||
|             limit_length('foo bar baz asd', 12).startswith('foo bar')) | ||||
|         self.assertTrue('...' in limit_length('foo bar baz asd', 12)) | ||||
|  | ||||
|     def test_parse_codecs(self): | ||||
|         self.assertEqual(parse_codecs(''), {}) | ||||
|         self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { | ||||
|             'vcodec': 'avc1.77.30', | ||||
|             'acodec': 'mp4a.40.2', | ||||
|         }) | ||||
|         self.assertEqual(parse_codecs('mp4a.40.2'), { | ||||
|             'vcodec': 'none', | ||||
|             'acodec': 'mp4a.40.2', | ||||
|         }) | ||||
|         self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), { | ||||
|             'vcodec': 'avc1.42001e', | ||||
|             'acodec': 'mp4a.40.5', | ||||
|         }) | ||||
|         self.assertEqual(parse_codecs('avc3.640028'), { | ||||
|             'vcodec': 'avc3.640028', | ||||
|             'acodec': 'none', | ||||
|         }) | ||||
|         self.assertEqual(parse_codecs(', h264,,newcodec,aac'), { | ||||
|             'vcodec': 'h264', | ||||
|             'acodec': 'aac', | ||||
|         }) | ||||
|  | ||||
|     def test_escape_rfc3986(self): | ||||
|         reserved = "!*'();:@&=+$,/?#[]" | ||||
|         unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~' | ||||
|   | ||||
							
								
								
									
										70
									
								
								test/test_verbose_output.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								test/test_verbose_output.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | ||||
| #!/usr/bin/env python | ||||
| # coding: utf-8 | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import unittest | ||||
|  | ||||
| import sys | ||||
| import os | ||||
| import subprocess | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||||
|  | ||||
|  | ||||
| class TestVerboseOutput(unittest.TestCase): | ||||
|     def test_private_info_arg(self): | ||||
|         outp = subprocess.Popen( | ||||
|             [ | ||||
|                 sys.executable, 'youtube_dl/__main__.py', '-v', | ||||
|                 '--username', 'johnsmith@gmail.com', | ||||
|                 '--password', 'secret', | ||||
|             ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         sout, serr = outp.communicate() | ||||
|         self.assertTrue('--username' in serr) | ||||
|         self.assertTrue('johnsmith' not in serr) | ||||
|         self.assertTrue('--password' in serr) | ||||
|         self.assertTrue('secret' not in serr) | ||||
|  | ||||
|     def test_private_info_shortarg(self): | ||||
|         outp = subprocess.Popen( | ||||
|             [ | ||||
|                 sys.executable, 'youtube_dl/__main__.py', '-v', | ||||
|                 '-u', 'johnsmith@gmail.com', | ||||
|                 '-p', 'secret', | ||||
|             ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         sout, serr = outp.communicate() | ||||
|         self.assertTrue('-u' in serr) | ||||
|         self.assertTrue('johnsmith' not in serr) | ||||
|         self.assertTrue('-p' in serr) | ||||
|         self.assertTrue('secret' not in serr) | ||||
|  | ||||
|     def test_private_info_eq(self): | ||||
|         outp = subprocess.Popen( | ||||
|             [ | ||||
|                 sys.executable, 'youtube_dl/__main__.py', '-v', | ||||
|                 '--username=johnsmith@gmail.com', | ||||
|                 '--password=secret', | ||||
|             ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         sout, serr = outp.communicate() | ||||
|         self.assertTrue('--username' in serr) | ||||
|         self.assertTrue('johnsmith' not in serr) | ||||
|         self.assertTrue('--password' in serr) | ||||
|         self.assertTrue('secret' not in serr) | ||||
|  | ||||
|     def test_private_info_shortarg_eq(self): | ||||
|         outp = subprocess.Popen( | ||||
|             [ | ||||
|                 sys.executable, 'youtube_dl/__main__.py', '-v', | ||||
|                 '-u=johnsmith@gmail.com', | ||||
|                 '-p=secret', | ||||
|             ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         sout, serr = outp.communicate() | ||||
|         self.assertTrue('-u' in serr) | ||||
|         self.assertTrue('johnsmith' not in serr) | ||||
|         self.assertTrue('-p' in serr) | ||||
|         self.assertTrue('secret' not in serr) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals | ||||
|  | ||||
| import collections | ||||
| import contextlib | ||||
| import copy | ||||
| import datetime | ||||
| import errno | ||||
| import fileinput | ||||
| @@ -1051,9 +1052,9 @@ class YoutubeDL(object): | ||||
|             if isinstance(selector, list): | ||||
|                 fs = [_build_selector_function(s) for s in selector] | ||||
|  | ||||
|                 def selector_function(formats): | ||||
|                 def selector_function(ctx): | ||||
|                     for f in fs: | ||||
|                         for format in f(formats): | ||||
|                         for format in f(ctx): | ||||
|                             yield format | ||||
|                 return selector_function | ||||
|             elif selector.type == GROUP: | ||||
| @@ -1061,17 +1062,17 @@ class YoutubeDL(object): | ||||
|             elif selector.type == PICKFIRST: | ||||
|                 fs = [_build_selector_function(s) for s in selector.selector] | ||||
|  | ||||
|                 def selector_function(formats): | ||||
|                 def selector_function(ctx): | ||||
|                     for f in fs: | ||||
|                         picked_formats = list(f(formats)) | ||||
|                         picked_formats = list(f(ctx)) | ||||
|                         if picked_formats: | ||||
|                             return picked_formats | ||||
|                     return [] | ||||
|             elif selector.type == SINGLE: | ||||
|                 format_spec = selector.selector | ||||
|  | ||||
|                 def selector_function(formats): | ||||
|                     formats = list(formats) | ||||
|                 def selector_function(ctx): | ||||
|                     formats = list(ctx['formats']) | ||||
|                     if not formats: | ||||
|                         return | ||||
|                     if format_spec == 'all': | ||||
| @@ -1084,9 +1085,10 @@ class YoutubeDL(object): | ||||
|                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] | ||||
|                         if audiovideo_formats: | ||||
|                             yield audiovideo_formats[format_idx] | ||||
|                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format | ||||
|                         elif (all(f.get('acodec') != 'none' for f in formats) or | ||||
|                               all(f.get('vcodec') != 'none' for f in formats)): | ||||
|                         # for extractors with incomplete formats (audio only (soundcloud) | ||||
|                         # or video only (imgur)) we will fallback to best/worst | ||||
|                         # {video,audio}-only format | ||||
|                         elif ctx['incomplete_formats']: | ||||
|                             yield formats[format_idx] | ||||
|                     elif format_spec == 'bestaudio': | ||||
|                         audio_formats = [ | ||||
| @@ -1160,17 +1162,18 @@ class YoutubeDL(object): | ||||
|                     } | ||||
|                 video_selector, audio_selector = map(_build_selector_function, selector.selector) | ||||
|  | ||||
|                 def selector_function(formats): | ||||
|                     formats = list(formats) | ||||
|                     for pair in itertools.product(video_selector(formats), audio_selector(formats)): | ||||
|                 def selector_function(ctx): | ||||
|                     for pair in itertools.product( | ||||
|                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))): | ||||
|                         yield _merge(pair) | ||||
|  | ||||
|             filters = [self._build_format_filter(f) for f in selector.filters] | ||||
|  | ||||
|             def final_selector(formats): | ||||
|             def final_selector(ctx): | ||||
|                 ctx_copy = copy.deepcopy(ctx) | ||||
|                 for _filter in filters: | ||||
|                     formats = list(filter(_filter, formats)) | ||||
|                 return selector_function(formats) | ||||
|                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) | ||||
|                 return selector_function(ctx_copy) | ||||
|             return final_selector | ||||
|  | ||||
|         stream = io.BytesIO(format_spec.encode('utf-8')) | ||||
| @@ -1377,7 +1380,34 @@ class YoutubeDL(object): | ||||
|             req_format_list.append('best') | ||||
|             req_format = '/'.join(req_format_list) | ||||
|         format_selector = self.build_format_selector(req_format) | ||||
|         formats_to_download = list(format_selector(formats)) | ||||
|  | ||||
|         # While in format selection we may need to have an access to the original | ||||
|         # format set in order to calculate some metrics or do some processing. | ||||
|         # For now we need to be able to guess whether original formats provided | ||||
|         # by extractor are incomplete or not (i.e. whether extractor provides only | ||||
|         # video-only or audio-only formats) for proper formats selection for | ||||
|         # extractors with such incomplete formats (see | ||||
|         # https://github.com/rg3/youtube-dl/pull/5556). | ||||
|         # Since formats may be filtered during format selection and may not match | ||||
|         # the original formats the results may be incorrect. Thus original formats | ||||
|         # or pre-calculated metrics should be passed to format selection routines | ||||
|         # as well. | ||||
|         # We will pass a context object containing all necessary additional data | ||||
|         # instead of just formats. | ||||
|         # This fixes incorrect format selection issue (see | ||||
|         # https://github.com/rg3/youtube-dl/issues/10083). | ||||
|         incomplete_formats = ( | ||||
|             # All formats are video-only or | ||||
|             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or | ||||
|             # all formats are audio-only | ||||
|             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) | ||||
|  | ||||
|         ctx = { | ||||
|             'formats': formats, | ||||
|             'incomplete_formats': incomplete_formats, | ||||
|         } | ||||
|  | ||||
|         formats_to_download = list(format_selector(ctx)) | ||||
|         if not formats_to_download: | ||||
|             raise ExtractorError('requested format not available', | ||||
|                                  expected=True) | ||||
|   | ||||
| @@ -83,6 +83,20 @@ class AdultSwimIE(InfoExtractor): | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         # heroMetadata.trailer | ||||
|         'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', | ||||
|         'info_dict': { | ||||
|             'id': 'I0LQFQkaSUaFp8PnAWHhoQ', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Decker - Inside Decker: A New Hero', | ||||
|             'description': 'md5:c916df071d425d62d70c86d4399d3ee0', | ||||
|             'duration': 249.008, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -133,20 +147,26 @@ class AdultSwimIE(InfoExtractor): | ||||
|             if video_info is None: | ||||
|                 if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path: | ||||
|                     video_info = bootstrapped_data['slugged_video'] | ||||
|                 else: | ||||
|                     raise ExtractorError('Unable to find video info') | ||||
|             if not video_info: | ||||
|                 video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video') | ||||
|             if not video_info: | ||||
|                 raise ExtractorError('Unable to find video info') | ||||
|  | ||||
|             show = bootstrapped_data['show'] | ||||
|             show_title = show['title'] | ||||
|             stream = video_info.get('stream') | ||||
|             clips = [stream] if stream else video_info.get('clips') | ||||
|             if not clips: | ||||
|             if stream and stream.get('videoPlaybackID'): | ||||
|                 segment_ids = [stream['videoPlaybackID']] | ||||
|             elif video_info.get('clips'): | ||||
|                 segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] | ||||
|             elif video_info.get('videoPlaybackID'): | ||||
|                 segment_ids = [video_info['videoPlaybackID']] | ||||
|             else: | ||||
|                 raise ExtractorError( | ||||
|                     'This video is only available via cable service provider subscription that' | ||||
|                     ' is not currently supported. You may want to use --cookies.' | ||||
|                     if video_info.get('auth') is True else 'Unable to find stream or clips', | ||||
|                     expected=True) | ||||
|             segment_ids = [clip['videoPlaybackID'] for clip in clips] | ||||
|  | ||||
|         episode_id = video_info['id'] | ||||
|         episode_title = video_info['title'] | ||||
|   | ||||
| @@ -203,7 +203,7 @@ class AnimeOnDemandIE(InfoExtractor): | ||||
|                 'formats': formats, | ||||
|             } | ||||
|  | ||||
|         def extract_entries(html, video_id, common_info, num): | ||||
|         def extract_entries(html, video_id, common_info, num=None): | ||||
|             info = extract_info(html, video_id, num) | ||||
|  | ||||
|             if info['formats']: | ||||
|   | ||||
| @@ -1,67 +1,65 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
| from .jwplatform import JWPlatformBaseIE | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     clean_html, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ArchiveOrgIE(InfoExtractor): | ||||
| class ArchiveOrgIE(JWPlatformBaseIE): | ||||
|     IE_NAME = 'archive.org' | ||||
|     IE_DESC = 'archive.org videos' | ||||
|     _VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect', | ||||
|         'md5': '8af1d4cf447933ed3c7f4871162602db', | ||||
|         'info_dict': { | ||||
|             'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect', | ||||
|             'ext': 'ogv', | ||||
|             'ext': 'ogg', | ||||
|             'title': '1968 Demo - FJCC Conference Presentation Reel #1', | ||||
|             'description': 'md5:1780b464abaca9991d8968c877bb53ed', | ||||
|             'description': 'md5:da45c349df039f1cc8075268eb1b5c25', | ||||
|             'upload_date': '19681210', | ||||
|             'uploader': 'SRI International' | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://archive.org/details/Cops1922', | ||||
|         'md5': '18f2a19e6d89af8425671da1cf3d4e04', | ||||
|         'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba', | ||||
|         'info_dict': { | ||||
|             'id': 'Cops1922', | ||||
|             'ext': 'ogv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Buster Keaton\'s "Cops" (1922)', | ||||
|             'description': 'md5:70f72ee70882f713d4578725461ffcc3', | ||||
|             'description': 'md5:b4544662605877edd99df22f9620d858', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage( | ||||
|             'http://archive.org/embed/' + video_id, video_id) | ||||
|         jwplayer_playlist = self._parse_json(self._search_regex( | ||||
|             r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\);", | ||||
|             webpage, 'jwplayer playlist'), video_id) | ||||
|         info = self._parse_jwplayer_data( | ||||
|             {'playlist': jwplayer_playlist}, video_id, base_url=url) | ||||
|  | ||||
|         json_url = url + ('&' if '?' in url else '?') + 'output=json' | ||||
|         data = self._download_json(json_url, video_id) | ||||
|         def get_optional(metadata, field): | ||||
|             return metadata.get(field, [None])[0] | ||||
|  | ||||
|         def get_optional(data_dict, field): | ||||
|             return data_dict['metadata'].get(field, [None])[0] | ||||
|  | ||||
|         title = get_optional(data, 'title') | ||||
|         description = get_optional(data, 'description') | ||||
|         uploader = get_optional(data, 'creator') | ||||
|         upload_date = unified_strdate(get_optional(data, 'date')) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'format': fdata['format'], | ||||
|                 'url': 'http://' + data['server'] + data['dir'] + fn, | ||||
|                 'file_size': int(fdata['size']), | ||||
|             } | ||||
|             for fn, fdata in data['files'].items() | ||||
|             if 'Video' in fdata['format']] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'thumbnail': data.get('misc', {}).get('image'), | ||||
|         } | ||||
|         metadata = self._download_json( | ||||
|             'http://archive.org/details/' + video_id, video_id, query={ | ||||
|                 'output': 'json', | ||||
|             })['metadata'] | ||||
|         info.update({ | ||||
|             'title': get_optional(metadata, 'title') or info.get('title'), | ||||
|             'description': clean_html(get_optional(metadata, 'description')), | ||||
|         }) | ||||
|         if info.get('_type') != 'playlist': | ||||
|             info.update({ | ||||
|                 'uploader': get_optional(metadata, 'creator'), | ||||
|                 'upload_date': unified_strdate(get_optional(metadata, 'date')), | ||||
|             }) | ||||
|         return info | ||||
|   | ||||
| @@ -20,7 +20,7 @@ from ..compat import compat_etree_fromstring | ||||
|  | ||||
| class ARDMediathekIE(InfoExtractor): | ||||
|     IE_NAME = 'ARD:mediathek' | ||||
|     _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' | ||||
|     _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114', | ||||
| @@ -62,6 +62,18 @@ class ARDMediathekIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # audio | ||||
|         'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158', | ||||
|         'md5': '4e8f00631aac0395fee17368ac0e9867', | ||||
|         'info_dict': { | ||||
|             'id': '30796318', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Vor dem Fest', | ||||
|             'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb', | ||||
|             'duration': 3287, | ||||
|         }, | ||||
|         'skip': 'Video is no longer available', | ||||
|     }] | ||||
|  | ||||
|     def _extract_media_info(self, media_info_url, webpage, video_id): | ||||
|   | ||||
							
								
								
									
										115
									
								
								youtube_dl/extractor/arkena.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								youtube_dl/extractor/arkena.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,115 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     mimetype2ext, | ||||
|     parse_iso8601, | ||||
|     strip_jsonp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ArkenaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411', | ||||
|         'md5': 'b96f2f71b359a8ecd05ce4e1daa72365', | ||||
|         'info_dict': { | ||||
|             'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Big Buck Bunny', | ||||
|             'description': 'Royalty free test video', | ||||
|             'timestamp': 1432816365, | ||||
|             'upload_date': '20150528', | ||||
|             'is_live': False, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         # See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1', | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         account_id = mobj.group('account_id') | ||||
|  | ||||
|         playlist = self._download_json( | ||||
|             'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_' | ||||
|             % (video_id, account_id), | ||||
|             video_id, transform_source=strip_jsonp)['Playlist'][0] | ||||
|  | ||||
|         media_info = playlist['MediaInfo'] | ||||
|         title = media_info['Title'] | ||||
|         media_files = playlist['MediaFiles'] | ||||
|  | ||||
|         is_live = False | ||||
|         formats = [] | ||||
|         for kind_case, kind_formats in media_files.items(): | ||||
|             kind = kind_case.lower() | ||||
|             for f in kind_formats: | ||||
|                 f_url = f.get('Url') | ||||
|                 if not f_url: | ||||
|                     continue | ||||
|                 is_live = f.get('Live') == 'true' | ||||
|                 exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None)) | ||||
|                 if kind == 'm3u8' or 'm3u8' in exts: | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         f_url, video_id, 'mp4', | ||||
|                         entry_protocol='m3u8' if is_live else 'm3u8_native', | ||||
|                         m3u8_id=kind, fatal=False, live=is_live)) | ||||
|                 elif kind == 'flash' or 'f4m' in exts: | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         f_url, video_id, f4m_id=kind, fatal=False)) | ||||
|                 elif kind == 'dash' or 'mpd' in exts: | ||||
|                     formats.extend(self._extract_mpd_formats( | ||||
|                         f_url, video_id, mpd_id=kind, fatal=False)) | ||||
|                 elif kind == 'silverlight': | ||||
|                     # TODO: process when ism is supported (see | ||||
|                     # https://github.com/rg3/youtube-dl/issues/8118) | ||||
|                     continue | ||||
|                 else: | ||||
|                     tbr = float_or_none(f.get('Bitrate'), 1000) | ||||
|                     formats.append({ | ||||
|                         'url': f_url, | ||||
|                         'format_id': '%s-%d' % (kind, tbr) if tbr else kind, | ||||
|                         'tbr': tbr, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = media_info.get('Description') | ||||
|         video_id = media_info.get('VideoId') or video_id | ||||
|         timestamp = parse_iso8601(media_info.get('PublishDate')) | ||||
|         thumbnails = [{ | ||||
|             'url': thumbnail['Url'], | ||||
|             'width': int_or_none(thumbnail.get('Size')), | ||||
|         } for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'is_live': is_live, | ||||
|             'thumbnails': thumbnails, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -589,7 +589,8 @@ class BBCIE(BBCCoUkIE): | ||||
|         'info_dict': { | ||||
|             'id': '150615_telabyad_kentin_cogu', | ||||
|             'ext': 'mp4', | ||||
|             'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde", | ||||
|             'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi", | ||||
|             'description': 'md5:33a4805a855c9baf7115fcbde57e7025', | ||||
|             'timestamp': 1434397334, | ||||
|             'upload_date': '20150615', | ||||
|         }, | ||||
| @@ -603,6 +604,7 @@ class BBCIE(BBCCoUkIE): | ||||
|             'id': '150619_video_honduras_militares_hospitales_corrupcion_aw', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción', | ||||
|             'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8', | ||||
|             'timestamp': 1434713142, | ||||
|             'upload_date': '20150619', | ||||
|         }, | ||||
| @@ -818,8 +820,20 @@ class BBCIE(BBCCoUkIE): | ||||
|                         # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani) | ||||
|                         playlist = data_playable.get('otherSettings', {}).get('playlist', {}) | ||||
|                         if playlist: | ||||
|                             entries.append(self._extract_from_playlist_sxml( | ||||
|                                 playlist.get('progressiveDownloadUrl'), playlist_id, timestamp)) | ||||
|                             for key in ('progressiveDownload', 'streaming'): | ||||
|                                 playlist_url = playlist.get('%sUrl' % key) | ||||
|                                 if not playlist_url: | ||||
|                                     continue | ||||
|                                 try: | ||||
|                                     entries.append(self._extract_from_playlist_sxml( | ||||
|                                         playlist_url, playlist_id, timestamp)) | ||||
|                                 except Exception as e: | ||||
|                                     # Some playlist URL may fail with 500, at the same time | ||||
|                                     # the other one may work fine (e.g. | ||||
|                                     # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu) | ||||
|                                     if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: | ||||
|                                         continue | ||||
|                                     raise | ||||
|  | ||||
|         if entries: | ||||
|             return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | ||||
| @@ -998,10 +1012,10 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): | ||||
|  | ||||
| class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): | ||||
|     IE_NAME = 'bbc.co.uk:iplayer:playlist' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX | ||||
|     _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s' | ||||
|     _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v', | ||||
|         'info_dict': { | ||||
|             'id': 'b05rcz9v', | ||||
| @@ -1009,7 +1023,17 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): | ||||
|             'description': 'French thriller serial about a missing teenager.', | ||||
|         }, | ||||
|         'playlist_mincount': 6, | ||||
|     } | ||||
|         'skip': 'This programme is not currently available on BBC iPlayer', | ||||
|     }, { | ||||
|         # Available for over a year unlike 30 days for most other programmes | ||||
|         'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32', | ||||
|         'info_dict': { | ||||
|             'id': 'p02tcc32', | ||||
|             'title': 'Bohemian Icons', | ||||
|             'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', | ||||
|         }, | ||||
|         'playlist_mincount': 10, | ||||
|     }] | ||||
|  | ||||
|     def _extract_title_and_description(self, webpage): | ||||
|         title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False) | ||||
|   | ||||
| @@ -12,7 +12,7 @@ class BigflixIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537', | ||||
|         'md5': 'ec76aa9b1129e2e5b301a474e54fab74', | ||||
|         'md5': 'dc1b4aebb46e3a7077ecc0d9f43f61e3', | ||||
|         'info_dict': { | ||||
|             'id': '16537', | ||||
|             'ext': 'mp4', | ||||
| @@ -26,7 +26,7 @@ class BigflixIE(InfoExtractor): | ||||
|             'id': '16070', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Madarasapatinam', | ||||
|             'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca', | ||||
|             'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b', | ||||
|             'formats': 'mincount:2', | ||||
|         }, | ||||
|         'params': { | ||||
|   | ||||
| @@ -2,11 +2,15 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import remove_end | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     remove_end, | ||||
| ) | ||||
| from .rudo import RudoIE | ||||
|  | ||||
|  | ||||
| class BioBioChileTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml' | ||||
|     _VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml', | ||||
| @@ -18,6 +22,7 @@ class BioBioChileTVIE(InfoExtractor): | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'uploader': 'Fernando Atria', | ||||
|         }, | ||||
|         'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html', | ||||
|     }, { | ||||
|         # different uploader layout | ||||
|         'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml', | ||||
| @@ -32,6 +37,16 @@ class BioBioChileTVIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html', | ||||
|     }, { | ||||
|         'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml', | ||||
|         'info_dict': { | ||||
|             'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos', | ||||
|             'ext': 'mp4', | ||||
|             'uploader': '(none)', | ||||
|             'upload_date': '20160708', | ||||
|             'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', | ||||
|         'only_matching': True, | ||||
| @@ -45,42 +60,22 @@ class BioBioChileTVIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         rudo_url = RudoIE._extract_url(webpage) | ||||
|         if not rudo_url: | ||||
|             raise ExtractorError('No videos found') | ||||
|  | ||||
|         title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV') | ||||
|  | ||||
|         file_url = self._search_regex( | ||||
|             r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1', | ||||
|             webpage, 'file url', group='url') | ||||
|  | ||||
|         base_url = self._search_regex( | ||||
|             r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage, | ||||
|             'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/', | ||||
|             group='url') | ||||
|  | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             '%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4', | ||||
|             entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) | ||||
|         f = { | ||||
|             'url': '%s%s' % (base_url, file_url), | ||||
|             'format_id': 'http', | ||||
|             'protocol': 'http', | ||||
|             'preference': 1, | ||||
|         } | ||||
|         if formats: | ||||
|             f_copy = formats[-1].copy() | ||||
|             f_copy.update(f) | ||||
|             f = f_copy | ||||
|         formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         uploader = self._html_search_regex( | ||||
|             r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>', | ||||
|             r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': rudo_url, | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| @@ -20,6 +21,18 @@ class BloombergIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'format': 'best[format_id^=hds]', | ||||
|         }, | ||||
|     }, { | ||||
|         # video ID in BPlayer(...) | ||||
|         'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/', | ||||
|         'info_dict': { | ||||
|             'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Meet the Real-Life Tech Wizards of Middle Earth', | ||||
|             'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.', | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': 'best[format_id^=hds]', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', | ||||
|         'only_matching': True, | ||||
| @@ -33,7 +46,11 @@ class BloombergIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         video_id = self._search_regex( | ||||
|             r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1', | ||||
|             webpage, 'id', group='url') | ||||
|             webpage, 'id', group='url', default=None) | ||||
|         if not video_id: | ||||
|             bplayer_data = self._parse_json(self._search_regex( | ||||
|                 r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) | ||||
|             video_id = bplayer_data['id'] | ||||
|         title = re.sub(': Video$', '', self._og_search_title(webpage)) | ||||
|  | ||||
|         embed_info = self._download_json( | ||||
|   | ||||
| @@ -26,6 +26,8 @@ from ..utils import ( | ||||
|     unescapeHTML, | ||||
|     unsmuggle_url, | ||||
|     update_url_query, | ||||
|     clean_html, | ||||
|     mimetype2ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -544,14 +546,16 @@ class BrightcoveNewIE(InfoExtractor): | ||||
|         formats = [] | ||||
|         for source in json_data.get('sources', []): | ||||
|             container = source.get('container') | ||||
|             source_type = source.get('type') | ||||
|             ext = mimetype2ext(source.get('type')) | ||||
|             src = source.get('src') | ||||
|             if source_type == 'application/x-mpegURL' or container == 'M2TS': | ||||
|             if ext == 'ism': | ||||
|                 continue | ||||
|             elif ext == 'm3u8' or container == 'M2TS': | ||||
|                 if not src: | ||||
|                     continue | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) | ||||
|             elif source_type == 'application/dash+xml': | ||||
|             elif ext == 'mpd': | ||||
|                 if not src: | ||||
|                     continue | ||||
|                 formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False)) | ||||
| @@ -567,7 +571,7 @@ class BrightcoveNewIE(InfoExtractor): | ||||
|                     'tbr': tbr, | ||||
|                     'filesize': int_or_none(source.get('size')), | ||||
|                     'container': container, | ||||
|                     'ext': container.lower(), | ||||
|                     'ext': ext or container.lower(), | ||||
|                 } | ||||
|                 if width == 0 and height == 0: | ||||
|                     f.update({ | ||||
| @@ -620,7 +624,7 @@ class BrightcoveNewIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': json_data.get('description'), | ||||
|             'description': clean_html(json_data.get('description')), | ||||
|             'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), | ||||
|             'duration': float_or_none(json_data.get('duration'), 1000), | ||||
|             'timestamp': parse_iso8601(json_data.get('published_at')), | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -10,8 +9,10 @@ from ..compat import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     clean_html, | ||||
|     parse_duration, | ||||
|     str_to_int, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -26,14 +27,14 @@ class CamdemyIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ch1-1 Introduction, Signals (02-23-2012)', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': '', | ||||
|             'creator': 'ss11spring', | ||||
|             'duration': 1591, | ||||
|             'upload_date': '20130114', | ||||
|             'timestamp': 1358154556, | ||||
|             'view_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         # With non-empty description | ||||
|         # webpage returns "No permission or not login" | ||||
|         'url': 'http://www.camdemy.com/media/13885', | ||||
|         'md5': '4576a3bb2581f86c61044822adbd1249', | ||||
|         'info_dict': { | ||||
| @@ -41,64 +42,71 @@ class CamdemyIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'EverCam + Camdemy QuickStart', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': 'md5:050b62f71ed62928f8a35f1a41e186c9', | ||||
|             'description': 'md5:2a9f989c2b153a2342acee579c6e7db6', | ||||
|             'creator': 'evercam', | ||||
|             'upload_date': '20140620', | ||||
|             'timestamp': 1403271569, | ||||
|             'duration': 318, | ||||
|         } | ||||
|     }, { | ||||
|         # External source | ||||
|         # External source (YouTube) | ||||
|         'url': 'http://www.camdemy.com/media/14842', | ||||
|         'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7', | ||||
|         'info_dict': { | ||||
|             'id': '2vsYQzNIsJo', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Excel 2013 Tutorial - How to add Password Protection', | ||||
|             'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection', | ||||
|             'upload_date': '20130211', | ||||
|             'uploader': 'Hun Kim', | ||||
|             'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection', | ||||
|             'uploader_id': 'hunkimtutorials', | ||||
|             'title': 'Excel 2013 Tutorial - How to add Password Protection', | ||||
|         } | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         src_from = self._html_search_regex( | ||||
|             r"<div class='srcFrom'>Source: <a title='([^']+)'", page, | ||||
|             'external source', default=None) | ||||
|             r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1", | ||||
|             webpage, 'external source', default=None, group='url') | ||||
|         if src_from: | ||||
|             return self.url_result(src_from) | ||||
|  | ||||
|         oembed_obj = self._download_json( | ||||
|             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) | ||||
|  | ||||
|         title = oembed_obj['title'] | ||||
|         thumb_url = oembed_obj['thumbnail_url'] | ||||
|         video_folder = compat_urlparse.urljoin(thumb_url, 'video/') | ||||
|         file_list_doc = self._download_xml( | ||||
|             compat_urlparse.urljoin(video_folder, 'fileList.xml'), | ||||
|             video_id, 'Filelist XML') | ||||
|             video_id, 'Downloading filelist XML') | ||||
|         file_name = file_list_doc.find('./video/item/fileName').text | ||||
|         video_url = compat_urlparse.urljoin(video_folder, file_name) | ||||
|  | ||||
|         timestamp = parse_iso8601(self._html_search_regex( | ||||
|             r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||
|             page, 'creation time', fatal=False), | ||||
|             delimiter=' ', timezone=datetime.timedelta(hours=8)) | ||||
|         view_count = str_to_int(self._html_search_regex( | ||||
|             r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||
|             page, 'view count', fatal=False)) | ||||
|         # Some URLs return "No permission or not login" in a webpage despite being | ||||
|         # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885) | ||||
|         upload_date = unified_strdate(self._search_regex( | ||||
|             r'>published on ([^<]+)<', webpage, | ||||
|             'upload date', default=None)) | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views', | ||||
|             webpage, 'view count', default=None)) | ||||
|         description = self._html_search_meta( | ||||
|             'description', webpage, default=None) or clean_html( | ||||
|             oembed_obj.get('description')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': oembed_obj['title'], | ||||
|             'title': title, | ||||
|             'thumbnail': thumb_url, | ||||
|             'description': self._html_search_meta('description', page), | ||||
|             'creator': oembed_obj['author_name'], | ||||
|             'duration': oembed_obj['duration'], | ||||
|             'timestamp': timestamp, | ||||
|             'description': description, | ||||
|             'creator': oembed_obj.get('author_name'), | ||||
|             'duration': parse_duration(oembed_obj.get('duration')), | ||||
|             'upload_date': upload_date, | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|  | ||||
|   | ||||
| @@ -4,9 +4,11 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
|     smuggle_url, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -25,8 +27,22 @@ class CBCIE(InfoExtractor): | ||||
|             'upload_date': '20160203', | ||||
|             'uploader': 'CBCC-NEW', | ||||
|         }, | ||||
|         'skip': 'Geo-restricted to Canada', | ||||
|     }, { | ||||
|         # with clipId | ||||
|         # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com | ||||
|         'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4', | ||||
|         'md5': '162adfa070274b144f4fdc3c3b8207db', | ||||
|         'info_dict': { | ||||
|             'id': '2414435309', | ||||
|             'ext': 'mp4', | ||||
|             'title': '22 Minutes Update: What Not To Wear Quebec', | ||||
|             'description': "This week's latest Canadian top political story is What Not To Wear Quebec.", | ||||
|             'upload_date': '20131025', | ||||
|             'uploader': 'CBCC-NEW', | ||||
|             'timestamp': 1382717907, | ||||
|         }, | ||||
|     }, { | ||||
|         # with clipId, feed only available via tpfeed.cbc.ca | ||||
|         'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', | ||||
|         'md5': '0274a90b51a9b4971fe005c63f592f12', | ||||
|         'info_dict': { | ||||
| @@ -64,6 +80,7 @@ class CBCIE(InfoExtractor): | ||||
|                 'uploader': 'CBCC-NEW', | ||||
|             }, | ||||
|         }], | ||||
|         'skip': 'Geo-restricted to Canada', | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
| @@ -81,9 +98,15 @@ class CBCIE(InfoExtractor): | ||||
|             media_id = player_info.get('mediaId') | ||||
|             if not media_id: | ||||
|                 clip_id = player_info['clipId'] | ||||
|                 media_id = self._download_json( | ||||
|                     'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, | ||||
|                     clip_id)['entries'][0]['id'].split('/')[-1] | ||||
|                 feed = self._download_json( | ||||
|                     'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, | ||||
|                     clip_id, fatal=False) | ||||
|                 if feed: | ||||
|                     media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) | ||||
|                 if not media_id: | ||||
|                     media_id = self._download_json( | ||||
|                         'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, | ||||
|                         clip_id)['entries'][0]['id'].split('/')[-1] | ||||
|             return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) | ||||
|         else: | ||||
|             entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)] | ||||
| @@ -104,6 +127,7 @@ class CBCPlayerIE(InfoExtractor): | ||||
|             'upload_date': '20160210', | ||||
|             'uploader': 'CBCC-NEW', | ||||
|         }, | ||||
|         'skip': 'Geo-restricted to Canada', | ||||
|     }, { | ||||
|         # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ | ||||
|         'url': 'http://www.cbc.ca/player/play/2657631896', | ||||
|   | ||||
| @@ -1,12 +1,10 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import calendar | ||||
| import datetime | ||||
|  | ||||
| from .anvato import AnvatoIE | ||||
| from .sendtonews import SendtoNewsIE | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import unified_timestamp | ||||
|  | ||||
|  | ||||
| class CBSLocalIE(AnvatoIE): | ||||
| @@ -71,10 +69,7 @@ class CBSLocalIE(AnvatoIE): | ||||
|  | ||||
|         time_str = self._html_search_regex( | ||||
|             r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) | ||||
|         timestamp = None | ||||
|         if time_str: | ||||
|             timestamp = calendar.timegm(datetime.datetime.strptime( | ||||
|                 time_str, '%b %d, %Y %I:%M %p').timetuple()) | ||||
|         timestamp = unified_timestamp(time_str) | ||||
|  | ||||
|         info_dict.update({ | ||||
|             'display_id': display_id, | ||||
|   | ||||
| @@ -26,6 +26,7 @@ class CBSNewsIE(CBSBaseIE): | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Subscribers only', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', | ||||
| @@ -69,7 +70,7 @@ class CBSNewsLiveVideoIE(InfoExtractor): | ||||
|     IE_DESC = 'CBS News Live Videos' | ||||
|     _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', | ||||
|         'info_dict': { | ||||
|             'id': 'clinton-sanders-prepare-to-face-off-in-nh', | ||||
| @@ -77,7 +78,15 @@ class CBSNewsLiveVideoIE(InfoExtractor): | ||||
|             'title': 'Clinton, Sanders Prepare To Face Off In NH', | ||||
|             'duration': 334, | ||||
|         }, | ||||
|     } | ||||
|         'skip': 'Video gone, redirected to http://www.cbsnews.com/live/', | ||||
|     }, { | ||||
|         'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/', | ||||
|         'info_dict': { | ||||
|             'id': 'video-shows-intense-paragliding-accident', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Video Shows Intense Paragliding Accident', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|   | ||||
| @@ -23,7 +23,7 @@ class CliphunterIE(InfoExtractor): | ||||
|         (?P<id>[0-9]+)/ | ||||
|         (?P<seo>.+?)(?:$|[#\?]) | ||||
|     ''' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo', | ||||
|         'md5': 'b7c9bbd4eb3a226ab91093714dcaa480', | ||||
|         'info_dict': { | ||||
| @@ -32,8 +32,19 @@ class CliphunterIE(InfoExtractor): | ||||
|             'title': 'Fun Jynx Maze solo', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|         }, | ||||
|         'skip': 'Video gone', | ||||
|     }, { | ||||
|         'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz', | ||||
|         'md5': '55a723c67bfc6da6b0cfa00d55da8a27', | ||||
|         'info_dict': { | ||||
|             'id': '2019449', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'age_limit': 18, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|   | ||||
| @@ -6,7 +6,6 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_HTTPError, | ||||
| ) | ||||
| from ..utils import ( | ||||
| @@ -17,37 +16,26 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class CloudyIE(InfoExtractor): | ||||
|     _IE_DESC = 'cloudy.ec and videoraj.ch' | ||||
|     _IE_DESC = 'cloudy.ec' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/ | ||||
|         https?://(?:www\.)?cloudy\.ec/ | ||||
|         (?:v/|embed\.php\?id=) | ||||
|         (?P<id>[A-Za-z0-9]+) | ||||
|         ''' | ||||
|     _EMBED_URL = 'http://www.%s/embed.php?id=%s' | ||||
|     _API_URL = 'http://www.%s/api/player.api.php?%s' | ||||
|     _EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s' | ||||
|     _API_URL = 'http://www.cloudy.ec/api/player.api.php' | ||||
|     _MAX_TRIES = 2 | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'https://www.cloudy.ec/v/af511e2527aac', | ||||
|             'md5': '5cb253ace826a42f35b4740539bedf07', | ||||
|             'info_dict': { | ||||
|                 'id': 'af511e2527aac', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Funny Cats and Animals Compilation june 2013', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.videoraj.to/v/47f399fd8bb60', | ||||
|             'md5': '7d0f8799d91efd4eda26587421c3c3b0', | ||||
|             'info_dict': { | ||||
|                 'id': '47f399fd8bb60', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?', | ||||
|             } | ||||
|     _TEST = { | ||||
|         'url': 'https://www.cloudy.ec/v/af511e2527aac', | ||||
|         'md5': '5cb253ace826a42f35b4740539bedf07', | ||||
|         'info_dict': { | ||||
|             'id': 'af511e2527aac', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Funny Cats and Animals Compilation june 2013', | ||||
|         } | ||||
|     ] | ||||
|     } | ||||
|  | ||||
|     def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0): | ||||
|     def _extract_video(self, video_id, file_key, error_url=None, try_num=0): | ||||
|  | ||||
|         if try_num > self._MAX_TRIES - 1: | ||||
|             raise ExtractorError('Unable to extract video URL', expected=True) | ||||
| @@ -64,9 +52,8 @@ class CloudyIE(InfoExtractor): | ||||
|                 'errorUrl': error_url, | ||||
|             }) | ||||
|  | ||||
|         data_url = self._API_URL % (video_host, compat_urllib_parse_urlencode(form)) | ||||
|         player_data = self._download_webpage( | ||||
|             data_url, video_id, 'Downloading player data') | ||||
|             self._API_URL, video_id, 'Downloading player data', query=form) | ||||
|         data = compat_parse_qs(player_data) | ||||
|  | ||||
|         try_num += 1 | ||||
| @@ -88,7 +75,7 @@ class CloudyIE(InfoExtractor): | ||||
|             except ExtractorError as e: | ||||
|                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]: | ||||
|                     self.report_warning('Invalid video URL, requesting another', video_id) | ||||
|                     return self._extract_video(video_host, video_id, file_key, video_url, try_num) | ||||
|                     return self._extract_video(video_id, file_key, video_url, try_num) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -98,14 +85,13 @@ class CloudyIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_host = mobj.group('host') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         url = self._EMBED_URL % (video_host, video_id) | ||||
|         url = self._EMBED_URL % video_id | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         file_key = self._search_regex( | ||||
|             [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'], | ||||
|             webpage, 'file_key') | ||||
|  | ||||
|         return self._extract_video(video_host, video_id, file_key) | ||||
|         return self._extract_video(video_id, file_key) | ||||
|   | ||||
| @@ -1,5 +1,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .mtv import MTVIE | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class CMTIE(MTVIE): | ||||
| @@ -16,7 +18,27 @@ class CMTIE(MTVIE): | ||||
|             'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"', | ||||
|             'description': 'Blame It All On My Roots', | ||||
|         }, | ||||
|         'skip': 'Video not available', | ||||
|     }, { | ||||
|         'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908', | ||||
|         'md5': 'e61a801ca4a183a466c08bd98dccbb1c', | ||||
|         'info_dict': { | ||||
|             'id': '1504699', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Still The King Ep. 109 in 3 Minutes', | ||||
|             'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.', | ||||
|             'timestamp': 1469421000.0, | ||||
|             'upload_date': '20160725', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def _transform_rtmp_url(cls, rtmp_video_url): | ||||
|         if 'error_not_available.swf' in rtmp_video_url: | ||||
|             raise ExtractorError( | ||||
|                 '%s said: video is not available' % cls.IE_NAME, expected=True) | ||||
|  | ||||
|         return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url) | ||||
|   | ||||
| @@ -1,17 +1,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse_urlencode, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     unified_strdate, | ||||
| ) | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class ComedyCentralIE(MTVServicesInfoExtractor): | ||||
| @@ -26,8 +16,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother', | ||||
|             'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother', | ||||
|             'description': 'After a certain point, breastfeeding becomes c**kblocking.', | ||||
|             'timestamp': 1376798400, | ||||
|             'upload_date': '20130818', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview', | ||||
| @@ -35,241 +27,92 @@ class ComedyCentralIE(MTVServicesInfoExtractor): | ||||
|     }] | ||||
|  | ||||
|  | ||||
| class ComedyCentralShowsIE(MTVServicesInfoExtractor): | ||||
|     IE_DESC = 'The Daily Show / The Colbert Report' | ||||
|     # urls can be abbreviations like :thedailyshow | ||||
|     # urls for episodes like: | ||||
|     # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day | ||||
|     #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news | ||||
|     #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524 | ||||
|     _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow) | ||||
|                       |https?://(:www\.)? | ||||
|                           (?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/ | ||||
|                          ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| | ||||
|                           (?P<clip> | ||||
|                               (?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) | ||||
|                               |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) | ||||
|                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) | ||||
|                           )| | ||||
|                           (?P<interview> | ||||
|                               extended-interviews/(?P<interID>[0-9a-z]+)/ | ||||
|                               (?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?) | ||||
|                               (?:/[^/?#]?|[?#]|$)))) | ||||
|                      ''' | ||||
| class ToshIE(MTVServicesInfoExtractor): | ||||
|     IE_DESC = 'Tosh.0' | ||||
|     _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)' | ||||
|     _FEED_URL = 'http://tosh.cc.com/feeds/mrss' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', | ||||
|         'md5': '4e2f5cb088a83cd8cdb7756132f9739d', | ||||
|         'info_dict': { | ||||
|             'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20121213', | ||||
|             'description': 'Kristen Stewart learns to let loose in "On the Road."', | ||||
|             'uploader': 'thedailyshow', | ||||
|             'title': 'thedailyshow kristen-stewart part 1', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview', | ||||
|         'info_dict': { | ||||
|             'id': 'sarah-chayes-extended-interview', | ||||
|             'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."', | ||||
|             'title': 'thedailyshow Sarah Chayes Extended Interview', | ||||
|         }, | ||||
|         'playlist': [ | ||||
|             { | ||||
|                 'info_dict': { | ||||
|                     'id': '0baad492-cbec-4ec1-9e50-ad91c291127f', | ||||
|                     'ext': 'mp4', | ||||
|                     'upload_date': '20150129', | ||||
|                     'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."', | ||||
|                     'uploader': 'thedailyshow', | ||||
|                     'title': 'thedailyshow sarah-chayes-extended-interview part 1', | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 'info_dict': { | ||||
|                     'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283', | ||||
|                     'ext': 'mp4', | ||||
|                     'upload_date': '20150129', | ||||
|                     'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."', | ||||
|                     'uploader': 'thedailyshow', | ||||
|                     'title': 'thedailyshow sarah-chayes-extended-interview part 2', | ||||
|                 }, | ||||
|             }, | ||||
|         ], | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans', | ||||
|         'info_dict': { | ||||
|             'description': 'Tosh asked fans to share their summer plans.', | ||||
|             'title': 'Twitter Users Share Summer Plans', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'md5': 'f269e88114c1805bb6d7653fecea9e06', | ||||
|             'info_dict': { | ||||
|                 'id': '90498ec2-ed00-11e0-aca6-0026b9414f30', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans', | ||||
|                 'description': 'Tosh asked fans to share their summer plans.', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|                 # It's really reported to be published on year 2077 | ||||
|                 'upload_date': '20770610', | ||||
|                 'timestamp': 3390510600, | ||||
|                 'subtitles': { | ||||
|                     'en': 'mincount:3', | ||||
|                 }, | ||||
|             }, | ||||
|         }] | ||||
|     }, { | ||||
|         'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] | ||||
|     @classmethod | ||||
|     def _transform_rtmp_url(cls, rtmp_video_url): | ||||
|         new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url) | ||||
|         new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm') | ||||
|         return new_urls | ||||
|  | ||||
|     _video_extensions = { | ||||
|         '3500': 'mp4', | ||||
|         '2200': 'mp4', | ||||
|         '1700': 'mp4', | ||||
|         '1200': 'mp4', | ||||
|         '750': 'mp4', | ||||
|         '400': 'mp4', | ||||
|     } | ||||
|     _video_dimensions = { | ||||
|         '3500': (1280, 720), | ||||
|         '2200': (960, 540), | ||||
|         '1700': (768, 432), | ||||
|         '1200': (640, 360), | ||||
|         '750': (512, 288), | ||||
|         '400': (384, 216), | ||||
|     } | ||||
|  | ||||
| class ComedyCentralTVIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4', | ||||
|         'info_dict': { | ||||
|             'id': 'local_playlist-f99b626bdfe13568579a', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.comedycentral.tv/shows/1074-workaholics', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         if mobj.group('shortname'): | ||||
|             return self.url_result('http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if mobj.group('clip'): | ||||
|             if mobj.group('videotitle'): | ||||
|                 epTitle = mobj.group('videotitle') | ||||
|             elif mobj.group('showname') == 'thedailyshow': | ||||
|                 epTitle = mobj.group('tdstitle') | ||||
|             else: | ||||
|                 epTitle = mobj.group('cntitle') | ||||
|             dlNewest = False | ||||
|         elif mobj.group('interview'): | ||||
|             epTitle = mobj.group('interview_title') | ||||
|             dlNewest = False | ||||
|         else: | ||||
|             dlNewest = not mobj.group('episode') | ||||
|             if dlNewest: | ||||
|                 epTitle = mobj.group('showname') | ||||
|             else: | ||||
|                 epTitle = mobj.group('episode') | ||||
|         show_name = mobj.group('showname') | ||||
|         mrss_url = self._search_regex( | ||||
|             r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1', | ||||
|             webpage, 'mrss url', group='url') | ||||
|  | ||||
|         webpage, htmlHandle = self._download_webpage_handle(url, epTitle) | ||||
|         if dlNewest: | ||||
|             url = htmlHandle.geturl() | ||||
|             mobj = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError('Invalid redirected URL: ' + url) | ||||
|             if mobj.group('episode') == '': | ||||
|                 raise ExtractorError('Redirected URL is still not specific: ' + url) | ||||
|             epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1] | ||||
|         return self._get_videos_info_from_url(mrss_url, video_id) | ||||
|  | ||||
|         mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage) | ||||
|         if len(mMovieParams) == 0: | ||||
|             # The Colbert Report embeds the information in a without | ||||
|             # a URL prefix; so extract the alternate reference | ||||
|             # and then add the URL prefix manually. | ||||
|  | ||||
|             altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage) | ||||
|             if len(altMovieParams) == 0: | ||||
|                 raise ExtractorError('unable to find Flash URL in webpage ' + url) | ||||
|             else: | ||||
|                 mMovieParams = [('http://media.mtvnservices.com/' + altMovieParams[0], altMovieParams[0])] | ||||
| class ComedyCentralShortnameIE(InfoExtractor): | ||||
|     _VALID_URL = r'^:(?P<id>tds|thedailyshow)$' | ||||
|     _TESTS = [{ | ||||
|         'url': ':tds', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': ':thedailyshow', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|         uri = mMovieParams[0][1] | ||||
|         # Correct cc.com in uri | ||||
|         uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri) | ||||
|  | ||||
|         index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri})) | ||||
|         idoc = self._download_xml( | ||||
|             index_url, epTitle, | ||||
|             'Downloading show index', 'Unable to download episode index') | ||||
|  | ||||
|         title = idoc.find('./channel/title').text | ||||
|         description = idoc.find('./channel/description').text | ||||
|  | ||||
|         entries = [] | ||||
|         item_els = idoc.findall('.//item') | ||||
|         for part_num, itemEl in enumerate(item_els): | ||||
|             upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text) | ||||
|             thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url') | ||||
|  | ||||
|             content = itemEl.find('.//{http://search.yahoo.com/mrss/}content') | ||||
|             duration = float_or_none(content.attrib.get('duration')) | ||||
|             mediagen_url = content.attrib['url'] | ||||
|             guid = itemEl.find('./guid').text.rpartition(':')[-1] | ||||
|  | ||||
|             cdoc = self._download_xml( | ||||
|                 mediagen_url, epTitle, | ||||
|                 'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els))) | ||||
|  | ||||
|             turls = [] | ||||
|             for rendition in cdoc.findall('.//rendition'): | ||||
|                 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) | ||||
|                 turls.append(finfo) | ||||
|  | ||||
|             formats = [] | ||||
|             for format, rtmp_video_url in turls: | ||||
|                 w, h = self._video_dimensions.get(format, (None, None)) | ||||
|                 formats.append({ | ||||
|                     'format_id': 'vhttp-%s' % format, | ||||
|                     'url': self._transform_rtmp_url(rtmp_video_url), | ||||
|                     'ext': self._video_extensions.get(format, 'mp4'), | ||||
|                     'height': h, | ||||
|                     'width': w, | ||||
|                 }) | ||||
|                 formats.append({ | ||||
|                     'format_id': 'rtmp-%s' % format, | ||||
|                     'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'), | ||||
|                     'ext': self._video_extensions.get(format, 'mp4'), | ||||
|                     'height': h, | ||||
|                     'width': w, | ||||
|                 }) | ||||
|                 self._sort_formats(formats) | ||||
|  | ||||
|             subtitles = self._extract_subtitles(cdoc, guid) | ||||
|  | ||||
|             virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1) | ||||
|             entries.append({ | ||||
|                 'id': guid, | ||||
|                 'title': virtual_id, | ||||
|                 'formats': formats, | ||||
|                 'uploader': show_name, | ||||
|                 'upload_date': upload_date, | ||||
|                 'duration': duration, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'description': description, | ||||
|                 'subtitles': subtitles, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': epTitle, | ||||
|             'entries': entries, | ||||
|             'title': show_name + ' ' + title, | ||||
|             'description': description, | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         shortcut_map = { | ||||
|             'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', | ||||
|             'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', | ||||
|         } | ||||
|         return self.url_result(shortcut_map[video_id]) | ||||
|   | ||||
| @@ -55,6 +55,8 @@ from ..utils import ( | ||||
|     update_Request, | ||||
|     update_url_query, | ||||
|     parse_m3u8_attributes, | ||||
|     extract_attributes, | ||||
|     parse_codecs, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -725,9 +727,14 @@ class InfoExtractor(object): | ||||
|                     [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) | ||||
|  | ||||
|     def _og_search_property(self, prop, html, name=None, **kargs): | ||||
|         if not isinstance(prop, (list, tuple)): | ||||
|             prop = [prop] | ||||
|         if name is None: | ||||
|             name = 'OpenGraph %s' % prop | ||||
|         escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs) | ||||
|             name = 'OpenGraph %s' % prop[0] | ||||
|         og_regexes = [] | ||||
|         for p in prop: | ||||
|             og_regexes.extend(self._og_regexes(p)) | ||||
|         escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs) | ||||
|         if escaped is None: | ||||
|             return None | ||||
|         return unescapeHTML(escaped) | ||||
| @@ -821,41 +828,47 @@ class InfoExtractor(object): | ||||
|         if not json_ld: | ||||
|             return {} | ||||
|         info = {} | ||||
|         if json_ld.get('@context') == 'http://schema.org': | ||||
|             item_type = json_ld.get('@type') | ||||
|             if expected_type is not None and expected_type != item_type: | ||||
|                 return info | ||||
|             if item_type == 'TVEpisode': | ||||
|                 info.update({ | ||||
|                     'episode': unescapeHTML(json_ld.get('name')), | ||||
|                     'episode_number': int_or_none(json_ld.get('episodeNumber')), | ||||
|                     'description': unescapeHTML(json_ld.get('description')), | ||||
|                 }) | ||||
|                 part_of_season = json_ld.get('partOfSeason') | ||||
|                 if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': | ||||
|                     info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) | ||||
|                 part_of_series = json_ld.get('partOfSeries') | ||||
|                 if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': | ||||
|                     info['series'] = unescapeHTML(part_of_series.get('name')) | ||||
|             elif item_type == 'Article': | ||||
|                 info.update({ | ||||
|                     'timestamp': parse_iso8601(json_ld.get('datePublished')), | ||||
|                     'title': unescapeHTML(json_ld.get('headline')), | ||||
|                     'description': unescapeHTML(json_ld.get('articleBody')), | ||||
|                 }) | ||||
|             elif item_type == 'VideoObject': | ||||
|                 info.update({ | ||||
|                     'url': json_ld.get('contentUrl'), | ||||
|                     'title': unescapeHTML(json_ld.get('name')), | ||||
|                     'description': unescapeHTML(json_ld.get('description')), | ||||
|                     'thumbnail': json_ld.get('thumbnailUrl'), | ||||
|                     'duration': parse_duration(json_ld.get('duration')), | ||||
|                     'timestamp': unified_timestamp(json_ld.get('uploadDate')), | ||||
|                     'filesize': float_or_none(json_ld.get('contentSize')), | ||||
|                     'tbr': int_or_none(json_ld.get('bitrate')), | ||||
|                     'width': int_or_none(json_ld.get('width')), | ||||
|                     'height': int_or_none(json_ld.get('height')), | ||||
|                 }) | ||||
|         if not isinstance(json_ld, (list, tuple, dict)): | ||||
|             return info | ||||
|         if isinstance(json_ld, dict): | ||||
|             json_ld = [json_ld] | ||||
|         for e in json_ld: | ||||
|             if e.get('@context') == 'http://schema.org': | ||||
|                 item_type = e.get('@type') | ||||
|                 if expected_type is not None and expected_type != item_type: | ||||
|                     return info | ||||
|                 if item_type == 'TVEpisode': | ||||
|                     info.update({ | ||||
|                         'episode': unescapeHTML(e.get('name')), | ||||
|                         'episode_number': int_or_none(e.get('episodeNumber')), | ||||
|                         'description': unescapeHTML(e.get('description')), | ||||
|                     }) | ||||
|                     part_of_season = e.get('partOfSeason') | ||||
|                     if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': | ||||
|                         info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) | ||||
|                     part_of_series = e.get('partOfSeries') | ||||
|                     if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': | ||||
|                         info['series'] = unescapeHTML(part_of_series.get('name')) | ||||
|                 elif item_type == 'Article': | ||||
|                     info.update({ | ||||
|                         'timestamp': parse_iso8601(e.get('datePublished')), | ||||
|                         'title': unescapeHTML(e.get('headline')), | ||||
|                         'description': unescapeHTML(e.get('articleBody')), | ||||
|                     }) | ||||
|                 elif item_type == 'VideoObject': | ||||
|                     info.update({ | ||||
|                         'url': e.get('contentUrl'), | ||||
|                         'title': unescapeHTML(e.get('name')), | ||||
|                         'description': unescapeHTML(e.get('description')), | ||||
|                         'thumbnail': e.get('thumbnailUrl'), | ||||
|                         'duration': parse_duration(e.get('duration')), | ||||
|                         'timestamp': unified_timestamp(e.get('uploadDate')), | ||||
|                         'filesize': float_or_none(e.get('contentSize')), | ||||
|                         'tbr': int_or_none(e.get('bitrate')), | ||||
|                         'width': int_or_none(e.get('width')), | ||||
|                         'height': int_or_none(e.get('height')), | ||||
|                     }) | ||||
|                 break | ||||
|         return dict((k, v) for k, v in info.items() if v is not None) | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -909,7 +922,8 @@ class InfoExtractor(object): | ||||
|                 if f.get('ext') in ['f4f', 'f4m']:  # Not yet supported | ||||
|                     preference -= 0.5 | ||||
|  | ||||
|             proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1 | ||||
|             protocol = f.get('protocol') or determine_protocol(f) | ||||
|             proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1) | ||||
|  | ||||
|             if f.get('vcodec') == 'none':  # audio only | ||||
|                 preference -= 50 | ||||
| @@ -1205,6 +1219,7 @@ class InfoExtractor(object): | ||||
|                     'url': format_url(line.strip()), | ||||
|                     'tbr': tbr, | ||||
|                     'ext': ext, | ||||
|                     'fps': float_or_none(last_info.get('FRAME-RATE')), | ||||
|                     'protocol': entry_protocol, | ||||
|                     'preference': preference, | ||||
|                 } | ||||
| @@ -1213,24 +1228,17 @@ class InfoExtractor(object): | ||||
|                     width_str, height_str = resolution.split('x') | ||||
|                     f['width'] = int(width_str) | ||||
|                     f['height'] = int(height_str) | ||||
|                 codecs = last_info.get('CODECS') | ||||
|                 if codecs: | ||||
|                     vcodec, acodec = [None] * 2 | ||||
|                     va_codecs = codecs.split(',') | ||||
|                     if len(va_codecs) == 1: | ||||
|                         # Audio only entries usually come with single codec and | ||||
|                         # no resolution. For more robustness we also check it to | ||||
|                         # be mp4 audio. | ||||
|                         if not resolution and va_codecs[0].startswith('mp4a'): | ||||
|                             vcodec, acodec = 'none', va_codecs[0] | ||||
|                         else: | ||||
|                             vcodec = va_codecs[0] | ||||
|                     else: | ||||
|                         vcodec, acodec = va_codecs[:2] | ||||
|                 # Unified Streaming Platform | ||||
|                 mobj = re.search( | ||||
|                     r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url']) | ||||
|                 if mobj: | ||||
|                     abr, vbr = mobj.groups() | ||||
|                     abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000) | ||||
|                     f.update({ | ||||
|                         'acodec': acodec, | ||||
|                         'vcodec': vcodec, | ||||
|                         'vbr': vbr, | ||||
|                         'abr': abr, | ||||
|                     }) | ||||
|                 f.update(parse_codecs(last_info.get('CODECS'))) | ||||
|                 if last_media is not None: | ||||
|                     f['m3u8_media'] = last_media | ||||
|                     last_media = None | ||||
| @@ -1485,6 +1493,13 @@ class InfoExtractor(object): | ||||
|             compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict) | ||||
|  | ||||
|     def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}): | ||||
|         """ | ||||
|         Parse formats from MPD manifest. | ||||
|         References: | ||||
|          1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E), | ||||
|             http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip | ||||
|          2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP | ||||
|         """ | ||||
|         if mpd_doc.get('type') == 'dynamic': | ||||
|             return [] | ||||
|  | ||||
| @@ -1517,8 +1532,16 @@ class InfoExtractor(object): | ||||
|                         s_e = segment_timeline.findall(_add_ns('S')) | ||||
|                         if s_e: | ||||
|                             ms_info['total_number'] = 0 | ||||
|                             ms_info['s'] = [] | ||||
|                             for s in s_e: | ||||
|                                 ms_info['total_number'] += 1 + int(s.get('r', '0')) | ||||
|                                 r = int(s.get('r', 0)) | ||||
|                                 ms_info['total_number'] += 1 + r | ||||
|                                 ms_info['s'].append({ | ||||
|                                     't': int(s.get('t', 0)), | ||||
|                                     # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60]) | ||||
|                                     'd': int(s.attrib['d']), | ||||
|                                     'r': r, | ||||
|                                 }) | ||||
|                     else: | ||||
|                         timescale = segment_template.get('timescale') | ||||
|                         if timescale: | ||||
| @@ -1555,7 +1578,7 @@ class InfoExtractor(object): | ||||
|                         continue | ||||
|                     representation_attrib = adaptation_set.attrib.copy() | ||||
|                     representation_attrib.update(representation.attrib) | ||||
|                     # According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory | ||||
|                     # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory | ||||
|                     mime_type = representation_attrib['mimeType'] | ||||
|                     content_type = mime_type.split('/')[0] | ||||
|                     if content_type == 'text': | ||||
| @@ -1599,16 +1622,40 @@ class InfoExtractor(object): | ||||
|                                 representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) | ||||
|                             media_template = representation_ms_info['media_template'] | ||||
|                             media_template = media_template.replace('$RepresentationID$', representation_id) | ||||
|                             media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template) | ||||
|                             media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template) | ||||
|                             media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template) | ||||
|                             media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template) | ||||
|                             media_template.replace('$$', '$') | ||||
|                             representation_ms_info['segment_urls'] = [ | ||||
|                                 media_template % { | ||||
|                                     'Number': segment_number, | ||||
|                                     'Bandwidth': representation_attrib.get('bandwidth')} | ||||
|                                 for segment_number in range( | ||||
|                                     representation_ms_info['start_number'], | ||||
|                                     representation_ms_info['total_number'] + representation_ms_info['start_number'])] | ||||
|  | ||||
|                             # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ | ||||
|                             # can't be used at the same time | ||||
|                             if '%(Number' in media_template: | ||||
|                                 representation_ms_info['segment_urls'] = [ | ||||
|                                     media_template % { | ||||
|                                         'Number': segment_number, | ||||
|                                         'Bandwidth': representation_attrib.get('bandwidth'), | ||||
|                                     } | ||||
|                                     for segment_number in range( | ||||
|                                         representation_ms_info['start_number'], | ||||
|                                         representation_ms_info['total_number'] + representation_ms_info['start_number'])] | ||||
|                             else: | ||||
|                                 representation_ms_info['segment_urls'] = [] | ||||
|                                 segment_time = 0 | ||||
|  | ||||
|                                 def add_segment_url(): | ||||
|                                     representation_ms_info['segment_urls'].append( | ||||
|                                         media_template % { | ||||
|                                             'Time': segment_time, | ||||
|                                             'Bandwidth': representation_attrib.get('bandwidth'), | ||||
|                                         } | ||||
|                                     ) | ||||
|  | ||||
|                                 for num, s in enumerate(representation_ms_info['s']): | ||||
|                                     segment_time = s.get('t') or segment_time | ||||
|                                     add_segment_url() | ||||
|                                     for r in range(s.get('r', 0)): | ||||
|                                         segment_time += s['d'] | ||||
|                                         add_segment_url() | ||||
|                                     segment_time += s['d'] | ||||
|                         if 'segment_urls' in representation_ms_info: | ||||
|                             f.update({ | ||||
|                                 'segment_urls': representation_ms_info['segment_urls'], | ||||
| @@ -1635,6 +1682,62 @@ class InfoExtractor(object): | ||||
|                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) | ||||
|         return formats | ||||
|  | ||||
|     def _parse_html5_media_entries(self, base_url, webpage): | ||||
|         def absolute_url(video_url): | ||||
|             return compat_urlparse.urljoin(base_url, video_url) | ||||
|  | ||||
|         def parse_content_type(content_type): | ||||
|             if not content_type: | ||||
|                 return {} | ||||
|             ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type) | ||||
|             if ctr: | ||||
|                 mimetype, codecs = ctr.groups() | ||||
|                 f = parse_codecs(codecs) | ||||
|                 f['ext'] = mimetype2ext(mimetype) | ||||
|                 return f | ||||
|             return {} | ||||
|  | ||||
|         entries = [] | ||||
|         for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage): | ||||
|             media_info = { | ||||
|                 'formats': [], | ||||
|                 'subtitles': {}, | ||||
|             } | ||||
|             media_attributes = extract_attributes(media_tag) | ||||
|             src = media_attributes.get('src') | ||||
|             if src: | ||||
|                 media_info['formats'].append({ | ||||
|                     'url': absolute_url(src), | ||||
|                     'vcodec': 'none' if media_type == 'audio' else None, | ||||
|                 }) | ||||
|             media_info['thumbnail'] = media_attributes.get('poster') | ||||
|             if media_content: | ||||
|                 for source_tag in re.findall(r'<source[^>]+>', media_content): | ||||
|                     source_attributes = extract_attributes(source_tag) | ||||
|                     src = source_attributes.get('src') | ||||
|                     if not src: | ||||
|                         continue | ||||
|                     f = parse_content_type(source_attributes.get('type')) | ||||
|                     f.update({ | ||||
|                         'url': absolute_url(src), | ||||
|                         'vcodec': 'none' if media_type == 'audio' else None, | ||||
|                     }) | ||||
|                     media_info['formats'].append(f) | ||||
|                 for track_tag in re.findall(r'<track[^>]+>', media_content): | ||||
|                     track_attributes = extract_attributes(track_tag) | ||||
|                     kind = track_attributes.get('kind') | ||||
|                     if not kind or kind == 'subtitles': | ||||
|                         src = track_attributes.get('src') | ||||
|                         if not src: | ||||
|                             continue | ||||
|                         lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label') | ||||
|                         media_info['subtitles'].setdefault(lang, []).append({ | ||||
|                             'url': absolute_url(src), | ||||
|                         }) | ||||
|             if media_info['formats']: | ||||
|                 entries.append(media_info) | ||||
|         return entries | ||||
|  | ||||
|     def _live_title(self, name): | ||||
|         """ Generate the title for a live video """ | ||||
|         now = datetime.datetime.now() | ||||
| @@ -1695,7 +1798,7 @@ class InfoExtractor(object): | ||||
|  | ||||
|         any_restricted = False | ||||
|         for tc in self.get_testcases(include_onlymatching=False): | ||||
|             if 'playlist' in tc: | ||||
|             if tc.get('playlist', []): | ||||
|                 tc = tc['playlist'][0] | ||||
|             is_restricted = age_restricted( | ||||
|                 tc.get('info_dict', {}).get('age_limit'), age_limit) | ||||
|   | ||||
| @@ -5,13 +5,17 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     orderedSet, | ||||
|     remove_end, | ||||
|     extract_attributes, | ||||
|     mimetype2ext, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -58,6 +62,9 @@ class CondeNastIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': '3D Printed Speakers Lit With LED', | ||||
|             'description': 'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', | ||||
|             'uploader': 'wired', | ||||
|             'upload_date': '20130314', | ||||
|             'timestamp': 1363219200, | ||||
|         } | ||||
|     }, { | ||||
|         # JS embed | ||||
| @@ -67,70 +74,84 @@ class CondeNastIE(InfoExtractor): | ||||
|             'id': '55f9cf8b61646d1acf00000c', | ||||
|             'ext': 'mp4', | ||||
|             'title': '3D printed TSA Travel Sentry keys really do open TSA locks', | ||||
|             'uploader': 'arstechnica', | ||||
|             'upload_date': '20150916', | ||||
|             'timestamp': 1442434955, | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _extract_series(self, url, webpage): | ||||
|         title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>', | ||||
|                                         webpage, 'series title', flags=re.DOTALL) | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)<div class="cne-series-info">.*?<h1>(.+?)</h1>', | ||||
|             webpage, 'series title') | ||||
|         url_object = compat_urllib_parse_urlparse(url) | ||||
|         base_url = '%s://%s' % (url_object.scheme, url_object.netloc) | ||||
|         m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', | ||||
|                               webpage, flags=re.DOTALL) | ||||
|         m_paths = re.finditer( | ||||
|             r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage) | ||||
|         paths = orderedSet(m.group(1) for m in m_paths) | ||||
|         build_url = lambda path: compat_urlparse.urljoin(base_url, path) | ||||
|         entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] | ||||
|         return self.playlist_result(entries, playlist_title=title) | ||||
|  | ||||
|     def _extract_video(self, webpage, url_type): | ||||
|         if url_type != 'embed': | ||||
|             description = self._html_search_regex( | ||||
|                 [ | ||||
|                     r'<div class="cne-video-description">(.+?)</div>', | ||||
|                     r'<div class="video-post-content">(.+?)</div>', | ||||
|                 ], | ||||
|                 webpage, 'description', fatal=False, flags=re.DOTALL) | ||||
|         query = {} | ||||
|         params = self._search_regex( | ||||
|             r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None) | ||||
|         if params: | ||||
|             query.update({ | ||||
|                 'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'), | ||||
|                 'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'), | ||||
|                 'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'), | ||||
|             }) | ||||
|         else: | ||||
|             description = None | ||||
|         params = self._search_regex(r'var params = {(.+?)}[;,]', webpage, | ||||
|                                     'player params', flags=re.DOTALL) | ||||
|         video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') | ||||
|         player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id') | ||||
|         target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target') | ||||
|         data = compat_urllib_parse_urlencode({'videoId': video_id, | ||||
|                                               'playerId': player_id, | ||||
|                                               'target': target, | ||||
|                                               }) | ||||
|         base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]', | ||||
|                                            webpage, 'base info url', | ||||
|                                            default='http://player.cnevids.com/player/loader.js?') | ||||
|         info_url = base_info_url + data | ||||
|         info_page = self._download_webpage(info_url, video_id, | ||||
|                                            'Downloading video info') | ||||
|         video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info') | ||||
|         video_info = self._parse_json(video_info, video_id) | ||||
|             params = extract_attributes(self._search_regex( | ||||
|                 r'(<[^>]+data-js="video-player"[^>]+>)', | ||||
|                 webpage, 'player params element')) | ||||
|             query.update({ | ||||
|                 'videoId': params['data-video'], | ||||
|                 'playerId': params['data-player'], | ||||
|                 'target': params['id'], | ||||
|             }) | ||||
|         video_id = query['videoId'] | ||||
|         info_page = self._download_webpage( | ||||
|             'http://player.cnevids.com/player/video.js', | ||||
|             video_id, 'Downloading video info', query=query) | ||||
|         video_info = self._parse_json(self._search_regex( | ||||
|             r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video'] | ||||
|         title = video_info['title'] | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']), | ||||
|             'url': fdata['src'], | ||||
|             'ext': fdata['type'].split('/')[-1], | ||||
|             'quality': 1 if fdata['quality'] == 'high' else 0, | ||||
|         } for fdata in video_info['sources'][0]] | ||||
|         formats = [] | ||||
|         for fdata in video_info.get('sources', [{}])[0]: | ||||
|             src = fdata.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             ext = mimetype2ext(fdata.get('type')) or determine_ext(src) | ||||
|             quality = fdata.get('quality') | ||||
|             formats.append({ | ||||
|                 'format_id': ext + ('-%s' % quality if quality else ''), | ||||
|                 'url': src, | ||||
|                 'ext': ext, | ||||
|                 'quality': 1 if quality == 'high' else 0, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|         info = self._search_json_ld(webpage, video_id) if url_type != 'embed' else {} | ||||
|         info.update({ | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': video_info['title'], | ||||
|             'thumbnail': video_info['poster_frame'], | ||||
|             'description': description, | ||||
|         } | ||||
|             'title': title, | ||||
|             'thumbnail': video_info.get('poster_frame'), | ||||
|             'uploader': video_info.get('brand'), | ||||
|             'duration': int_or_none(video_info.get('duration')), | ||||
|             'tags': video_info.get('tags'), | ||||
|             'series': video_info.get('series_title'), | ||||
|             'season': video_info.get('season_title'), | ||||
|             'timestamp': parse_iso8601(video_info.get('premiere_date')), | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         site = mobj.group('site') | ||||
|         url_type = mobj.group('type') | ||||
|         item_id = mobj.group('id') | ||||
|         site, url_type, item_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         # Convert JS embed to regular embed | ||||
|         if url_type == 'embedjs': | ||||
|   | ||||
| @@ -51,8 +51,11 @@ class CSpanIE(InfoExtractor): | ||||
|         'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', | ||||
|         'info_dict': { | ||||
|             'id': 'judiciary031715', | ||||
|             'ext': 'flv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Immigration Reforms Needed to Protect Skilled American Workers', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # m3u8 downloads | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|   | ||||
| @@ -9,7 +9,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class CWTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/(?:shows/)?(?:[^/]+/){2}\?.*\bplay=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' | ||||
|     _VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63', | ||||
|         'info_dict': { | ||||
| @@ -51,6 +51,12 @@ class CWTVIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -5,19 +5,20 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     determine_protocol, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DailyMailIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html', | ||||
|         'md5': '2f639d446394f53f3a33658b518b6615', | ||||
|         'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html', | ||||
|         'md5': 'f6129624562251f628296c3a9ffde124', | ||||
|         'info_dict': { | ||||
|             'id': '1288527', | ||||
|             'id': '1295863', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Turn any video into an impressionist masterpiece', | ||||
|             'description': 'md5:88ddbcb504367987b2708bb38677c9d2', | ||||
|             'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'', | ||||
|             'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -26,7 +27,7 @@ class DailyMailIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_data = self._parse_json(self._search_regex( | ||||
|             r"data-opts='({.+?})'", webpage, 'video data'), video_id) | ||||
|         title = video_data['title'] | ||||
|         title = unescapeHTML(video_data['title']) | ||||
|         video_sources = self._download_json(video_data.get( | ||||
|             'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id) | ||||
|  | ||||
| @@ -55,7 +56,7 @@ class DailyMailIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': video_data.get('descr'), | ||||
|             'description': unescapeHTML(video_data.get('descr')), | ||||
|             'thumbnail': video_data.get('poster') or video_data.get('thumbnail'), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -331,7 +331,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|  | ||||
|             for video_id in re.findall(r'data-xid="(.+?)"', webpage): | ||||
|                 if video_id not in video_ids: | ||||
|                     yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') | ||||
|                     yield self.url_result( | ||||
|                         'http://www.dailymotion.com/video/%s' % video_id, | ||||
|                         DailymotionIE.ie_key(), video_id) | ||||
|                     video_ids.add(video_id) | ||||
|  | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: | ||||
|   | ||||
| @@ -4,78 +4,47 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     clean_html, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DBTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:(?:lazyplayer|player)/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', | ||||
|         'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', | ||||
|         'md5': '2e24f67936517b143a234b4cadf792ec', | ||||
|         'info_dict': { | ||||
|             'id': '33100', | ||||
|             'id': '3649835190001', | ||||
|             'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', | ||||
|             'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|             'timestamp': 1404039863.438, | ||||
|             'thumbnail': 're:https?://.*\.jpg', | ||||
|             'timestamp': 1404039863, | ||||
|             'upload_date': '20140629', | ||||
|             'duration': 69.544, | ||||
|             'view_count': int, | ||||
|             'categories': list, | ||||
|         } | ||||
|             'uploader_id': '1027729757001', | ||||
|         }, | ||||
|         'add_ie': ['BrightcoveNew'] | ||||
|     }, { | ||||
|         'url': 'http://dbtv.no/3649835190001', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.dbtv.no/lazyplayer/4631135248001', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://dbtv.no/vice/5000634109001', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://dbtv.no/filmtrailer/3359293614001', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or video_id | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'http://api.dbtv.no/discovery/%s' % video_id, display_id) | ||||
|  | ||||
|         video = data['playlist'][0] | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': f['URL'], | ||||
|             'vcodec': f.get('container'), | ||||
|             'width': int_or_none(f.get('width')), | ||||
|             'height': int_or_none(f.get('height')), | ||||
|             'vbr': float_or_none(f.get('rate'), 1000), | ||||
|             'filesize': int_or_none(f.get('size')), | ||||
|         } for f in video['renditions'] if 'URL' in f] | ||||
|  | ||||
|         if not formats: | ||||
|             for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]: | ||||
|                 if url_key in video: | ||||
|                     formats.append({ | ||||
|                         'url': video[url_key], | ||||
|                         'format_id': format_id, | ||||
|                     }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|         video_id, display_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         return { | ||||
|             'id': compat_str(video['id']), | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id, | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': video['title'], | ||||
|             'description': clean_html(video['desc']), | ||||
|             'thumbnail': video.get('splash') or video.get('thumb'), | ||||
|             'timestamp': float_or_none(video.get('publishedAt'), 1000), | ||||
|             'duration': float_or_none(video.get('length'), 1000), | ||||
|             'view_count': int_or_none(video.get('views')), | ||||
|             'categories': video.get('tags'), | ||||
|             'formats': formats, | ||||
|             'ie_key': 'BrightcoveNew', | ||||
|         } | ||||
|   | ||||
| @@ -62,11 +62,9 @@ class DCNBaseIE(InfoExtractor): | ||||
|                 r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8', | ||||
|                 r'<a[^>]+href="rtsp(://[^"]+)"' | ||||
|             ], webpage, 'format url') | ||||
|         # TODO: Current DASH formats are broken - $Time$ pattern in | ||||
|         # <SegmentTemplate> not implemented yet | ||||
|         # formats.extend(self._extract_mpd_formats( | ||||
|         #     format_url_base + '/manifest.mpd', | ||||
|         #     video_id, mpd_id='dash', fatal=False)) | ||||
|         formats.extend(self._extract_mpd_formats( | ||||
|             format_url_base + '/manifest.mpd', | ||||
|             video_id, mpd_id='dash', fatal=False)) | ||||
|         formats.extend(self._extract_m3u8_formats( | ||||
|             format_url_base + '/playlist.m3u8', video_id, 'mp4', | ||||
|             m3u8_entry_protocol, m3u8_id='hls', fatal=False)) | ||||
|   | ||||
| @@ -17,8 +17,12 @@ class DreiSatIE(ZDFIE): | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Waidmannsheil', | ||||
|                 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', | ||||
|                 'uploader': '3sat', | ||||
|                 'uploader': 'SCHWEIZWEIT', | ||||
|                 'uploader_id': '100000210', | ||||
|                 'upload_date': '20140913' | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # m3u8 downloads | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|   | ||||
| @@ -6,12 +6,13 @@ import json | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     NO_DEFAULT, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EllenTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', | ||||
|         'md5': '4294cf98bc165f218aaa0b89e0fd8042', | ||||
|         'info_dict': { | ||||
| @@ -22,24 +23,47 @@ class EllenTVIE(InfoExtractor): | ||||
|             'timestamp': 1428035648, | ||||
|             'upload_date': '20150403', | ||||
|             'uploader_id': 'batchUser', | ||||
|         } | ||||
|     } | ||||
|         }, | ||||
|     }, { | ||||
|         # not available via http://widgets.ellentube.com/ | ||||
|         'url': 'http://www.ellentv.com/videos/1-szkgu2m2/', | ||||
|         'info_dict': { | ||||
|             'id': '1_szkgu2m2', | ||||
|             'ext': 'flv', | ||||
|             'title': "Ellen's Amazingly Talented Audience", | ||||
|             'description': 'md5:86ff1e376ff0d717d7171590e273f0a5', | ||||
|             'timestamp': 1255140900, | ||||
|             'upload_date': '20091010', | ||||
|             'uploader_id': 'ellenkaltura@gmail.com', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://widgets.ellentube.com/videos/%s' % video_id, | ||||
|             video_id) | ||||
|         URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url) | ||||
|  | ||||
|         partner_id = self._search_regex( | ||||
|             r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id') | ||||
|         for num, url_ in enumerate(URLS, 1): | ||||
|             webpage = self._download_webpage( | ||||
|                 url_, video_id, fatal=num == len(URLS)) | ||||
|  | ||||
|         kaltura_id = self._search_regex( | ||||
|             [r'id="kaltura_player_([^"]+)"', | ||||
|              r"_wb_entry_id\s*:\s*'([^']+)", | ||||
|              r'data-kaltura-entry-id="([^"]+)'], | ||||
|             webpage, 'kaltura id') | ||||
|             default = NO_DEFAULT if num == len(URLS) else None | ||||
|  | ||||
|             partner_id = self._search_regex( | ||||
|                 r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id', | ||||
|                 default=default) | ||||
|  | ||||
|             kaltura_id = self._search_regex( | ||||
|                 [r'id="kaltura_player_([^"]+)"', | ||||
|                  r"_wb_entry_id\s*:\s*'([^']+)", | ||||
|                  r'data-kaltura-entry-id="([^"]+)'], | ||||
|                 webpage, 'kaltura id', default=default) | ||||
|  | ||||
|             if partner_id and kaltura_id: | ||||
|                 break | ||||
|  | ||||
|         return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') | ||||
|  | ||||
|   | ||||
| @@ -4,9 +4,10 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class EngadgetIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://www.engadget.com/video/(?P<id>[^/?#]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         # video with 5min ID | ||||
|         'url': 'http://www.engadget.com/video/518153925/', | ||||
|         'md5': 'c6820d4828a5064447a4d9fc73f312c9', | ||||
|         'info_dict': { | ||||
| @@ -15,8 +16,12 @@ class EngadgetIE(InfoExtractor): | ||||
|             'title': 'Samsung Galaxy Tab Pro 8.4 Review', | ||||
|         }, | ||||
|         'add_ie': ['FiveMin'], | ||||
|     } | ||||
|     }, { | ||||
|         # video with vidible ID | ||||
|         'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return self.url_result('5min:%s' % video_id) | ||||
|         return self.url_result('aol-video:%s' % video_id) | ||||
|   | ||||
| @@ -4,19 +4,23 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     encode_base_n, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EpornerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', | ||||
|         'md5': '39d486f046212d8e1b911c52ab4691f8', | ||||
|         'info_dict': { | ||||
|             'id': '95008', | ||||
|             'id': 'qlDUmNsj6VS', | ||||
|             'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Infamous Tiffany Teen Strip Tease Video', | ||||
| @@ -28,34 +32,72 @@ class EpornerIE(InfoExtractor): | ||||
|         # New (May 2016) URL layout | ||||
|         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|         display_id = mobj.group('display_id') or video_id | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>(.*?) - EPORNER', webpage, 'title') | ||||
|         webpage, urlh = self._download_webpage_handle(url, display_id) | ||||
|  | ||||
|         redirect_url = 'http://www.eporner.com/config5/%s' % video_id | ||||
|         player_code = self._download_webpage( | ||||
|             redirect_url, display_id, note='Downloading player config') | ||||
|         video_id = self._match_id(compat_str(urlh.geturl())) | ||||
|  | ||||
|         sources = self._search_regex( | ||||
|             r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources') | ||||
|         hash = self._search_regex( | ||||
|             r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash') | ||||
|  | ||||
|         title = self._og_search_title(webpage, default=None) or self._html_search_regex( | ||||
|             r'<title>(.+?) - EPORNER', webpage, 'title') | ||||
|  | ||||
|         # Reverse engineered from vjs.js | ||||
|         def calc_hash(s): | ||||
|             return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8))) | ||||
|  | ||||
|         video = self._download_json( | ||||
|             'http://www.eporner.com/xhr/video/%s' % video_id, | ||||
|             display_id, note='Downloading video JSON', | ||||
|             query={ | ||||
|                 'hash': calc_hash(hash), | ||||
|                 'device': 'generic', | ||||
|                 'domain': 'www.eporner.com', | ||||
|                 'fallback': 'false', | ||||
|             }) | ||||
|  | ||||
|         if video.get('available') is False: | ||||
|             raise ExtractorError( | ||||
|                 '%s said: %s' % (self.IE_NAME, video['message']), expected=True) | ||||
|  | ||||
|         sources = video['sources'] | ||||
|  | ||||
|         formats = [] | ||||
|         for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources): | ||||
|             fmt = { | ||||
|                 'url': video_url, | ||||
|                 'format_id': format_id, | ||||
|             } | ||||
|             m = re.search(r'^(\d+)', format_id) | ||||
|             if m: | ||||
|                 fmt['height'] = int(m.group(1)) | ||||
|             formats.append(fmt) | ||||
|         for kind, formats_dict in sources.items(): | ||||
|             if not isinstance(formats_dict, dict): | ||||
|                 continue | ||||
|             for format_id, format_dict in formats_dict.items(): | ||||
|                 if not isinstance(format_dict, dict): | ||||
|                     continue | ||||
|                 src = format_dict.get('src') | ||||
|                 if not isinstance(src, compat_str) or not src.startswith('http'): | ||||
|                     continue | ||||
|                 if kind == 'hls': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         src, display_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                         m3u8_id=kind, fatal=False)) | ||||
|                 else: | ||||
|                     height = int_or_none(self._search_regex( | ||||
|                         r'(\d+)[pP]', format_id, 'height', default=None)) | ||||
|                     fps = int_or_none(self._search_regex( | ||||
|                         r'(\d+)fps', format_id, 'fps', default=None)) | ||||
|  | ||||
|                     formats.append({ | ||||
|                         'url': src, | ||||
|                         'format_id': format_id, | ||||
|                         'height': height, | ||||
|                         'fps': fps, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         duration = parse_duration(self._html_search_meta('duration', webpage)) | ||||
|   | ||||
| @@ -44,6 +44,7 @@ from .appletrailers import ( | ||||
|     AppleTrailersSectionIE, | ||||
| ) | ||||
| from .archiveorg import ArchiveOrgIE | ||||
| from .arkena import ArkenaIE | ||||
| from .ard import ( | ||||
|     ARDIE, | ||||
|     ARDMediathekIE, | ||||
| @@ -156,7 +157,12 @@ from .cnn import ( | ||||
| ) | ||||
| from .coub import CoubIE | ||||
| from .collegerama import CollegeRamaIE | ||||
| from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE | ||||
| from .comedycentral import ( | ||||
|     ComedyCentralIE, | ||||
|     ComedyCentralShortnameIE, | ||||
|     ComedyCentralTVIE, | ||||
|     ToshIE, | ||||
| ) | ||||
| from .comcarcoff import ComCarCoffIE | ||||
| from .commonmistakes import CommonMistakesIE, UnicodeBOMIE | ||||
| from .commonprotocols import RtmpIE | ||||
| @@ -284,7 +290,6 @@ from .funimation import FunimationIE | ||||
| from .funnyordie import FunnyOrDieIE | ||||
| from .fusion import FusionIE | ||||
| from .gameinformer import GameInformerIE | ||||
| from .gamekings import GamekingsIE | ||||
| from .gameone import ( | ||||
|     GameOneIE, | ||||
|     GameOnePlaylistIE, | ||||
| @@ -393,6 +398,10 @@ from .kuwo import ( | ||||
| ) | ||||
| from .la7 import LA7IE | ||||
| from .laola1tv import Laola1TvIE | ||||
| from .lcp import ( | ||||
|     LcpPlayIE, | ||||
|     LcpIE, | ||||
| ) | ||||
| from .learnr import LearnrIE | ||||
| from .lecture2go import Lecture2GoIE | ||||
| from .lemonde import LemondeIE | ||||
| @@ -471,7 +480,6 @@ from .msn import MSNIE | ||||
| from .mtv import ( | ||||
|     MTVIE, | ||||
|     MTVServicesEmbeddedIE, | ||||
|     MTVIggyIE, | ||||
|     MTVDEIE, | ||||
| ) | ||||
| from .muenchentv import MuenchenTVIE | ||||
| @@ -483,8 +491,9 @@ from .myvi import MyviIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .myvidster import MyVidsterIE | ||||
| from .nationalgeographic import ( | ||||
|     NationalGeographicVideoIE, | ||||
|     NationalGeographicIE, | ||||
|     NationalGeographicChannelIE, | ||||
|     NationalGeographicEpisodeGuideIE, | ||||
| ) | ||||
| from .naver import NaverIE | ||||
| from .nba import NBAIE | ||||
| @@ -521,7 +530,6 @@ from .nextmedia import ( | ||||
|     NextMediaActionNewsIE, | ||||
|     AppleDailyIE, | ||||
| ) | ||||
| from .nextmovie import NextMovieIE | ||||
| from .nfb import NFBIE | ||||
| from .nfl import NFLIE | ||||
| from .nhl import ( | ||||
| @@ -537,6 +545,8 @@ from .nick import ( | ||||
| from .niconico import NiconicoIE, NiconicoPlaylistIE | ||||
| from .ninecninemedia import NineCNineMediaIE | ||||
| from .ninegag import NineGagIE | ||||
| from .ninenow import NineNowIE | ||||
| from .nintendo import NintendoIE | ||||
| from .noco import NocoIE | ||||
| from .normalboots import NormalbootsIE | ||||
| from .nosvideo import NosVideoIE | ||||
| @@ -581,6 +591,7 @@ from .nytimes import ( | ||||
|     NYTimesArticleIE, | ||||
| ) | ||||
| from .nuvid import NuvidIE | ||||
| from .odatv import OdaTVIE | ||||
| from .odnoklassniki import OdnoklassnikiIE | ||||
| from .oktoberfesttv import OktoberfestTVIE | ||||
| from .onet import ( | ||||
| @@ -680,6 +691,7 @@ from .rice import RICEIE | ||||
| from .ringtv import RingTVIE | ||||
| from .ro220 import Ro220IE | ||||
| from .rockstargames import RockstarGamesIE | ||||
| from .roosterteeth import RoosterTeethIE | ||||
| from .rottentomatoes import RottenTomatoesIE | ||||
| from .roxwel import RoxwelIE | ||||
| from .rtbf import RTBFIE | ||||
| @@ -688,8 +700,9 @@ from .rtlnl import RtlNlIE | ||||
| from .rtl2 import RTL2IE | ||||
| from .rtp import RTPIE | ||||
| from .rts import RTSIE | ||||
| from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE | ||||
| from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE | ||||
| from .rtvnh import RTVNHIE | ||||
| from .rudo import RudoIE | ||||
| from .ruhd import RUHDIE | ||||
| from .ruleporn import RulePornIE | ||||
| from .rutube import ( | ||||
| @@ -779,6 +792,7 @@ from .srmediathek import SRMediathekIE | ||||
| from .ssa import SSAIE | ||||
| from .stanfordoc import StanfordOpenClassroomIE | ||||
| from .steam import SteamIE | ||||
| from .streamable import StreamableIE | ||||
| from .streamcloud import StreamcloudIE | ||||
| from .streamcz import StreamCZIE | ||||
| from .streetvoice import StreetVoiceIE | ||||
| @@ -878,6 +892,7 @@ from .tvc import ( | ||||
| from .tvigle import TvigleIE | ||||
| from .tvland import TVLandIE | ||||
| from .tvp import ( | ||||
|     TVPEmbedIE, | ||||
|     TVPIE, | ||||
|     TVPSeriesIE, | ||||
| ) | ||||
| @@ -987,9 +1002,11 @@ from .viki import ( | ||||
| from .vk import ( | ||||
|     VKIE, | ||||
|     VKUserVideosIE, | ||||
|     VKWallPostIE, | ||||
| ) | ||||
| from .vlive import VLiveIE | ||||
| from .vodlocker import VodlockerIE | ||||
| from .vodplatform import VODPlatformIE | ||||
| from .voicerepublic import VoiceRepublicIE | ||||
| from .voxmedia import VoxMediaIE | ||||
| from .vporn import VpornIE | ||||
|   | ||||
| @@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                 (?: | ||||
|                     https?:// | ||||
|                         (?:\w+\.)?facebook\.com/ | ||||
|                         (?:[\w-]+\.)?facebook\.com/ | ||||
|                         (?:[^#]*?\#!/)? | ||||
|                         (?: | ||||
|                             (?: | ||||
| @@ -127,6 +127,9 @@ class FacebookIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -231,11 +234,13 @@ class FacebookIE(InfoExtractor): | ||||
|                     continue | ||||
|                 if isinstance(f, dict): | ||||
|                     f = [f] | ||||
|                 if isinstance(f, list): | ||||
|                 if not isinstance(f, list): | ||||
|                     continue | ||||
|                 if f[0].get('video_id') == video_id: | ||||
|                     video_data = video_data_candidate | ||||
|                     break | ||||
|             if video_data: | ||||
|                 break | ||||
|  | ||||
|         def video_data_list2dict(video_data): | ||||
|             ret = {} | ||||
|   | ||||
| @@ -1,24 +1,11 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     parse_duration, | ||||
|     replace_extension, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FiveMinIE(InfoExtractor): | ||||
|     IE_NAME = '5min' | ||||
|     _VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))' | ||||
|     _VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -29,8 +16,16 @@ class FiveMinIE(InfoExtractor): | ||||
|                 'id': '518013791', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'iPad Mini with Retina Display Review', | ||||
|                 'description': 'iPad mini with Retina Display review', | ||||
|                 'duration': 177, | ||||
|                 'uploader': 'engadget', | ||||
|                 'upload_date': '20131115', | ||||
|                 'timestamp': 1384515288, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247 | ||||
| @@ -44,108 +39,16 @@ class FiveMinIE(InfoExtractor): | ||||
|             }, | ||||
|             'skip': 'no longer available', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://embed.5min.com/518726732/', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://delivery.vidible.tv/aol?playList=518013791', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|     _ERRORS = { | ||||
|         'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.', | ||||
|         'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.', | ||||
|         'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.', | ||||
|         'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.', | ||||
|         'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.', | ||||
|         'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.', | ||||
|     } | ||||
|     _QUALITIES = { | ||||
|         1: { | ||||
|             'width': 640, | ||||
|             'height': 360, | ||||
|         }, | ||||
|         2: { | ||||
|             'width': 854, | ||||
|             'height': 480, | ||||
|         }, | ||||
|         4: { | ||||
|             'width': 1280, | ||||
|             'height': 720, | ||||
|         }, | ||||
|         8: { | ||||
|             'width': 1920, | ||||
|             'height': 1080, | ||||
|         }, | ||||
|         16: { | ||||
|             'width': 640, | ||||
|             'height': 360, | ||||
|         }, | ||||
|         32: { | ||||
|             'width': 854, | ||||
|             'height': 480, | ||||
|         }, | ||||
|         64: { | ||||
|             'width': 1280, | ||||
|             'height': 720, | ||||
|         }, | ||||
|         128: { | ||||
|             'width': 640, | ||||
|             'height': 360, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         sid = mobj.group('sid') | ||||
|  | ||||
|         if mobj.group('query'): | ||||
|             qs = compat_parse_qs(mobj.group('query')) | ||||
|             if not qs.get('playList'): | ||||
|                 raise ExtractorError('Invalid URL', expected=True) | ||||
|             video_id = qs['playList'][0] | ||||
|             if qs.get('sid'): | ||||
|                 sid = qs['sid'][0] | ||||
|  | ||||
|         embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id | ||||
|         if not sid: | ||||
|             embed_page = self._download_webpage(embed_url, video_id, | ||||
|                                                 'Downloading embed page') | ||||
|             sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid') | ||||
|  | ||||
|         response = self._download_json( | ||||
|             'https://syn.5min.com/handlers/SenseHandler.ashx?' + | ||||
|             compat_urllib_parse_urlencode({ | ||||
|                 'func': 'GetResults', | ||||
|                 'playlist': video_id, | ||||
|                 'sid': sid, | ||||
|                 'isPlayerSeed': 'true', | ||||
|                 'url': embed_url, | ||||
|             }), | ||||
|             video_id) | ||||
|         if not response['success']: | ||||
|             raise ExtractorError( | ||||
|                 '%s said: %s' % ( | ||||
|                     self.IE_NAME, | ||||
|                     self._ERRORS.get(response['errorMessage'], response['errorMessage'])), | ||||
|                 expected=True) | ||||
|         info = response['binding'][0] | ||||
|  | ||||
|         formats = [] | ||||
|         parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs( | ||||
|             compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0]) | ||||
|         for rendition in info['Renditions']: | ||||
|             if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8': | ||||
|                 continue | ||||
|             else: | ||||
|                 rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType']))) | ||||
|                 quality = self._QUALITIES.get(rendition['ID'], {}) | ||||
|                 formats.append({ | ||||
|                     'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']), | ||||
|                     'url': rendition_url, | ||||
|                     'width': quality.get('width'), | ||||
|                     'height': quality.get('height'), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info['Title'], | ||||
|             'thumbnail': info.get('ThumbURL'), | ||||
|             'duration': parse_duration(info.get('Duration')), | ||||
|             'formats': formats, | ||||
|         } | ||||
|         video_id = self._match_id(url) | ||||
|         return self.url_result('aol-video:%s' % video_id) | ||||
|   | ||||
| @@ -1,76 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
| from .youtube import YoutubeIE | ||||
|  | ||||
|  | ||||
| class GamekingsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         # YouTube embed video | ||||
|         'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/', | ||||
|         'md5': '5208d3a17adeaef829a7861887cb9029', | ||||
|         'info_dict': { | ||||
|             'id': 'HkSQKetlGOU', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Phoenix Wright: Ace Attorney - Dual Destinies Review', | ||||
|             'description': 'md5:db88c0e7f47e9ea50df3271b9dc72e1d', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'uploader_id': 'UCJugRGo4STYMeFr5RoOShtQ', | ||||
|             'uploader': 'Gamekings Vault', | ||||
|             'upload_date': '20151123', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }, { | ||||
|         # vimeo video | ||||
|         'url': 'http://www.gamekings.nl/videos/the-legend-of-zelda-majoras-mask/', | ||||
|         'md5': '12bf04dfd238e70058046937657ea68d', | ||||
|         'info_dict': { | ||||
|             'id': 'the-legend-of-zelda-majoras-mask', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Legend of Zelda: Majora’s Mask', | ||||
|             'description': 'md5:9917825fe0e9f4057601fe1e38860de3', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.gamekings.nl/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         playlist_id = self._search_regex( | ||||
|             r'gogoVideo\([^,]+,\s*"([^"]+)', webpage, 'playlist id') | ||||
|  | ||||
|         # Check if a YouTube embed is used | ||||
|         if YoutubeIE.suitable(playlist_id): | ||||
|             return self.url_result(playlist_id, ie='Youtube') | ||||
|  | ||||
|         playlist = self._download_xml( | ||||
|             'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id, | ||||
|             video_id) | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'jwplayer': 'http://rss.jwpcdn.com/' | ||||
|         } | ||||
|  | ||||
|         item = playlist.find('./channel/item') | ||||
|  | ||||
|         thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail') | ||||
|         video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
| @@ -28,10 +28,13 @@ class GameSpotIE(OnceIE): | ||||
|         'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/', | ||||
|         'info_dict': { | ||||
|             'id': 'gs-2300-6424837', | ||||
|             'ext': 'flv', | ||||
|             'title': 'The Witcher 3: Wild Hunt [Xbox ONE]  - Now Playing', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Now Playing - The Witcher 3: Wild Hunt', | ||||
|             'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # m3u8 downloads | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -62,6 +62,7 @@ from .videomore import VideomoreIE | ||||
| from .googledrive import GoogleDriveIE | ||||
| from .jwplatform import JWPlatformIE | ||||
| from .digiteka import DigitekaIE | ||||
| from .arkena import ArkenaIE | ||||
| from .instagram import InstagramIE | ||||
| from .liveleak import LiveLeakIE | ||||
| from .threeqsdn import ThreeQSDNIE | ||||
| @@ -70,6 +71,7 @@ from .vessel import VesselIE | ||||
| from .kaltura import KalturaIE | ||||
| from .eagleplatform import EaglePlatformIE | ||||
| from .facebook import FacebookIE | ||||
| from .soundcloud import SoundcloudIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -473,7 +475,7 @@ class GenericIE(InfoExtractor): | ||||
|             'url': 'http://www.vestifinance.ru/articles/25753', | ||||
|             'info_dict': { | ||||
|                 'id': '25753', | ||||
|                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"', | ||||
|                 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"', | ||||
|             }, | ||||
|             'playlist': [{ | ||||
|                 'info_dict': { | ||||
| @@ -640,6 +642,8 @@ class GenericIE(InfoExtractor): | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored', | ||||
|                 'description': 'Two valets share their love for movie star Liam Neesons.', | ||||
|                 'timestamp': 1349922600, | ||||
|                 'upload_date': '20121011', | ||||
|             }, | ||||
|         }, | ||||
|         # YouTube embed via <data-embed-url=""> | ||||
| @@ -781,6 +785,15 @@ class GenericIE(InfoExtractor): | ||||
|                 'upload_date': '20141029', | ||||
|             } | ||||
|         }, | ||||
|         # Soundcloud multiple embeds | ||||
|         { | ||||
|             'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809', | ||||
|             'info_dict': { | ||||
|                 'id': '52809', | ||||
|                 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO', | ||||
|             }, | ||||
|             'playlist_mincount': 7, | ||||
|         }, | ||||
|         # Livestream embed | ||||
|         { | ||||
|             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast', | ||||
| @@ -856,6 +869,7 @@ class GenericIE(InfoExtractor): | ||||
|                 'description': 'md5:601cb790edd05908957dae8aaa866465', | ||||
|                 'upload_date': '20150220', | ||||
|             }, | ||||
|             'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/', | ||||
|         }, | ||||
|         # jwplayer YouTube | ||||
|         { | ||||
| @@ -1249,6 +1263,20 @@ class GenericIE(InfoExtractor): | ||||
|                 'uploader': 'www.hudl.com', | ||||
|             }, | ||||
|         }, | ||||
|         # twitter:player:stream embed | ||||
|         { | ||||
|             'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288', | ||||
|             'info_dict': { | ||||
|                 'id': 'master', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine', | ||||
|                 'uploader': 'www.rtl.be', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 downloads | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # twitter:player embed | ||||
|         { | ||||
|             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/', | ||||
| @@ -1313,6 +1341,38 @@ class GenericIE(InfoExtractor): | ||||
|             }, | ||||
|             'add_ie': ['Kaltura'], | ||||
|         }, | ||||
|         { | ||||
|             # Non-standard Vimeo embed | ||||
|             'url': 'https://openclassrooms.com/courses/understanding-the-web', | ||||
|             'md5': '64d86f1c7d369afd9a78b38cbb88d80a', | ||||
|             'info_dict': { | ||||
|                 'id': '148867247', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Understanding the web - Teaser', | ||||
|                 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.', | ||||
|                 'upload_date': '20151214', | ||||
|                 'uploader': 'OpenClassrooms', | ||||
|                 'uploader_id': 'openclassrooms', | ||||
|             }, | ||||
|             'add_ie': ['Vimeo'], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video', | ||||
|             'md5': 'b96f2f71b359a8ecd05ce4e1daa72365', | ||||
|             'info_dict': { | ||||
|                 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Big Buck Bunny', | ||||
|                 'description': 'Royalty free test video', | ||||
|                 'timestamp': 1432816365, | ||||
|                 'upload_date': '20150528', | ||||
|                 'is_live': False, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'add_ie': [ArkenaIE.ie_key()], | ||||
|         }, | ||||
|         # { | ||||
|         #     # TODO: find another test | ||||
|         #     # http://schema.org/VideoObject | ||||
| @@ -1949,12 +2009,9 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result(myvi_url) | ||||
|  | ||||
|         # Look for embedded soundcloud player | ||||
|         mobj = re.search( | ||||
|             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             url = unescapeHTML(mobj.group('url')) | ||||
|             return self.url_result(url) | ||||
|         soundcloud_urls = SoundcloudIE._extract_urls(webpage) | ||||
|         if soundcloud_urls: | ||||
|             return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) | ||||
|  | ||||
|         # Look for embedded mtvservices player | ||||
|         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) | ||||
| @@ -2117,6 +2174,11 @@ class GenericIE(InfoExtractor): | ||||
|         if digiteka_url: | ||||
|             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key()) | ||||
|  | ||||
|         # Look for Arkena embeds | ||||
|         arkena_url = ArkenaIE._extract_url(webpage) | ||||
|         if arkena_url: | ||||
|             return self.url_result(arkena_url, ArkenaIE.ie_key()) | ||||
|  | ||||
|         # Look for Limelight embeds | ||||
|         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage) | ||||
|         if mobj: | ||||
| @@ -2145,6 +2207,14 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result( | ||||
|                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine') | ||||
|  | ||||
|         # Look for VODPlatform embeds | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result( | ||||
|                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform') | ||||
|  | ||||
|         # Look for Instagram embeds | ||||
|         instagram_embed_url = InstagramIE._extract_embed_url(webpage) | ||||
|         if instagram_embed_url is not None: | ||||
| @@ -2169,11 +2239,6 @@ class GenericIE(InfoExtractor): | ||||
|                 'uploader': video_uploader, | ||||
|             } | ||||
|  | ||||
|         # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser | ||||
|         embed_url = self._html_search_meta('twitter:player', webpage, default=None) | ||||
|         if embed_url: | ||||
|             return self.url_result(embed_url) | ||||
|  | ||||
|         # Looking for http://schema.org/VideoObject | ||||
|         json_ld = self._search_json_ld( | ||||
|             webpage, video_id, default=None, expected_type='VideoObject') | ||||
| @@ -2230,6 +2295,9 @@ class GenericIE(InfoExtractor): | ||||
|                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage) | ||||
|         if not found: | ||||
|             # Try to find twitter cards info | ||||
|             # twitter:player:stream should be checked before twitter:player since | ||||
|             # it is expected to contain a raw stream (see | ||||
|             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser) | ||||
|             found = filter_video(re.findall( | ||||
|                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)) | ||||
|         if not found: | ||||
| @@ -2263,6 +2331,15 @@ class GenericIE(InfoExtractor): | ||||
|                     '_type': 'url', | ||||
|                     'url': new_url, | ||||
|                 } | ||||
|  | ||||
|         if not found: | ||||
|             # twitter:player is a https URL to iframe player that may or may not | ||||
|             # be supported by youtube-dl thus this is checked the very last (see | ||||
|             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser) | ||||
|             embed_url = self._html_search_meta('twitter:player', webpage, default=None) | ||||
|             if embed_url: | ||||
|                 return self.url_result(embed_url) | ||||
|  | ||||
|         if not found: | ||||
|             raise UnsupportedError(url) | ||||
|  | ||||
|   | ||||
| @@ -36,7 +36,6 @@ class InstagramIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': 'BA-pQFBG8HZ', | ||||
|             'ext': 'mp4', | ||||
|             'uploader_id': 'britneyspears', | ||||
|             'title': 'Video by britneyspears', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'timestamp': 1453760977, | ||||
|   | ||||
| @@ -4,10 +4,12 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     mimetype2ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -28,74 +30,84 @@ class JWPlatformBaseIE(InfoExtractor): | ||||
|         return self._parse_jwplayer_data( | ||||
|             jwplayer_data, video_id, *args, **kwargs) | ||||
|  | ||||
|     def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None): | ||||
|     def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None): | ||||
|         # JWPlayer backward compatibility: flattened playlists | ||||
|         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 | ||||
|         if 'playlist' not in jwplayer_data: | ||||
|             jwplayer_data = {'playlist': [jwplayer_data]} | ||||
|  | ||||
|         video_data = jwplayer_data['playlist'][0] | ||||
|         entries = [] | ||||
|         for video_data in jwplayer_data['playlist']: | ||||
|             # JWPlayer backward compatibility: flattened sources | ||||
|             # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 | ||||
|             if 'sources' not in video_data: | ||||
|                 video_data['sources'] = [video_data] | ||||
|  | ||||
|         # JWPlayer backward compatibility: flattened sources | ||||
|         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 | ||||
|         if 'sources' not in video_data: | ||||
|             video_data['sources'] = [video_data] | ||||
|  | ||||
|         formats = [] | ||||
|         for source in video_data['sources']: | ||||
|             source_url = self._proto_relative_url(source['file']) | ||||
|             source_type = source.get('type') or '' | ||||
|             if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) | ||||
|             elif source_type.startswith('audio'): | ||||
|                 formats.append({ | ||||
|                     'url': source_url, | ||||
|                     'vcodec': 'none', | ||||
|                 }) | ||||
|             else: | ||||
|                 a_format = { | ||||
|                     'url': source_url, | ||||
|                     'width': int_or_none(source.get('width')), | ||||
|                     'height': int_or_none(source.get('height')), | ||||
|                 } | ||||
|                 if source_url.startswith('rtmp'): | ||||
|                     a_format['ext'] = 'flv', | ||||
|  | ||||
|                     # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as | ||||
|                     # of jwplayer.flash.swf | ||||
|                     rtmp_url_parts = re.split( | ||||
|                         r'((?:mp4|mp3|flv):)', source_url, 1) | ||||
|                     if len(rtmp_url_parts) == 3: | ||||
|                         rtmp_url, prefix, play_path = rtmp_url_parts | ||||
|                         a_format.update({ | ||||
|                             'url': rtmp_url, | ||||
|                             'play_path': prefix + play_path, | ||||
|                         }) | ||||
|                     if rtmp_params: | ||||
|                         a_format.update(rtmp_params) | ||||
|                 formats.append(a_format) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         tracks = video_data.get('tracks') | ||||
|         if tracks and isinstance(tracks, list): | ||||
|             for track in tracks: | ||||
|                 if track.get('file') and track.get('kind') == 'captions': | ||||
|                     subtitles.setdefault(track.get('label') or 'en', []).append({ | ||||
|                         'url': self._proto_relative_url(track['file']) | ||||
|             formats = [] | ||||
|             for source in video_data['sources']: | ||||
|                 source_url = self._proto_relative_url(source['file']) | ||||
|                 if base_url: | ||||
|                     source_url = compat_urlparse.urljoin(base_url, source_url) | ||||
|                 source_type = source.get('type') or '' | ||||
|                 ext = mimetype2ext(source_type) or determine_ext(source_url) | ||||
|                 if source_type == 'hls' or ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) | ||||
|                 # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 | ||||
|                 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): | ||||
|                     formats.append({ | ||||
|                         'url': source_url, | ||||
|                         'vcodec': 'none', | ||||
|                         'ext': ext, | ||||
|                     }) | ||||
|                 else: | ||||
|                     a_format = { | ||||
|                         'url': source_url, | ||||
|                         'width': int_or_none(source.get('width')), | ||||
|                         'height': int_or_none(source.get('height')), | ||||
|                         'ext': ext, | ||||
|                     } | ||||
|                     if source_url.startswith('rtmp'): | ||||
|                         a_format['ext'] = 'flv', | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_data['title'] if require_title else video_data.get('title'), | ||||
|             'description': video_data.get('description'), | ||||
|             'thumbnail': self._proto_relative_url(video_data.get('image')), | ||||
|             'timestamp': int_or_none(video_data.get('pubdate')), | ||||
|             'duration': float_or_none(jwplayer_data.get('duration')), | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats, | ||||
|         } | ||||
|                         # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as | ||||
|                         # of jwplayer.flash.swf | ||||
|                         rtmp_url_parts = re.split( | ||||
|                             r'((?:mp4|mp3|flv):)', source_url, 1) | ||||
|                         if len(rtmp_url_parts) == 3: | ||||
|                             rtmp_url, prefix, play_path = rtmp_url_parts | ||||
|                             a_format.update({ | ||||
|                                 'url': rtmp_url, | ||||
|                                 'play_path': prefix + play_path, | ||||
|                             }) | ||||
|                         if rtmp_params: | ||||
|                             a_format.update(rtmp_params) | ||||
|                     formats.append(a_format) | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             subtitles = {} | ||||
|             tracks = video_data.get('tracks') | ||||
|             if tracks and isinstance(tracks, list): | ||||
|                 for track in tracks: | ||||
|                     if track.get('file') and track.get('kind') == 'captions': | ||||
|                         subtitles.setdefault(track.get('label') or 'en', []).append({ | ||||
|                             'url': self._proto_relative_url(track['file']) | ||||
|                         }) | ||||
|  | ||||
|             entries.append({ | ||||
|                 'id': video_id, | ||||
|                 'title': video_data['title'] if require_title else video_data.get('title'), | ||||
|                 'description': video_data.get('description'), | ||||
|                 'thumbnail': self._proto_relative_url(video_data.get('image')), | ||||
|                 'timestamp': int_or_none(video_data.get('pubdate')), | ||||
|                 'duration': float_or_none(jwplayer_data.get('duration')), | ||||
|                 'subtitles': subtitles, | ||||
|                 'formats': formats, | ||||
|             }) | ||||
|         if len(entries) == 1: | ||||
|             return entries[0] | ||||
|         else: | ||||
|             return self.playlist_result(entries) | ||||
|  | ||||
|  | ||||
| class JWPlatformIE(JWPlatformBaseIE): | ||||
|   | ||||
| @@ -62,6 +62,11 @@ class KalturaIE(InfoExtractor): | ||||
|         { | ||||
|             'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             # video with subtitles | ||||
|             'url': 'kaltura:111032:1_cw786r8q', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -130,7 +135,6 @@ class KalturaIE(InfoExtractor): | ||||
|             video_id, actions, service_url, note='Downloading Kaltura signature')['ks'] | ||||
|  | ||||
|     def _get_video_info(self, video_id, partner_id, service_url=None): | ||||
|         signature = self._get_kaltura_signature(video_id, partner_id, service_url) | ||||
|         actions = [ | ||||
|             { | ||||
|                 'action': 'null', | ||||
| @@ -138,18 +142,30 @@ class KalturaIE(InfoExtractor): | ||||
|                 'clientTag': 'kdp:v3.8.5', | ||||
|                 'format': 1,  # JSON, 2 = XML, 3 = PHP | ||||
|                 'service': 'multirequest', | ||||
|                 'ks': signature, | ||||
|             }, | ||||
|             { | ||||
|                 'expiry': 86400, | ||||
|                 'service': 'session', | ||||
|                 'action': 'startWidgetSession', | ||||
|                 'widgetId': '_%s' % partner_id, | ||||
|             }, | ||||
|             { | ||||
|                 'action': 'get', | ||||
|                 'entryId': video_id, | ||||
|                 'service': 'baseentry', | ||||
|                 'version': '-1', | ||||
|                 'ks': '{1:result:ks}', | ||||
|             }, | ||||
|             { | ||||
|                 'action': 'getbyentryid', | ||||
|                 'entryId': video_id, | ||||
|                 'service': 'flavorAsset', | ||||
|                 'ks': '{1:result:ks}', | ||||
|             }, | ||||
|             { | ||||
|                 'action': 'list', | ||||
|                 'filter:entryIdEqual': video_id, | ||||
|                 'service': 'caption_captionasset', | ||||
|                 'ks': '{1:result:ks}', | ||||
|             }, | ||||
|         ] | ||||
|         return self._kaltura_api_call( | ||||
| @@ -161,8 +177,9 @@ class KalturaIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         partner_id, entry_id = mobj.group('partner_id', 'id') | ||||
|         ks = None | ||||
|         captions = None | ||||
|         if partner_id and entry_id: | ||||
|             info, flavor_assets = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url')) | ||||
|             _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url')) | ||||
|         else: | ||||
|             path, query = mobj.group('path', 'query') | ||||
|             if not path and not query: | ||||
| @@ -181,7 +198,7 @@ class KalturaIE(InfoExtractor): | ||||
|                 raise ExtractorError('Invalid URL', expected=True) | ||||
|             if 'entry_id' in params: | ||||
|                 entry_id = params['entry_id'][0] | ||||
|                 info, flavor_assets = self._get_video_info(entry_id, partner_id) | ||||
|                 _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id) | ||||
|             elif 'uiconf_id' in params and 'flashvars[referenceId]' in params: | ||||
|                 reference_id = params['flashvars[referenceId]'][0] | ||||
|                 webpage = self._download_webpage(url, reference_id) | ||||
| @@ -217,7 +234,7 @@ class KalturaIE(InfoExtractor): | ||||
|         formats = [] | ||||
|         for f in flavor_assets: | ||||
|             # Continue if asset is not ready | ||||
|             if f['status'] != 2: | ||||
|             if f.get('status') != 2: | ||||
|                 continue | ||||
|             video_url = sign_url( | ||||
|                 '%s/flavorId/%s' % (data_url, f['id'])) | ||||
| @@ -240,13 +257,24 @@ class KalturaIE(InfoExtractor): | ||||
|                 m3u8_url, entry_id, 'mp4', 'm3u8_native', | ||||
|                 m3u8_id='hls', fatal=False)) | ||||
|  | ||||
|         self._check_formats(formats, entry_id) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         if captions: | ||||
|             for caption in captions.get('objects', []): | ||||
|                 # Continue if caption is not ready | ||||
|                 if f.get('status') != 2: | ||||
|                     continue | ||||
|                 subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({ | ||||
|                     'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']), | ||||
|                     'ext': caption.get('fileExt'), | ||||
|                 }) | ||||
|  | ||||
|         return { | ||||
|             'id': entry_id, | ||||
|             'title': info['name'], | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'description': clean_html(info.get('description')), | ||||
|             'thumbnail': info.get('thumbnailUrl'), | ||||
|             'duration': info.get('duration'), | ||||
|   | ||||
							
								
								
									
										90
									
								
								youtube_dl/extractor/lcp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								youtube_dl/extractor/lcp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,90 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .arkena import ArkenaIE | ||||
|  | ||||
|  | ||||
| class LcpPlayIE(ArkenaIE): | ||||
|     _VALID_URL = r'https?://play\.lcp\.fr/embed/(?P<id>[^/]+)/(?P<account_id>[^/]+)/[^/]+/[^/]+' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0', | ||||
|         'md5': 'b8bd9298542929c06c1c15788b1f277a', | ||||
|         'info_dict': { | ||||
|             'id': '327336', | ||||
|             'ext': 'mp4', | ||||
|             'title': '327336', | ||||
|             'timestamp': 1456391602, | ||||
|             'upload_date': '20160225', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|  | ||||
| class LcpIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^/]+/)*(?P<id>[^/]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # arkena embed | ||||
|         'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire', | ||||
|         'md5': 'b8bd9298542929c06c1c15788b1f277a', | ||||
|         'info_dict': { | ||||
|             'id': 'd56d03e9', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche', | ||||
|             'description': 'md5:96ad55009548da9dea19f4120c6c16a8', | ||||
|             'timestamp': 1456488895, | ||||
|             'upload_date': '20160226', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # dailymotion live stream | ||||
|         'url': 'http://www.lcp.fr/le-direct', | ||||
|         'info_dict': { | ||||
|             'id': 'xji3qy', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'La Chaine Parlementaire (LCP), Live TNT', | ||||
|             'description': 'md5:5c69593f2de0f38bd9a949f2c95e870b', | ||||
|             'uploader': 'LCP', | ||||
|             'uploader_id': 'xbz33d', | ||||
|             'timestamp': 1308923058, | ||||
|             'upload_date': '20110624', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 live stream | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.lcp.fr/emissions/277792-les-volontaires', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         play_url = self._search_regex( | ||||
|             r'<iframe[^>]+src=(["\'])(?P<url>%s?(?:(?!\1).)*)\1' % LcpPlayIE._VALID_URL, | ||||
|             webpage, 'play iframe', default=None, group='url') | ||||
|  | ||||
|         if not play_url: | ||||
|             return self.url_result(url, 'Generic') | ||||
|  | ||||
|         title = self._og_search_title(webpage, default=None) or self._html_search_meta( | ||||
|             'twitter:title', webpage, fatal=True) | ||||
|         description = self._html_search_meta( | ||||
|             ('description', 'twitter:description'), webpage) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': LcpPlayIE.ie_key(), | ||||
|             'url': play_url, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|         } | ||||
| @@ -37,11 +37,12 @@ class LimelightBaseIE(InfoExtractor): | ||||
|  | ||||
|         for stream in streams: | ||||
|             stream_url = stream.get('url') | ||||
|             if not stream_url: | ||||
|             if not stream_url or stream.get('drmProtected'): | ||||
|                 continue | ||||
|             if '.f4m' in stream_url: | ||||
|             ext = determine_ext(stream_url) | ||||
|             if ext == 'f4m': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     stream_url, video_id, fatal=False)) | ||||
|                     stream_url, video_id, f4m_id='hds', fatal=False)) | ||||
|             else: | ||||
|                 fmt = { | ||||
|                     'url': stream_url, | ||||
| @@ -50,13 +51,19 @@ class LimelightBaseIE(InfoExtractor): | ||||
|                     'fps': float_or_none(stream.get('videoFrameRate')), | ||||
|                     'width': int_or_none(stream.get('videoWidthInPixels')), | ||||
|                     'height': int_or_none(stream.get('videoHeightInPixels')), | ||||
|                     'ext': determine_ext(stream_url) | ||||
|                     'ext': ext, | ||||
|                 } | ||||
|                 rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url) | ||||
|                 rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url) | ||||
|                 if rtmp: | ||||
|                     format_id = 'rtmp' | ||||
|                     if stream.get('videoBitRate'): | ||||
|                         format_id += '-%d' % int_or_none(stream['videoBitRate']) | ||||
|                     http_fmt = fmt.copy() | ||||
|                     http_fmt.update({ | ||||
|                         'url': 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]), | ||||
|                         'format_id': format_id.replace('rtmp', 'http'), | ||||
|                     }) | ||||
|                     formats.append(http_fmt) | ||||
|                     fmt.update({ | ||||
|                         'url': rtmp.group('url'), | ||||
|                         'play_path': rtmp.group('playpath'), | ||||
| @@ -68,18 +75,23 @@ class LimelightBaseIE(InfoExtractor): | ||||
|  | ||||
|         for mobile_url in mobile_urls: | ||||
|             media_url = mobile_url.get('mobileUrl') | ||||
|             if not media_url: | ||||
|                 continue | ||||
|             format_id = mobile_url.get('targetMediaPlatform') | ||||
|             if determine_ext(media_url) == 'm3u8': | ||||
|             if not media_url or format_id == 'Widevine': | ||||
|                 continue | ||||
|             ext = determine_ext(media_url) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     media_url, video_id, 'mp4', 'm3u8_native', | ||||
|                     m3u8_id=format_id, fatal=False)) | ||||
|             elif ext == 'f4m': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     stream_url, video_id, f4m_id=format_id, fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': media_url, | ||||
|                     'format_id': format_id, | ||||
|                     'preference': -1, | ||||
|                     'ext': ext, | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
| @@ -145,7 +157,7 @@ class LimelightMediaIE(LimelightBaseIE): | ||||
|         'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', | ||||
|         'info_dict': { | ||||
|             'id': '3ffd040b522b4485b6d84effc750cd86', | ||||
|             'ext': 'flv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'HaP and the HB Prince Trailer', | ||||
|             'description': 'md5:8005b944181778e313d95c1237ddb640', | ||||
|             'thumbnail': 're:^https?://.*\.jpeg$', | ||||
| @@ -154,27 +166,23 @@ class LimelightMediaIE(LimelightBaseIE): | ||||
|             'upload_date': '20090604', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # video with subtitles | ||||
|         'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335', | ||||
|         'md5': '2fa3bad9ac321e23860ca23bc2c69e3d', | ||||
|         'info_dict': { | ||||
|             'id': 'a3e00274d4564ec4a9b29b9466432335', | ||||
|             'ext': 'flv', | ||||
|             'ext': 'mp4', | ||||
|             'title': '3Play Media Overview Video', | ||||
|             'description': '', | ||||
|             'thumbnail': 're:^https?://.*\.jpeg$', | ||||
|             'duration': 78.101, | ||||
|             'timestamp': 1338929955, | ||||
|             'upload_date': '20120605', | ||||
|             'subtitles': 'mincount:9', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452', | ||||
|         'only_matching': True, | ||||
|   | ||||
| @@ -100,7 +100,7 @@ class LyndaIE(LyndaBaseIE): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', | ||||
|         'md5': '679734f6786145da3546585de9a356be', | ||||
|         # md5 is unstable | ||||
|         'info_dict': { | ||||
|             'id': '114408', | ||||
|             'ext': 'mp4', | ||||
|   | ||||
| @@ -9,7 +9,7 @@ class MGTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' | ||||
|     IE_DESC = '芒果TV' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', | ||||
|         'md5': '1bdadcf760a0b90946ca68ee9a2db41a', | ||||
|         'info_dict': { | ||||
| @@ -20,7 +20,11 @@ class MGTVIE(InfoExtractor): | ||||
|             'duration': 7461, | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         # no tbr extracted from stream_url | ||||
|         'url': 'http://www.mgtv.com/v/1/1/f/3324755.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| @@ -41,7 +45,8 @@ class MGTVIE(InfoExtractor): | ||||
|             def extract_format(stream_url, format_id, idx, query={}): | ||||
|                 format_info = self._download_json( | ||||
|                     stream_url, video_id, | ||||
|                     note='Download video info for format %s' % format_id or '#%d' % idx, query=query) | ||||
|                     note='Download video info for format %s' % (format_id or '#%d' % idx), | ||||
|                     query=query) | ||||
|                 return { | ||||
|                     'format_id': format_id, | ||||
|                     'url': format_info['info'], | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | ||||
| import random | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     int_or_none, | ||||
| @@ -18,13 +19,16 @@ class MioMioIE(InfoExtractor): | ||||
|     _TESTS = [{ | ||||
|         # "type=video" in flashvars | ||||
|         'url': 'http://www.miomio.tv/watch/cc88912/', | ||||
|         'md5': '317a5f7f6b544ce8419b784ca8edae65', | ||||
|         'info_dict': { | ||||
|             'id': '88912', | ||||
|             'ext': 'flv', | ||||
|             'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕', | ||||
|             'duration': 5923, | ||||
|         }, | ||||
|         'params': { | ||||
|             # The server provides broken file | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.miomio.tv/watch/cc184024/', | ||||
|         'info_dict': { | ||||
| @@ -32,7 +36,7 @@ class MioMioIE(InfoExtractor): | ||||
|             'title': '《动漫同人插画绘制》', | ||||
|         }, | ||||
|         'playlist_mincount': 86, | ||||
|         'skip': 'This video takes time too long for retrieving the URL', | ||||
|         'skip': 'Unable to load videos', | ||||
|     }, { | ||||
|         'url': 'http://www.miomio.tv/watch/cc173113/', | ||||
|         'info_dict': { | ||||
| @@ -40,20 +44,23 @@ class MioMioIE(InfoExtractor): | ||||
|             'title': 'The New Macbook 2015 上手试玩与简评' | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|         'skip': 'Unable to load videos', | ||||
|     }, { | ||||
|         # new 'h5' player | ||||
|         'url': 'http://www.miomio.tv/watch/cc273295/', | ||||
|         'md5': '', | ||||
|         'info_dict': { | ||||
|             'id': '273295', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'アウト×デラックス 20160526', | ||||
|         }, | ||||
|         'params': { | ||||
|             # intermittent HTTP 500 | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_meta( | ||||
|             'description', webpage, 'title', fatal=True) | ||||
|  | ||||
|         mioplayer_path = self._search_regex( | ||||
|             r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path') | ||||
|  | ||||
|         http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path} | ||||
|  | ||||
|     def _extract_mioplayer(self, webpage, video_id, title, http_headers): | ||||
|         xml_config = self._search_regex( | ||||
|             r'flashvars="type=(?:sina|video)&(.+?)&', | ||||
|             webpage, 'xml config') | ||||
| @@ -92,10 +99,34 @@ class MioMioIE(InfoExtractor): | ||||
|                 'http_headers': http_headers, | ||||
|             }) | ||||
|  | ||||
|         return entries | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_meta( | ||||
|             'description', webpage, 'title', fatal=True) | ||||
|  | ||||
|         mioplayer_path = self._search_regex( | ||||
|             r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path') | ||||
|  | ||||
|         if '_h5' in mioplayer_path: | ||||
|             player_url = compat_urlparse.urljoin(url, mioplayer_path) | ||||
|             player_webpage = self._download_webpage( | ||||
|                 player_url, video_id, | ||||
|                 note='Downloading player webpage', headers={'Referer': url}) | ||||
|             entries = self._parse_html5_media_entries(player_url, player_webpage) | ||||
|             http_headers = {'Referer': player_url} | ||||
|         else: | ||||
|             http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path} | ||||
|             entries = self._extract_mioplayer(webpage, video_id, title, http_headers) | ||||
|  | ||||
|         if len(entries) == 1: | ||||
|             segment = entries[0] | ||||
|             segment['id'] = video_id | ||||
|             segment['title'] = title | ||||
|             segment['http_headers'] = http_headers | ||||
|             return segment | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -15,6 +15,8 @@ from ..utils import ( | ||||
|     float_or_none, | ||||
|     HEADRequest, | ||||
|     sanitized_Request, | ||||
|     strip_or_none, | ||||
|     timeconvert, | ||||
|     unescapeHTML, | ||||
|     url_basename, | ||||
|     RegexNotFoundError, | ||||
| @@ -35,13 +37,13 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         return uri.split(':')[-1] | ||||
|  | ||||
|     # This was originally implemented for ComedyCentral, but it also works here | ||||
|     @staticmethod | ||||
|     def _transform_rtmp_url(rtmp_video_url): | ||||
|     @classmethod | ||||
|     def _transform_rtmp_url(cls, rtmp_video_url): | ||||
|         m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url) | ||||
|         if not m: | ||||
|             return rtmp_video_url | ||||
|             return {'rtmp': rtmp_video_url} | ||||
|         base = 'http://viacommtvstrmfs.fplive.net/' | ||||
|         return base + m.group('finalid') | ||||
|         return {'http': base + m.group('finalid')} | ||||
|  | ||||
|     def _get_feed_url(self, uri): | ||||
|         return self._FEED_URL | ||||
| @@ -85,14 +87,14 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|                 rtmp_video_url = rendition.find('./src').text | ||||
|                 if rtmp_video_url.endswith('siteunavail.png'): | ||||
|                     continue | ||||
|                 new_url = self._transform_rtmp_url(rtmp_video_url) | ||||
|                 formats.append({ | ||||
|                 new_urls = self._transform_rtmp_url(rtmp_video_url) | ||||
|                 formats.extend([{ | ||||
|                     'ext': 'flv' if new_url.startswith('rtmp') else ext, | ||||
|                     'url': new_url, | ||||
|                     'format_id': rendition.get('bitrate'), | ||||
|                     'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])), | ||||
|                     'width': int(rendition.get('width')), | ||||
|                     'height': int(rendition.get('height')), | ||||
|                 }) | ||||
|                 } for kind, new_url in new_urls.items()]) | ||||
|             except (KeyError, TypeError): | ||||
|                 raise ExtractorError('Invalid rendition field.') | ||||
|         self._sort_formats(formats) | ||||
| @@ -133,7 +135,9 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|             message += item.text | ||||
|             raise ExtractorError(message, expected=True) | ||||
|  | ||||
|         description = xpath_text(itemdoc, 'description') | ||||
|         description = strip_or_none(xpath_text(itemdoc, 'description')) | ||||
|  | ||||
|         timestamp = timeconvert(xpath_text(itemdoc, 'pubDate')) | ||||
|  | ||||
|         title_el = None | ||||
|         if title_el is None: | ||||
| @@ -167,6 +171,7 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|             'thumbnail': self._get_thumbnail_url(uri, itemdoc), | ||||
|             'description': description, | ||||
|             'duration': float_or_none(content_el.attrib.get('duration')), | ||||
|             'timestamp': timestamp, | ||||
|         } | ||||
|  | ||||
|     def _get_feed_query(self, uri): | ||||
| @@ -185,8 +190,13 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         idoc = self._download_xml( | ||||
|             url, video_id, | ||||
|             'Downloading info', transform_source=fix_xml_ampersands) | ||||
|  | ||||
|         title = xpath_text(idoc, './channel/title') | ||||
|         description = xpath_text(idoc, './channel/description') | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             [self._get_video_info(item) for item in idoc.findall('.//item')]) | ||||
|             [self._get_video_info(item) for item in idoc.findall('.//item')], | ||||
|             playlist_title=title, playlist_description=description) | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         try: | ||||
| @@ -232,6 +242,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds', | ||||
|             'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.', | ||||
|             'timestamp': 1400126400, | ||||
|             'upload_date': '20140515', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -274,6 +286,8 @@ class MTVIE(MTVServicesInfoExtractor): | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"', | ||||
|                 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', | ||||
|                 'timestamp': 1352610000, | ||||
|                 'upload_date': '20121111', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
| @@ -300,20 +314,6 @@ class MTVIE(MTVServicesInfoExtractor): | ||||
|         return self._get_videos_info(uri) | ||||
|  | ||||
|  | ||||
| class MTVIggyIE(MTVServicesInfoExtractor): | ||||
|     IE_NAME = 'mtviggy.com' | ||||
|     _VALID_URL = r'https?://www\.mtviggy\.com/videos/.+' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/', | ||||
|         'info_dict': { | ||||
|             'id': '984696', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet', | ||||
|         } | ||||
|     } | ||||
|     _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/' | ||||
|  | ||||
|  | ||||
| class MTVDEIE(MTVServicesInfoExtractor): | ||||
|     IE_NAME = 'mtv.de' | ||||
|     _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$' | ||||
| @@ -321,7 +321,7 @@ class MTVDEIE(MTVServicesInfoExtractor): | ||||
|         'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum', | ||||
|         'info_dict': { | ||||
|             'id': 'music_video-a50bc5f0b3aa4b3190aa', | ||||
|             'ext': 'mp4', | ||||
|             'ext': 'flv', | ||||
|             'title': 'MusicVideo_cro-traum', | ||||
|             'description': 'Cro - Traum', | ||||
|         }, | ||||
| @@ -329,20 +329,21 @@ class MTVDEIE(MTVServicesInfoExtractor): | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Blocked at Travis CI', | ||||
|     }, { | ||||
|         # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97) | ||||
|         'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen', | ||||
|         'info_dict': { | ||||
|             'id': 'local_playlist-f5ae778b9832cc837189', | ||||
|             'ext': 'mp4', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Blocked at Travis CI', | ||||
|     }, { | ||||
|         # single video in pagePlaylist with different id | ||||
|         'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3', | ||||
|         'info_dict': { | ||||
|             'id': 'local_playlist-4e760566473c4c8c5344', | ||||
| @@ -354,6 +355,7 @@ class MTVDEIE(MTVServicesInfoExtractor): | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Das Video kann zur Zeit nicht abgespielt werden.', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -366,11 +368,14 @@ class MTVDEIE(MTVServicesInfoExtractor): | ||||
|                 r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'), | ||||
|             video_id) | ||||
|  | ||||
|         def _mrss_url(item): | ||||
|             return item['mrss'] + item.get('mrssvars', '') | ||||
|  | ||||
|         # news pages contain single video in playlist with different id | ||||
|         if len(playlist) == 1: | ||||
|             return self._get_videos_info_from_url(playlist[0]['mrss'], video_id) | ||||
|             return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id) | ||||
|  | ||||
|         for item in playlist: | ||||
|             item_id = item.get('id') | ||||
|             if item_id and compat_str(item_id) == video_id: | ||||
|                 return self._get_videos_info_from_url(item['mrss'], video_id) | ||||
|                 return self._get_videos_info_from_url(_mrss_url(item), video_id) | ||||
|   | ||||
| @@ -1,16 +1,19 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .theplatform import ThePlatformIE | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
|     url_basename, | ||||
|     update_url_query, | ||||
|     get_element_by_class, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NationalGeographicIE(InfoExtractor): | ||||
|     IE_NAME = 'natgeo' | ||||
| class NationalGeographicVideoIE(InfoExtractor): | ||||
|     IE_NAME = 'natgeo:video' | ||||
|     _VALID_URL = r'https?://video\.nationalgeographic\.com/.*?' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -62,16 +65,16 @@ class NationalGeographicIE(InfoExtractor): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NationalGeographicChannelIE(ThePlatformIE): | ||||
|     IE_NAME = 'natgeo:channel' | ||||
|     _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)' | ||||
| class NationalGeographicIE(ThePlatformIE): | ||||
|     IE_NAME = 'natgeo' | ||||
|     _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/', | ||||
|             'md5': '518c9aa655686cf81493af5cc21e2a04', | ||||
|             'info_dict': { | ||||
|                 'id': 'nB5vIAfmyllm', | ||||
|                 'id': 'vKInpacll2pC', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Uncovering a Universal Knowledge', | ||||
|                 'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a', | ||||
| @@ -85,7 +88,7 @@ class NationalGeographicChannelIE(ThePlatformIE): | ||||
|             'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/', | ||||
|             'md5': 'c4912f656b4cbe58f3e000c489360989', | ||||
|             'info_dict': { | ||||
|                 'id': '3TmMv9OvGwIR', | ||||
|                 'id': 'Pok5lWCkiEFA', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'The Stunning Red Bird of Paradise', | ||||
|                 'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c', | ||||
| @@ -95,6 +98,10 @@ class NationalGeographicChannelIE(ThePlatformIE): | ||||
|             }, | ||||
|             'add_ie': ['ThePlatform'], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -122,3 +129,40 @@ class NationalGeographicChannelIE(ThePlatformIE): | ||||
|                 {'force_smil_url': True}), | ||||
|             'display_id': display_id, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NationalGeographicEpisodeGuideIE(ThePlatformIE): | ||||
|     IE_NAME = 'natgeo:episodeguide' | ||||
|     _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/', | ||||
|             'info_dict': { | ||||
|                 'id': 'the-story-of-god-with-morgan-freeman-season-1', | ||||
|                 'title': 'The Story of God with Morgan Freeman - Season 1', | ||||
|             }, | ||||
|             'playlist_mincount': 6, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2', | ||||
|             'info_dict': { | ||||
|                 'id': 'underworld-inc-season-2', | ||||
|                 'title': 'Underworld, Inc. - Season 2', | ||||
|             }, | ||||
|             'playlist_mincount': 7, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         show = get_element_by_class('show', webpage) | ||||
|         selected_season = self._search_regex( | ||||
|             r'<div[^>]+class="select-seasons[^"]*".*?<a[^>]*>(.*?)</a>', | ||||
|             webpage, 'selected season') | ||||
|         entries = [ | ||||
|             self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic') | ||||
|             for entry_url in re.findall('(?s)<div[^>]+class="col-inner"[^>]*?>.*?<a[^>]+href="([^"]+)"', webpage)] | ||||
|         return self.playlist_result( | ||||
|             entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')), | ||||
|             '%s - %s' % (show, selected_season)) | ||||
|   | ||||
| @@ -4,12 +4,10 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -51,48 +49,74 @@ class NaverIE(InfoExtractor): | ||||
|             if error: | ||||
|                 raise ExtractorError(error, expected=True) | ||||
|             raise ExtractorError('couldn\'t extract vid and key') | ||||
|         vid = m_id.group(1) | ||||
|         key = m_id.group(2) | ||||
|         query = compat_urllib_parse_urlencode({'vid': vid, 'inKey': key, }) | ||||
|         query_urls = compat_urllib_parse_urlencode({ | ||||
|             'masterVid': vid, | ||||
|             'protocol': 'p2p', | ||||
|             'inKey': key, | ||||
|         }) | ||||
|         info = self._download_xml( | ||||
|             'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, | ||||
|             video_id, 'Downloading video info') | ||||
|         urls = self._download_xml( | ||||
|             'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, | ||||
|             video_id, 'Downloading video formats info') | ||||
|  | ||||
|         video_data = self._download_json( | ||||
|             'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1), | ||||
|             video_id, query={ | ||||
|                 'key': m_id.group(2), | ||||
|             }) | ||||
|         meta = video_data['meta'] | ||||
|         title = meta['subject'] | ||||
|         formats = [] | ||||
|         for format_el in urls.findall('EncodingOptions/EncodingOption'): | ||||
|             domain = format_el.find('Domain').text | ||||
|             uri = format_el.find('uri').text | ||||
|             f = { | ||||
|                 'url': compat_urlparse.urljoin(domain, uri), | ||||
|                 'ext': 'mp4', | ||||
|                 'width': int(format_el.find('width').text), | ||||
|                 'height': int(format_el.find('height').text), | ||||
|             } | ||||
|             if domain.startswith('rtmp'): | ||||
|                 # urlparse does not support custom schemes | ||||
|                 # https://bugs.python.org/issue18828 | ||||
|                 f.update({ | ||||
|                     'url': domain + uri, | ||||
|                     'ext': 'flv', | ||||
|                     'rtmp_protocol': '1',  # rtmpt | ||||
|  | ||||
|         def extract_formats(streams, stream_type, query={}): | ||||
|             for stream in streams: | ||||
|                 stream_url = stream.get('source') | ||||
|                 if not stream_url: | ||||
|                     continue | ||||
|                 stream_url = update_url_query(stream_url, query) | ||||
|                 encoding_option = stream.get('encodingOption', {}) | ||||
|                 bitrate = stream.get('bitrate', {}) | ||||
|                 formats.append({ | ||||
|                     'format_id': '%s_%s' % (stream.get('type') or stream_type, encoding_option.get('id') or encoding_option.get('name')), | ||||
|                     'url': stream_url, | ||||
|                     'width': int_or_none(encoding_option.get('width')), | ||||
|                     'height': int_or_none(encoding_option.get('height')), | ||||
|                     'vbr': int_or_none(bitrate.get('video')), | ||||
|                     'abr': int_or_none(bitrate.get('audio')), | ||||
|                     'filesize': int_or_none(stream.get('size')), | ||||
|                     'protocol': 'm3u8_native' if stream_type == 'HLS' else None, | ||||
|                 }) | ||||
|             formats.append(f) | ||||
|  | ||||
|         extract_formats(video_data.get('videos', {}).get('list', []), 'H264') | ||||
|         for stream_set in video_data.get('streams', []): | ||||
|             query = {} | ||||
|             for param in stream_set.get('keys', []): | ||||
|                 query[param['name']] = param['value'] | ||||
|             stream_type = stream_set.get('type') | ||||
|             videos = stream_set.get('videos') | ||||
|             if videos: | ||||
|                 extract_formats(videos, stream_type, query) | ||||
|             elif stream_type == 'HLS': | ||||
|                 stream_url = stream_set.get('source') | ||||
|                 if not stream_url: | ||||
|                     continue | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     update_url_query(stream_url, query), video_id, | ||||
|                     'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for caption in video_data.get('captions', {}).get('list', []): | ||||
|             caption_url = caption.get('source') | ||||
|             if not caption_url: | ||||
|                 continue | ||||
|             subtitles.setdefault(caption.get('language') or caption.get('locale'), []).append({ | ||||
|                 'url': caption_url, | ||||
|             }) | ||||
|  | ||||
|         upload_date = self._search_regex( | ||||
|             r'<span[^>]+class="date".*?(\d{4}\.\d{2}\.\d{2})', | ||||
|             webpage, 'upload date', fatal=False) | ||||
|         if upload_date: | ||||
|             upload_date = upload_date.replace('.', '') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info.find('Subject').text, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'upload_date': info.find('WriteDate').text.replace('.', ''), | ||||
|             'view_count': int(info.find('PlayCount').text), | ||||
|             'thumbnail': meta.get('cover', {}).get('source') or self._og_search_thumbnail(webpage), | ||||
|             'view_count': int_or_none(meta.get('count')), | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
|   | ||||
| @@ -1,30 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
|  | ||||
|  | ||||
| class NextMovieIE(MTVServicesInfoExtractor): | ||||
|     IE_NAME = 'nextmovie.com' | ||||
|     _VALID_URL = r'https?://(?:www\.)?nextmovie\.com/shows/[^/]+/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)' | ||||
|     _FEED_URL = 'http://lite.dextr.mtvi.com/service1/dispatch.htm' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nextmovie.com/shows/exclusives/2013-03-10/mgid:uma:videolist:nextmovie.com:1715019/', | ||||
|         'md5': '09a9199f2f11f10107d04fcb153218aa', | ||||
|         'info_dict': { | ||||
|             'id': '961726', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Muppets\' Gravity', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _get_feed_query(self, uri): | ||||
|         return compat_urllib_parse_urlencode({ | ||||
|             'feed': '1505', | ||||
|             'mgid': uri, | ||||
|         }) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mgid = self._match_id(url) | ||||
|         return self._get_videos_info(mgid) | ||||
| @@ -7,6 +7,7 @@ from ..utils import update_url_query | ||||
|  | ||||
|  | ||||
| class NickIE(MTVServicesInfoExtractor): | ||||
|     # None of videos on the website are still alive? | ||||
|     IE_NAME = 'nick.com' | ||||
|     _VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)' | ||||
|     _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' | ||||
|   | ||||
							
								
								
									
										72
									
								
								youtube_dl/extractor/ninenow.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								youtube_dl/extractor/ninenow.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NineNowIE(InfoExtractor): | ||||
|     IE_NAME = '9now.com.au' | ||||
|     _VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         # clip | ||||
|         'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc', | ||||
|         'md5': '17cf47d63ec9323e562c9957a968b565', | ||||
|         'info_dict': { | ||||
|             'id': '16801', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'St. Kilda\'s Joey Montagna on the potential for a player\'s strike', | ||||
|             'description': 'Is a boycott of the NAB Cup "on the table"?', | ||||
|             'uploader_id': '4460760524001', | ||||
|             'upload_date': '20160713', | ||||
|             'timestamp': 1468421266, | ||||
|         }, | ||||
|         'skip': 'Only available in Australia', | ||||
|     }, { | ||||
|         # episode | ||||
|         'url': 'https://www.9now.com.au/afl-footy-show/2016/episode-19', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # DRM protected | ||||
|         'url': 'https://www.9now.com.au/andrew-marrs-history-of-the-world/season-1/episode-1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         page_data = self._parse_json(self._search_regex( | ||||
|             r'window\.__data\s*=\s*({.*?});', webpage, | ||||
|             'page data'), display_id) | ||||
|         common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip') | ||||
|         video_data = common_data['video'] | ||||
|  | ||||
|         if video_data.get('drm'): | ||||
|             raise ExtractorError('This video is DRM protected.', expected=True) | ||||
|  | ||||
|         brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId'] | ||||
|         video_id = compat_str(video_data.get('id') or brightcove_id) | ||||
|         title = common_data['name'] | ||||
|  | ||||
|         thumbnails = [{ | ||||
|             'id': thumbnail_id, | ||||
|             'url': thumbnail_url, | ||||
|             'width': int_or_none(thumbnail_id[1:]) | ||||
|         } for thumbnail_id, thumbnail_url in common_data.get('image', {}).get('sizes', {}).items()] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': common_data.get('description'), | ||||
|             'duration': float_or_none(video_data.get('duration'), 1000), | ||||
|             'thumbnails': thumbnails, | ||||
|             'ie_key': 'BrightcoveNew', | ||||
|         } | ||||
							
								
								
									
										46
									
								
								youtube_dl/extractor/nintendo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								youtube_dl/extractor/nintendo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,46 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .ooyala import OoyalaIE | ||||
| from ..utils import unescapeHTML | ||||
|  | ||||
|  | ||||
| class NintendoIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nintendo.com/games/detail/yEiAzhU2eQI1KZ7wOHhngFoAHc1FpHwj', | ||||
|         'info_dict': { | ||||
|             'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Duck Hunt Wii U VC NES - Trailer', | ||||
|             'duration': 60.326, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     }, { | ||||
|         'url': 'http://www.nintendo.com/games/detail/tokyo-mirage-sessions-fe-wii-u', | ||||
|         'info_dict': { | ||||
|             'id': 'tokyo-mirage-sessions-fe-wii-u', | ||||
|             'title': 'Tokyo Mirage Sessions ♯FE', | ||||
|         }, | ||||
|         'playlist_count': 3, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         page_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|  | ||||
|         entries = [ | ||||
|             OoyalaIE._build_url_result(m.group('code')) | ||||
|             for m in re.finditer( | ||||
|                 r'class=(["\'])embed-video\1[^>]+data-video-code=(["\'])(?P<code>(?:(?!\2).)+)\2', | ||||
|                 webpage)] | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False))) | ||||
| @@ -11,70 +11,64 @@ from ..utils import ( | ||||
|  | ||||
| class NTVRuIE(InfoExtractor): | ||||
|     IE_NAME = 'ntv.ru' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?P<id>.+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/novosti/863142/', | ||||
|             'md5': 'ba7ea172a91cb83eb734cad18c10e723', | ||||
|             'info_dict': { | ||||
|                 'id': '746000', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', | ||||
|                 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 136, | ||||
|             }, | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ntv.ru/novosti/863142/', | ||||
|         'md5': 'ba7ea172a91cb83eb734cad18c10e723', | ||||
|         'info_dict': { | ||||
|             'id': '746000', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', | ||||
|             'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', | ||||
|             'thumbnail': 're:^http://.*\.jpg', | ||||
|             'duration': 136, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/video/novosti/750370/', | ||||
|             'md5': 'adecff79691b4d71e25220a191477124', | ||||
|             'info_dict': { | ||||
|                 'id': '750370', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', | ||||
|                 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 172, | ||||
|             }, | ||||
|     }, { | ||||
|         'url': 'http://www.ntv.ru/video/novosti/750370/', | ||||
|         'md5': 'adecff79691b4d71e25220a191477124', | ||||
|         'info_dict': { | ||||
|             'id': '750370', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', | ||||
|             'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', | ||||
|             'thumbnail': 're:^http://.*\.jpg', | ||||
|             'duration': 172, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', | ||||
|             'md5': '82dbd49b38e3af1d00df16acbeab260c', | ||||
|             'info_dict': { | ||||
|                 'id': '747480', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '«Сегодня». 21 марта 2014 года. 16:00', | ||||
|                 'description': '«Сегодня». 21 марта 2014 года. 16:00', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 1496, | ||||
|             }, | ||||
|     }, { | ||||
|         'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', | ||||
|         'md5': '82dbd49b38e3af1d00df16acbeab260c', | ||||
|         'info_dict': { | ||||
|             'id': '747480', | ||||
|             'ext': 'mp4', | ||||
|             'title': '«Сегодня». 21 марта 2014 года. 16:00', | ||||
|             'description': '«Сегодня». 21 марта 2014 года. 16:00', | ||||
|             'thumbnail': 're:^http://.*\.jpg', | ||||
|             'duration': 1496, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/kino/Koma_film', | ||||
|             'md5': 'f825770930937aa7e5aca0dc0d29319a', | ||||
|             'info_dict': { | ||||
|                 'id': '1007609', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Остросюжетный фильм «Кома»', | ||||
|                 'description': 'Остросюжетный фильм «Кома»', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 5592, | ||||
|             }, | ||||
|     }, { | ||||
|         'url': 'http://www.ntv.ru/kino/Koma_film', | ||||
|         'md5': 'f825770930937aa7e5aca0dc0d29319a', | ||||
|         'info_dict': { | ||||
|             'id': '1007609', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Остросюжетный фильм «Кома»', | ||||
|             'description': 'Остросюжетный фильм «Кома»', | ||||
|             'thumbnail': 're:^http://.*\.jpg', | ||||
|             'duration': 5592, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/', | ||||
|             'md5': '9320cd0e23f3ea59c330dc744e06ff3b', | ||||
|             'info_dict': { | ||||
|                 'id': '751482', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '«Дело врачей»: «Деревце жизни»', | ||||
|                 'description': '«Дело врачей»: «Деревце жизни»', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 2590, | ||||
|             }, | ||||
|     }, { | ||||
|         'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/', | ||||
|         'md5': '9320cd0e23f3ea59c330dc744e06ff3b', | ||||
|         'info_dict': { | ||||
|             'id': '751482', | ||||
|             'ext': 'mp4', | ||||
|             'title': '«Дело врачей»: «Деревце жизни»', | ||||
|             'description': '«Дело врачей»: «Деревце жизни»', | ||||
|             'thumbnail': 're:^http://.*\.jpg', | ||||
|             'duration': 2590, | ||||
|         }, | ||||
|     ] | ||||
|     }] | ||||
|  | ||||
|     _VIDEO_ID_REGEXES = [ | ||||
|         r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)', | ||||
| @@ -87,11 +81,21 @@ class NTVRuIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id') | ||||
|         video_url = self._og_search_property( | ||||
|             ('video', 'video:iframe'), webpage, default=None) | ||||
|         if video_url: | ||||
|             video_id = self._search_regex( | ||||
|                 r'https?://(?:www\.)?ntv\.ru/video/(?:embed/)?(\d+)', | ||||
|                 video_url, 'video id', default=None) | ||||
|  | ||||
|         if not video_id: | ||||
|             video_id = self._html_search_regex( | ||||
|                 self._VIDEO_ID_REGEXES, webpage, 'video id') | ||||
|  | ||||
|         player = self._download_xml( | ||||
|             'http://www.ntv.ru/vi%s/' % video_id, | ||||
|             video_id, 'Downloading video XML') | ||||
|  | ||||
|         title = clean_html(xpath_text(player, './data/title', 'title', fatal=True)) | ||||
|         description = clean_html(xpath_text(player, './data/description', 'description')) | ||||
|  | ||||
|   | ||||
							
								
								
									
										50
									
								
								youtube_dl/extractor/odatv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								youtube_dl/extractor/odatv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     NO_DEFAULT, | ||||
|     remove_start | ||||
| ) | ||||
|  | ||||
|  | ||||
| class OdaTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?.*\bid=(?P<id>[^&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://odatv.com/vid_video.php?id=8E388', | ||||
|         'md5': 'dc61d052f205c9bf2da3545691485154', | ||||
|         'info_dict': { | ||||
|             'id': '8E388', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Artık Davutoğlu ile devam edemeyiz' | ||||
|         } | ||||
|     }, { | ||||
|         # mobile URL | ||||
|         'url': 'http://odatv.com/mob_video.php?id=8E388', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # no video | ||||
|         'url': 'http://odatv.com/mob_video.php?id=8E900', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         no_video = 'NO VIDEO!' in webpage | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'mp4\s*:\s*(["\'])(?P<url>http.+?)\1', webpage, 'video url', | ||||
|             default=None if no_video else NO_DEFAULT, group='url') | ||||
|  | ||||
|         if no_video: | ||||
|             raise ExtractorError('Video %s does not exist' % video_id, expected=True) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': remove_start(self._og_search_title(webpage), 'Video: '), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|         } | ||||
| @@ -59,11 +59,8 @@ class OnetBaseIE(InfoExtractor): | ||||
|                         # TODO: Support Microsoft Smooth Streaming | ||||
|                         continue | ||||
|                     elif ext == 'mpd': | ||||
|                         # TODO: Current DASH formats are broken - $Time$ pattern in | ||||
|                         # <SegmentTemplate> not implemented yet | ||||
|                         # formats.extend(self._extract_mpd_formats( | ||||
|                         #    video_url, video_id, mpd_id='dash', fatal=False)) | ||||
|                         continue | ||||
|                         formats.extend(self._extract_mpd_formats( | ||||
|                             video_url, video_id, mpd_id='dash', fatal=False)) | ||||
|                     else: | ||||
|                         formats.append({ | ||||
|                             'url': video_url, | ||||
|   | ||||
| @@ -40,16 +40,16 @@ class ORFTVthekIE(InfoExtractor): | ||||
|         'skip': 'Blocked outside of Austria / Germany', | ||||
|     }, { | ||||
|         'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', | ||||
|         'playlist': [{ | ||||
|             'md5': '68f543909aea49d621dfc7703a11cfaf', | ||||
|             'info_dict': { | ||||
|                 'id': '7982259', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Best of Ingrid Thurnher', | ||||
|                 'upload_date': '20140527', | ||||
|                 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', | ||||
|             } | ||||
|         }], | ||||
|         'info_dict': { | ||||
|             'id': '7982259', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Best of Ingrid Thurnher', | ||||
|             'upload_date': '20140527', | ||||
|             'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # rtsp downloads | ||||
|         }, | ||||
|         '_skip': 'Blocked outside of Austria / Germany', | ||||
|     }] | ||||
|  | ||||
| @@ -137,13 +137,16 @@ class ORFTVthekIE(InfoExtractor): | ||||
| class ORFOE1IE(InfoExtractor): | ||||
|     IE_NAME = 'orf:oe1' | ||||
|     IE_DESC = 'Radio Österreich 1' | ||||
|     _VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole\?.*?\btrack_id=)(?P<id>[0-9]+)' | ||||
|  | ||||
|     # Audios on ORF radio are only available for 7 days, so we can't add tests. | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211', | ||||
|         'only_matching': True, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://oe1.orf.at/konsole?show=ondemand&track_id=443608&load_day=/programm/konsole/tag/20160726', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         show_id = self._match_id(url) | ||||
|   | ||||
| @@ -15,7 +15,7 @@ from ..utils import ( | ||||
|  | ||||
| class PlayvidIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', | ||||
|         'md5': 'ffa2f6b2119af359f544388d8c01eb6c', | ||||
|         'info_dict': { | ||||
| @@ -24,8 +24,19 @@ class PlayvidIE(InfoExtractor): | ||||
|             'title': 'md5:9256d01c6317e3f703848b5906880dc8', | ||||
|             'duration': 82, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|         }, | ||||
|         'skip': 'Video removed due to ToS', | ||||
|     }, { | ||||
|         'url': 'http://www.playvid.com/watch/hwb0GpNkzgH', | ||||
|         'md5': '39d49df503ad7b8f23a4432cbf046477', | ||||
|         'info_dict': { | ||||
|             'id': 'hwb0GpNkzgH', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park', | ||||
|             'age_limit': 18, | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|   | ||||
| @@ -111,7 +111,7 @@ class PornHubIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         error_msg = self._html_search_regex( | ||||
|             r'(?s)<div[^>]+class=(["\']).*?\b(?:removed|userMessageSection)\b.*?\1[^>]*>(?P<error>.+?)</div>', | ||||
|             r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>', | ||||
|             webpage, 'error message', default=None, group='error') | ||||
|         if error_msg: | ||||
|             error_msg = re.sub(r'\s+', ' ', error_msg) | ||||
|   | ||||
							
								
								
									
										148
									
								
								youtube_dl/extractor/roosterteeth.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										148
									
								
								youtube_dl/extractor/roosterteeth.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,148 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     strip_or_none, | ||||
|     unescapeHTML, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RoosterTeethIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)' | ||||
|     _LOGIN_URL = 'https://roosterteeth.com/login' | ||||
|     _NETRC_MACHINE = 'roosterteeth' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', | ||||
|         'md5': 'e2bd7764732d785ef797700a2489f212', | ||||
|         'info_dict': { | ||||
|             'id': '26576', | ||||
|             'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement', | ||||
|             'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5', | ||||
|             'thumbnail': 're:^https?://.*\.png$', | ||||
|             'series': 'Million Dollars, But...', | ||||
|             'episode': 'Million Dollars, But... The Game Announcement', | ||||
|             'comment_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # only available for FIRST members | ||||
|         'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             self._LOGIN_URL, None, | ||||
|             note='Downloading login page', | ||||
|             errnote='Unable to download login page') | ||||
|  | ||||
|         login_form = self._hidden_inputs(login_page) | ||||
|  | ||||
|         login_form.update({ | ||||
|             'username': username, | ||||
|             'password': password, | ||||
|         }) | ||||
|  | ||||
|         login_request = self._download_webpage( | ||||
|             self._LOGIN_URL, None, | ||||
|             note='Logging in as %s' % username, | ||||
|             data=urlencode_postdata(login_form), | ||||
|             headers={ | ||||
|                 'Referer': self._LOGIN_URL, | ||||
|             }) | ||||
|  | ||||
|         if not any(re.search(p, login_request) for p in ( | ||||
|                 r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"', | ||||
|                 r'>Sign Out<')): | ||||
|             error = self._html_search_regex( | ||||
|                 r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>', | ||||
|                 login_request, 'alert', default=None, group='error') | ||||
|             if error: | ||||
|                 raise ExtractorError('Unable to login: %s' % error, expected=True) | ||||
|             raise ExtractorError('Unable to log in') | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         episode = strip_or_none(unescapeHTML(self._search_regex( | ||||
|             (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', | ||||
|              r'<title>(?P<title>[^<]+)</title>'), webpage, 'title', | ||||
|             default=None, group='title'))) | ||||
|  | ||||
|         title = strip_or_none(self._og_search_title( | ||||
|             webpage, default=None)) or episode | ||||
|  | ||||
|         m3u8_url = self._search_regex( | ||||
|             r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1', | ||||
|             webpage, 'm3u8 url', default=None, group='url') | ||||
|  | ||||
|         if not m3u8_url: | ||||
|             if re.search(r'<div[^>]+class=["\']non-sponsor', webpage): | ||||
|                 self.raise_login_required( | ||||
|                     '%s is only available for FIRST members' % display_id) | ||||
|  | ||||
|             if re.search(r'<div[^>]+class=["\']golive-gate', webpage): | ||||
|                 self.raise_login_required('%s is not available yet' % display_id) | ||||
|  | ||||
|             raise ExtractorError('Unable to extract m3u8 URL') | ||||
|  | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             m3u8_url, display_id, ext='mp4', | ||||
|             entry_protocol='m3u8_native', m3u8_id='hls') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = strip_or_none(self._og_search_description(webpage)) | ||||
|         thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage)) | ||||
|  | ||||
|         series = self._search_regex( | ||||
|             (r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'), | ||||
|             webpage, 'series', fatal=False) | ||||
|  | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'>Comments \((\d+)\)<', webpage, | ||||
|             'comment count', fatal=False)) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             (r'containerId\s*=\s*["\']episode-(\d+)\1', | ||||
|              r'<div[^<]+id=["\']episode-(\d+)'), webpage, | ||||
|             'video id', default=display_id) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'series': series, | ||||
|             'episode': episode, | ||||
|             'comment_count': comment_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -113,9 +113,9 @@ class RTVEALaCartaIE(InfoExtractor): | ||||
|         png = self._download_webpage(png_request, video_id, 'Downloading url information') | ||||
|         video_url = _decrypt_url(png) | ||||
|         if not video_url.endswith('.f4m'): | ||||
|             video_url = video_url.replace( | ||||
|                 'resources/', 'auth/resources/' | ||||
|             ).replace('.net.rtve', '.multimedia.cdn.rtve') | ||||
|             if '?' not in video_url: | ||||
|                 video_url = video_url.replace('resources/', 'auth/resources/') | ||||
|             video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve') | ||||
|  | ||||
|         subtitles = None | ||||
|         if info.get('sbtFile') is not None: | ||||
| @@ -222,3 +222,34 @@ class RTVELiveIE(InfoExtractor): | ||||
|             'formats': formats, | ||||
|             'is_live': True, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class RTVETelevisionIE(InfoExtractor): | ||||
|     IE_NAME = 'rtve.es:television' | ||||
|     _VALID_URL = r'https?://www\.rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', | ||||
|         'info_dict': { | ||||
|             'id': '3069778', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Documentos TV - La revolución del móvil', | ||||
|             'duration': 3496.948, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         page_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|  | ||||
|         alacarta_url = self._search_regex( | ||||
|             r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', | ||||
|             webpage, 'alacarta url', default=None) | ||||
|         if alacarta_url is None: | ||||
|             raise ExtractorError( | ||||
|                 'The webpage doesn\'t contain any video', expected=True) | ||||
|  | ||||
|         return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) | ||||
|   | ||||
							
								
								
									
										53
									
								
								youtube_dl/extractor/rudo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								youtube_dl/extractor/rudo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .jwplatform import JWPlatformBaseIE | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
|     get_element_by_class, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RudoIE(JWPlatformBaseIE): | ||||
|     _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://rudo.video/vod/oTzw0MGnyG', | ||||
|         'md5': '2a03a5b32dd90a04c83b6d391cf7b415', | ||||
|         'info_dict': { | ||||
|             'id': 'oTzw0MGnyG', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Comentario Tomás Mosciatti', | ||||
|             'upload_date': '20160617', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
|     def _extract_url(self, webpage): | ||||
|         mobj = re.search( | ||||
|             '<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id, encoding='iso-8859-1') | ||||
|  | ||||
|         jwplayer_data = self._parse_json(self._search_regex( | ||||
|             r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id, | ||||
|             transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) | ||||
|  | ||||
|         info_dict = self._parse_jwplayer_data( | ||||
|             jwplayer_data, video_id, require_title=False, m3u8_id='hls') | ||||
|  | ||||
|         info_dict.update({ | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'upload_date': unified_strdate(get_element_by_class('date', webpage)), | ||||
|         }) | ||||
|  | ||||
|         return info_dict | ||||
| @@ -75,7 +75,7 @@ class SafariBaseIE(InfoExtractor): | ||||
| class SafariIE(SafariBaseIE): | ||||
|     IE_NAME = 'safari' | ||||
|     IE_DESC = 'safaribooksonline.com online video' | ||||
|     _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html' | ||||
|     _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', | ||||
| @@ -92,6 +92,9 @@ class SafariIE(SafariBaseIE): | ||||
|         # non-digits in course id | ||||
|         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -132,12 +135,15 @@ class SafariIE(SafariBaseIE): | ||||
|  | ||||
| class SafariApiIE(SafariBaseIE): | ||||
|     IE_NAME = 'safari:api' | ||||
|     _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html' | ||||
|     _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', | ||||
|         'only_matching': True, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|   | ||||
| @@ -2,11 +2,11 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     str_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -33,45 +33,27 @@ class ShahidIE(InfoExtractor): | ||||
|         'only_matching': True | ||||
|     }] | ||||
|  | ||||
|     def _handle_error(self, response): | ||||
|         if not isinstance(response, dict): | ||||
|             return | ||||
|         error = response.get('error') | ||||
|     def _call_api(self, path, video_id, note): | ||||
|         data = self._download_json( | ||||
|             'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={ | ||||
|                 'apiKey': 'sh@hid0nlin3', | ||||
|                 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', | ||||
|             }).get('data', {}) | ||||
|  | ||||
|         error = data.get('error') | ||||
|         if error: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), | ||||
|                 expected=True) | ||||
|  | ||||
|     def _download_json(self, url, video_id, note='Downloading JSON metadata'): | ||||
|         response = super(ShahidIE, self)._download_json(url, video_id, note)['data'] | ||||
|         self._handle_error(response) | ||||
|         return response | ||||
|         return data | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         api_vars = { | ||||
|             'id': video_id, | ||||
|             'type': 'player', | ||||
|             'url': 'http://api.shahid.net/api/v1_1', | ||||
|             'playerType': 'episode', | ||||
|         } | ||||
|  | ||||
|         flashvars = self._search_regex( | ||||
|             r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None) | ||||
|         if flashvars: | ||||
|             for key in api_vars.keys(): | ||||
|                 value = self._search_regex( | ||||
|                     r'\b%s\s*:\s*(?P<q>["\'])(?P<value>.+?)(?P=q)' % key, | ||||
|                     flashvars, 'type', default=None, group='value') | ||||
|                 if value: | ||||
|                     api_vars[key] = value | ||||
|  | ||||
|         player = self._download_json( | ||||
|             'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html' | ||||
|             % (video_id, api_vars['type']), video_id, 'Downloading player JSON') | ||||
|         player = self._call_api( | ||||
|             'Content/Episode/%s' % video_id, | ||||
|             video_id, 'Downloading player JSON') | ||||
|  | ||||
|         if player.get('drm'): | ||||
|             raise ExtractorError('This video is DRM protected.', expected=True) | ||||
| @@ -79,22 +61,11 @@ class ShahidIE(InfoExtractor): | ||||
|         formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         video = self._download_json( | ||||
|             '%s/%s/%s?%s' % ( | ||||
|                 api_vars['url'], api_vars['playerType'], api_vars['id'], | ||||
|                 compat_urllib_parse_urlencode({ | ||||
|                     'apiKey': 'sh@hid0nlin3', | ||||
|                     'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', | ||||
|                 })), | ||||
|             video_id, 'Downloading video JSON') | ||||
|  | ||||
|         video = video[api_vars['playerType']] | ||||
|         video = self._call_api( | ||||
|             'episode/%s' % video_id, video_id, | ||||
|             'Downloading video JSON')['episode'] | ||||
|  | ||||
|         title = video['title'] | ||||
|         description = video.get('description') | ||||
|         thumbnail = video.get('thumbnailUrl') | ||||
|         duration = int_or_none(video.get('duration')) | ||||
|         timestamp = parse_iso8601(video.get('referenceDate')) | ||||
|         categories = [ | ||||
|             category['name'] | ||||
|             for category in video.get('genres', []) if 'name' in category] | ||||
| @@ -102,10 +73,16 @@ class ShahidIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'timestamp': timestamp, | ||||
|             'description': video.get('description'), | ||||
|             'thumbnail': video.get('thumbnailUrl'), | ||||
|             'duration': int_or_none(video.get('duration')), | ||||
|             'timestamp': parse_iso8601(video.get('referenceDate')), | ||||
|             'categories': categories, | ||||
|             'series': video.get('showTitle') or video.get('showName'), | ||||
|             'season': video.get('seasonTitle'), | ||||
|             'season_number': int_or_none(video.get('seasonNumber')), | ||||
|             'season_id': str_or_none(video.get('seasonId')), | ||||
|             'episode_number': int_or_none(video.get('number')), | ||||
|             'episode_id': video_id, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -6,7 +6,6 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     sanitized_Request, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
| @@ -37,28 +36,33 @@ class SharedIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         webpage, urlh = self._download_webpage_handle(url, video_id) | ||||
|  | ||||
|         if '>File does not exist<' in webpage: | ||||
|             raise ExtractorError( | ||||
|                 'Video %s does not exist' % video_id, expected=True) | ||||
|  | ||||
|         download_form = self._hidden_inputs(webpage) | ||||
|         request = sanitized_Request( | ||||
|             url, urlencode_postdata(download_form)) | ||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|  | ||||
|         video_page = self._download_webpage( | ||||
|             request, video_id, 'Downloading video page') | ||||
|             urlh.geturl(), video_id, 'Downloading video page', | ||||
|             data=urlencode_postdata(download_form), | ||||
|             headers={ | ||||
|                 'Content-Type': 'application/x-www-form-urlencoded', | ||||
|                 'Referer': urlh.geturl(), | ||||
|             }) | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'data-url="([^"]+)"', video_page, 'video URL') | ||||
|             r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', | ||||
|             video_page, 'video URL', group='url') | ||||
|         title = base64.b64decode(self._html_search_meta( | ||||
|             'full:title', webpage, 'title').encode('utf-8')).decode('utf-8') | ||||
|         filesize = int_or_none(self._html_search_meta( | ||||
|             'full:size', webpage, 'file size', fatal=False)) | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None) | ||||
|             r'data-poster=(["\'])(?P<url>(?:(?!\1).)+)\1', | ||||
|             video_page, 'thumbnail', default=None, group='url') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -13,20 +13,21 @@ from ..utils import ( | ||||
|     sanitized_Request, | ||||
|     unified_strdate, | ||||
|     urlencode_postdata, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SmotriIE(InfoExtractor): | ||||
|     IE_DESC = 'Smotri.com' | ||||
|     IE_NAME = 'smotri' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})' | ||||
|     _NETRC_MACHINE = 'smotri' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         # real video id 2610366 | ||||
|         { | ||||
|             'url': 'http://smotri.com/video/view/?id=v261036632ab', | ||||
|             'md5': '2a7b08249e6f5636557579c368040eb9', | ||||
|             'md5': '02c0dfab2102984e9c5bb585cc7cc321', | ||||
|             'info_dict': { | ||||
|                 'id': 'v261036632ab', | ||||
|                 'ext': 'mp4', | ||||
| @@ -174,11 +175,11 @@ class SmotriIE(InfoExtractor): | ||||
|         if video_password: | ||||
|             video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         request = sanitized_Request( | ||||
|             'http://smotri.com/video/view/url/bot/', urlencode_postdata(video_form)) | ||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|  | ||||
|         video = self._download_json(request, video_id, 'Downloading video JSON') | ||||
|         video = self._download_json( | ||||
|             'http://smotri.com/video/view/url/bot/', | ||||
|             video_id, 'Downloading video JSON', | ||||
|             data=urlencode_postdata(video_form), | ||||
|             headers={'Content-Type': 'application/x-www-form-urlencoded'}) | ||||
|  | ||||
|         video_url = video.get('_vidURL') or video.get('_vidURL_mp4') | ||||
|  | ||||
| @@ -196,11 +197,11 @@ class SmotriIE(InfoExtractor): | ||||
|                 raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|         title = video['title'] | ||||
|         thumbnail = video['_imgURL'] | ||||
|         upload_date = unified_strdate(video['added']) | ||||
|         uploader = video['userNick'] | ||||
|         uploader_id = video['userLogin'] | ||||
|         duration = int_or_none(video['duration']) | ||||
|         thumbnail = video.get('_imgURL') | ||||
|         upload_date = unified_strdate(video.get('added')) | ||||
|         uploader = video.get('userNick') | ||||
|         uploader_id = video.get('userLogin') | ||||
|         duration = int_or_none(video.get('duration')) | ||||
|  | ||||
|         # Video JSON does not provide enough meta data | ||||
|         # We will extract some from the video web page instead | ||||
| @@ -209,7 +210,7 @@ class SmotriIE(InfoExtractor): | ||||
|  | ||||
|         # Warning if video is unavailable | ||||
|         warning = self._html_search_regex( | ||||
|             r'<div class="videoUnModer">(.*?)</div>', webpage, | ||||
|             r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage, | ||||
|             'warning message', default=None) | ||||
|         if warning is not None: | ||||
|             self._downloader.report_warning( | ||||
| @@ -217,20 +218,22 @@ class SmotriIE(InfoExtractor): | ||||
|                 (video_id, warning)) | ||||
|  | ||||
|         # Adult content | ||||
|         if re.search('EroConfirmText">', webpage) is not None: | ||||
|         if 'EroConfirmText">' in webpage: | ||||
|             self.report_age_confirmation() | ||||
|             confirm_string = self._html_search_regex( | ||||
|                 r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id, | ||||
|                 r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id, | ||||
|                 webpage, 'confirm string') | ||||
|             confirm_url = webpage_url + '&confirm=%s' % confirm_string | ||||
|             webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)') | ||||
|             webpage = self._download_webpage( | ||||
|                 confirm_url, video_id, | ||||
|                 'Downloading video page (age confirmed)') | ||||
|             adult_content = True | ||||
|         else: | ||||
|             adult_content = False | ||||
|  | ||||
|         view_count = self._html_search_regex( | ||||
|             'Общее количество просмотров.*?<span class="Number">(\\d+)</span>', | ||||
|             webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL) | ||||
|             r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>', | ||||
|             webpage, 'view count', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -249,37 +252,33 @@ class SmotriIE(InfoExtractor): | ||||
| class SmotriCommunityIE(InfoExtractor): | ||||
|     IE_DESC = 'Smotri.com community videos' | ||||
|     IE_NAME = 'smotri:community' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://smotri.com/community/video/kommuna', | ||||
|         'info_dict': { | ||||
|             'id': 'kommuna', | ||||
|             'title': 'КПРФ', | ||||
|         }, | ||||
|         'playlist_mincount': 4, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         community_id = mobj.group('communityid') | ||||
|         community_id = self._match_id(url) | ||||
|  | ||||
|         url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id | ||||
|         rss = self._download_xml(url, community_id, 'Downloading community RSS') | ||||
|         rss = self._download_xml( | ||||
|             'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id, | ||||
|             community_id, 'Downloading community RSS') | ||||
|  | ||||
|         entries = [self.url_result(video_url.text, 'Smotri') | ||||
|                    for video_url in rss.findall('./channel/item/link')] | ||||
|         entries = [ | ||||
|             self.url_result(video_url.text, SmotriIE.ie_key()) | ||||
|             for video_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         description_text = rss.find('./channel/description').text | ||||
|         community_title = self._html_search_regex( | ||||
|             '^Видео сообщества "([^"]+)"$', description_text, 'community title') | ||||
|  | ||||
|         return self.playlist_result(entries, community_id, community_title) | ||||
|         return self.playlist_result(entries, community_id) | ||||
|  | ||||
|  | ||||
| class SmotriUserIE(InfoExtractor): | ||||
|     IE_DESC = 'Smotri.com user videos' | ||||
|     IE_NAME = 'smotri:user' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://smotri.com/user/inspector', | ||||
|         'info_dict': { | ||||
| @@ -290,19 +289,19 @@ class SmotriUserIE(InfoExtractor): | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         user_id = mobj.group('userid') | ||||
|         user_id = self._match_id(url) | ||||
|  | ||||
|         url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id | ||||
|         rss = self._download_xml(url, user_id, 'Downloading user RSS') | ||||
|         rss = self._download_xml( | ||||
|             'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id, | ||||
|             user_id, 'Downloading user RSS') | ||||
|  | ||||
|         entries = [self.url_result(video_url.text, 'Smotri') | ||||
|                    for video_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         description_text = rss.find('./channel/description').text | ||||
|         user_nickname = self._html_search_regex( | ||||
|             '^Видео режиссера (.*)$', description_text, | ||||
|             'user nickname') | ||||
|         description_text = xpath_text(rss, './channel/description') or '' | ||||
|         user_nickname = self._search_regex( | ||||
|             '^Видео режиссера (.+)$', description_text, | ||||
|             'user nickname', fatal=False) | ||||
|  | ||||
|         return self.playlist_result(entries, user_id, user_nickname) | ||||
|  | ||||
| @@ -310,11 +309,11 @@ class SmotriUserIE(InfoExtractor): | ||||
| class SmotriBroadcastIE(InfoExtractor): | ||||
|     IE_DESC = 'Smotri.com broadcasts' | ||||
|     IE_NAME = 'smotri:broadcast' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         broadcast_id = mobj.group('broadcastid') | ||||
|         broadcast_id = mobj.group('id') | ||||
|  | ||||
|         broadcast_url = 'http://' + mobj.group('url') | ||||
|         broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page') | ||||
| @@ -328,7 +327,8 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|  | ||||
|             (username, password) = self._get_login_info() | ||||
|             if username is None: | ||||
|                 self.raise_login_required('Erotic broadcasts allowed only for registered users') | ||||
|                 self.raise_login_required( | ||||
|                     'Erotic broadcasts allowed only for registered users') | ||||
|  | ||||
|             login_form = { | ||||
|                 'login-hint53': '1', | ||||
| @@ -343,8 +343,9 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|             broadcast_page = self._download_webpage( | ||||
|                 request, broadcast_id, 'Logging in and confirming age') | ||||
|  | ||||
|             if re.search('>Неверный логин или пароль<', broadcast_page) is not None: | ||||
|                 raise ExtractorError('Unable to log in: bad username or password', expected=True) | ||||
|             if '>Неверный логин или пароль<' in broadcast_page: | ||||
|                 raise ExtractorError( | ||||
|                     'Unable to log in: bad username or password', expected=True) | ||||
|  | ||||
|             adult_content = True | ||||
|         else: | ||||
| @@ -383,11 +384,11 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|  | ||||
|             broadcast_playpath = broadcast_json['_streamName'] | ||||
|             broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL']) | ||||
|             broadcast_thumbnail = broadcast_json['_imgURL'] | ||||
|             broadcast_thumbnail = broadcast_json.get('_imgURL') | ||||
|             broadcast_title = self._live_title(broadcast_json['title']) | ||||
|             broadcast_description = broadcast_json['description'] | ||||
|             broadcaster_nick = broadcast_json['nick'] | ||||
|             broadcaster_login = broadcast_json['login'] | ||||
|             broadcast_description = broadcast_json.get('description') | ||||
|             broadcaster_nick = broadcast_json.get('nick') | ||||
|             broadcaster_login = broadcast_json.get('login') | ||||
|             rtmp_conn = 'S:%s' % uuid.uuid4().hex | ||||
|         except KeyError: | ||||
|             if protected_broadcast: | ||||
|   | ||||
| @@ -119,6 +119,12 @@ class SoundcloudIE(InfoExtractor): | ||||
|     _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' | ||||
|     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_urls(webpage): | ||||
|         return [m.group('url') for m in re.finditer( | ||||
|             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1', | ||||
|             webpage)] | ||||
|  | ||||
|     def report_resolve(self, video_id): | ||||
|         """Report information extraction.""" | ||||
|         self.to_screen('%s: Resolving id' % video_id) | ||||
|   | ||||
| @@ -17,6 +17,8 @@ class SouthParkIE(MTVServicesInfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'South Park|Bat Daded', | ||||
|             'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', | ||||
|             'timestamp': 1112760000, | ||||
|             'upload_date': '20050406', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
| @@ -28,6 +30,10 @@ class SouthParkEsIE(SouthParkIE): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', | ||||
|         'info_dict': { | ||||
|             'title': 'Cartman Consigue Una Sonda Anal', | ||||
|             'description': 'Cartman Consigue Una Sonda Anal', | ||||
|         }, | ||||
|         'playlist_count': 4, | ||||
|     }] | ||||
|  | ||||
| @@ -42,17 +48,27 @@ class SouthParkDeIE(SouthParkIE): | ||||
|         'info_dict': { | ||||
|             'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Government Won\'t Respect My Privacy', | ||||
|             'title': 'South Park|The Government Won\'t Respect My Privacy', | ||||
|             'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.', | ||||
|             'timestamp': 1380160800, | ||||
|             'upload_date': '20130926', | ||||
|         }, | ||||
|     }, { | ||||
|         # non-ASCII characters in initial URL | ||||
|         'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen', | ||||
|         'playlist_count': 4, | ||||
|         'info_dict': { | ||||
|             'title': 'Hashtag „Aufwärmen“', | ||||
|             'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.', | ||||
|         }, | ||||
|         'playlist_count': 3, | ||||
|     }, { | ||||
|         # non-ASCII characters in redirect URL | ||||
|         'url': 'http://www.southpark.de/alle-episoden/s18e09', | ||||
|         'playlist_count': 4, | ||||
|         'info_dict': { | ||||
|             'title': 'Hashtag „Aufwärmen“', | ||||
|             'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.', | ||||
|         }, | ||||
|         'playlist_count': 3, | ||||
|     }] | ||||
|  | ||||
|  | ||||
| @@ -63,7 +79,11 @@ class SouthParkNlIE(SouthParkIE): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free', | ||||
|         'playlist_count': 4, | ||||
|         'info_dict': { | ||||
|             'title': 'Freemium Isn\'t Free', | ||||
|             'description': 'Stan is addicted to the new Terrance and Phillip mobile game.', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     }] | ||||
|  | ||||
|  | ||||
| @@ -74,5 +94,9 @@ class SouthParkDkIE(SouthParkIE): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop', | ||||
|         'playlist_count': 4, | ||||
|         'info_dict': { | ||||
|             'title': 'Grounded Vindaloop', | ||||
|             'description': 'Butters is convinced he\'s living in a virtual reality.', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     }] | ||||
|   | ||||
| @@ -4,26 +4,31 @@ from .mtv import MTVServicesInfoExtractor | ||||
|  | ||||
|  | ||||
| class SpikeIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|         (?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+| | ||||
|          m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+)) | ||||
|         ''' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:[^/]+\.)?spike\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', | ||||
|         'md5': '1a9265f32b0c375793d6c4ce45255256', | ||||
|         'info_dict': { | ||||
|             'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Auction Hunters|Can Allen Ride A Hundred Year-Old Motorcycle?', | ||||
|             'title': 'Auction Hunters|December 27, 2013|4|414|Can Allen Ride A Hundred Year-Old Motorcycle?', | ||||
|             'description': 'md5:fbed7e82ed5fad493615b3094a9499cb', | ||||
|             'timestamp': 1388120400, | ||||
|             'upload_date': '20131227', | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.spike.com/video-clips/lhtu8m/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.spike.com/video-clips/lhtu8m', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://bellator.spike.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://bellator.spike.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _FEED_URL = 'http://www.spike.com/feeds/mrss/' | ||||
|     _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobile_id = self._match_id(url) | ||||
|         if mobile_id: | ||||
|             url = 'http://www.spike.com/video-clips/%s' % mobile_id | ||||
|         return super(SpikeIE, self)._real_extract(url) | ||||
|   | ||||
							
								
								
									
										98
									
								
								youtube_dl/extractor/streamable.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								youtube_dl/extractor/streamable.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class StreamableIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://streamable\.com/(?:e/)?(?P<id>\w+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'https://streamable.com/dnd1', | ||||
|             'md5': '3e3bc5ca088b48c2d436529b64397fef', | ||||
|             'info_dict': { | ||||
|                 'id': 'dnd1', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol', | ||||
|                 'thumbnail': 're:https?://.*\.jpg$', | ||||
|                 'uploader': 'teabaker', | ||||
|                 'timestamp': 1454964157.35115, | ||||
|                 'upload_date': '20160208', | ||||
|                 'duration': 61.516, | ||||
|                 'view_count': int, | ||||
|             } | ||||
|         }, | ||||
|         # older video without bitrate, width/height, etc. info | ||||
|         { | ||||
|             'url': 'https://streamable.com/moo', | ||||
|             'md5': '2cf6923639b87fba3279ad0df3a64e73', | ||||
|             'info_dict': { | ||||
|                 'id': 'moo', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '"Please don\'t eat me!"', | ||||
|                 'thumbnail': 're:https?://.*\.jpg$', | ||||
|                 'timestamp': 1426115495, | ||||
|                 'upload_date': '20150311', | ||||
|                 'duration': 12, | ||||
|                 'view_count': int, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://streamable.com/e/dnd1', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         # Note: Using the ajax API, as the public Streamable API doesn't seem | ||||
|         # to return video info like the title properly sometimes, and doesn't | ||||
|         # include info like the video duration | ||||
|         video = self._download_json( | ||||
|             'https://streamable.com/ajax/videos/%s' % video_id, video_id) | ||||
|  | ||||
|         # Format IDs: | ||||
|         # 0 The video is being uploaded | ||||
|         # 1 The video is being processed | ||||
|         # 2 The video has at least one file ready | ||||
|         # 3 The video is unavailable due to an error | ||||
|         status = video.get('status') | ||||
|         if status != 2: | ||||
|             raise ExtractorError( | ||||
|                 'This video is currently unavailable. It may still be uploading or processing.', | ||||
|                 expected=True) | ||||
|  | ||||
|         title = video.get('reddit_title') or video['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for key, info in video['files'].items(): | ||||
|             if not info.get('url'): | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'format_id': key, | ||||
|                 'url': self._proto_relative_url(info['url']), | ||||
|                 'width': int_or_none(info.get('width')), | ||||
|                 'height': int_or_none(info.get('height')), | ||||
|                 'filesize': int_or_none(info.get('size')), | ||||
|                 'fps': int_or_none(info.get('framerate')), | ||||
|                 'vbr': float_or_none(info.get('bitrate'), 1000) | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': video.get('description'), | ||||
|             'thumbnail': self._proto_relative_url(video.get('thumbnail_url')), | ||||
|             'uploader': video.get('owner', {}).get('user_name'), | ||||
|             'timestamp': float_or_none(video.get('date_added')), | ||||
|             'duration': float_or_none(video.get('duration')), | ||||
|             'view_count': int_or_none(video.get('plays')), | ||||
|             'formats': formats | ||||
|         } | ||||
| @@ -1,46 +1,56 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .theplatform import ThePlatformIE | ||||
| from ..utils import ( | ||||
|     update_url_query, | ||||
|     smuggle_url, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SyfyIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P<id>[0-9]+)|(?!videos)(?P<video_name>[^/]+)(?:$|[?#]))' | ||||
|  | ||||
| class SyfyIE(ThePlatformIE): | ||||
|     _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458', | ||||
|         'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer', | ||||
|         'info_dict': { | ||||
|             'id': 'NmqMrGnXvmO1', | ||||
|             'ext': 'flv', | ||||
|             'title': 'George Lucas has Advice for his Daughter', | ||||
|             'description': 'Listen to what insights George Lucas give his daughter Amanda.', | ||||
|             'id': '2968097', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Internet Ruined My Life: Season 1 Trailer', | ||||
|             'description': 'One tweet, one post, one click, can destroy everything.', | ||||
|             'uploader': 'NBCU-MPAT', | ||||
|             'upload_date': '20170113', | ||||
|             'timestamp': 1484345640, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }, { | ||||
|         'url': 'http://www.syfy.com/wilwheaton', | ||||
|         'md5': '94dfa54ee3ccb63295b276da08c415f6', | ||||
|         'info_dict': { | ||||
|             'id': '4yoffOOXC767', | ||||
|             'ext': 'flv', | ||||
|             'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.', | ||||
|             'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|         'skip': 'Blocked outside the US', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_name = mobj.group('video_name') | ||||
|         if video_name: | ||||
|             generic_webpage = self._download_webpage(url, video_name) | ||||
|             video_id = self._search_regex( | ||||
|                 r'<iframe.*?class="video_iframe_page"\s+src="/_utils/video/thP_video_controller.php.*?_vid([0-9]+)">', | ||||
|                 generic_webpage, 'video ID') | ||||
|             url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % ( | ||||
|                 video_name, video_name, video_id) | ||||
|         else: | ||||
|             video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         return self.url_result(self._og_search_video_url(webpage)) | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         syfy_mpx = list(self._parse_json(self._search_regex( | ||||
|             r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'), | ||||
|             display_id)['syfy']['syfy_mpx'].values())[0] | ||||
|         video_id = syfy_mpx['mpxGUID'] | ||||
|         title = syfy_mpx['episodeTitle'] | ||||
|         query = { | ||||
|             'mbr': 'true', | ||||
|             'manifest': 'm3u', | ||||
|         } | ||||
|         if syfy_mpx.get('entitlement') == 'auth': | ||||
|             resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>syfy</title><item><title><![CDATA[%s]]></title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14')) | ||||
|             query['auth'] = self._extract_mvpd_auth( | ||||
|                 url, video_id, 'syfy', resource) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': 'ThePlatform', | ||||
|             'url': smuggle_url(update_url_query( | ||||
|                 self._proto_relative_url(syfy_mpx['releaseURL']), query), | ||||
|                 {'force_smil_url': True}), | ||||
|             'title': title, | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|         } | ||||
|   | ||||
| @@ -47,11 +47,10 @@ class TelegraafIE(InfoExtractor): | ||||
|             ext = determine_ext(manifest_url) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     manifest_url, video_id, ext='mp4', m3u8_id='hls')) | ||||
|                     manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) | ||||
|             elif ext == 'mpd': | ||||
|                 # TODO: Current DASH formats are broken - $Time$ pattern in | ||||
|                 # <SegmentTemplate> not implemented yet | ||||
|                 continue | ||||
|                 formats.extend(self._extract_mpd_formats( | ||||
|                     manifest_url, video_id, mpd_id='dash', fatal=False)) | ||||
|             else: | ||||
|                 self.report_warning('Unknown adaptive format %s' % ext) | ||||
|         for location in locations.get('progressive', []): | ||||
|   | ||||
| @@ -24,16 +24,20 @@ class ThreeQSDNIE(InfoExtractor): | ||||
|             'title': '0280d6b9-1215-11e6-b427-0cc47a188158', | ||||
|             'is_live': False, | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download MPD manifest'], | ||||
|         'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'], | ||||
|     }, { | ||||
|         # live video stream | ||||
|         'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true', | ||||
|         'info_dict': { | ||||
|             'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f', | ||||
|             'is_live': False, | ||||
|             'title': 're:^d755d94b-4ab9-11e3-9162-0025907ad44f [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # m3u8 downloads | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download MPD manifest'], | ||||
|     }, { | ||||
|         # live audio stream | ||||
|         'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48', | ||||
| @@ -114,7 +118,7 @@ class ThreeQSDNIE(InfoExtractor): | ||||
|                     'vcodec': 'none' if stream_type == 'audio' else None, | ||||
|                 }) | ||||
|  | ||||
|         for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js): | ||||
|         for item_js in re.findall(r'({[^{]*?\b(?:src|source)\s*:\s*["\'].+?})', js): | ||||
|             f = self._parse_json( | ||||
|                 item_js, video_id, transform_source=js_to_json, fatal=False) | ||||
|             if not f: | ||||
|   | ||||
| @@ -5,31 +5,27 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class TMZIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/]+)/?' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.tmz.com/videos/0_okj015ty/', | ||||
|         'md5': '791204e3bf790b1426cb2db0706184c0', | ||||
|         'md5': '4d22a51ef205b6c06395d8394f72d560', | ||||
|         'info_dict': { | ||||
|             'id': '0_okj015ty', | ||||
|             'url': 'http://tmz.vo.llnwd.net/o28/2014-03/13/0_okj015ty_0_rt8ro3si_2.mp4', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!', | ||||
|             'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie???  Or is she just showing off her amazing boobs?', | ||||
|             'thumbnail': r're:http://cdnbakmi\.kaltura\.com/.*thumbnail.*', | ||||
|             'timestamp': 1394747163, | ||||
|             'uploader_id': 'batchUser', | ||||
|             'upload_date': '20140313', | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.tmz.com/videos/0-cegprt2p/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': self._html_search_meta('VideoURL', webpage, fatal=True), | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._html_search_meta('ThumbURL', webpage), | ||||
|         } | ||||
|         video_id = self._match_id(url).replace('-', '_') | ||||
|         return self.url_result('kaltura:591531:%s' % video_id, 'Kaltura', video_id) | ||||
|  | ||||
|  | ||||
| class TMZArticleIE(InfoExtractor): | ||||
|   | ||||
| @@ -8,6 +8,7 @@ from ..utils import ( | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
|     js_to_json, | ||||
|     parse_iso8601, | ||||
|     remove_end, | ||||
| ) | ||||
| @@ -54,10 +55,11 @@ class TV2IE(InfoExtractor): | ||||
|                 ext = determine_ext(video_url) | ||||
|                 if ext == 'f4m': | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         video_url, video_id, f4m_id=format_id)) | ||||
|                         video_url, video_id, f4m_id=format_id, fatal=False)) | ||||
|                 elif ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         video_url, video_id, 'mp4', m3u8_id=format_id)) | ||||
|                         video_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                         m3u8_id=format_id, fatal=False)) | ||||
|                 elif ext == 'ism' or video_url.endswith('.ism/Manifest'): | ||||
|                     pass | ||||
|                 else: | ||||
| @@ -105,7 +107,7 @@ class TV2ArticleIE(InfoExtractor): | ||||
|         'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542', | ||||
|         'info_dict': { | ||||
|             'id': '6930542', | ||||
|             'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret', | ||||
|             'title': 'Russen hetses etter pingvintyveri - innrømmer å ha åpnet luken på buret', | ||||
|             'description': 'md5:339573779d3eea3542ffe12006190954', | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
| @@ -119,9 +121,23 @@ class TV2ArticleIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         # Old embed pattern (looks unused nowadays) | ||||
|         assets = re.findall(r'data-assetid=["\'](\d+)', webpage) | ||||
|  | ||||
|         if not assets: | ||||
|             # New embed pattern | ||||
|             for v in re.findall('TV2ContentboxVideo\(({.+?})\)', webpage): | ||||
|                 video = self._parse_json( | ||||
|                     v, playlist_id, transform_source=js_to_json, fatal=False) | ||||
|                 if not video: | ||||
|                     continue | ||||
|                 asset = video.get('assetId') | ||||
|                 if asset: | ||||
|                     assets.append(asset) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2') | ||||
|             for video_id in re.findall(r'data-assetid="(\d+)"', webpage)] | ||||
|             self.url_result('http://www.tv2.no/v/%s' % asset_id, 'TV2') | ||||
|             for asset_id in assets] | ||||
|  | ||||
|         title = remove_end(self._og_search_title(webpage), ' - TV2.no') | ||||
|         description = remove_end(self._og_search_description(webpage), ' - TV2.no') | ||||
|   | ||||
| @@ -9,56 +9,23 @@ class TVLandIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)' | ||||
|     _FEED_URL = 'http://www.tvland.com/feeds/mrss/' | ||||
|     _TESTS = [{ | ||||
|         # Geo-restricted. Without a proxy metadata are still there. With a | ||||
|         # proxy it redirects to http://m.tvland.com/app/ | ||||
|         'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048', | ||||
|         'playlist': [ | ||||
|             { | ||||
|                 'md5': '227e9723b9669c05bf51098b10287aa7', | ||||
|                 'info_dict': { | ||||
|                     'id': 'bcbd3a83-3aca-4dca-809b-f78a87dcccdd', | ||||
|                     'ext': 'mp4', | ||||
|                     'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 1 of 5', | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 'md5': '9fa2b764ec0e8194fb3ebb01a83df88b', | ||||
|                 'info_dict': { | ||||
|                     'id': 'f4279548-6e13-40dd-92e8-860d27289197', | ||||
|                     'ext': 'mp4', | ||||
|                     'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 2 of 5', | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 'md5': 'fde4c3bccd7cc7e3576b338734153cec', | ||||
|                 'info_dict': { | ||||
|                     'id': '664e4a38-53ef-4115-9bc9-d0f789ec6334', | ||||
|                     'ext': 'mp4', | ||||
|                     'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 3 of 5', | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 'md5': '247f6780cda6891f2e49b8ae2b10e017', | ||||
|                 'info_dict': { | ||||
|                     'id': '9146ecf5-b15a-4d78-879c-6679b77f4960', | ||||
|                     'ext': 'mp4', | ||||
|                     'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 4 of 5', | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 'md5': 'fd269f33256e47bad5eb6c40de089ff6', | ||||
|                 'info_dict': { | ||||
|                     'id': '04334a2e-9a47-4214-a8c2-ae5792e2fab7', | ||||
|                     'ext': 'mp4', | ||||
|                     'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 5 of 5', | ||||
|                 } | ||||
|             } | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'description': 'md5:80973e81b916a324e05c14a3fb506d29', | ||||
|             'title': 'The Invasion', | ||||
|         }, | ||||
|         'playlist': [], | ||||
|     }, { | ||||
|         'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies', | ||||
|         'md5': 'e2c6389401cf485df26c79c247b08713', | ||||
|         'info_dict': { | ||||
|             'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Younger|Younger: Hilary Duff - Little Lies', | ||||
|             'description': 'md5:7d192f56ca8d958645c83f0de8ef0269' | ||||
|             'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies', | ||||
|             'description': 'md5:7d192f56ca8d958645c83f0de8ef0269', | ||||
|             'upload_date': '20151228', | ||||
|             'timestamp': 1451289600, | ||||
|         }, | ||||
|     }] | ||||
|   | ||||
| @@ -24,6 +24,7 @@ class TVPIE(InfoExtractor): | ||||
|             'id': '194536', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Czas honoru, I seria – odc. 13', | ||||
|             'description': 'md5:76649d2014f65c99477be17f23a4dead', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', | ||||
| @@ -32,6 +33,16 @@ class TVPIE(InfoExtractor): | ||||
|             'id': '17916176', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', | ||||
|             'description': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', | ||||
|         }, | ||||
|     }, { | ||||
|         # page id is not the same as video id(#7799) | ||||
|         'url': 'http://vod.tvp.pl/22704887/08122015-1500', | ||||
|         'md5': 'cf6a4705dfd1489aef8deb168d6ba742', | ||||
|         'info_dict': { | ||||
|             'id': '22680786', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Wiadomości, 08.12.2015, 15:00', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', | ||||
| @@ -53,6 +64,39 @@ class TVPIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         page_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|         video_id = self._search_regex([ | ||||
|             r'<iframe[^>]+src="[^"]*?object_id=(\d+)', | ||||
|             "object_id\s*:\s*'(\d+)'"], webpage, 'video id') | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'tvp:' + video_id, | ||||
|             'description': self._og_search_description(webpage, default=None), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'ie_key': 'TVPEmbed', | ||||
|         } | ||||
|  | ||||
|  | ||||
| class TVPEmbedIE(InfoExtractor): | ||||
|     IE_NAME = 'tvp:embed' | ||||
|     IE_DESC = 'Telewizja Polska' | ||||
|     _VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268', | ||||
|         'md5': '8c9cd59d16edabf39331f93bf8a766c7', | ||||
|         'info_dict': { | ||||
|             'id': '22670268', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Panorama, 07.12.2015, 15:40', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'tvp:22670268', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
| @@ -89,8 +133,8 @@ class TVPIE(InfoExtractor): | ||||
|             r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)', | ||||
|             video_url, 'video base url', default=None) | ||||
|         if video_url_base: | ||||
|             # TODO: Current DASH formats are broken - $Time$ pattern in | ||||
|             # <SegmentTemplate> not implemented yet | ||||
|             # TODO: <Group> found instead of <AdaptationSet> in MPD manifest. | ||||
|             # It's not mentioned in MPEG-DASH standard. Figure that out. | ||||
|             # formats.extend(self._extract_mpd_formats( | ||||
|             #     video_url_base + '.ism/video.mpd', | ||||
|             #     video_id, mpd_id='dash', fatal=False)) | ||||
|   | ||||
| @@ -4,47 +4,48 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     qualities, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TVPlayIE(InfoExtractor): | ||||
|     IE_DESC = 'TV3Play and related services' | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)? | ||||
|         (?:tvplay\.lv/parraides| | ||||
|            tv3play\.lt/programos| | ||||
|            play\.tv3\.lt/programos| | ||||
|            tv3play\.ee/sisu| | ||||
|            tv3play\.se/program| | ||||
|            tv6play\.se/program| | ||||
|            tv8play\.se/program| | ||||
|            tv10play\.se/program| | ||||
|            tv3play\.no/programmer| | ||||
|            viasat4play\.no/programmer| | ||||
|            tv6play\.no/programmer| | ||||
|            tv3play\.dk/programmer| | ||||
|         (?:tvplay(?:\.skaties)?\.lv/parraides| | ||||
|            (?:tv3play|play\.tv3)\.lt/programos| | ||||
|            tv3play(?:\.tv3)?\.ee/sisu| | ||||
|            tv(?:3|6|8|10)play\.se/program| | ||||
|            (?:(?:tv3play|viasat4play|tv6play)\.no|tv3play\.dk)/programmer| | ||||
|            play\.novatv\.bg/programi | ||||
|         )/[^/]+/(?P<id>\d+) | ||||
|         ''' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true', | ||||
|             'md5': 'a1612fe0849455423ad8718fe049be21', | ||||
|             'info_dict': { | ||||
|                 'id': '418113', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Kādi ir īri? - Viņas melo labāk', | ||||
|                 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.', | ||||
|                 'series': 'Viņas melo labāk', | ||||
|                 'season': '2.sezona', | ||||
|                 'season_number': 2, | ||||
|                 'duration': 25, | ||||
|                 'timestamp': 1406097056, | ||||
|                 'upload_date': '20140723', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true', | ||||
| @@ -53,6 +54,10 @@ class TVPlayIE(InfoExtractor): | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Moterys meluoja geriau', | ||||
|                 'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e', | ||||
|                 'series': 'Moterys meluoja geriau', | ||||
|                 'episode_number': 47, | ||||
|                 'season': '1 sezonas', | ||||
|                 'season_number': 1, | ||||
|                 'duration': 1330, | ||||
|                 'timestamp': 1403769181, | ||||
|                 'upload_date': '20140626', | ||||
| @@ -82,7 +87,7 @@ class TVPlayIE(InfoExtractor): | ||||
|             'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true', | ||||
|             'info_dict': { | ||||
|                 'id': '395385', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Husräddarna S02E07', | ||||
|                 'description': 'md5:f210c6c89f42d4fc39faa551be813777', | ||||
|                 'duration': 2574, | ||||
| @@ -90,7 +95,6 @@ class TVPlayIE(InfoExtractor): | ||||
|                 'upload_date': '20140520', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
| @@ -98,7 +102,7 @@ class TVPlayIE(InfoExtractor): | ||||
|             'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true', | ||||
|             'info_dict': { | ||||
|                 'id': '266636', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Den sista dokusåpan S01E08', | ||||
|                 'description': 'md5:295be39c872520221b933830f660b110', | ||||
|                 'duration': 1492, | ||||
| @@ -107,7 +111,6 @@ class TVPlayIE(InfoExtractor): | ||||
|                 'age_limit': 18, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
| @@ -115,7 +118,7 @@ class TVPlayIE(InfoExtractor): | ||||
|             'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true', | ||||
|             'info_dict': { | ||||
|                 'id': '282756', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Antikjakten S01E10', | ||||
|                 'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8', | ||||
|                 'duration': 2646, | ||||
| @@ -123,7 +126,6 @@ class TVPlayIE(InfoExtractor): | ||||
|                 'upload_date': '20120925', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
| @@ -131,7 +133,7 @@ class TVPlayIE(InfoExtractor): | ||||
|             'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true', | ||||
|             'info_dict': { | ||||
|                 'id': '230898', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Anna Anka søker assistent - Ep. 8', | ||||
|                 'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474', | ||||
|                 'duration': 2656, | ||||
| @@ -139,7 +141,6 @@ class TVPlayIE(InfoExtractor): | ||||
|                 'upload_date': '20100628', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
| @@ -147,7 +148,7 @@ class TVPlayIE(InfoExtractor): | ||||
|             'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true', | ||||
|             'info_dict': { | ||||
|                 'id': '21873', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Budbringerne program 10', | ||||
|                 'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d', | ||||
|                 'duration': 1297, | ||||
| @@ -155,7 +156,6 @@ class TVPlayIE(InfoExtractor): | ||||
|                 'upload_date': '20090929', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
| @@ -163,7 +163,7 @@ class TVPlayIE(InfoExtractor): | ||||
|             'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true', | ||||
|             'info_dict': { | ||||
|                 'id': '361883', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Hotelinspektør Alex Polizzi - Ep. 10', | ||||
|                 'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81', | ||||
|                 'duration': 2594, | ||||
| @@ -171,7 +171,6 @@ class TVPlayIE(InfoExtractor): | ||||
|                 'upload_date': '20140224', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
| @@ -191,6 +190,14 @@ class TVPlayIE(InfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -199,51 +206,91 @@ class TVPlayIE(InfoExtractor): | ||||
|         video = self._download_json( | ||||
|             'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON') | ||||
|  | ||||
|         if video['is_geo_blocked']: | ||||
|             self.report_warning( | ||||
|                 'This content might not be available in your country due to copyright reasons') | ||||
|         title = video['title'] | ||||
|  | ||||
|         streams = self._download_json( | ||||
|             'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON') | ||||
|         try: | ||||
|             streams = self._download_json( | ||||
|                 'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, | ||||
|                 video_id, 'Downloading streams JSON') | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: | ||||
|                 msg = self._parse_json(e.cause.read().decode('utf-8'), video_id) | ||||
|                 raise ExtractorError(msg['msg'], expected=True) | ||||
|             raise | ||||
|  | ||||
|         quality = qualities(['hls', 'medium', 'high']) | ||||
|         formats = [] | ||||
|         for format_id, video_url in streams['streams'].items(): | ||||
|         for format_id, video_url in streams.get('streams', {}).items(): | ||||
|             if not video_url or not isinstance(video_url, compat_str): | ||||
|                 continue | ||||
|             fmt = { | ||||
|                 'format_id': format_id, | ||||
|                 'preference': quality(format_id), | ||||
|             } | ||||
|             if video_url.startswith('rtmp'): | ||||
|                 m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url) | ||||
|                 if not m: | ||||
|                     continue | ||||
|                 fmt.update({ | ||||
|                     'ext': 'flv', | ||||
|                     'url': m.group('url'), | ||||
|                     'app': m.group('app'), | ||||
|                     'play_path': m.group('playpath'), | ||||
|                 }) | ||||
|             elif video_url.endswith('.f4m'): | ||||
|             ext = determine_ext(video_url) | ||||
|             if ext == 'f4m': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     video_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id)) | ||||
|                 continue | ||||
|                     update_url_query(video_url, { | ||||
|                         'hdcore': '3.5.0', | ||||
|                         'plugin': 'aasp-3.5.0.151.81' | ||||
|                     }), video_id, f4m_id='hds', fatal=False)) | ||||
|             elif ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     video_url, video_id, 'mp4', 'm3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             else: | ||||
|                 fmt.update({ | ||||
|                     'url': video_url, | ||||
|                 }) | ||||
|             formats.append(fmt) | ||||
|                 fmt = { | ||||
|                     'format_id': format_id, | ||||
|                     'quality': quality(format_id), | ||||
|                     'ext': ext, | ||||
|                 } | ||||
|                 if video_url.startswith('rtmp'): | ||||
|                     m = re.search( | ||||
|                         r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url) | ||||
|                     if not m: | ||||
|                         continue | ||||
|                     fmt.update({ | ||||
|                         'ext': 'flv', | ||||
|                         'url': m.group('url'), | ||||
|                         'app': m.group('app'), | ||||
|                         'play_path': m.group('playpath'), | ||||
|                     }) | ||||
|                 else: | ||||
|                     fmt.update({ | ||||
|                         'url': video_url, | ||||
|                     }) | ||||
|                 formats.append(fmt) | ||||
|  | ||||
|         if not formats and video.get('is_geo_blocked'): | ||||
|             self.raise_geo_restricted( | ||||
|                 'This content might not be available in your country due to copyright reasons') | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         # TODO: webvtt in m3u8 | ||||
|         subtitles = {} | ||||
|         sami_path = video.get('sami_path') | ||||
|         if sami_path: | ||||
|             lang = self._search_regex( | ||||
|                 r'_([a-z]{2})\.xml', sami_path, 'lang', | ||||
|                 default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]) | ||||
|             subtitles[lang] = [{ | ||||
|                 'url': sami_path, | ||||
|             }] | ||||
|  | ||||
|         series = video.get('format_title') | ||||
|         episode_number = int_or_none(video.get('format_position', {}).get('episode')) | ||||
|         season = video.get('_embedded', {}).get('season', {}).get('title') | ||||
|         season_number = int_or_none(video.get('format_position', {}).get('season')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video['title'], | ||||
|             'description': video['description'], | ||||
|             'duration': video['duration'], | ||||
|             'timestamp': parse_iso8601(video['created_at']), | ||||
|             'view_count': video['views']['total'], | ||||
|             'age_limit': video.get('age_limit', 0), | ||||
|             'title': title, | ||||
|             'description': video.get('description'), | ||||
|             'series': series, | ||||
|             'episode_number': episode_number, | ||||
|             'season': season, | ||||
|             'season_number': season_number, | ||||
|             'duration': int_or_none(video.get('duration')), | ||||
|             'timestamp': parse_iso8601(video.get('created_at')), | ||||
|             'view_count': int_or_none(video.get('views', {}).get('total')), | ||||
|             'age_limit': int_or_none(video.get('age_limit', 0)), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|   | ||||
| @@ -461,7 +461,7 @@ class TwitchClipsIE(InfoExtractor): | ||||
|     IE_NAME = 'twitch:clips' | ||||
|     _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound', | ||||
|         'md5': '761769e1eafce0ffebfb4089cb3847cd', | ||||
|         'info_dict': { | ||||
| @@ -473,7 +473,11 @@ class TwitchClipsIE(InfoExtractor): | ||||
|             'uploader': 'stereotype_', | ||||
|             'uploader_id': 'stereotype_', | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         # multiple formats | ||||
|         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| @@ -485,15 +489,27 @@ class TwitchClipsIE(InfoExtractor): | ||||
|                 r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'), | ||||
|             video_id, transform_source=js_to_json) | ||||
|  | ||||
|         video_url = clip['clip_video_url'] | ||||
|         title = clip['channel_title'] | ||||
|         title = clip.get('channel_title') or self._og_search_title(webpage) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': option['source'], | ||||
|             'format_id': option.get('quality'), | ||||
|             'height': int_or_none(option.get('quality')), | ||||
|         } for option in clip.get('quality_options', []) if option.get('source')] | ||||
|  | ||||
|         if not formats: | ||||
|             formats = [{ | ||||
|                 'url': clip['clip_video_url'], | ||||
|             }] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'), | ||||
|             'uploader': clip.get('curator_login'), | ||||
|             'uploader_id': clip.get('curator_display_name'), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -9,8 +9,8 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class VidziIE(JWPlatformBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://vidzi.tv/cghql9yq6emu.html', | ||||
|         'md5': '4f16c71ca0c8c8635ab6932b5f3f1660', | ||||
|         'info_dict': { | ||||
| @@ -22,12 +22,16 @@ class VidziIE(JWPlatformBaseIE): | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html', | ||||
|         'skip_download': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         webpage = self._download_webpage( | ||||
|             'http://vidzi.tv/%s' % video_id, video_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title') | ||||
|  | ||||
|   | ||||
| @@ -130,7 +130,7 @@ class VikiIE(VikiBaseIE): | ||||
|     }, { | ||||
|         # clip | ||||
|         'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference', | ||||
|         'md5': 'feea2b1d7b3957f70886e6dfd8b8be84', | ||||
|         'md5': '86c0b5dbd4d83a6611a79987cc7a1989', | ||||
|         'info_dict': { | ||||
|             'id': '1067139v', | ||||
|             'ext': 'mp4', | ||||
| @@ -156,15 +156,11 @@ class VikiIE(VikiBaseIE): | ||||
|             'like_count': int, | ||||
|             'age_limit': 13, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Blocked in the US', | ||||
|     }, { | ||||
|         # episode | ||||
|         'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', | ||||
|         'md5': '1f54697dabc8f13f31bf06bb2e4de6db', | ||||
|         'md5': '5fa476a902e902783ac7a4d615cdbc7a', | ||||
|         'info_dict': { | ||||
|             'id': '44699v', | ||||
|             'ext': 'mp4', | ||||
| @@ -200,7 +196,7 @@ class VikiIE(VikiBaseIE): | ||||
|     }, { | ||||
|         # non-English description | ||||
|         'url': 'http://www.viki.com/videos/158036v-love-in-magic', | ||||
|         'md5': '013dc282714e22acf9447cad14ff1208', | ||||
|         'md5': '1713ae35df5a521b31f6dc40730e7c9c', | ||||
|         'info_dict': { | ||||
|             'id': '158036v', | ||||
|             'ext': 'mp4', | ||||
| @@ -281,9 +277,16 @@ class VikiIE(VikiBaseIE): | ||||
|                 r'^(\d+)[pP]$', format_id, 'height', default=None)) | ||||
|             for protocol, format_dict in stream_dict.items(): | ||||
|                 if format_id == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         format_dict['url'], video_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id='m3u8-%s' % protocol, fatal=False)) | ||||
|                     m3u8_formats = self._extract_m3u8_formats( | ||||
|                         format_dict['url'], video_id, 'mp4', | ||||
|                         entry_protocol='m3u8_native', preference=-1, | ||||
|                         m3u8_id='m3u8-%s' % protocol, fatal=False) | ||||
|                     # Despite CODECS metadata in m3u8 all video-only formats | ||||
|                     # are actually video+audio | ||||
|                     for f in m3u8_formats: | ||||
|                         if f.get('acodec') == 'none' and f.get('vcodec') != 'none': | ||||
|                             f['acodec'] = None | ||||
|                     formats.extend(m3u8_formats) | ||||
|                 else: | ||||
|                     formats.append({ | ||||
|                         'url': format_dict['url'], | ||||
|   | ||||
| @@ -364,6 +364,11 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) | ||||
|         if mobj: | ||||
|             return mobj.group(1) | ||||
|         # Look more for non-standard embedded Vimeo player | ||||
|         mobj = re.search( | ||||
|             r'<video[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     def _verify_player_video_password(self, url, video_id): | ||||
|         password = self._downloader.params.get('videopassword') | ||||
|   | ||||
| @@ -6,11 +6,18 @@ import json | ||||
| import sys | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     get_element_by_class, | ||||
|     int_or_none, | ||||
|     orderedSet, | ||||
|     parse_duration, | ||||
|     remove_start, | ||||
|     str_to_int, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
| @@ -20,7 +27,55 @@ from .vimeo import VimeoIE | ||||
| from .pladform import PladformIE | ||||
|  | ||||
|  | ||||
| class VKIE(InfoExtractor): | ||||
| class VKBaseIE(InfoExtractor): | ||||
|     _NETRC_MACHINE = 'vk' | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_page, url_handle = self._download_webpage_handle( | ||||
|             'https://vk.com', None, 'Downloading login page') | ||||
|  | ||||
|         login_form = self._hidden_inputs(login_page) | ||||
|  | ||||
|         login_form.update({ | ||||
|             'email': username.encode('cp1251'), | ||||
|             'pass': password.encode('cp1251'), | ||||
|         }) | ||||
|  | ||||
|         # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header | ||||
|         # and expects the first one to be set rather than second (see | ||||
|         # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201). | ||||
|         # As of RFC6265 the newer one cookie should be set into cookie store | ||||
|         # what actually happens. | ||||
|         # We will workaround this VK issue by resetting the remixlhk cookie to | ||||
|         # the first one manually. | ||||
|         cookies = url_handle.headers.get('Set-Cookie') | ||||
|         if cookies: | ||||
|             if sys.version_info[0] >= 3: | ||||
|                 cookies = cookies.encode('iso-8859-1') | ||||
|             cookies = cookies.decode('utf-8') | ||||
|             remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) | ||||
|             if remixlhk: | ||||
|                 value, domain = remixlhk.groups() | ||||
|                 self._set_cookie(domain, 'remixlhk', value) | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             'https://login.vk.com/?act=login', None, | ||||
|             note='Logging in as %s' % username, | ||||
|             data=urlencode_postdata(login_form)) | ||||
|  | ||||
|         if re.search(r'onLoginFailed', login_page): | ||||
|             raise ExtractorError( | ||||
|                 'Unable to login, incorrect username and/or password', expected=True) | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|  | ||||
| class VKIE(VKBaseIE): | ||||
|     IE_NAME = 'vk' | ||||
|     IE_DESC = 'VK' | ||||
|     _VALID_URL = r'''(?x) | ||||
| @@ -38,8 +93,6 @@ class VKIE(InfoExtractor): | ||||
|                             (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))? | ||||
|                         ) | ||||
|                     ''' | ||||
|     _NETRC_MACHINE = 'vk' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', | ||||
| @@ -189,49 +242,6 @@ class VKIE(InfoExtractor): | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_page, url_handle = self._download_webpage_handle( | ||||
|             'https://vk.com', None, 'Downloading login page') | ||||
|  | ||||
|         login_form = self._hidden_inputs(login_page) | ||||
|  | ||||
|         login_form.update({ | ||||
|             'email': username.encode('cp1251'), | ||||
|             'pass': password.encode('cp1251'), | ||||
|         }) | ||||
|  | ||||
|         # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header | ||||
|         # and expects the first one to be set rather than second (see | ||||
|         # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201). | ||||
|         # As of RFC6265 the newer one cookie should be set into cookie store | ||||
|         # what actually happens. | ||||
|         # We will workaround this VK issue by resetting the remixlhk cookie to | ||||
|         # the first one manually. | ||||
|         cookies = url_handle.headers.get('Set-Cookie') | ||||
|         if sys.version_info[0] >= 3: | ||||
|             cookies = cookies.encode('iso-8859-1') | ||||
|         cookies = cookies.decode('utf-8') | ||||
|         remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) | ||||
|         if remixlhk: | ||||
|             value, domain = remixlhk.groups() | ||||
|             self._set_cookie(domain, 'remixlhk', value) | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             'https://login.vk.com/?act=login', None, | ||||
|             note='Logging in as %s' % username, | ||||
|             data=urlencode_postdata(login_form)) | ||||
|  | ||||
|         if re.search(r'onLoginFailed', login_page): | ||||
|             raise ExtractorError( | ||||
|                 'Unable to login, incorrect username and/or password', expected=True) | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
| @@ -355,7 +365,7 @@ class VKIE(InfoExtractor): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class VKUserVideosIE(InfoExtractor): | ||||
| class VKUserVideosIE(VKBaseIE): | ||||
|     IE_NAME = 'vk:uservideos' | ||||
|     IE_DESC = "VK - User's Videos" | ||||
|     _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' | ||||
| @@ -396,3 +406,121 @@ class VKUserVideosIE(InfoExtractor): | ||||
|             webpage, 'title', default=page_id)) | ||||
|  | ||||
|         return self.playlist_result(entries, page_id, title) | ||||
|  | ||||
|  | ||||
| class VKWallPostIE(VKBaseIE): | ||||
|     IE_NAME = 'vk:wallpost' | ||||
|     _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))' | ||||
|     _TESTS = [{ | ||||
|         # public page URL, audio playlist | ||||
|         'url': 'https://vk.com/bs.official?w=wall-23538238_35', | ||||
|         'info_dict': { | ||||
|             'id': '23538238_35', | ||||
|             'title': 'Black Shadow - Wall post 23538238_35', | ||||
|             'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'md5': '5ba93864ec5b85f7ce19a9af4af080f6', | ||||
|             'info_dict': { | ||||
|                 'id': '135220665_111806521', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Black Shadow - Слепое Верование', | ||||
|                 'duration': 370, | ||||
|                 'uploader': 'Black Shadow', | ||||
|                 'artist': 'Black Shadow', | ||||
|                 'track': 'Слепое Верование', | ||||
|             }, | ||||
|         }, { | ||||
|             'md5': '4cc7e804579122b17ea95af7834c9233', | ||||
|             'info_dict': { | ||||
|                 'id': '135220665_111802303', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!', | ||||
|                 'duration': 423, | ||||
|                 'uploader': 'Black Shadow', | ||||
|                 'artist': 'Black Shadow', | ||||
|                 'track': 'Война - Негасимое Бездны Пламя!', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }], | ||||
|         'skip': 'Requires vk account credentials', | ||||
|     }, { | ||||
|         # single YouTube embed, no leading - | ||||
|         'url': 'https://vk.com/wall85155021_6319', | ||||
|         'info_dict': { | ||||
|             'id': '85155021_6319', | ||||
|             'title': 'Sergey Gorbunov - Wall post 85155021_6319', | ||||
|         }, | ||||
|         'playlist_count': 1, | ||||
|         'skip': 'Requires vk account credentials', | ||||
|     }, { | ||||
|         # wall page URL | ||||
|         'url': 'https://vk.com/wall-23538238_35', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # mobile wall page URL | ||||
|         'url': 'https://m.vk.com/wall-23538238_35', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         post_id = self._match_id(url) | ||||
|  | ||||
|         wall_url = 'https://vk.com/wall%s' % post_id | ||||
|  | ||||
|         post_id = remove_start(post_id, '-') | ||||
|  | ||||
|         webpage = self._download_webpage(wall_url, post_id) | ||||
|  | ||||
|         error = self._html_search_regex( | ||||
|             r'>Error</div>\s*<div[^>]+class=["\']body["\'][^>]*>([^<]+)', | ||||
|             webpage, 'error', default=None) | ||||
|         if error: | ||||
|             raise ExtractorError('VK said: %s' % error, expected=True) | ||||
|  | ||||
|         description = clean_html(get_element_by_class('wall_post_text', webpage)) | ||||
|         uploader = clean_html(get_element_by_class( | ||||
|             'fw_post_author', webpage)) or self._og_search_description(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         entries = [] | ||||
|  | ||||
|         for audio in re.finditer(r'''(?sx) | ||||
|                             <input[^>]+ | ||||
|                                 id=(?P<q1>["\'])audio_info(?P<id>\d+_\d+).*?(?P=q1)[^>]+ | ||||
|                                 value=(?P<q2>["\'])(?P<url>http.+?)(?P=q2) | ||||
|                                 .+? | ||||
|                             </table>''', webpage): | ||||
|             audio_html = audio.group(0) | ||||
|             audio_id = audio.group('id') | ||||
|             duration = parse_duration(get_element_by_class('duration', audio_html)) | ||||
|             track = self._html_search_regex( | ||||
|                 r'<span[^>]+id=["\']title%s[^>]*>([^<]+)' % audio_id, | ||||
|                 audio_html, 'title', default=None) | ||||
|             artist = self._html_search_regex( | ||||
|                 r'>([^<]+)</a></b>\s*&ndash', audio_html, | ||||
|                 'artist', default=None) | ||||
|             entries.append({ | ||||
|                 'id': audio_id, | ||||
|                 'url': audio.group('url'), | ||||
|                 'title': '%s - %s' % (artist, track) if artist and track else audio_id, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'duration': duration, | ||||
|                 'uploader': uploader, | ||||
|                 'artist': artist, | ||||
|                 'track': track, | ||||
|             }) | ||||
|  | ||||
|         for video in re.finditer( | ||||
|                 r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage): | ||||
|             entries.append(self.url_result( | ||||
|                 compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key())) | ||||
|  | ||||
|         title = 'Wall post %s' % post_id | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             orderedSet(entries), post_id, | ||||
|             '%s - %s' % (uploader, title) if uploader else title, | ||||
|             description) | ||||
|   | ||||
							
								
								
									
										58
									
								
								youtube_dl/extractor/vodplatform.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								youtube_dl/extractor/vodplatform.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unescapeHTML | ||||
|  | ||||
|  | ||||
| class VODPlatformIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/embed/(?P<id>[^/?#]+)' | ||||
|     _TEST = { | ||||
|         # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar | ||||
|         'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw', | ||||
|         'md5': '1db2b7249ce383d6be96499006e951fc', | ||||
|         'info_dict': { | ||||
|             'id': 'RufMcytHDolTH1MuKHY9Fw', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = unescapeHTML(self._og_search_title(webpage)) | ||||
|         hidden_inputs = self._hidden_inputs(webpage) | ||||
|  | ||||
|         base_url = self._search_regex( | ||||
|             '(.*/)(?:playlist.m3u8|manifest.mpd)', | ||||
|             hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], | ||||
|             'base url') | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             base_url + 'playlist.m3u8', video_id, 'mp4', | ||||
|             'm3u8_native', m3u8_id='hls', fatal=False) | ||||
|         formats.extend(self._extract_mpd_formats( | ||||
|             base_url + 'manifest.mpd', video_id, | ||||
|             mpd_id='dash', fatal=False)) | ||||
|         rtmp_formats = self._extract_smil_formats( | ||||
|             base_url + 'jwplayer.smil', video_id, fatal=False) | ||||
|         for rtmp_format in rtmp_formats: | ||||
|             rtsp_format = rtmp_format.copy() | ||||
|             rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) | ||||
|             del rtsp_format['play_path'] | ||||
|             del rtsp_format['ext'] | ||||
|             rtsp_format.update({ | ||||
|                 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), | ||||
|                 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), | ||||
|                 'protocol': 'rtsp', | ||||
|             }) | ||||
|             formats.extend([rtmp_format, rtsp_format]) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': hidden_inputs.get('HiddenThumbnail') or self._og_search_thumbnail(webpage), | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -9,7 +9,7 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     parse_duration, | ||||
|     qualities, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -22,7 +22,7 @@ class VuClipIE(InfoExtractor): | ||||
|             'id': '922692425', | ||||
|             'ext': '3gp', | ||||
|             'title': 'The Toy Soldiers - Hollywood Movie Trailer', | ||||
|             'duration': 180, | ||||
|             'duration': 177, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -46,34 +46,21 @@ class VuClipIE(InfoExtractor): | ||||
|                 '%s said: %s' % (self.IE_NAME, error_msg), expected=True) | ||||
|  | ||||
|         # These clowns alternate between two page types | ||||
|         links_code = self._search_regex( | ||||
|             r'''(?xs) | ||||
|                 (?: | ||||
|                     <img\s+src="[^"]*/play.gif".*?>| | ||||
|                     <!--\ player\ end\ -->\s*</div><!--\ thumb\ end--> | ||||
|                 ) | ||||
|                 (.*?) | ||||
|                 (?: | ||||
|                     <a\s+href="fblike|<div\s+class="social"> | ||||
|                 ) | ||||
|             ''', webpage, 'links') | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip() | ||||
|         video_url = self._search_regex( | ||||
|             r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif', | ||||
|             webpage, 'video URL', default=None) | ||||
|         if video_url: | ||||
|             formats = [{ | ||||
|                 'url': video_url, | ||||
|             }] | ||||
|         else: | ||||
|             formats = self._parse_html5_media_entries(url, webpage)[0]['formats'] | ||||
|  | ||||
|         quality_order = qualities(['Reg', 'Hi']) | ||||
|         formats = [] | ||||
|         for url, q in re.findall( | ||||
|                 r'<a\s+href="(?P<url>[^"]+)".*?>(?:<button[^>]*>)?(?P<q>[^<]+)(?:</button>)?</a>', links_code): | ||||
|             format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'url': url, | ||||
|                 'quality': quality_order(q), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|         title = remove_end(self._html_search_regex( | ||||
|             r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video') | ||||
|  | ||||
|         duration = parse_duration(self._search_regex( | ||||
|             r'\(([0-9:]+)\)</span>', webpage, 'duration', fatal=False)) | ||||
|         duration = parse_duration(self._html_search_regex( | ||||
|             r'[(>]([0-9]+:[0-9]+)(?:<span|\))', webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user