Compare commits
	
		
			341 Commits
		
	
	
		
			2015.02.02
			...
			2015.02.24
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 4f3b21e1c7 | ||
|  | 54233c9080 | ||
|  | db8e13ef71 | ||
|  | 5a42414b9c | ||
|  | 9c665ab72e | ||
|  | b665ba6aa6 | ||
|  | ec5913b5cd | ||
|  | 25ac63ed71 | ||
|  | 99209c2916 | ||
|  | 1fbaa0a521 | ||
|  | 3037b91e05 | ||
|  | ffdf972b91 | ||
|  | 459e5fbd5f | ||
|  | bfc993cc91 | ||
|  | 4432db35d9 | ||
|  | 591ab1dff9 | ||
|  | 5bca2424bc | ||
|  | bd61a9e770 | ||
|  | 3438e7acd2 | ||
|  | 09c200acf2 | ||
|  | 716889cab1 | ||
|  | 409693984f | ||
|  | 04e8c11080 | ||
|  | 80af2b73ab | ||
|  | 3cc57f9645 | ||
|  | a65d4e7f14 | ||
|  | b531cfc019 | ||
|  | 543ec2136b | ||
|  | 93b5071f73 | ||
|  | ddc369f073 | ||
|  | fcc3e6138b | ||
|  | 9fe6ef7ab2 | ||
|  | c010af6f19 | ||
|  | 35b7982303 | ||
|  | f311cfa231 | ||
|  | 80970e531b | ||
|  | b7bb76df05 | ||
|  | 98c70d6fc7 | ||
|  | ab84349b16 | ||
|  | 03091e372f | ||
|  | 4d17184817 | ||
|  | e086e0eb6c | ||
|  | 314368c822 | ||
|  | c5181ab410 | ||
|  | ea5152cae1 | ||
|  | 255fca5eea | ||
|  | 4aeccadf4e | ||
|  | 93540ee10e | ||
|  | 8fb3ac3649 | ||
|  | 77b2986b5b | ||
|  | 62b013df0d | ||
|  | fad6768bd1 | ||
|  | a78125f925 | ||
|  | a00a8bcc8a | ||
|  | 1e9a9e167d | ||
|  | 3da0db62e6 | ||
|  | e14ced7918 | ||
|  | ab9d02f53b | ||
|  | a461a11989 | ||
|  | 1bd838608f | ||
|  | 365577f567 | ||
|  | 50efb383f0 | ||
|  | 5da6bd0083 | ||
|  | 5e9a033e6e | ||
|  | fb7cb6823e | ||
|  | dd0a58f5f0 | ||
|  | a21420389e | ||
|  | 6140baf4e1 | ||
|  | 8fc642eb5b | ||
|  | e66e1a0046 | ||
|  | d5c69f1da4 | ||
|  | f13b1e7d7f | ||
|  | 5c8a3f862a | ||
|  | 8807f1277f | ||
|  | a3b9157f49 | ||
|  | b88ba05356 | ||
|  | b74d505577 | ||
|  | 9e2d7dca87 | ||
|  | d236b37ac9 | ||
|  | e880c66bd8 | ||
|  | 383456aa29 | ||
|  | 1a13940c8d | ||
|  | 3d54788495 | ||
|  | 71d53ace2f | ||
|  | f37e3f99f0 | ||
|  | bd03ffc16e | ||
|  | 1ac1af9b47 | ||
|  | 3bf5705316 | ||
|  | 1c2528c8a3 | ||
|  | 7bd15b1a03 | ||
|  | 6b961a85fd | ||
|  | 7707004043 | ||
|  | a025d3c5a5 | ||
|  | c460bdd56b | ||
|  | b81a359eb6 | ||
|  | d61aefb24c | ||
|  | d305dd73a3 | ||
|  | 93a16ba238 | ||
|  | 4f7cea6c53 | ||
|  | afbdd3acc3 | ||
|  | 85d5866177 | ||
|  | 9789d7535d | ||
|  | d8443cd3f7 | ||
|  | d47c26e168 | ||
|  | 01561da142 | ||
|  | 0af25f784b | ||
|  | b9b42f2ea0 | ||
|  | 311c393838 | ||
|  | 18c1c42405 | ||
|  | 37dd5d4629 | ||
|  | 81975f4693 | ||
|  | b8b928d5cb | ||
|  | 3eff81fbf7 | ||
|  | 785521bf4f | ||
|  | 6d1a55a521 | ||
|  | 9cad27008b | ||
|  | 11e611a7fa | ||
|  | 72c1f8de06 | ||
|  | 6e99868e4c | ||
|  | 4d278fde64 | ||
|  | f21e915fb9 | ||
|  | 6f53c63df6 | ||
|  | 1def5f359e | ||
|  | 15ec669374 | ||
|  | a3fa5da496 | ||
|  | 30965ac66a | ||
|  | 09ab40b7d1 | ||
|  | edab9dbf4d | ||
|  | 9868ea4936 | ||
|  | 85920dd01d | ||
|  | fa15607773 | ||
|  | a91a2c1a83 | ||
|  | 16e7711e22 | ||
|  | 5cda4eda72 | ||
|  | 98f000409f | ||
|  | bd7fe0cf66 | ||
|  | 48246541da | ||
|  | 4a8d4a53b1 | ||
|  | 4cd95bcbc3 | ||
|  | be24c8697f | ||
|  | 0d93378887 | ||
|  | 4069766c52 | ||
|  | 7010577720 | ||
|  | 8ac27a68e6 | ||
|  | 46312e0b46 | ||
|  | f9216ed6ad | ||
|  | 65bf37ef83 | ||
|  | f740fae2a4 | ||
|  | fbc503d696 | ||
|  | 662435f728 | ||
|  | 163d966707 | ||
|  | 85729c51af | ||
|  | 360e1ca5cc | ||
|  | a1f2a06b34 | ||
|  | c84dd8a90d | ||
|  | 65469a7f8b | ||
|  | 6b597516c1 | ||
|  | b5857f62e2 | ||
|  | a504ced097 | ||
|  | 1db5fbcfe3 | ||
|  | 59b8ab5834 | ||
|  | a568180441 | ||
|  | 85e80f71cd | ||
|  | bfa6bdcd8b | ||
|  | 03cd72b007 | ||
|  | 5bfd430f81 | ||
|  | 73fac4e911 | ||
|  | 8fb474fb17 | ||
|  | f813928e4b | ||
|  | b9c7a97318 | ||
|  | 9fb2f1cd6d | ||
|  | 6ca7732d5e | ||
|  | b0ab0fac49 | ||
|  | a294bce82f | ||
|  | 76d1466b08 | ||
|  | 1888d3f7b3 | ||
|  | c2787701cc | ||
|  | 52e1d0ccc4 | ||
|  | 10e3c4c221 | ||
|  | 68f2d273bf | ||
|  | 7c86c21662 | ||
|  | ae1580d790 | ||
|  | 3215c50f25 | ||
|  | 36f73e8044 | ||
|  | a4f3d779db | ||
|  | d9aa2b784d | ||
|  | cffcbc02de | ||
|  | 9347fddbfc | ||
|  | 037e9437e4 | ||
|  | 36e7a4ca2e | ||
|  | ae6423d704 | ||
|  | 7105440cec | ||
|  | c80b9cd280 | ||
|  | 171ca612af | ||
|  | c3d64fc1b3 | ||
|  | 7c24ce225d | ||
|  | 08b38d5401 | ||
|  | 024c53694d | ||
|  | 7e6011101f | ||
|  | c40feaba77 | ||
|  | 5277f09dfc | ||
|  | 2d30521ab9 | ||
|  | 050fa43561 | ||
|  | f36f92f4da | ||
|  | 124f3bc67d | ||
|  | d304209a85 | ||
|  | 8367d3f3cb | ||
|  | c56d7d899d | ||
|  | ea5db8469e | ||
|  | 3811c567e7 | ||
|  | 8708d76425 | ||
|  | 054fe3cc40 | ||
|  | af0d11f244 | ||
|  | 9650885be9 | ||
|  | 596ac6e31f | ||
|  | 612ee37365 | ||
|  | 442c37b7a9 | ||
|  | 04bbe41330 | ||
|  | 8f84f57183 | ||
|  | 6a78740211 | ||
|  | c0e1a415fd | ||
|  | bf8f082a90 | ||
|  | 2f543a2142 | ||
|  | 7e5db8c930 | ||
|  | f7a211dcc8 | ||
|  | 845734773d | ||
|  | 347de4931c | ||
|  | 8829650513 | ||
|  | c73fae1e2e | ||
|  | 834bf069d2 | ||
|  | c06a9fa34f | ||
|  | 753fad4adc | ||
|  | 34814eb66e | ||
|  | 3a5bcd0326 | ||
|  | 99c2398bc6 | ||
|  | 28f1272870 | ||
|  | f18e3a2fc0 | ||
|  | c4c5dc27cb | ||
|  | 2caf182f37 | ||
|  | 43f244b6d5 | ||
|  | 1309b396d0 | ||
|  | ba61796458 | ||
|  | 3255fe7141 | ||
|  | e98b8e79ea | ||
|  | 196121c51b | ||
|  | 5269028951 | ||
|  | f7bc056b5a | ||
|  | a0f7198544 | ||
|  | dd8930684e | ||
|  | bdb186f3b0 | ||
|  | 64f9baa084 | ||
|  | b29231c040 | ||
|  | 6128bf07a9 | ||
|  | 2ec19e9558 | ||
|  | 9ddb6925bf | ||
|  | 12931e1c6e | ||
|  | 41c23b0da5 | ||
|  | 2578ab19e4 | ||
|  | d87ec897e9 | ||
|  | 3bd4bffb1c | ||
|  | c36b09a502 | ||
|  | 641eb10d34 | ||
|  | 955c5505e7 | ||
|  | 69319969de | ||
|  | a14292e848 | ||
|  | 5d678df64a | ||
|  | 8ca8cbe2bd | ||
|  | ba322d8209 | ||
|  | 2f38289b79 | ||
|  | f23a3ca699 | ||
|  | 77d2b106cc | ||
|  | c0e46412e9 | ||
|  | 0161353d7d | ||
|  | 2b4ecde2c8 | ||
|  | b3a286d69d | ||
|  | 467d3c9a0c | ||
|  | ad5747bad1 | ||
|  | d6eb66ed3c | ||
|  | 7f2a9f1b49 | ||
|  | 1e1896f2de | ||
|  | c831973366 | ||
|  | 1a2548d9e9 | ||
|  | 3900eec27c | ||
|  | a02d212638 | ||
|  | 9c91a8fa70 | ||
|  | 41469f335e | ||
|  | 67ce4f8820 | ||
|  | bc63d56cca | ||
|  | c893d70805 | ||
|  | 3ee6e02564 | ||
|  | e3aaace400 | ||
|  | 300753a069 | ||
|  | f13b88c616 | ||
|  | 60ca389c64 | ||
|  | 1b0f3919c1 | ||
|  | 6a348cf7d5 | ||
|  | 9e91449c8d | ||
|  | 25e5ebf382 | ||
|  | 7dfc356625 | ||
|  | 58ba6c0160 | ||
|  | f076b63821 | ||
|  | 12f0454cd6 | ||
|  | cd7342755f | ||
|  | 9bb8e0a3f9 | ||
|  | 1a6373ef39 | ||
|  | f6c24009be | ||
|  | d862042301 | ||
|  | 23d9ded655 | ||
|  | 4c1a017e69 | ||
|  | ee623d9247 | ||
|  | 330537d08a | ||
|  | 2cf0ecac7b | ||
|  | d200b11c7e | ||
|  | d0eca21021 | ||
|  | c1147c05e1 | ||
|  | 55898ad2cf | ||
|  | a465808592 | ||
|  | 5c4862bad4 | ||
|  | 995029a142 | ||
|  | a57b562cff | ||
|  | 531572578e | ||
|  | 3a4cca687f | ||
|  | 7d3d06a16c | ||
|  | c21b1fbeeb | ||
|  | f920ce295e | ||
|  | 7a7bd19c45 | ||
|  | 8f4b58d70e | ||
|  | 3fd45e03bf | ||
|  | 869b4aeff4 | ||
|  | cc9ca3ba6e | ||
|  | ea71034bd3 | ||
|  | 9fffd0469f | ||
|  | ae7773942e | ||
|  | 469a64cebf | ||
|  | aae3fdcfae | ||
|  | 6a66904f8e | ||
|  | 78271e3319 | ||
|  | 92bf0bcdf8 | ||
|  | 1283204917 | ||
|  | 6789defea9 | ||
|  | e77d2975af | 
							
								
								
									
										4
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -108,3 +108,7 @@ Enam Mijbah Noor | ||||
| David Luhmer | ||||
| Shaya Goldberg | ||||
| Paul Hartmann | ||||
| Frans de Jonge | ||||
| Robin de Rooij | ||||
| Ryan Schmidt | ||||
| Leslie P. Polzer | ||||
|   | ||||
| @@ -1,4 +1,6 @@ | ||||
| Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. | ||||
| **Please include the full output of youtube-dl when run with `-v`**. | ||||
|  | ||||
| The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. | ||||
|  | ||||
| Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): | ||||
|  | ||||
| @@ -122,7 +124,7 @@ If you want to add support for a new site, you can follow this quick list (assum | ||||
| 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). | ||||
| 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. | ||||
| 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. | ||||
| 8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501). | ||||
| 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). | ||||
| 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: | ||||
|  | ||||
|         $ git add youtube_dl/extractor/__init__.py | ||||
|   | ||||
							
								
								
									
										7
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,10 +1,7 @@ | ||||
| all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites | ||||
|  | ||||
| clean: | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp | ||||
|  | ||||
| cleanall: clean | ||||
| 	rm -f youtube-dl youtube-dl.exe | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe | ||||
|  | ||||
| PREFIX ?= /usr/local | ||||
| BINDIR ?= $(PREFIX)/bin | ||||
| @@ -46,7 +43,7 @@ test: | ||||
| ot: offlinetest | ||||
|  | ||||
| offlinetest: codetest | ||||
| 	nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists | ||||
| 	nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py | ||||
|  | ||||
| tar: youtube-dl.tar.gz | ||||
|  | ||||
|   | ||||
							
								
								
									
										83
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										83
									
								
								README.md
									
									
									
									
									
								
							| @@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      on Windows) | ||||
|     --flat-playlist                  Do not extract the videos of a playlist, | ||||
|                                      only list them. | ||||
|     --no-color                       Do not emit color codes in output. | ||||
|  | ||||
| ## Network Options: | ||||
|     --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in | ||||
| @@ -119,6 +120,23 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      COUNT views | ||||
|     --max-views COUNT                Do not download any videos with more than | ||||
|                                      COUNT views | ||||
|     --match-filter FILTER            (Experimental) Generic video filter. | ||||
|                                      Specify any key (see help for -o for a list | ||||
|                                      of available keys) to match if the key is | ||||
|                                      present, !key to check if the key is not | ||||
|                                      present,key > NUMBER (like "comment_count > | ||||
|                                      12", also works with >=, <, <=, !=, =) to | ||||
|                                      compare against a number, and & to require | ||||
|                                      multiple matches. Values which are not | ||||
|                                      known are excluded unless you put a | ||||
|                                      question mark (?) after the operator.For | ||||
|                                      example, to only match videos that have | ||||
|                                      been liked more than 100 times and disliked | ||||
|                                      less than 50 times (or the dislike | ||||
|                                      functionality is not available at the given | ||||
|                                      service), but who also have a description, | ||||
|                                      use  --match-filter "like_count > 100 & | ||||
|                                      dislike_count <? 50 & description" . | ||||
|     --no-playlist                    If the URL refers to a video and a | ||||
|                                      playlist, download only the video. | ||||
|     --age-limit YEARS                download only videos suitable for the given | ||||
| @@ -143,6 +161,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --playlist-reverse               Download playlist videos in reverse order | ||||
|     --xattr-set-filesize             (experimental) set file xattribute | ||||
|                                      ytdl.filesize with expected filesize | ||||
|     --hls-prefer-native              (experimental) Use the native HLS | ||||
|                                      downloader instead of ffmpeg. | ||||
|     --external-downloader COMMAND    (experimental) Use the specified external | ||||
|                                      downloader. Currently supports | ||||
|                                      aria2c,curl,wget | ||||
| @@ -292,18 +312,20 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      video results by putting a condition in | ||||
|                                      brackets, as in -f "best[height=720]" (or | ||||
|                                      -f "[filesize>10M]").  This works for | ||||
|                                      filesize, height, width, tbr, abr, vbr, and | ||||
|                                      fps and the comparisons <, <=, >, >=, =, != | ||||
|                                      . Formats for which the value is not known | ||||
|                                      are excluded unless you put a question mark | ||||
|                                      (?) after the operator. You can combine | ||||
|                                      format filters, so  -f "[height <=? | ||||
|                                      720][tbr>500]" selects up to 720p videos | ||||
|                                      (or videos where the height is not known) | ||||
|                                      with a bitrate of at least 500 KBit/s. By | ||||
|                                      default, youtube-dl will pick the best | ||||
|                                      quality. Use commas to download multiple | ||||
|                                      audio formats, such as -f | ||||
|                                      filesize, height, width, tbr, abr, vbr, | ||||
|                                      asr, and fps and the comparisons <, <=, >, | ||||
|                                      >=, =, != and for ext, acodec, vcodec, | ||||
|                                      container, and protocol and the comparisons | ||||
|                                      =, != . Formats for which the value is not | ||||
|                                      known are excluded unless you put a | ||||
|                                      question mark (?) after the operator. You | ||||
|                                      can combine format filters, so  -f "[height | ||||
|                                      <=? 720][tbr>500]" selects up to 720p | ||||
|                                      videos (or videos where the height is not | ||||
|                                      known) with a bitrate of at least 500 | ||||
|                                      KBit/s. By default, youtube-dl will pick | ||||
|                                      the best quality. Use commas to download | ||||
|                                      multiple audio formats, such as -f | ||||
|                                      136/137/mp4/bestvideo,140/m4a/bestaudio. | ||||
|                                      You can merge the video and audio of two | ||||
|                                      formats into a single file using -f <video- | ||||
| @@ -329,8 +351,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --all-subs                       downloads all the available subtitles of | ||||
|                                      the video | ||||
|     --list-subs                      lists all available subtitles for the video | ||||
|     --sub-format FORMAT              subtitle format (default=srt) ([sbv/vtt] | ||||
|                                      youtube only) | ||||
|     --sub-format FORMAT              subtitle format, accepts formats | ||||
|                                      preference, for example: "ass/srt/best" | ||||
|     --sub-lang LANGS                 languages of the subtitles to download | ||||
|                                      (optional) separated by commas, use IETF | ||||
|                                      language tags like 'en,pt' | ||||
| @@ -368,15 +390,18 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --add-metadata                   write metadata to the video file | ||||
|     --xattrs                         write metadata to the video file's xattrs | ||||
|                                      (using dublin core and xdg standards) | ||||
|     --fixup POLICY                   (experimental) Automatically correct known | ||||
|                                      faults of the file. One of never (do | ||||
|                                      nothing), warn (only emit a warning), | ||||
|                                      detect_or_warn(check whether we can do | ||||
|                                      anything about it, warn otherwise | ||||
|     --fixup POLICY                   Automatically correct known faults of the | ||||
|                                      file. One of never (do nothing), warn (only | ||||
|                                      emit a warning), detect_or_warn(the | ||||
|                                      default; fix file if we can, warn | ||||
|                                      otherwise) | ||||
|     --prefer-avconv                  Prefer avconv over ffmpeg for running the | ||||
|                                      postprocessors (default) | ||||
|     --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the | ||||
|                                      postprocessors | ||||
|     --ffmpeg-location PATH           Location of the ffmpeg/avconv binary; | ||||
|                                      either the path to the binary or its | ||||
|                                      containing directory. | ||||
|     --exec CMD                       Execute a command on the file after | ||||
|                                      downloading, similar to find's -exec | ||||
|                                      syntax. Example: --exec 'adb push {} | ||||
| @@ -490,11 +515,15 @@ If you want to play the video on a machine that is not running youtube-dl, you c | ||||
|  | ||||
| ### ERROR: no fmt_url_map or conn information found in video info | ||||
|  | ||||
| youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. | ||||
| YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. | ||||
|  | ||||
| ### ERROR: unable to download video ### | ||||
|  | ||||
| youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. | ||||
| YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. | ||||
|  | ||||
| ### ExtractorError: Could not find JS function u'OF' | ||||
|  | ||||
| In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. | ||||
|  | ||||
| ### SyntaxError: Non-ASCII character ### | ||||
|  | ||||
| @@ -532,9 +561,17 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt | ||||
|     youtube-dl -- -wNyEUrxzFU | ||||
|     youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU" | ||||
|  | ||||
| ### Can you add support for this anime video site, or site which shows current movies for free? | ||||
|  | ||||
| As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl. | ||||
|  | ||||
| A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into youtube-dl. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization. | ||||
|  | ||||
| Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content. | ||||
|  | ||||
| ### How can I detect whether a given URL is supported by youtube-dl? | ||||
|  | ||||
| For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. | ||||
| For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. | ||||
|  | ||||
| It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. | ||||
|  | ||||
| @@ -728,7 +765,7 @@ In particular, every site support request issue should only pertain to services | ||||
|  | ||||
| ###  Is anyone going to need the feature? | ||||
|  | ||||
| Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. | ||||
| Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. | ||||
|  | ||||
| ###  Is your question about youtube-dl? | ||||
|  | ||||
|   | ||||
| @@ -45,12 +45,12 @@ for test in get_testcases(): | ||||
|  | ||||
|         RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) | ||||
|  | ||||
|     if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] | ||||
|                    or test['info_dict']['age_limit'] != 18): | ||||
|     if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or | ||||
|                    test['info_dict']['age_limit'] != 18): | ||||
|         print('\nPotential missing age_limit check: {0}'.format(test['name'])) | ||||
|  | ||||
|     elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] | ||||
|                          and test['info_dict']['age_limit'] == 18): | ||||
|     elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and | ||||
|                          test['info_dict']['age_limit'] == 18): | ||||
|         print('\nPotential false negative: {0}'.format(test['name'])) | ||||
|  | ||||
|     else: | ||||
|   | ||||
| @@ -35,7 +35,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us | ||||
| if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi | ||||
|  | ||||
| /bin/echo -e "\n### First of all, testing..." | ||||
| make cleanall | ||||
| make clean | ||||
| if $skip_tests ; then | ||||
|     echo 'SKIPPING TESTS' | ||||
| else | ||||
| @@ -45,9 +45,9 @@ fi | ||||
| /bin/echo -e "\n### Changing version in version.py..." | ||||
| sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py | ||||
|  | ||||
| /bin/echo -e "\n### Committing README.md and youtube_dl/version.py..." | ||||
| make README.md | ||||
| git add README.md youtube_dl/version.py | ||||
| /bin/echo -e "\n### Committing documentation and youtube_dl/version.py..." | ||||
| make README.md CONTRIBUTING.md supportedsites | ||||
| git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py | ||||
| git commit -m "release $version" | ||||
|  | ||||
| /bin/echo -e "\n### Now tagging, signing and pushing..." | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| # Supported sites | ||||
|  - **1tv**: Первый канал | ||||
|  - **1up.com** | ||||
|  - **220.ro** | ||||
|  - **24video** | ||||
| @@ -9,16 +10,21 @@ | ||||
|  - **8tracks** | ||||
|  - **9gag** | ||||
|  - **abc.net.au** | ||||
|  - **Abc7News** | ||||
|  - **AcademicEarth:Course** | ||||
|  - **AddAnime** | ||||
|  - **AdobeTV** | ||||
|  - **AdultSwim** | ||||
|  - **Aftenposten** | ||||
|  - **Aftonbladet** | ||||
|  - **AlJazeera** | ||||
|  - **Allocine** | ||||
|  - **AlphaPorno** | ||||
|  - **anitube.se** | ||||
|  - **AnySex** | ||||
|  - **Aparat** | ||||
|  - **AppleDailyAnimationNews** | ||||
|  - **AppleDailyRealtimeNews** | ||||
|  - **AppleTrailers** | ||||
|  - **archive.org**: archive.org videos | ||||
|  - **ARD** | ||||
| @@ -30,8 +36,10 @@ | ||||
|  - **arte.tv:ddc** | ||||
|  - **arte.tv:embed** | ||||
|  - **arte.tv:future** | ||||
|  - **AtresPlayer** | ||||
|  - **ATTTechChannel** | ||||
|  - **audiomack** | ||||
|  - **AUEngine** | ||||
|  - **audiomack:album** | ||||
|  - **Azubu** | ||||
|  - **bambuser** | ||||
|  - **bambuser:channel** | ||||
| @@ -53,14 +61,19 @@ | ||||
|  - **Brightcove** | ||||
|  - **BuzzFeed** | ||||
|  - **BYUtv** | ||||
|  - **Camdemy** | ||||
|  - **CamdemyFolder** | ||||
|  - **Canal13cl** | ||||
|  - **canalc2.tv** | ||||
|  - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv | ||||
|  - **CBS** | ||||
|  - **CBSNews**: CBS News | ||||
|  - **CBSSports** | ||||
|  - **CeskaTelevize** | ||||
|  - **channel9**: Channel 9 | ||||
|  - **Chilloutzone** | ||||
|  - **chirbit** | ||||
|  - **chirbit:profile** | ||||
|  - **Cinchcast** | ||||
|  - **Cinemassacre** | ||||
|  - **clipfish** | ||||
| @@ -71,8 +84,10 @@ | ||||
|  - **cmt.com** | ||||
|  - **CNET** | ||||
|  - **CNN** | ||||
|  - **CNNArticle** | ||||
|  - **CNNBlogs** | ||||
|  - **CollegeHumor** | ||||
|  - **CollegeRama** | ||||
|  - **ComCarCoff** | ||||
|  - **ComedyCentral** | ||||
|  - **ComedyCentralShows**: The Daily Show / The Colbert Report | ||||
| @@ -82,32 +97,38 @@ | ||||
|  - **Crunchyroll** | ||||
|  - **crunchyroll:playlist** | ||||
|  - **CSpan**: C-SPAN | ||||
|  - **CtsNews** | ||||
|  - **culturebox.francetvinfo.fr** | ||||
|  - **dailymotion** | ||||
|  - **dailymotion:playlist** | ||||
|  - **dailymotion:user** | ||||
|  - **daum.net** | ||||
|  - **DBTV** | ||||
|  - **DctpTv** | ||||
|  - **DeezerPlaylist** | ||||
|  - **defense.gouv.fr** | ||||
|  - **Discovery** | ||||
|  - **divxstage**: DivxStage | ||||
|  - **Dotsub** | ||||
|  - **DRBonanza** | ||||
|  - **Dropbox** | ||||
|  - **DrTuber** | ||||
|  - **DRTV** | ||||
|  - **Dump** | ||||
|  - **dvtv**: http://video.aktualne.cz/ | ||||
|  - **EbaumsWorld** | ||||
|  - **EchoMsk** | ||||
|  - **eHow** | ||||
|  - **Einthusan** | ||||
|  - **eitb.tv** | ||||
|  - **EllenTV** | ||||
|  - **EllenTV:clips** | ||||
|  - **ElPais**: El País | ||||
|  - **Embedly** | ||||
|  - **EMPFlix** | ||||
|  - **Engadget** | ||||
|  - **Eporner** | ||||
|  - **EroProfile** | ||||
|  - **Escapist** | ||||
|  - **EveryonesMixtape** | ||||
|  - **exfm**: ex.fm | ||||
| @@ -120,7 +141,6 @@ | ||||
|  - **fernsehkritik.tv:postecke** | ||||
|  - **Firedrive** | ||||
|  - **Firstpost** | ||||
|  - **firsttv**: Видеоархив - Первый канал | ||||
|  - **Flickr** | ||||
|  - **Folketinget**: Folketinget (ft.dk; Danish parliament) | ||||
|  - **Foxgay** | ||||
| @@ -143,6 +163,7 @@ | ||||
|  - **GDCVault** | ||||
|  - **generic**: Generic downloader that works on some sites | ||||
|  - **GiantBomb** | ||||
|  - **Giga** | ||||
|  - **Glide**: Glide mobile video messages (glide.me) | ||||
|  - **Globo** | ||||
|  - **GodTube** | ||||
| @@ -153,9 +174,15 @@ | ||||
|  - **Grooveshark** | ||||
|  - **Groupon** | ||||
|  - **Hark** | ||||
|  - **HearThisAt** | ||||
|  - **Heise** | ||||
|  - **HellPorno** | ||||
|  - **Helsinki**: helsinki.fi | ||||
|  - **HentaiStigma** | ||||
|  - **HistoricFilms** | ||||
|  - **History** | ||||
|  - **hitbox** | ||||
|  - **hitbox:live** | ||||
|  - **HornBunny** | ||||
|  - **HostingBulk** | ||||
|  - **HotNewHipHop** | ||||
| @@ -167,6 +194,7 @@ | ||||
|  - **ign.com** | ||||
|  - **imdb**: Internet Movie Database trailers | ||||
|  - **imdb:list**: Internet Movie Database lists | ||||
|  - **Imgur** | ||||
|  - **Ina** | ||||
|  - **InfoQ** | ||||
|  - **Instagram** | ||||
| @@ -182,6 +210,7 @@ | ||||
|  - **jpopsuki.tv** | ||||
|  - **Jukebox** | ||||
|  - **Kankan** | ||||
|  - **Karaoketv** | ||||
|  - **keek** | ||||
|  - **KeezMovies** | ||||
|  - **KhanAcademy** | ||||
| @@ -195,6 +224,7 @@ | ||||
|  - **LiveLeak** | ||||
|  - **livestream** | ||||
|  - **livestream:original** | ||||
|  - **LnkGo** | ||||
|  - **lrt.lt** | ||||
|  - **lynda**: lynda.com videos | ||||
|  - **lynda:course**: lynda.com online courses | ||||
| @@ -203,6 +233,7 @@ | ||||
|  - **mailru**: Видео@Mail.Ru | ||||
|  - **Malemotion** | ||||
|  - **MDR** | ||||
|  - **media.ccc.de** | ||||
|  - **metacafe** | ||||
|  - **Metacritic** | ||||
|  - **Mgoon** | ||||
| @@ -235,6 +266,8 @@ | ||||
|  - **MySpass** | ||||
|  - **myvideo** | ||||
|  - **MyVidster** | ||||
|  - **n-tv.de** | ||||
|  - **NationalGeographic** | ||||
|  - **Naver** | ||||
|  - **NBA** | ||||
|  - **NBC** | ||||
| @@ -242,11 +275,16 @@ | ||||
|  - **ndr**: NDR.de - Mediathek | ||||
|  - **NDTV** | ||||
|  - **NerdCubedFeed** | ||||
|  - **Nerdist** | ||||
|  - **Netzkino** | ||||
|  - **Newgrounds** | ||||
|  - **Newstube** | ||||
|  - **NextMedia** | ||||
|  - **NextMediaActionNews** | ||||
|  - **nfb**: National Film Board of Canada | ||||
|  - **nfl.com** | ||||
|  - **nhl.com** | ||||
|  - **nhl.com:news**: NHL news | ||||
|  - **nhl.com:videocenter**: NHL videocenter category | ||||
|  - **niconico**: ニコニコ動画 | ||||
|  - **NiconicoPlaylist** | ||||
| @@ -257,18 +295,22 @@ | ||||
|  - **Nowness** | ||||
|  - **nowvideo**: NowVideo | ||||
|  - **npo.nl** | ||||
|  - **npo.nl:live** | ||||
|  - **npo.nl:radio** | ||||
|  - **npo.nl:radio:fragment** | ||||
|  - **NRK** | ||||
|  - **NRKTV** | ||||
|  - **NTV** | ||||
|  - **ntv.ru** | ||||
|  - **Nuvid** | ||||
|  - **NYTimes** | ||||
|  - **ocw.mit.edu** | ||||
|  - **OktoberfestTV** | ||||
|  - **on.aol.com** | ||||
|  - **Ooyala** | ||||
|  - **OpenFilm** | ||||
|  - **orf:fm4**: radio FM4 | ||||
|  - **orf:oe1**: Radio Österreich 1 | ||||
|  - **orf:tvthek**: ORF TVthek | ||||
|  - **ORFFM4**: radio FM4 | ||||
|  - **parliamentlive.tv**: UK parliament videos | ||||
|  - **Patreon** | ||||
|  - **PBS** | ||||
| @@ -283,13 +325,16 @@ | ||||
|  - **podomatic** | ||||
|  - **PornHd** | ||||
|  - **PornHub** | ||||
|  - **PornHubPlaylist** | ||||
|  - **Pornotube** | ||||
|  - **PornoXO** | ||||
|  - **PromptFile** | ||||
|  - **prosiebensat1**: ProSiebenSat.1 Digital | ||||
|  - **Pyvideo** | ||||
|  - **QuickVid** | ||||
|  - **R7** | ||||
|  - **radio.de** | ||||
|  - **radiobremen** | ||||
|  - **radiofrance** | ||||
|  - **Rai** | ||||
|  - **RBMARadio** | ||||
| @@ -300,8 +345,10 @@ | ||||
|  - **RottenTomatoes** | ||||
|  - **Roxwel** | ||||
|  - **RTBF** | ||||
|  - **Rte** | ||||
|  - **rtl.nl**: rtl.nl and rtlxl.nl | ||||
|  - **RTL2** | ||||
|  - **RTLnow** | ||||
|  - **rtlxl.nl** | ||||
|  - **RTP** | ||||
|  - **RTS**: RTS.ch | ||||
|  - **rtve.es:alacarta**: RTVE a la carta | ||||
| @@ -309,9 +356,11 @@ | ||||
|  - **RUHD** | ||||
|  - **rutube**: Rutube videos | ||||
|  - **rutube:channel**: Rutube channels | ||||
|  - **rutube:embed**: Rutube embedded videos | ||||
|  - **rutube:movie**: Rutube movies | ||||
|  - **rutube:person**: Rutube person videos | ||||
|  - **RUTV**: RUTV.RU | ||||
|  - **Sandia**: Sandia National Laboratories | ||||
|  - **Sapo**: SAPO Vídeos | ||||
|  - **savefrom.net** | ||||
|  - **SBS**: sbs.com.au | ||||
| @@ -339,7 +388,8 @@ | ||||
|  - **soundcloud:playlist** | ||||
|  - **soundcloud:set** | ||||
|  - **soundcloud:user** | ||||
|  - **Soundgasm** | ||||
|  - **soundgasm** | ||||
|  - **soundgasm:profile** | ||||
|  - **southpark.cc.com** | ||||
|  - **southpark.de** | ||||
|  - **Space** | ||||
| @@ -351,12 +401,14 @@ | ||||
|  - **Sport5** | ||||
|  - **SportBox** | ||||
|  - **SportDeutschland** | ||||
|  - **SRMediathek**: Süddeutscher Rundfunk | ||||
|  - **SRMediathek**: Saarländischer Rundfunk | ||||
|  - **stanfordoc**: Stanford Open ClassRoom | ||||
|  - **Steam** | ||||
|  - **streamcloud.eu** | ||||
|  - **StreamCZ** | ||||
|  - **StreetVoice** | ||||
|  - **SunPorno** | ||||
|  - **SVTPlay** | ||||
|  - **SWRMediathek** | ||||
|  - **Syfy** | ||||
|  - **SztvHu** | ||||
| @@ -375,7 +427,9 @@ | ||||
|  - **TeleBruxelles** | ||||
|  - **telecinco.es** | ||||
|  - **TeleMB** | ||||
|  - **TeleTask** | ||||
|  - **TenPlay** | ||||
|  - **TestTube** | ||||
|  - **TF1** | ||||
|  - **TheOnion** | ||||
|  - **ThePlatform** | ||||
| @@ -401,10 +455,19 @@ | ||||
|  - **Turbo** | ||||
|  - **Tutv** | ||||
|  - **tv.dfb.de** | ||||
|  - **TV4**: tv4.se and tv4play.se | ||||
|  - **tvigle**: Интернет-телевидение Tvigle.ru | ||||
|  - **tvp.pl** | ||||
|  - **tvp.pl:Series** | ||||
|  - **TVPlay**: TV3Play and related services | ||||
|  - **Twitch** | ||||
|  - **Tweakers** | ||||
|  - **twitch:bookmarks** | ||||
|  - **twitch:chapter** | ||||
|  - **twitch:past_broadcasts** | ||||
|  - **twitch:profile** | ||||
|  - **twitch:stream** | ||||
|  - **twitch:video** | ||||
|  - **twitch:vod** | ||||
|  - **Ubu** | ||||
|  - **udemy** | ||||
|  - **udemy:course** | ||||
| @@ -433,6 +496,8 @@ | ||||
|  - **videoweed**: VideoWeed | ||||
|  - **Vidme** | ||||
|  - **Vidzi** | ||||
|  - **vier** | ||||
|  - **vier:videos** | ||||
|  - **viki** | ||||
|  - **vimeo** | ||||
|  - **vimeo:album** | ||||
| @@ -460,11 +525,13 @@ | ||||
|  - **WDR** | ||||
|  - **wdr:mobile** | ||||
|  - **WDRMaus**: Sendung mit der Maus | ||||
|  - **WebOfStories** | ||||
|  - **Weibo** | ||||
|  - **Wimp** | ||||
|  - **Wistia** | ||||
|  - **WorldStarHipHop** | ||||
|  - **wrzuta.pl** | ||||
|  - **WSJ**: Wall Street Journal | ||||
|  - **XBef** | ||||
|  - **XboxClips** | ||||
|  - **XHamster** | ||||
| @@ -472,8 +539,11 @@ | ||||
|  - **XNXX** | ||||
|  - **XTube** | ||||
|  - **XTubeUser**: XTube user profile | ||||
|  - **Xuite** | ||||
|  - **XVideos** | ||||
|  - **XXXYMovies** | ||||
|  - **Yahoo**: Yahoo screen and movies | ||||
|  - **Yam** | ||||
|  - **YesJapan** | ||||
|  - **Ynet** | ||||
|  - **YouJizz** | ||||
| @@ -491,9 +561,9 @@ | ||||
|  - **youtube:search_url**: YouTube.com search URLs | ||||
|  - **youtube:show**: YouTube.com (multi-season) shows | ||||
|  - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) | ||||
|  - **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks") | ||||
|  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) | ||||
|  - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) | ||||
|  - **Zapiks** | ||||
|  - **ZDF** | ||||
|  - **ZDFChannel** | ||||
|  - **zingmp3:album**: mp3.zing.vn albums | ||||
|   | ||||
| @@ -3,4 +3,4 @@ universal = True | ||||
|  | ||||
| [flake8] | ||||
| exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git | ||||
| ignore = E501 | ||||
| ignore = E402,E501,E731 | ||||
|   | ||||
| @@ -103,6 +103,26 @@ def expect_info_dict(self, got_dict, expected_dict): | ||||
|             self.assertTrue( | ||||
|                 match_rex.match(got), | ||||
|                 'field %s (value: %r) should match %r' % (info_field, got, match_str)) | ||||
|         elif isinstance(expected, compat_str) and expected.startswith('startswith:'): | ||||
|             got = got_dict.get(info_field) | ||||
|             start_str = expected[len('startswith:'):] | ||||
|             self.assertTrue( | ||||
|                 isinstance(got, compat_str), | ||||
|                 'Expected a %s object, but got %s for field %s' % ( | ||||
|                     compat_str.__name__, type(got).__name__, info_field)) | ||||
|             self.assertTrue( | ||||
|                 got.startswith(start_str), | ||||
|                 'field %s (value: %r) should start with %r' % (info_field, got, start_str)) | ||||
|         elif isinstance(expected, compat_str) and expected.startswith('contains:'): | ||||
|             got = got_dict.get(info_field) | ||||
|             contains_str = expected[len('contains:'):] | ||||
|             self.assertTrue( | ||||
|                 isinstance(got, compat_str), | ||||
|                 'Expected a %s object, but got %s for field %s' % ( | ||||
|                     compat_str.__name__, type(got).__name__, info_field)) | ||||
|             self.assertTrue( | ||||
|                 contains_str in got, | ||||
|                 'field %s (value: %r) should contain %r' % (info_field, got, contains_str)) | ||||
|         elif isinstance(expected, type): | ||||
|             got = got_dict.get(info_field) | ||||
|             self.assertTrue(isinstance(got, expected), | ||||
| @@ -153,12 +173,14 @@ def expect_info_dict(self, got_dict, expected_dict): | ||||
|             info_dict_str += ''.join( | ||||
|                 '    %s: %s,\n' % (_repr(k), _repr(v)) | ||||
|                 for k, v in test_info_dict.items() if k not in missing_keys) | ||||
|             info_dict_str += '\n' | ||||
|  | ||||
|             if info_dict_str: | ||||
|                 info_dict_str += '\n' | ||||
|         info_dict_str += ''.join( | ||||
|             '    %s: %s,\n' % (_repr(k), _repr(test_info_dict[k])) | ||||
|             for k in missing_keys) | ||||
|         write_string( | ||||
|             '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr) | ||||
|             '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) | ||||
|         self.assertFalse( | ||||
|             missing_keys, | ||||
|             'Missing keys in test definition: %s' % ( | ||||
|   | ||||
| @@ -28,7 +28,7 @@ | ||||
|     "retries": 10,  | ||||
|     "simulate": false,  | ||||
|     "subtitleslang": null,  | ||||
|     "subtitlesformat": "srt", | ||||
|     "subtitlesformat": "best", | ||||
|     "test": true,  | ||||
|     "updatetime": true,  | ||||
|     "usenetrc": false,  | ||||
| @@ -39,5 +39,6 @@ | ||||
|     "writesubtitles": false, | ||||
|     "allsubtitles": false, | ||||
|     "listssubtitles": false, | ||||
|     "socket_timeout": 20 | ||||
|     "socket_timeout": 20, | ||||
|     "fixup": "never" | ||||
| } | ||||
|   | ||||
| @@ -13,6 +13,7 @@ import copy | ||||
| from test.helper import FakeYDL, assertRegexpMatches | ||||
| from youtube_dl import YoutubeDL | ||||
| from youtube_dl.extractor import YoutubeIE | ||||
| from youtube_dl.postprocessor.common import PostProcessor | ||||
|  | ||||
|  | ||||
| class YDL(FakeYDL): | ||||
| @@ -336,6 +337,65 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'G') | ||||
|  | ||||
|     def test_subtitles(self): | ||||
|         def s_formats(lang, autocaption=False): | ||||
|             return [{ | ||||
|                 'ext': ext, | ||||
|                 'url': 'http://localhost/video.%s.%s' % (lang, ext), | ||||
|                 '_auto': autocaption, | ||||
|             } for ext in ['vtt', 'srt', 'ass']] | ||||
|         subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es']) | ||||
|         auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es']) | ||||
|         info_dict = { | ||||
|             'id': 'test', | ||||
|             'title': 'Test', | ||||
|             'url': 'http://localhost/video.mp4', | ||||
|             'subtitles': subtitles, | ||||
|             'automatic_captions': auto_captions, | ||||
|             'extractor': 'TEST', | ||||
|         } | ||||
|  | ||||
|         def get_info(params={}): | ||||
|             params.setdefault('simulate', True) | ||||
|             ydl = YDL(params) | ||||
|             ydl.report_warning = lambda *args, **kargs: None | ||||
|             return ydl.process_video_result(info_dict, download=False) | ||||
|  | ||||
|         result = get_info() | ||||
|         self.assertFalse(result.get('requested_subtitles')) | ||||
|         self.assertEqual(result['subtitles'], subtitles) | ||||
|         self.assertEqual(result['automatic_captions'], auto_captions) | ||||
|  | ||||
|         result = get_info({'writesubtitles': True}) | ||||
|         subs = result['requested_subtitles'] | ||||
|         self.assertTrue(subs) | ||||
|         self.assertEqual(set(subs.keys()), set(['en'])) | ||||
|         self.assertTrue(subs['en'].get('data') is None) | ||||
|         self.assertEqual(subs['en']['ext'], 'ass') | ||||
|  | ||||
|         result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'}) | ||||
|         subs = result['requested_subtitles'] | ||||
|         self.assertEqual(subs['en']['ext'], 'srt') | ||||
|  | ||||
|         result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']}) | ||||
|         subs = result['requested_subtitles'] | ||||
|         self.assertTrue(subs) | ||||
|         self.assertEqual(set(subs.keys()), set(['es', 'fr'])) | ||||
|  | ||||
|         result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) | ||||
|         subs = result['requested_subtitles'] | ||||
|         self.assertTrue(subs) | ||||
|         self.assertEqual(set(subs.keys()), set(['es', 'pt'])) | ||||
|         self.assertFalse(subs['es']['_auto']) | ||||
|         self.assertTrue(subs['pt']['_auto']) | ||||
|  | ||||
|         result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) | ||||
|         subs = result['requested_subtitles'] | ||||
|         self.assertTrue(subs) | ||||
|         self.assertEqual(set(subs.keys()), set(['es', 'pt'])) | ||||
|         self.assertTrue(subs['es']['_auto']) | ||||
|         self.assertTrue(subs['pt']['_auto']) | ||||
|  | ||||
|     def test_add_extra_info(self): | ||||
|         test_dict = { | ||||
|             'extractor': 'Foo', | ||||
| @@ -370,5 +430,35 @@ class TestFormatSelection(unittest.TestCase): | ||||
|             'vbr': 10, | ||||
|         }), '^\s*10k$') | ||||
|  | ||||
|     def test_postprocessors(self): | ||||
|         filename = 'post-processor-testfile.mp4' | ||||
|         audiofile = filename + '.mp3' | ||||
|  | ||||
|         class SimplePP(PostProcessor): | ||||
|             def run(self, info): | ||||
|                 with open(audiofile, 'wt') as f: | ||||
|                     f.write('EXAMPLE') | ||||
|                 info['filepath'] | ||||
|                 return False, info | ||||
|  | ||||
|         def run_pp(params): | ||||
|             with open(filename, 'wt') as f: | ||||
|                 f.write('EXAMPLE') | ||||
|             ydl = YoutubeDL(params) | ||||
|             ydl.add_post_processor(SimplePP()) | ||||
|             ydl.post_process(filename, {'filepath': filename}) | ||||
|  | ||||
|         run_pp({'keepvideo': True}) | ||||
|         self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) | ||||
|         self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) | ||||
|         os.unlink(filename) | ||||
|         os.unlink(audiofile) | ||||
|  | ||||
|         run_pp({'keepvideo': False}) | ||||
|         self.assertFalse(os.path.exists(filename), '%s exists' % filename) | ||||
|         self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) | ||||
|         os.unlink(audiofile) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -70,6 +70,8 @@ class TestJSInterpreter(unittest.TestCase): | ||||
|         self.assertEqual(jsi.call_function('f'), -11) | ||||
|  | ||||
|     def test_comments(self): | ||||
|         'Skipping: Not yet fully implemented' | ||||
|         return | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { | ||||
|             var x = /* 1 + */ 2; | ||||
| @@ -80,6 +82,15 @@ class TestJSInterpreter(unittest.TestCase): | ||||
|         ''') | ||||
|         self.assertEqual(jsi.call_function('x'), 52) | ||||
|  | ||||
|         jsi = JSInterpreter(''' | ||||
|         function f() { | ||||
|             var x = "/*"; | ||||
|             var y = 1 /* comment */ + 2; | ||||
|             return y; | ||||
|         } | ||||
|         ''') | ||||
|         self.assertEqual(jsi.call_function('f'), 3) | ||||
|  | ||||
|     def test_precedence(self): | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { | ||||
|   | ||||
| @@ -18,6 +18,14 @@ from youtube_dl.extractor import ( | ||||
|     VimeoIE, | ||||
|     WallaIE, | ||||
|     CeskaTelevizeIE, | ||||
|     LyndaIE, | ||||
|     NPOIE, | ||||
|     ComedyCentralIE, | ||||
|     NRKTVIE, | ||||
|     RaiIE, | ||||
|     VikiIE, | ||||
|     ThePlatformIE, | ||||
|     RTVEALaCartaIE, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -27,42 +35,38 @@ class BaseTestSubtitles(unittest.TestCase): | ||||
|  | ||||
|     def setUp(self): | ||||
|         self.DL = FakeYDL() | ||||
|         self.ie = self.IE(self.DL) | ||||
|         self.ie = self.IE() | ||||
|         self.DL.add_info_extractor(self.ie) | ||||
|  | ||||
|     def getInfoDict(self): | ||||
|         info_dict = self.ie.extract(self.url) | ||||
|         info_dict = self.DL.extract_info(self.url, download=False) | ||||
|         return info_dict | ||||
|  | ||||
|     def getSubtitles(self): | ||||
|         info_dict = self.getInfoDict() | ||||
|         return info_dict['subtitles'] | ||||
|         subtitles = info_dict['requested_subtitles'] | ||||
|         if not subtitles: | ||||
|             return subtitles | ||||
|         for sub_info in subtitles.values(): | ||||
|             if sub_info.get('data') is None: | ||||
|                 uf = self.DL.urlopen(sub_info['url']) | ||||
|                 sub_info['data'] = uf.read().decode('utf-8') | ||||
|         return dict((l, sub_info['data']) for l, sub_info in subtitles.items()) | ||||
|  | ||||
|  | ||||
| class TestYoutubeSubtitles(BaseTestSubtitles): | ||||
|     url = 'QRS8MkLhQmM' | ||||
|     IE = YoutubeIE | ||||
|  | ||||
|     def test_youtube_no_writesubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = False | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|  | ||||
|     def test_youtube_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') | ||||
|  | ||||
|     def test_youtube_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['it'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') | ||||
|  | ||||
|     def test_youtube_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 13) | ||||
|         self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') | ||||
|         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') | ||||
|         for lang in ['it', 'fr', 'de']: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
|     def test_youtube_subtitles_sbv_format(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
| @@ -76,12 +80,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06') | ||||
|  | ||||
|     def test_youtube_list_subtitles(self): | ||||
|         self.DL.expect_warning('Video doesn\'t have automatic captions') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_youtube_automatic_captions(self): | ||||
|         self.url = '8YoUxe5ncPo' | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
| @@ -103,55 +101,22 @@ class TestYoutubeSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|  | ||||
|     def test_youtube_multiple_langs(self): | ||||
|         self.url = 'QRS8MkLhQmM' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['it', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|         self.assertFalse(subtitles) | ||||
|  | ||||
|  | ||||
| class TestDailymotionSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.dailymotion.com/video/xczg00' | ||||
|     IE = DailymotionIE | ||||
|  | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|  | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') | ||||
|  | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['fr'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 5) | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_automatic_captions(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslang'] = ['en'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) == 0) | ||||
|         self.assertTrue(len(subtitles.keys()) >= 6) | ||||
|         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') | ||||
|         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') | ||||
|         for lang in ['es', 'fr', 'de']: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
|     def test_nosubtitles(self): | ||||
|         self.DL.expect_warning('video doesn\'t have subtitles') | ||||
| @@ -159,61 +124,21 @@ class TestDailymotionSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|  | ||||
|     def test_multiple_langs(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['es', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|         self.assertFalse(subtitles) | ||||
|  | ||||
|  | ||||
| class TestTedSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' | ||||
|     IE = TEDIE | ||||
|  | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|  | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') | ||||
|  | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['fr'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) >= 28) | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_automatic_captions(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslang'] = ['en'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) == 0) | ||||
|  | ||||
|     def test_multiple_langs(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['es', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|         self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') | ||||
|         self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') | ||||
|         for lang in ['es', 'fr', 'de']: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
|  | ||||
| @@ -221,14 +146,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://blip.tv/a/a-6603250' | ||||
|     IE = BlipTVIE | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
| @@ -240,39 +158,13 @@ class TestVimeoSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://vimeo.com/76979871' | ||||
|     IE = VimeoIE | ||||
|  | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|  | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4') | ||||
|  | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['fr'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr'])) | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_automatic_captions(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslang'] = ['en'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) == 0) | ||||
|         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') | ||||
|         self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') | ||||
|  | ||||
|     def test_nosubtitles(self): | ||||
|         self.DL.expect_warning('video doesn\'t have subtitles') | ||||
| @@ -280,27 +172,13 @@ class TestVimeoSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|  | ||||
|     def test_multiple_langs(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['es', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|         self.assertFalse(subtitles) | ||||
|  | ||||
|  | ||||
| class TestWallaSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://vod.walla.co.il/movie/2705958/the-yes-men' | ||||
|     IE = WallaIE | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
| @@ -315,26 +193,20 @@ class TestWallaSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|         self.assertFalse(subtitles) | ||||
|  | ||||
|  | ||||
| class TestCeskaTelevizeSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' | ||||
|     IE = CeskaTelevizeIE | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['cs'])) | ||||
|         self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4') | ||||
|         self.assertTrue(len(subtitles['cs']) > 20000) | ||||
|  | ||||
|     def test_nosubtitles(self): | ||||
|         self.DL.expect_warning('video doesn\'t have subtitles') | ||||
| @@ -342,7 +214,110 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|         self.assertFalse(subtitles) | ||||
|  | ||||
|  | ||||
| class TestLyndaSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html' | ||||
|     IE = LyndaIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['en'])) | ||||
|         self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7') | ||||
|  | ||||
|  | ||||
| class TestNPOSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860' | ||||
|     IE = NPOIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['nl'])) | ||||
|         self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4') | ||||
|  | ||||
|  | ||||
| class TestMTVSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother' | ||||
|     IE = ComedyCentralIE | ||||
|  | ||||
|     def getInfoDict(self): | ||||
|         return super(TestMTVSubtitles, self).getInfoDict()['entries'][0] | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['en'])) | ||||
|         self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65') | ||||
|  | ||||
|  | ||||
| class TestNRKSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1' | ||||
|     IE = NRKTVIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['no'])) | ||||
|         self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a') | ||||
|  | ||||
|  | ||||
| class TestRaiSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html' | ||||
|     IE = RaiIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['it'])) | ||||
|         self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a') | ||||
|  | ||||
|  | ||||
| class TestVikiSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.viki.com/videos/1060846v-punch-episode-18' | ||||
|     IE = VikiIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['en'])) | ||||
|         self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a') | ||||
|  | ||||
|  | ||||
| class TestThePlatformSubtitles(BaseTestSubtitles): | ||||
|     # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/ | ||||
|     # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/) | ||||
|     url = 'theplatform:JFUjUE1_ehvq' | ||||
|     IE = ThePlatformIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['en'])) | ||||
|         self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') | ||||
|  | ||||
|  | ||||
| class TestRtveSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/' | ||||
|     IE = RTVEALaCartaIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         print('Skipping, only available from Spain') | ||||
|         return | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['es'])) | ||||
|         self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|   | ||||
| @@ -34,8 +34,8 @@ def _make_testfunc(testfile): | ||||
|     def test_func(self): | ||||
|         as_file = os.path.join(TEST_DIR, testfile) | ||||
|         swf_file = os.path.join(TEST_DIR, test_id + '.swf') | ||||
|         if ((not os.path.exists(swf_file)) | ||||
|                 or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): | ||||
|         if ((not os.path.exists(swf_file)) or | ||||
|                 os.path.getmtime(swf_file) < os.path.getmtime(as_file)): | ||||
|             # Recompile | ||||
|             try: | ||||
|                 subprocess.check_call([ | ||||
|   | ||||
| @@ -53,6 +53,7 @@ from youtube_dl.utils import ( | ||||
|     version_tuple, | ||||
|     xpath_with_ns, | ||||
|     render_table, | ||||
|     match_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -84,6 +85,8 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual( | ||||
|             sanitize_filename('New World record at 0:12:34'), | ||||
|             'New World record at 0_12_34') | ||||
|         self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') | ||||
|         self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') | ||||
|  | ||||
|         forbidden = '"\0\\/' | ||||
|         for fc in forbidden: | ||||
| @@ -156,6 +159,9 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual( | ||||
|             unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), | ||||
|             '20141126') | ||||
|         self.assertEqual( | ||||
|             unified_strdate('2/2/2015 6:47:40 PM', day_first=False), | ||||
|             '20150202') | ||||
|  | ||||
|     def test_find_xpath_attr(self): | ||||
|         testxml = '''<root> | ||||
| @@ -238,6 +244,8 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(parse_duration('5 s'), 5) | ||||
|         self.assertEqual(parse_duration('3 min'), 180) | ||||
|         self.assertEqual(parse_duration('2.5 hours'), 9000) | ||||
|         self.assertEqual(parse_duration('02:03:04'), 7384) | ||||
|         self.assertEqual(parse_duration('01:02:03:04'), 93784) | ||||
|  | ||||
|     def test_fix_xml_ampersands(self): | ||||
|         self.assertEqual( | ||||
| @@ -364,6 +372,10 @@ class TestUtil(unittest.TestCase): | ||||
|             "playlist":[{"controls":{"all":null}}] | ||||
|         }''') | ||||
|  | ||||
|         inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"' | ||||
|         json_code = js_to_json(inp) | ||||
|         self.assertEqual(json.loads(json_code), json.loads(inp)) | ||||
|  | ||||
|     def test_js_to_json_edgecases(self): | ||||
|         on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") | ||||
|         self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) | ||||
| @@ -371,6 +383,16 @@ class TestUtil(unittest.TestCase): | ||||
|         on = js_to_json('{"abc": true}') | ||||
|         self.assertEqual(json.loads(on), {'abc': True}) | ||||
|  | ||||
|         # Ignore JavaScript code as well | ||||
|         on = js_to_json('''{ | ||||
|             "x": 1, | ||||
|             y: "a", | ||||
|             z: some.code | ||||
|         }''') | ||||
|         d = json.loads(on) | ||||
|         self.assertEqual(d['x'], 1) | ||||
|         self.assertEqual(d['y'], 'a') | ||||
|  | ||||
|     def test_clean_html(self): | ||||
|         self.assertEqual(clean_html('a:\nb'), 'a: b') | ||||
|         self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"') | ||||
| @@ -444,6 +466,37 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') | ||||
|             '123  4\n' | ||||
|             '9999 51') | ||||
|  | ||||
|     def test_match_str(self): | ||||
|         self.assertRaises(ValueError, match_str, 'xy>foobar', {}) | ||||
|         self.assertFalse(match_str('xy', {'x': 1200})) | ||||
|         self.assertTrue(match_str('!xy', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x', {'x': 1200})) | ||||
|         self.assertFalse(match_str('!x', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x', {'x': 0})) | ||||
|         self.assertFalse(match_str('x>0', {'x': 0})) | ||||
|         self.assertFalse(match_str('x>0', {})) | ||||
|         self.assertTrue(match_str('x>?0', {})) | ||||
|         self.assertTrue(match_str('x>1K', {'x': 1200})) | ||||
|         self.assertFalse(match_str('x>2K', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) | ||||
|         self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) | ||||
|         self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) | ||||
|         self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) | ||||
|         self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) | ||||
|         self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 90, 'description': 'foo'})) | ||||
|         self.assertTrue(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'description': 'foo'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'dislike_count': 60, 'description': 'foo'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'dislike_count': 10})) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -8,11 +8,11 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
|  | ||||
| import io | ||||
| import re | ||||
| import string | ||||
|  | ||||
| from test.helper import FakeYDL | ||||
| from youtube_dl.extractor import YoutubeIE | ||||
| from youtube_dl.compat import compat_str, compat_urlretrieve | ||||
|  | ||||
| @@ -64,6 +64,12 @@ _TESTS = [ | ||||
|         'js', | ||||
|         '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', | ||||
|         '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' | ||||
|     ), | ||||
|     ( | ||||
|         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', | ||||
|         'js', | ||||
|         '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', | ||||
|         '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', | ||||
|     ) | ||||
| ] | ||||
|  | ||||
| @@ -88,7 +94,8 @@ def make_tfunc(url, stype, sig_input, expected_sig): | ||||
|         if not os.path.exists(fn): | ||||
|             compat_urlretrieve(url, fn) | ||||
|  | ||||
|         ie = YoutubeIE() | ||||
|         ydl = FakeYDL() | ||||
|         ie = YoutubeIE(ydl) | ||||
|         if stype == 'js': | ||||
|             with io.open(fn, encoding='utf-8') as testf: | ||||
|                 jscode = testf.read() | ||||
|   | ||||
| @@ -154,7 +154,7 @@ class YoutubeDL(object): | ||||
|     allsubtitles:      Downloads all the subtitles of the video | ||||
|                        (requires writesubtitles or writeautomaticsub) | ||||
|     listsubtitles:     Lists all available subtitles for the video | ||||
|     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt) | ||||
|     subtitlesformat:   The format code for subtitles | ||||
|     subtitleslangs:    List of languages of the subtitles to download | ||||
|     keepvideo:         Keep the video file after post-processing | ||||
|     daterange:         A DateRange object, download only if the upload_date is in the range. | ||||
| @@ -199,18 +199,25 @@ class YoutubeDL(object): | ||||
|                        postprocessor. | ||||
|     progress_hooks:    A list of functions that get called on download | ||||
|                        progress, with a dictionary with the entries | ||||
|                        * status: One of "downloading" and "finished". | ||||
|                        * status: One of "downloading", "error", or "finished". | ||||
|                                  Check this first and ignore unknown values. | ||||
|  | ||||
|                        If status is one of "downloading" or "finished", the | ||||
|                        If status is one of "downloading", or "finished", the | ||||
|                        following properties may also be present: | ||||
|                        * filename: The final filename (always present) | ||||
|                        * tmpfilename: The filename we're currently writing to | ||||
|                        * downloaded_bytes: Bytes on disk | ||||
|                        * total_bytes: Size of the whole file, None if unknown | ||||
|                        * tmpfilename: The filename we're currently writing to | ||||
|                        * total_bytes_estimate: Guess of the eventual file size, | ||||
|                                                None if unavailable. | ||||
|                        * elapsed: The number of seconds since download started. | ||||
|                        * eta: The estimated time in seconds, None if unknown | ||||
|                        * speed: The download speed in bytes/second, None if | ||||
|                                 unknown | ||||
|                        * fragment_index: The counter of the currently | ||||
|                                          downloaded video fragment. | ||||
|                        * fragment_count: The number of fragments (= individual | ||||
|                                          files that will be merged) | ||||
|  | ||||
|                        Progress hooks are guaranteed to be called at least once | ||||
|                        (with status "finished") if the download is successful. | ||||
| @@ -225,10 +232,19 @@ class YoutubeDL(object): | ||||
|     call_home:         Boolean, true iff we are allowed to contact the | ||||
|                        youtube-dl servers for debugging. | ||||
|     sleep_interval:    Number of seconds to sleep before each download. | ||||
|     external_downloader:  Executable of the external downloader to call. | ||||
|     listformats:       Print an overview of available video formats and exit. | ||||
|     list_thumbnails:   Print a table of all thumbnails and exit. | ||||
|     match_filter:      A function that gets called with the info_dict of | ||||
|                        every video. | ||||
|                        If it returns a message, the video is ignored. | ||||
|                        If it returns None, the video is downloaded. | ||||
|                        match_filter_func in utils.py is one example for this. | ||||
|     no_color:          Do not emit color codes in output. | ||||
|  | ||||
|     The following options determine which downloader is picked: | ||||
|     external_downloader: Executable of the external downloader to call. | ||||
|                        None or unset for standard (built-in) downloader. | ||||
|     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv. | ||||
|  | ||||
|     The following parameters are not used by YoutubeDL itself, they are used by | ||||
|     the FileDownloader: | ||||
| @@ -292,8 +308,8 @@ class YoutubeDL(object): | ||||
|                     raise | ||||
|  | ||||
|         if (sys.version_info >= (3,) and sys.platform != 'win32' and | ||||
|                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] | ||||
|                 and not params.get('restrictfilenames', False)): | ||||
|                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and | ||||
|                 not params.get('restrictfilenames', False)): | ||||
|             # On Python 3, the Unicode filesystem API will throw errors (#1474) | ||||
|             self.report_warning( | ||||
|                 'Assuming --restrict-filenames since file system encoding ' | ||||
| @@ -485,7 +501,7 @@ class YoutubeDL(object): | ||||
|         else: | ||||
|             if self.params.get('no_warnings'): | ||||
|                 return | ||||
|             if self._err_file.isatty() and os.name != 'nt': | ||||
|             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': | ||||
|                 _msg_header = '\033[0;33mWARNING:\033[0m' | ||||
|             else: | ||||
|                 _msg_header = 'WARNING:' | ||||
| @@ -497,7 +513,7 @@ class YoutubeDL(object): | ||||
|         Do the same as trouble, but prefixes the message with 'ERROR:', colored | ||||
|         in red if stderr is a tty file. | ||||
|         ''' | ||||
|         if self._err_file.isatty() and os.name != 'nt': | ||||
|         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': | ||||
|             _msg_header = '\033[0;31mERROR:\033[0m' | ||||
|         else: | ||||
|             _msg_header = 'ERROR:' | ||||
| @@ -554,7 +570,7 @@ class YoutubeDL(object): | ||||
|             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') | ||||
|             return None | ||||
|  | ||||
|     def _match_entry(self, info_dict): | ||||
|     def _match_entry(self, info_dict, incomplete): | ||||
|         """ Returns None iff the file should be downloaded """ | ||||
|  | ||||
|         video_title = info_dict.get('title', info_dict.get('id', 'video')) | ||||
| @@ -583,9 +599,17 @@ class YoutubeDL(object): | ||||
|             if max_views is not None and view_count > max_views: | ||||
|                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) | ||||
|         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): | ||||
|             return 'Skipping "%s" because it is age restricted' % title | ||||
|             return 'Skipping "%s" because it is age restricted' % video_title | ||||
|         if self.in_download_archive(info_dict): | ||||
|             return '%s has already been recorded in archive' % video_title | ||||
|  | ||||
|         if not incomplete: | ||||
|             match_filter = self.params.get('match_filter') | ||||
|             if match_filter is not None: | ||||
|                 ret = match_filter(info_dict) | ||||
|                 if ret is not None: | ||||
|                     return ret | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -779,7 +803,7 @@ class YoutubeDL(object): | ||||
|                     'extractor_key': ie_result['extractor_key'], | ||||
|                 } | ||||
|  | ||||
|                 reason = self._match_entry(entry) | ||||
|                 reason = self._match_entry(entry, incomplete=True) | ||||
|                 if reason is not None: | ||||
|                     self.to_screen('[download] ' + reason) | ||||
|                     continue | ||||
| @@ -826,27 +850,44 @@ class YoutubeDL(object): | ||||
|             '!=': operator.ne, | ||||
|         } | ||||
|         operator_rex = re.compile(r'''(?x)\s*\[ | ||||
|             (?P<key>width|height|tbr|abr|vbr|filesize|fps) | ||||
|             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps) | ||||
|             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* | ||||
|             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) | ||||
|             \]$ | ||||
|             ''' % '|'.join(map(re.escape, OPERATORS.keys()))) | ||||
|         m = operator_rex.search(format_spec) | ||||
|         if m: | ||||
|             try: | ||||
|                 comparison_value = int(m.group('value')) | ||||
|             except ValueError: | ||||
|                 comparison_value = parse_filesize(m.group('value')) | ||||
|                 if comparison_value is None: | ||||
|                     comparison_value = parse_filesize(m.group('value') + 'B') | ||||
|                 if comparison_value is None: | ||||
|                     raise ValueError( | ||||
|                         'Invalid value %r in format specification %r' % ( | ||||
|                             m.group('value'), format_spec)) | ||||
|             op = OPERATORS[m.group('op')] | ||||
|  | ||||
|         if not m: | ||||
|             STR_OPERATORS = { | ||||
|                 '=': operator.eq, | ||||
|                 '!=': operator.ne, | ||||
|             } | ||||
|             str_operator_rex = re.compile(r'''(?x)\s*\[ | ||||
|                 \s*(?P<key>ext|acodec|vcodec|container|protocol) | ||||
|                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)? | ||||
|                 \s*(?P<value>[a-zA-Z0-9_-]+) | ||||
|                 \s*\]$ | ||||
|                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) | ||||
|             m = str_operator_rex.search(format_spec) | ||||
|             if m: | ||||
|                 comparison_value = m.group('value') | ||||
|                 op = STR_OPERATORS[m.group('op')] | ||||
|  | ||||
|         if not m: | ||||
|             raise ValueError('Invalid format specification %r' % format_spec) | ||||
|  | ||||
|         try: | ||||
|             comparison_value = int(m.group('value')) | ||||
|         except ValueError: | ||||
|             comparison_value = parse_filesize(m.group('value')) | ||||
|             if comparison_value is None: | ||||
|                 comparison_value = parse_filesize(m.group('value') + 'B') | ||||
|             if comparison_value is None: | ||||
|                 raise ValueError( | ||||
|                     'Invalid value %r in format specification %r' % ( | ||||
|                         m.group('value'), format_spec)) | ||||
|         op = OPERATORS[m.group('op')] | ||||
|  | ||||
|         def _filter(f): | ||||
|             actual_value = f.get(m.group('key')) | ||||
|             if actual_value is None: | ||||
| @@ -920,27 +961,9 @@ class YoutubeDL(object): | ||||
|         return res | ||||
|  | ||||
|     def _calc_cookies(self, info_dict): | ||||
|         class _PseudoRequest(object): | ||||
|             def __init__(self, url): | ||||
|                 self.url = url | ||||
|                 self.headers = {} | ||||
|                 self.unverifiable = False | ||||
|  | ||||
|             def add_unredirected_header(self, k, v): | ||||
|                 self.headers[k] = v | ||||
|  | ||||
|             def get_full_url(self): | ||||
|                 return self.url | ||||
|  | ||||
|             def is_unverifiable(self): | ||||
|                 return self.unverifiable | ||||
|  | ||||
|             def has_header(self, h): | ||||
|                 return h in self.headers | ||||
|  | ||||
|         pr = _PseudoRequest(info_dict['url']) | ||||
|         pr = compat_urllib_request.Request(info_dict['url']) | ||||
|         self.cookiejar.add_cookie_header(pr) | ||||
|         return pr.headers.get('Cookie') | ||||
|         return pr.get_header('Cookie') | ||||
|  | ||||
|     def process_video_result(self, info_dict, download=True): | ||||
|         assert info_dict.get('_type', 'video') == 'video' | ||||
| @@ -964,9 +987,11 @@ class YoutubeDL(object): | ||||
|             thumbnails.sort(key=lambda t: ( | ||||
|                 t.get('preference'), t.get('width'), t.get('height'), | ||||
|                 t.get('id'), t.get('url'))) | ||||
|             for t in thumbnails: | ||||
|             for i, t in enumerate(thumbnails): | ||||
|                 if 'width' in t and 'height' in t: | ||||
|                     t['resolution'] = '%dx%d' % (t['width'], t['height']) | ||||
|                 if t.get('id') is None: | ||||
|                     t['id'] = '%d' % i | ||||
|  | ||||
|         if thumbnails and 'thumbnail' not in info_dict: | ||||
|             info_dict['thumbnail'] = thumbnails[-1]['url'] | ||||
| @@ -983,6 +1008,15 @@ class YoutubeDL(object): | ||||
|                 info_dict['timestamp']) | ||||
|             info_dict['upload_date'] = upload_date.strftime('%Y%m%d') | ||||
|  | ||||
|         if self.params.get('listsubtitles', False): | ||||
|             if 'automatic_captions' in info_dict: | ||||
|                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') | ||||
|             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles') | ||||
|             return | ||||
|         info_dict['requested_subtitles'] = self.process_subtitles( | ||||
|             info_dict['id'], info_dict.get('subtitles'), | ||||
|             info_dict.get('automatic_captions')) | ||||
|  | ||||
|         # This extractors handle format selection themselves | ||||
|         if info_dict['extractor'] in ['Youku']: | ||||
|             if download: | ||||
| @@ -1074,7 +1108,8 @@ class YoutubeDL(object): | ||||
|                                 else self.params['merge_output_format']) | ||||
|                             selected_format = { | ||||
|                                 'requested_formats': formats_info, | ||||
|                                 'format': rf, | ||||
|                                 'format': '%s+%s' % (formats_info[0].get('format'), | ||||
|                                                      formats_info[1].get('format')), | ||||
|                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'), | ||||
|                                                         formats_info[1].get('format_id')), | ||||
|                                 'width': formats_info[0].get('width'), | ||||
| @@ -1110,6 +1145,55 @@ class YoutubeDL(object): | ||||
|         info_dict.update(formats_to_download[-1]) | ||||
|         return info_dict | ||||
|  | ||||
|     def process_subtitles(self, video_id, normal_subtitles, automatic_captions): | ||||
|         """Select the requested subtitles and their format""" | ||||
|         available_subs = {} | ||||
|         if normal_subtitles and self.params.get('writesubtitles'): | ||||
|             available_subs.update(normal_subtitles) | ||||
|         if automatic_captions and self.params.get('writeautomaticsub'): | ||||
|             for lang, cap_info in automatic_captions.items(): | ||||
|                 if lang not in available_subs: | ||||
|                     available_subs[lang] = cap_info | ||||
|  | ||||
|         if (not self.params.get('writesubtitles') and not | ||||
|                 self.params.get('writeautomaticsub') or not | ||||
|                 available_subs): | ||||
|             return None | ||||
|  | ||||
|         if self.params.get('allsubtitles', False): | ||||
|             requested_langs = available_subs.keys() | ||||
|         else: | ||||
|             if self.params.get('subtitleslangs', False): | ||||
|                 requested_langs = self.params.get('subtitleslangs') | ||||
|             elif 'en' in available_subs: | ||||
|                 requested_langs = ['en'] | ||||
|             else: | ||||
|                 requested_langs = [list(available_subs.keys())[0]] | ||||
|  | ||||
|         formats_query = self.params.get('subtitlesformat', 'best') | ||||
|         formats_preference = formats_query.split('/') if formats_query else [] | ||||
|         subs = {} | ||||
|         for lang in requested_langs: | ||||
|             formats = available_subs.get(lang) | ||||
|             if formats is None: | ||||
|                 self.report_warning('%s subtitles not available for %s' % (lang, video_id)) | ||||
|                 continue | ||||
|             for ext in formats_preference: | ||||
|                 if ext == 'best': | ||||
|                     f = formats[-1] | ||||
|                     break | ||||
|                 matches = list(filter(lambda f: f['ext'] == ext, formats)) | ||||
|                 if matches: | ||||
|                     f = matches[-1] | ||||
|                     break | ||||
|             else: | ||||
|                 f = formats[-1] | ||||
|                 self.report_warning( | ||||
|                     'No subtitle format found matching "%s" for language %s, ' | ||||
|                     'using %s' % (formats_query, lang, f['ext'])) | ||||
|             subs[lang] = f | ||||
|         return subs | ||||
|  | ||||
|     def process_info(self, info_dict): | ||||
|         """Process a single resolved IE result.""" | ||||
|  | ||||
| @@ -1130,7 +1214,7 @@ class YoutubeDL(object): | ||||
|         if 'format' not in info_dict: | ||||
|             info_dict['format'] = info_dict['ext'] | ||||
|  | ||||
|         reason = self._match_entry(info_dict) | ||||
|         reason = self._match_entry(info_dict, incomplete=False) | ||||
|         if reason is not None: | ||||
|             self.to_screen('[download] ' + reason) | ||||
|             return | ||||
| @@ -1212,15 +1296,22 @@ class YoutubeDL(object): | ||||
|         subtitles_are_requested = any([self.params.get('writesubtitles', False), | ||||
|                                        self.params.get('writeautomaticsub')]) | ||||
|  | ||||
|         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|         if subtitles_are_requested and info_dict.get('requested_subtitles'): | ||||
|             # subtitles download errors are already managed as troubles in relevant IE | ||||
|             # that way it will silently go on when used with unsupporting IE | ||||
|             subtitles = info_dict['subtitles'] | ||||
|             sub_format = self.params.get('subtitlesformat', 'srt') | ||||
|             for sub_lang in subtitles.keys(): | ||||
|                 sub = subtitles[sub_lang] | ||||
|                 if sub is None: | ||||
|                     continue | ||||
|             subtitles = info_dict['requested_subtitles'] | ||||
|             for sub_lang, sub_info in subtitles.items(): | ||||
|                 sub_format = sub_info['ext'] | ||||
|                 if sub_info.get('data') is not None: | ||||
|                     sub_data = sub_info['data'] | ||||
|                 else: | ||||
|                     try: | ||||
|                         uf = self.urlopen(sub_info['url']) | ||||
|                         sub_data = uf.read().decode('utf-8') | ||||
|                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                         self.report_warning('Unable to download subtitle for "%s": %s' % | ||||
|                                             (sub_lang, compat_str(err))) | ||||
|                         continue | ||||
|                 try: | ||||
|                     sub_filename = subtitles_filename(filename, sub_lang, sub_format) | ||||
|                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): | ||||
| @@ -1228,7 +1319,7 @@ class YoutubeDL(object): | ||||
|                     else: | ||||
|                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename) | ||||
|                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                             subfile.write(sub) | ||||
|                             subfile.write(sub_data) | ||||
|                 except (OSError, IOError): | ||||
|                     self.report_error('Cannot write subtitles file ' + sub_filename) | ||||
|                     return | ||||
| @@ -1261,7 +1352,7 @@ class YoutubeDL(object): | ||||
|                     downloaded = [] | ||||
|                     success = True | ||||
|                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) | ||||
|                     if not merger._executable: | ||||
|                     if not merger.available: | ||||
|                         postprocessors = [] | ||||
|                         self.report_warning('You have requested multiple ' | ||||
|                                             'formats but ffmpeg or avconv are not installed.' | ||||
| @@ -1340,8 +1431,8 @@ class YoutubeDL(object): | ||||
|         """Download a given list of URLs.""" | ||||
|         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) | ||||
|         if (len(url_list) > 1 and | ||||
|                 '%' not in outtmpl | ||||
|                 and self.params.get('max_downloads') != 1): | ||||
|                 '%' not in outtmpl and | ||||
|                 self.params.get('max_downloads') != 1): | ||||
|             raise SameFileError(outtmpl) | ||||
|  | ||||
|         for url in url_list: | ||||
| @@ -1508,30 +1599,18 @@ class YoutubeDL(object): | ||||
|         return res | ||||
|  | ||||
|     def list_formats(self, info_dict): | ||||
|         def line(format, idlen=20): | ||||
|             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( | ||||
|                 format['format_id'], | ||||
|                 format['ext'], | ||||
|                 self.format_resolution(format), | ||||
|                 self._format_note(format), | ||||
|             )) | ||||
|  | ||||
|         formats = info_dict.get('formats', [info_dict]) | ||||
|         idlen = max(len('format code'), | ||||
|                     max(len(f['format_id']) for f in formats)) | ||||
|         formats_s = [ | ||||
|             line(f, idlen) for f in formats | ||||
|         table = [ | ||||
|             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] | ||||
|             for f in formats | ||||
|             if f.get('preference') is None or f['preference'] >= -1000] | ||||
|         if len(formats) > 1: | ||||
|             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' | ||||
|             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' | ||||
|             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' | ||||
|  | ||||
|         header_line = line({ | ||||
|             'format_id': 'format code', 'ext': 'extension', | ||||
|             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) | ||||
|         header_line = ['format code', 'extension', 'resolution', 'note'] | ||||
|         self.to_screen( | ||||
|             '[info] Available formats for %s:\n%s\n%s' % | ||||
|             (info_dict['id'], header_line, '\n'.join(formats_s))) | ||||
|             '[info] Available formats for %s:\n%s' % | ||||
|             (info_dict['id'], render_table(header_line, table))) | ||||
|  | ||||
|     def list_thumbnails(self, info_dict): | ||||
|         thumbnails = info_dict.get('thumbnails') | ||||
| @@ -1550,6 +1629,17 @@ class YoutubeDL(object): | ||||
|             ['ID', 'width', 'height', 'URL'], | ||||
|             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) | ||||
|  | ||||
|     def list_subtitles(self, video_id, subtitles, name='subtitles'): | ||||
|         if not subtitles: | ||||
|             self.to_screen('%s has no %s' % (video_id, name)) | ||||
|             return | ||||
|         self.to_screen( | ||||
|             'Available %s for %s:' % (name, video_id)) | ||||
|         self.to_screen(render_table( | ||||
|             ['Language', 'formats'], | ||||
|             [[lang, ', '.join(f['ext'] for f in reversed(formats))] | ||||
|                 for lang, formats in subtitles.items()])) | ||||
|  | ||||
|     def urlopen(self, req): | ||||
|         """ Start an HTTP download """ | ||||
|  | ||||
| @@ -1611,7 +1701,7 @@ class YoutubeDL(object): | ||||
|         self._write_string('[debug] Python version %s - %s\n' % ( | ||||
|             platform.python_version(), platform_name())) | ||||
|  | ||||
|         exe_versions = FFmpegPostProcessor.get_versions() | ||||
|         exe_versions = FFmpegPostProcessor.get_versions(self) | ||||
|         exe_versions['rtmpdump'] = rtmpdump_version() | ||||
|         exe_str = ', '.join( | ||||
|             '%s %s' % (exe, v) | ||||
|   | ||||
| @@ -23,9 +23,10 @@ from .compat import ( | ||||
| ) | ||||
| from .utils import ( | ||||
|     DateRange, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     decodeOption, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     DownloadError, | ||||
|     match_filter_func, | ||||
|     MaxDownloadsReached, | ||||
|     preferredencoding, | ||||
|     read_batch_urls, | ||||
| @@ -188,14 +189,14 @@ def _real_main(argv=None): | ||||
|         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) | ||||
|         if opts.outtmpl is not None: | ||||
|             opts.outtmpl = opts.outtmpl.decode(preferredencoding()) | ||||
|     outtmpl = ((opts.outtmpl is not None and opts.outtmpl) | ||||
|                or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') | ||||
|                or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') | ||||
|                or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') | ||||
|                or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') | ||||
|                or (opts.useid and '%(id)s.%(ext)s') | ||||
|                or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') | ||||
|                or DEFAULT_OUTTMPL) | ||||
|     outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or | ||||
|                (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or | ||||
|                (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or | ||||
|                (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or | ||||
|                (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or | ||||
|                (opts.useid and '%(id)s.%(ext)s') or | ||||
|                (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or | ||||
|                DEFAULT_OUTTMPL) | ||||
|     if not os.path.splitext(outtmpl)[1] and opts.extractaudio: | ||||
|         parser.error('Cannot download a video and extract audio into the same' | ||||
|                      ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' | ||||
| @@ -225,7 +226,6 @@ def _real_main(argv=None): | ||||
|     if opts.embedsubtitles: | ||||
|         postprocessors.append({ | ||||
|             'key': 'FFmpegEmbedSubtitle', | ||||
|             'subtitlesformat': opts.subtitlesformat, | ||||
|         }) | ||||
|     if opts.xattrs: | ||||
|         postprocessors.append({'key': 'XAttrMetadata'}) | ||||
| @@ -247,6 +247,9 @@ def _real_main(argv=None): | ||||
|             xattr  # Confuse flake8 | ||||
|         except ImportError: | ||||
|             parser.error('setting filesize xattr requested but python-xattr is not available') | ||||
|     match_filter = ( | ||||
|         None if opts.match_filter is None | ||||
|         else match_filter_func(opts.match_filter)) | ||||
|  | ||||
|     ydl_opts = { | ||||
|         'usenetrc': opts.usenetrc, | ||||
| @@ -344,6 +347,10 @@ def _real_main(argv=None): | ||||
|         'list_thumbnails': opts.list_thumbnails, | ||||
|         'playlist_items': opts.playlist_items, | ||||
|         'xattr_set_filesize': opts.xattr_set_filesize, | ||||
|         'match_filter': match_filter, | ||||
|         'no_color': opts.no_color, | ||||
|         'ffmpeg_location': opts.ffmpeg_location, | ||||
|         'hls_prefer_native': opts.hls_prefer_native, | ||||
|     } | ||||
|  | ||||
|     with YoutubeDL(ydl_opts) as ydl: | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] | ||||
|  | ||||
| import base64 | ||||
| from math import ceil | ||||
|  | ||||
| @@ -329,3 +327,5 @@ def inc(data): | ||||
|             data[i] = data[i] + 1 | ||||
|             break | ||||
|     return data | ||||
|  | ||||
| __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] | ||||
|   | ||||
| @@ -34,6 +34,9 @@ def get_suitable_downloader(info_dict, params={}): | ||||
|         if ed.supports(info_dict): | ||||
|             return ed | ||||
|  | ||||
|     if protocol == 'm3u8' and params.get('hls_prefer_native'): | ||||
|         return NativeHlsFD | ||||
|  | ||||
|     return PROTOCOL_MAP.get(protocol, HttpFD) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division, unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| @@ -54,6 +54,7 @@ class FileDownloader(object): | ||||
|         self.ydl = ydl | ||||
|         self._progress_hooks = [] | ||||
|         self.params = params | ||||
|         self.add_progress_hook(self.report_progress) | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_seconds(seconds): | ||||
| @@ -226,42 +227,64 @@ class FileDownloader(object): | ||||
|             self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) | ||||
|         self.to_console_title('youtube-dl ' + msg) | ||||
|  | ||||
|     def report_progress(self, percent, data_len_str, speed, eta): | ||||
|         """Report download progress.""" | ||||
|         if self.params.get('noprogress', False): | ||||
|     def report_progress(self, s): | ||||
|         if s['status'] == 'finished': | ||||
|             if self.params.get('noprogress', False): | ||||
|                 self.to_screen('[download] Download completed') | ||||
|             else: | ||||
|                 s['_total_bytes_str'] = format_bytes(s['total_bytes']) | ||||
|                 if s.get('elapsed') is not None: | ||||
|                     s['_elapsed_str'] = self.format_seconds(s['elapsed']) | ||||
|                     msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s' | ||||
|                 else: | ||||
|                     msg_template = '100%% of %(_total_bytes_str)s' | ||||
|                 self._report_progress_status( | ||||
|                     msg_template % s, is_last_line=True) | ||||
|  | ||||
|         if self.params.get('noprogress'): | ||||
|             return | ||||
|         if eta is not None: | ||||
|             eta_str = self.format_eta(eta) | ||||
|         else: | ||||
|             eta_str = 'Unknown ETA' | ||||
|         if percent is not None: | ||||
|             percent_str = self.format_percent(percent) | ||||
|         else: | ||||
|             percent_str = 'Unknown %' | ||||
|         speed_str = self.format_speed(speed) | ||||
|  | ||||
|         msg = ('%s of %s at %s ETA %s' % | ||||
|                (percent_str, data_len_str, speed_str, eta_str)) | ||||
|         self._report_progress_status(msg) | ||||
|  | ||||
|     def report_progress_live_stream(self, downloaded_data_len, speed, elapsed): | ||||
|         if self.params.get('noprogress', False): | ||||
|         if s['status'] != 'downloading': | ||||
|             return | ||||
|         downloaded_str = format_bytes(downloaded_data_len) | ||||
|         speed_str = self.format_speed(speed) | ||||
|         elapsed_str = FileDownloader.format_seconds(elapsed) | ||||
|         msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str) | ||||
|         self._report_progress_status(msg) | ||||
|  | ||||
|     def report_finish(self, data_len_str, tot_time): | ||||
|         """Report download finished.""" | ||||
|         if self.params.get('noprogress', False): | ||||
|             self.to_screen('[download] Download completed') | ||||
|         if s.get('eta') is not None: | ||||
|             s['_eta_str'] = self.format_eta(s['eta']) | ||||
|         else: | ||||
|             self._report_progress_status( | ||||
|                 ('100%% of %s in %s' % | ||||
|                  (data_len_str, self.format_seconds(tot_time))), | ||||
|                 is_last_line=True) | ||||
|             s['_eta_str'] = 'Unknown ETA' | ||||
|  | ||||
|         if s.get('total_bytes') and s.get('downloaded_bytes') is not None: | ||||
|             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) | ||||
|         elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: | ||||
|             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) | ||||
|         else: | ||||
|             if s.get('downloaded_bytes') == 0: | ||||
|                 s['_percent_str'] = self.format_percent(0) | ||||
|             else: | ||||
|                 s['_percent_str'] = 'Unknown %' | ||||
|  | ||||
|         if s.get('speed') is not None: | ||||
|             s['_speed_str'] = self.format_speed(s['speed']) | ||||
|         else: | ||||
|             s['_speed_str'] = 'Unknown speed' | ||||
|  | ||||
|         if s.get('total_bytes') is not None: | ||||
|             s['_total_bytes_str'] = format_bytes(s['total_bytes']) | ||||
|             msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' | ||||
|         elif s.get('total_bytes_estimate') is not None: | ||||
|             s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) | ||||
|             msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' | ||||
|         else: | ||||
|             if s.get('downloaded_bytes') is not None: | ||||
|                 s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) | ||||
|                 if s.get('elapsed'): | ||||
|                     s['_elapsed_str'] = self.format_seconds(s['elapsed']) | ||||
|                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' | ||||
|                 else: | ||||
|                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' | ||||
|             else: | ||||
|                 msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' | ||||
|  | ||||
|         self._report_progress_status(msg_template % s) | ||||
|  | ||||
|     def report_resuming_byte(self, resume_len): | ||||
|         """Report attempt to resume at given byte.""" | ||||
| @@ -288,14 +311,14 @@ class FileDownloader(object): | ||||
|         """ | ||||
|  | ||||
|         nooverwrites_and_exists = ( | ||||
|             self.params.get('nooverwrites', False) | ||||
|             and os.path.exists(encodeFilename(filename)) | ||||
|             self.params.get('nooverwrites', False) and | ||||
|             os.path.exists(encodeFilename(filename)) | ||||
|         ) | ||||
|  | ||||
|         continuedl_and_exists = ( | ||||
|             self.params.get('continuedl', False) | ||||
|             and os.path.isfile(encodeFilename(filename)) | ||||
|             and not self.params.get('nopart', False) | ||||
|             self.params.get('continuedl', False) and | ||||
|             os.path.isfile(encodeFilename(filename)) and | ||||
|             not self.params.get('nopart', False) | ||||
|         ) | ||||
|  | ||||
|         # Check file already present | ||||
|   | ||||
| @@ -45,6 +45,12 @@ class ExternalFD(FileDownloader): | ||||
|     def supports(cls, info_dict): | ||||
|         return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') | ||||
|  | ||||
|     def _source_address(self, command_option): | ||||
|         source_address = self.params.get('source_address') | ||||
|         if source_address is None: | ||||
|             return [] | ||||
|         return [command_option, source_address] | ||||
|  | ||||
|     def _call_downloader(self, tmpfilename, info_dict): | ||||
|         """ Either overwrite this or implement _make_cmd """ | ||||
|         cmd = self._make_cmd(tmpfilename, info_dict) | ||||
| @@ -69,9 +75,10 @@ class ExternalFD(FileDownloader): | ||||
|  | ||||
| class CurlFD(ExternalFD): | ||||
|     def _make_cmd(self, tmpfilename, info_dict): | ||||
|         cmd = [self.exe, '-o', tmpfilename] | ||||
|         cmd = [self.exe, '--location', '-o', tmpfilename] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._source_address('--interface') | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
| @@ -81,6 +88,7 @@ class WgetFD(ExternalFD): | ||||
|         cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._source_address('--bind-address') | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
| @@ -96,6 +104,7 @@ class Aria2cFD(ExternalFD): | ||||
|         cmd += ['--out', os.path.basename(tmpfilename)] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._source_address('--interface') | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division, unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import io | ||||
| @@ -15,7 +15,6 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     struct_pack, | ||||
|     struct_unpack, | ||||
|     format_bytes, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     xpath_text, | ||||
| @@ -252,17 +251,6 @@ class F4mFD(FileDownloader): | ||||
|         requested_bitrate = info_dict.get('tbr') | ||||
|         self.to_screen('[download] Downloading f4m manifest') | ||||
|         manifest = self.ydl.urlopen(man_url).read() | ||||
|         self.report_destination(filename) | ||||
|         http_dl = HttpQuietDownloader( | ||||
|             self.ydl, | ||||
|             { | ||||
|                 'continuedl': True, | ||||
|                 'quiet': True, | ||||
|                 'noprogress': True, | ||||
|                 'ratelimit': self.params.get('ratelimit', None), | ||||
|                 'test': self.params.get('test', False), | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         doc = etree.fromstring(manifest) | ||||
|         formats = [(int(f.attrib.get('bitrate', -1)), f) | ||||
| @@ -298,39 +286,65 @@ class F4mFD(FileDownloader): | ||||
|         # For some akamai manifests we'll need to add a query to the fragment url | ||||
|         akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) | ||||
|  | ||||
|         self.report_destination(filename) | ||||
|         http_dl = HttpQuietDownloader( | ||||
|             self.ydl, | ||||
|             { | ||||
|                 'continuedl': True, | ||||
|                 'quiet': True, | ||||
|                 'noprogress': True, | ||||
|                 'ratelimit': self.params.get('ratelimit', None), | ||||
|                 'test': self.params.get('test', False), | ||||
|             } | ||||
|         ) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') | ||||
|  | ||||
|         write_flv_header(dest_stream) | ||||
|         write_metadata_tag(dest_stream, metadata) | ||||
|  | ||||
|         # This dict stores the download progress, it's updated by the progress | ||||
|         # hook | ||||
|         state = { | ||||
|             'status': 'downloading', | ||||
|             'downloaded_bytes': 0, | ||||
|             'frag_counter': 0, | ||||
|             'frag_index': 0, | ||||
|             'frag_count': total_frags, | ||||
|             'filename': filename, | ||||
|             'tmpfilename': tmpfilename, | ||||
|         } | ||||
|         start = time.time() | ||||
|  | ||||
|         def frag_progress_hook(status): | ||||
|             frag_total_bytes = status.get('total_bytes', 0) | ||||
|             estimated_size = (state['downloaded_bytes'] + | ||||
|                               (total_frags - state['frag_counter']) * frag_total_bytes) | ||||
|             if status['status'] == 'finished': | ||||
|         def frag_progress_hook(s): | ||||
|             if s['status'] not in ('downloading', 'finished'): | ||||
|                 return | ||||
|  | ||||
|             frag_total_bytes = s.get('total_bytes', 0) | ||||
|             if s['status'] == 'finished': | ||||
|                 state['downloaded_bytes'] += frag_total_bytes | ||||
|                 state['frag_counter'] += 1 | ||||
|                 progress = self.calc_percent(state['frag_counter'], total_frags) | ||||
|                 byte_counter = state['downloaded_bytes'] | ||||
|                 state['frag_index'] += 1 | ||||
|  | ||||
|             estimated_size = ( | ||||
|                 (state['downloaded_bytes'] + frag_total_bytes) / | ||||
|                 (state['frag_index'] + 1) * total_frags) | ||||
|             time_now = time.time() | ||||
|             state['total_bytes_estimate'] = estimated_size | ||||
|             state['elapsed'] = time_now - start | ||||
|  | ||||
|             if s['status'] == 'finished': | ||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||
|             else: | ||||
|                 frag_downloaded_bytes = status['downloaded_bytes'] | ||||
|                 byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes | ||||
|                 frag_downloaded_bytes = s['downloaded_bytes'] | ||||
|                 frag_progress = self.calc_percent(frag_downloaded_bytes, | ||||
|                                                   frag_total_bytes) | ||||
|                 progress = self.calc_percent(state['frag_counter'], total_frags) | ||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||
|                 progress += frag_progress / float(total_frags) | ||||
|  | ||||
|             eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) | ||||
|             self.report_progress(progress, format_bytes(estimated_size), | ||||
|                                  status.get('speed'), eta) | ||||
|                 state['eta'] = self.calc_eta( | ||||
|                     start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) | ||||
|                 state['speed'] = s.get('speed') | ||||
|             self._hook_progress(state) | ||||
|  | ||||
|         http_dl.add_progress_hook(frag_progress_hook) | ||||
|  | ||||
|         frags_filenames = [] | ||||
| @@ -354,8 +368,8 @@ class F4mFD(FileDownloader): | ||||
|             frags_filenames.append(frag_filename) | ||||
|  | ||||
|         dest_stream.close() | ||||
|         self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) | ||||
|  | ||||
|         elapsed = time.time() - start | ||||
|         self.try_rename(tmpfilename, filename) | ||||
|         for frag_file in frags_filenames: | ||||
|             os.remove(frag_file) | ||||
| @@ -366,6 +380,7 @@ class F4mFD(FileDownloader): | ||||
|             'total_bytes': fsize, | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|             'elapsed': elapsed, | ||||
|         }) | ||||
|  | ||||
|         return True | ||||
|   | ||||
| @@ -23,15 +23,14 @@ class HlsFD(FileDownloader): | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         ffpp = FFmpegPostProcessor(downloader=self) | ||||
|         program = ffpp._executable | ||||
|         if program is None: | ||||
|         if not ffpp.available: | ||||
|             self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') | ||||
|             return False | ||||
|         ffpp.check_version() | ||||
|  | ||||
|         args = [ | ||||
|             encodeArgument(opt) | ||||
|             for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] | ||||
|             for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] | ||||
|         args.append(encodeFilename(tmpfilename, True)) | ||||
|  | ||||
|         retval = subprocess.call(args) | ||||
| @@ -48,7 +47,7 @@ class HlsFD(FileDownloader): | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr('\n') | ||||
|             self.report_error('%s exited with code %d' % (program, retval)) | ||||
|             self.report_error('%s exited with code %d' % (ffpp.basename, retval)) | ||||
|             return False | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import errno | ||||
| import os | ||||
| import socket | ||||
| import time | ||||
|  | ||||
| from .common import FileDownloader | ||||
| @@ -12,7 +14,6 @@ from ..utils import ( | ||||
|     ContentTooShortError, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     format_bytes, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -99,6 +100,11 @@ class HttpFD(FileDownloader): | ||||
|                             resume_len = 0 | ||||
|                             open_mode = 'wb' | ||||
|                             break | ||||
|             except socket.error as e: | ||||
|                 if e.errno != errno.ECONNRESET: | ||||
|                     # Connection reset is no problem, just retry | ||||
|                     raise | ||||
|  | ||||
|             # Retry | ||||
|             count += 1 | ||||
|             if count <= retries: | ||||
| @@ -129,7 +135,6 @@ class HttpFD(FileDownloader): | ||||
|                 self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) | ||||
|                 return False | ||||
|  | ||||
|         data_len_str = format_bytes(data_len) | ||||
|         byte_counter = 0 + resume_len | ||||
|         block_size = self.params.get('buffersize', 1024) | ||||
|         start = time.time() | ||||
| @@ -188,20 +193,19 @@ class HttpFD(FileDownloader): | ||||
|             # Progress message | ||||
|             speed = self.calc_speed(start, now, byte_counter - resume_len) | ||||
|             if data_len is None: | ||||
|                 eta = percent = None | ||||
|                 eta = None | ||||
|             else: | ||||
|                 percent = self.calc_percent(byte_counter, data_len) | ||||
|                 eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) | ||||
|             self.report_progress(percent, data_len_str, speed, eta) | ||||
|  | ||||
|             self._hook_progress({ | ||||
|                 'status': 'downloading', | ||||
|                 'downloaded_bytes': byte_counter, | ||||
|                 'total_bytes': data_len, | ||||
|                 'tmpfilename': tmpfilename, | ||||
|                 'filename': filename, | ||||
|                 'status': 'downloading', | ||||
|                 'eta': eta, | ||||
|                 'speed': speed, | ||||
|                 'elapsed': now - start, | ||||
|             }) | ||||
|  | ||||
|             if is_test and byte_counter == data_len: | ||||
| @@ -213,7 +217,13 @@ class HttpFD(FileDownloader): | ||||
|             return False | ||||
|         if tmpfilename != '-': | ||||
|             stream.close() | ||||
|         self.report_finish(data_len_str, (time.time() - start)) | ||||
|  | ||||
|         self._hook_progress({ | ||||
|             'downloaded_bytes': byte_counter, | ||||
|             'total_bytes': data_len, | ||||
|             'tmpfilename': tmpfilename, | ||||
|             'status': 'error', | ||||
|         }) | ||||
|         if data_len is not None and byte_counter != data_len: | ||||
|             raise ContentTooShortError(byte_counter, int(data_len)) | ||||
|         self.try_rename(tmpfilename, filename) | ||||
| @@ -227,6 +237,7 @@ class HttpFD(FileDownloader): | ||||
|             'total_bytes': byte_counter, | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|             'elapsed': time.time() - start, | ||||
|         }) | ||||
|  | ||||
|         return True | ||||
|   | ||||
| @@ -11,7 +11,6 @@ from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     check_executable, | ||||
|     encodeFilename, | ||||
|     format_bytes, | ||||
|     get_exe_version, | ||||
| ) | ||||
|  | ||||
| @@ -51,23 +50,23 @@ class RtmpFD(FileDownloader): | ||||
|                     if not resume_percent: | ||||
|                         resume_percent = percent | ||||
|                         resume_downloaded_data_len = downloaded_data_len | ||||
|                     eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent) | ||||
|                     speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len) | ||||
|                     time_now = time.time() | ||||
|                     eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) | ||||
|                     speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) | ||||
|                     data_len = None | ||||
|                     if percent > 0: | ||||
|                         data_len = int(downloaded_data_len * 100 / percent) | ||||
|                     data_len_str = '~' + format_bytes(data_len) | ||||
|                     self.report_progress(percent, data_len_str, speed, eta) | ||||
|                     cursor_in_new_line = False | ||||
|                     self._hook_progress({ | ||||
|                         'status': 'downloading', | ||||
|                         'downloaded_bytes': downloaded_data_len, | ||||
|                         'total_bytes': data_len, | ||||
|                         'total_bytes_estimate': data_len, | ||||
|                         'tmpfilename': tmpfilename, | ||||
|                         'filename': filename, | ||||
|                         'status': 'downloading', | ||||
|                         'eta': eta, | ||||
|                         'elapsed': time_now - start, | ||||
|                         'speed': speed, | ||||
|                     }) | ||||
|                     cursor_in_new_line = False | ||||
|                 else: | ||||
|                     # no percent for live streams | ||||
|                     mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) | ||||
| @@ -75,15 +74,15 @@ class RtmpFD(FileDownloader): | ||||
|                         downloaded_data_len = int(float(mobj.group(1)) * 1024) | ||||
|                         time_now = time.time() | ||||
|                         speed = self.calc_speed(start, time_now, downloaded_data_len) | ||||
|                         self.report_progress_live_stream(downloaded_data_len, speed, time_now - start) | ||||
|                         cursor_in_new_line = False | ||||
|                         self._hook_progress({ | ||||
|                             'downloaded_bytes': downloaded_data_len, | ||||
|                             'tmpfilename': tmpfilename, | ||||
|                             'filename': filename, | ||||
|                             'status': 'downloading', | ||||
|                             'elapsed': time_now - start, | ||||
|                             'speed': speed, | ||||
|                         }) | ||||
|                         cursor_in_new_line = False | ||||
|                     elif self.params.get('verbose', False): | ||||
|                         if not cursor_in_new_line: | ||||
|                             self.to_screen('') | ||||
|   | ||||
| @@ -6,6 +6,7 @@ from .academicearth import AcademicEarthCourseIE | ||||
| from .addanime import AddAnimeIE | ||||
| from .adobetv import AdobeTVIE | ||||
| from .adultswim import AdultSwimIE | ||||
| from .aftenposten import AftenpostenIE | ||||
| from .aftonbladet import AftonbladetIE | ||||
| from .aljazeera import AlJazeeraIE | ||||
| from .alphaporno import AlphaPornoIE | ||||
| @@ -48,14 +49,24 @@ from .brightcove import BrightcoveIE | ||||
| from .buzzfeed import BuzzFeedIE | ||||
| from .byutv import BYUtvIE | ||||
| from .c56 import C56IE | ||||
| from .camdemy import ( | ||||
|     CamdemyIE, | ||||
|     CamdemyFolderIE | ||||
| ) | ||||
| from .canal13cl import Canal13clIE | ||||
| from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .cbs import CBSIE | ||||
| from .cbsnews import CBSNewsIE | ||||
| from .cbssports import CBSSportsIE | ||||
| from .ccc import CCCIE | ||||
| from .ceskatelevize import CeskaTelevizeIE | ||||
| from .channel9 import Channel9IE | ||||
| from .chilloutzone import ChilloutzoneIE | ||||
| from .chirbit import ( | ||||
|     ChirbitIE, | ||||
|     ChirbitProfileIE, | ||||
| ) | ||||
| from .cinchcast import CinchcastIE | ||||
| from .clipfish import ClipfishIE | ||||
| from .cliphunter import CliphunterIE | ||||
| @@ -73,7 +84,7 @@ from .collegehumor import CollegeHumorIE | ||||
| from .collegerama import CollegeRamaIE | ||||
| from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE | ||||
| from .comcarcoff import ComCarCoffIE | ||||
| from .commonmistakes import CommonMistakesIE | ||||
| from .commonmistakes import CommonMistakesIE, UnicodeBOMIE | ||||
| from .condenast import CondeNastIE | ||||
| from .cracked import CrackedIE | ||||
| from .criterion import CriterionIE | ||||
| @@ -115,6 +126,7 @@ from .ellentv import ( | ||||
|     EllenTVClipsIE, | ||||
| ) | ||||
| from .elpais import ElPaisIE | ||||
| from .embedly import EmbedlyIE | ||||
| from .empflix import EMPFlixIE | ||||
| from .engadget import EngadgetIE | ||||
| from .eporner import EpornerIE | ||||
| @@ -182,6 +194,8 @@ from .heise import HeiseIE | ||||
| from .hellporno import HellPornoIE | ||||
| from .helsinki import HelsinkiIE | ||||
| from .hentaistigma import HentaiStigmaIE | ||||
| from .historicfilms import HistoricFilmsIE | ||||
| from .history import HistoryIE | ||||
| from .hitbox import HitboxIE, HitboxLiveIE | ||||
| from .hornbunny import HornBunnyIE | ||||
| from .hostingbulk import HostingBulkIE | ||||
| @@ -196,6 +210,7 @@ from .imdb import ( | ||||
|     ImdbIE, | ||||
|     ImdbListIE | ||||
| ) | ||||
| from .imgur import ImgurIE | ||||
| from .ina import InaIE | ||||
| from .infoq import InfoQIE | ||||
| from .instagram import InstagramIE, InstagramUserIE | ||||
| @@ -274,6 +289,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE | ||||
| from .myspass import MySpassIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .myvidster import MyVidsterIE | ||||
| from .nationalgeographic import NationalGeographicIE | ||||
| from .naver import NaverIE | ||||
| from .nba import NBAIE | ||||
| from .nbc import ( | ||||
| @@ -284,6 +300,7 @@ from .ndr import NDRIE | ||||
| from .ndtv import NDTVIE | ||||
| from .netzkino import NetzkinoIE | ||||
| from .nerdcubed import NerdCubedFeedIE | ||||
| from .nerdist import NerdistIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| from .newstube import NewstubeIE | ||||
| from .nextmedia import ( | ||||
| @@ -310,13 +327,16 @@ from .nowvideo import NowVideoIE | ||||
| from .npo import ( | ||||
|     NPOIE, | ||||
|     NPOLiveIE, | ||||
|     NPORadioIE, | ||||
|     NPORadioFragmentIE, | ||||
|     TegenlichtVproIE, | ||||
| ) | ||||
| from .nrk import ( | ||||
|     NRKIE, | ||||
|     NRKTVIE, | ||||
| ) | ||||
| from .ntv import NTVIE | ||||
| from .ntvde import NTVDeIE | ||||
| from .ntvru import NTVRuIE | ||||
| from .nytimes import NYTimesIE | ||||
| from .nuvid import NuvidIE | ||||
| from .oktoberfesttv import OktoberfestTVIE | ||||
| @@ -338,13 +358,17 @@ from .playfm import PlayFMIE | ||||
| from .playvid import PlayvidIE | ||||
| from .podomatic import PodomaticIE | ||||
| from .pornhd import PornHdIE | ||||
| from .pornhub import PornHubIE | ||||
| from .pornhub import ( | ||||
|     PornHubIE, | ||||
|     PornHubPlaylistIE, | ||||
| ) | ||||
| from .pornotube import PornotubeIE | ||||
| from .pornoxo import PornoXOIE | ||||
| from .promptfile import PromptFileIE | ||||
| from .prosiebensat1 import ProSiebenSat1IE | ||||
| from .pyvideo import PyvideoIE | ||||
| from .quickvid import QuickVidIE | ||||
| from .r7 import R7IE | ||||
| from .radiode import RadioDeIE | ||||
| from .radiobremen import RadioBremenIE | ||||
| from .radiofrance import RadioFranceIE | ||||
| @@ -359,7 +383,7 @@ from .rottentomatoes import RottenTomatoesIE | ||||
| from .roxwel import RoxwelIE | ||||
| from .rtbf import RTBFIE | ||||
| from .rte import RteIE | ||||
| from .rtlnl import RtlXlIE | ||||
| from .rtlnl import RtlNlIE | ||||
| from .rtlnow import RTLnowIE | ||||
| from .rtl2 import RTL2IE | ||||
| from .rtp import RTPIE | ||||
| @@ -374,6 +398,7 @@ from .rutube import ( | ||||
|     RutubePersonIE, | ||||
| ) | ||||
| from .rutv import RUTVIE | ||||
| from .sandia import SandiaIE | ||||
| from .sapo import SapoIE | ||||
| from .savefrom import SaveFromIE | ||||
| from .sbs import SBSIE | ||||
| @@ -404,7 +429,10 @@ from .soundcloud import ( | ||||
|     SoundcloudUserIE, | ||||
|     SoundcloudPlaylistIE | ||||
| ) | ||||
| from .soundgasm import SoundgasmIE | ||||
| from .soundgasm import ( | ||||
|     SoundgasmIE, | ||||
|     SoundgasmProfileIE | ||||
| ) | ||||
| from .southpark import ( | ||||
|     SouthParkIE, | ||||
|     SouthparkDeIE, | ||||
| @@ -424,6 +452,7 @@ from .streamcloud import StreamcloudIE | ||||
| from .streamcz import StreamCZIE | ||||
| from .streetvoice import StreetVoiceIE | ||||
| from .sunporno import SunPornoIE | ||||
| from .svtplay import SVTPlayIE | ||||
| from .swrmediathek import SWRMediathekIE | ||||
| from .syfy import SyfyIE | ||||
| from .sztvhu import SztvHuIE | ||||
| @@ -469,9 +498,11 @@ from .tumblr import TumblrIE | ||||
| from .tunein import TuneInIE | ||||
| from .turbo import TurboIE | ||||
| from .tutv import TutvIE | ||||
| from .tv4 import TV4IE | ||||
| from .tvigle import TvigleIE | ||||
| from .tvp import TvpIE, TvpSeriesIE | ||||
| from .tvplay import TVPlayIE | ||||
| from .tweakers import TweakersIE | ||||
| from .twentyfourvideo import TwentyFourVideoIE | ||||
| from .twitch import ( | ||||
|     TwitchVideoIE, | ||||
| @@ -551,6 +582,7 @@ from .wimp import WimpIE | ||||
| from .wistia import WistiaIE | ||||
| from .worldstarhiphop import WorldStarHipHopIE | ||||
| from .wrzuta import WrzutaIE | ||||
| from .wsj import WSJIE | ||||
| from .xbef import XBefIE | ||||
| from .xboxclips import XboxClipsIE | ||||
| from .xhamster import XHamsterIE | ||||
| @@ -564,6 +596,7 @@ from .yahoo import ( | ||||
|     YahooIE, | ||||
|     YahooSearchIE, | ||||
| ) | ||||
| from .yam import YamIE | ||||
| from .yesjapan import YesJapanIE | ||||
| from .ynet import YnetIE | ||||
| from .youjizz import YouJizzIE | ||||
| @@ -587,6 +620,7 @@ from .youtube import ( | ||||
|     YoutubeUserIE, | ||||
|     YoutubeWatchLaterIE, | ||||
| ) | ||||
| from .zapiks import ZapiksIE | ||||
| from .zdf import ZDFIE, ZDFChannelIE | ||||
| from .zingmp3 import ( | ||||
|     ZingMp3SongIE, | ||||
|   | ||||
| @@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         player = self._parse_json( | ||||
| @@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor): | ||||
|             self._html_search_meta('datepublished', webpage, 'upload date')) | ||||
|  | ||||
|         duration = parse_duration( | ||||
|             self._html_search_meta('duration', webpage, 'duration') | ||||
|             or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration')) | ||||
|             self._html_search_meta('duration', webpage, 'duration') or | ||||
|             self._search_regex( | ||||
|                 r'Runtime:\s*(\d{2}:\d{2}:\d{2})', | ||||
|                 webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>', | ||||
|   | ||||
| @@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor): | ||||
|             }, | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'id': 'rQxZvXQ4ROaSOqq-or2Mow', | ||||
|             'title': 'Rick and Morty - Pilot', | ||||
|             'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " | ||||
|         } | ||||
| @@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor): | ||||
|             } | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'id': '-t8CamQlQ2aYZ49ItZCFog', | ||||
|             'title': 'American Dad - Putting Francine Out of Business', | ||||
|             'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' | ||||
|         }, | ||||
|   | ||||
							
								
								
									
										103
									
								
								youtube_dl/extractor/aftenposten.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								youtube_dl/extractor/aftenposten.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     xpath_with_ns, | ||||
|     xpath_text, | ||||
|     find_xpath_attr, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AftenpostenIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish', | ||||
|         'md5': 'fd828cd29774a729bf4d4425fe192972', | ||||
|         'info_dict': { | ||||
|             'id': '21039', | ||||
|             'ext': 'mov', | ||||
|             'title': 'TRAILER: "Sweatshop" - I can´t take any more', | ||||
|             'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238', | ||||
|             'timestamp': 1416927969, | ||||
|             'upload_date': '20141125', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._html_search_regex( | ||||
|             r'data-xs-id="(\d+)"', webpage, 'video id') | ||||
|  | ||||
|         data = self._download_xml( | ||||
|             'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id) | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'atom': 'http://www.w3.org/2005/Atom', | ||||
|             'xt': 'http://xstream.dk/', | ||||
|             'media': 'http://search.yahoo.com/mrss/', | ||||
|         } | ||||
|  | ||||
|         entry = data.find(xpath_with_ns('./atom:entry', NS_MAP)) | ||||
|  | ||||
|         title = xpath_text( | ||||
|             entry, xpath_with_ns('./atom:title', NS_MAP), 'title') | ||||
|         description = xpath_text( | ||||
|             entry, xpath_with_ns('./atom:summary', NS_MAP), 'description') | ||||
|         timestamp = parse_iso8601(xpath_text( | ||||
|             entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date')) | ||||
|  | ||||
|         formats = [] | ||||
|         media_group = entry.find(xpath_with_ns('./media:group', NS_MAP)) | ||||
|         for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)): | ||||
|             media_url = media_content.get('url') | ||||
|             if not media_url: | ||||
|                 continue | ||||
|             tbr = int_or_none(media_content.get('bitrate')) | ||||
|             mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url) | ||||
|             if mobj: | ||||
|                 formats.append({ | ||||
|                     'url': mobj.group('url'), | ||||
|                     'play_path': 'mp4:%s' % mobj.group('playpath'), | ||||
|                     'app': mobj.group('app'), | ||||
|                     'ext': 'flv', | ||||
|                     'tbr': tbr, | ||||
|                     'format_id': 'rtmp-%d' % tbr, | ||||
|                 }) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': media_url, | ||||
|                     'tbr': tbr, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         link = find_xpath_attr( | ||||
|             entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original') | ||||
|         if link is not None: | ||||
|             formats.append({ | ||||
|                 'url': link.get('href'), | ||||
|                 'format_id': link.get('rel'), | ||||
|             }) | ||||
|  | ||||
|         thumbnails = [{ | ||||
|             'url': splash.get('url'), | ||||
|             'width': int_or_none(splash.get('width')), | ||||
|             'height': int_or_none(splash.get('height')), | ||||
|         } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'formats': formats, | ||||
|             'thumbnails': thumbnails, | ||||
|         } | ||||
| @@ -1,8 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -21,9 +19,7 @@ class AftonbladetIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.search(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('video_id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # find internal video meta data | ||||
|   | ||||
| @@ -20,6 +20,7 @@ class AparatIE(InfoExtractor): | ||||
|             'id': 'wP8On', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'تیم گلکسی 11 - زومیت', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         # 'skip': 'Extremely unreliable', | ||||
|     } | ||||
| @@ -34,7 +35,8 @@ class AparatIE(InfoExtractor): | ||||
|                      video_id + '/vt/frame') | ||||
|         webpage = self._download_webpage(embed_url, video_id) | ||||
|  | ||||
|         video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage) | ||||
|         video_urls = [video_url.replace('\\/', '/') for video_url in re.findall( | ||||
|             r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)] | ||||
|         for i, video_url in enumerate(video_urls): | ||||
|             req = HEADRequest(video_url) | ||||
|             res = self._request_webpage( | ||||
| @@ -46,7 +48,7 @@ class AparatIE(InfoExtractor): | ||||
|  | ||||
|         title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') | ||||
|         thumbnail = self._search_regex( | ||||
|             r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) | ||||
|             r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -54,4 +56,5 @@ class AparatIE(InfoExtractor): | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'thumbnail': thumbnail, | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|         } | ||||
|   | ||||
| @@ -11,9 +11,12 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class AppleTrailersIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         "url": "http://trailers.apple.com/trailers/wb/manofsteel/", | ||||
|         'info_dict': { | ||||
|             'id': 'manofsteel', | ||||
|         }, | ||||
|         "playlist": [ | ||||
|             { | ||||
|                 "md5": "d97a8e575432dbcb81b7c3acb741f8a8", | ||||
| @@ -60,7 +63,10 @@ class AppleTrailersIE(InfoExtractor): | ||||
|                 }, | ||||
|             }, | ||||
|         ] | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/ca/metropole/autrui/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _JSON_RE = r'iTunes.playURL\((.*?)\);' | ||||
|  | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
| import time | ||||
| import hmac | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
| @@ -17,7 +17,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AtresPlayerIE(SubtitlesInfoExtractor): | ||||
| class AtresPlayerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html' | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -144,13 +144,12 @@ class AtresPlayerIE(SubtitlesInfoExtractor): | ||||
|         thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') | ||||
|  | ||||
|         subtitles = {} | ||||
|         subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') | ||||
|         if subtitle: | ||||
|             subtitles['es'] = subtitle | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|         subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') | ||||
|         if subtitle_url: | ||||
|             subtitles['es'] = [{ | ||||
|                 'ext': 'srt', | ||||
|                 'url': subtitle_url, | ||||
|             }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -159,5 +158,5 @@ class AtresPlayerIE(SubtitlesInfoExtractor): | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': self.extract_subtitles(video_id, subtitles), | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|   | ||||
| @@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor): | ||||
|             'duration': int(info['length']), | ||||
|             'view_count': int(info['views_total']), | ||||
|             'uploader': info['username'], | ||||
|             'uploader_id': info['uid'], | ||||
|             'uploader_id': info['owner']['uid'], | ||||
|         } | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -72,26 +72,29 @@ class BandcampIE(InfoExtractor): | ||||
|  | ||||
|         download_link = m_download.group(1) | ||||
|         video_id = self._search_regex( | ||||
|             r'var TralbumData = {.*?id: (?P<id>\d+),?$', | ||||
|             webpage, 'video id', flags=re.MULTILINE | re.DOTALL) | ||||
|             r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') | ||||
|         # We get the dictionary of the track from some javascript code | ||||
|         info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1) | ||||
|         info = json.loads(info)[0] | ||||
|         all_info = self._parse_json(self._search_regex( | ||||
|             r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id) | ||||
|         info = all_info[0] | ||||
|         # We pick mp3-320 for now, until format selection can be easily implemented. | ||||
|         mp3_info = info['downloads']['mp3-320'] | ||||
|         # If we try to use this url it says the link has expired | ||||
|         initial_url = mp3_info['url'] | ||||
|         re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' | ||||
|         m_url = re.match(re_url, initial_url) | ||||
|         m_url = re.match( | ||||
|             r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$', | ||||
|             initial_url) | ||||
|         # We build the url we will use to get the final track url | ||||
|         # This url is build in Bandcamp in the script download_bunde_*.js | ||||
|         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) | ||||
|         final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') | ||||
|         # If we could correctly generate the .rand field the url would be | ||||
|         # in the "download_url" key | ||||
|         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) | ||||
|         final_url = self._search_regex( | ||||
|             r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -106,7 +109,7 @@ class BandcampIE(InfoExtractor): | ||||
|  | ||||
| class BandcampAlbumIE(InfoExtractor): | ||||
|     IE_NAME = 'Bandcamp:album' | ||||
|     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))' | ||||
|     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', | ||||
| @@ -130,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor): | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'title': 'Jazz Format Mixtape vol.1', | ||||
|             'id': 'jazz-format-mixtape-vol-1', | ||||
|             'uploader_id': 'blazo', | ||||
|         }, | ||||
|         'params': { | ||||
|             'playlistend': 2 | ||||
|         }, | ||||
|         'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' | ||||
|         'skip': 'Bandcamp imposes download limits.' | ||||
|     }, { | ||||
|         'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', | ||||
|         'info_dict': { | ||||
|             'title': 'Hierophany of the Open Grave', | ||||
|             'uploader_id': 'nightbringer', | ||||
|             'id': 'hierophany-of-the-open-grave', | ||||
|         }, | ||||
|         'playlist_mincount': 9, | ||||
|     }, { | ||||
|         'url': 'http://dotscale.bandcamp.com', | ||||
|         'info_dict': { | ||||
|             'title': 'Loom', | ||||
|             'id': 'dotscale', | ||||
|             'uploader_id': 'dotscale', | ||||
|         }, | ||||
|         'playlist_mincount': 7, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('subdomain') | ||||
|         title = mobj.group('title') | ||||
|         display_id = title or playlist_id | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         uploader_id = mobj.group('subdomain') | ||||
|         album_id = mobj.group('album_id') | ||||
|         playlist_id = album_id or uploader_id | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) | ||||
|         if not tracks_paths: | ||||
|             raise ExtractorError('The page doesn\'t contain any tracks') | ||||
| @@ -165,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor): | ||||
|             r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'uploader_id': uploader_id, | ||||
|             'id': playlist_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'entries': entries, | ||||
|         } | ||||
|   | ||||
| @@ -2,12 +2,12 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
| from ..compat import compat_HTTPError | ||||
|  | ||||
|  | ||||
| class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
| class BBCCoUkIE(InfoExtractor): | ||||
|     IE_NAME = 'bbc.co.uk' | ||||
|     IE_DESC = 'BBC iPlayer' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})' | ||||
| @@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|             formats.extend(conn_formats) | ||||
|         return formats | ||||
|  | ||||
|     def _extract_captions(self, media, programme_id): | ||||
|     def _get_subtitles(self, media, programme_id): | ||||
|         subtitles = {} | ||||
|         for connection in self._extract_connections(media): | ||||
|             captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions') | ||||
|             lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') | ||||
|             ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}')) | ||||
|             srt = '' | ||||
|  | ||||
|             def _extract_text(p): | ||||
|                 if p.text is not None: | ||||
|                     stripped_text = p.text.strip() | ||||
|                     if stripped_text: | ||||
|                         return stripped_text | ||||
|                 return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span')) | ||||
|             for pos, p in enumerate(ps): | ||||
|                 srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), | ||||
|                                                           p.text.strip() if p.text is not None else '') | ||||
|             subtitles[lang] = srt | ||||
|                 srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p)) | ||||
|             subtitles[lang] = [ | ||||
|                 { | ||||
|                     'url': connection.get('href'), | ||||
|                     'ext': 'ttml', | ||||
|                 }, | ||||
|                 { | ||||
|                     'data': srt, | ||||
|                     'ext': 'srt', | ||||
|                 }, | ||||
|             ] | ||||
|         return subtitles | ||||
|  | ||||
|     def _download_media_selector(self, programme_id): | ||||
| @@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|             elif kind == 'video': | ||||
|                 formats.extend(self._extract_video(media, programme_id)) | ||||
|             elif kind == 'captions': | ||||
|                 subtitles = self._extract_captions(media, programme_id) | ||||
|                 subtitles = self.extract_subtitles(media, programme_id) | ||||
|  | ||||
|         return formats, subtitles | ||||
|  | ||||
| @@ -273,7 +288,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|                     formats, subtitles = self._download_media_selector(programme_id) | ||||
|                 return programme_id, title, description, duration, formats, subtitles | ||||
|         except ExtractorError as ee: | ||||
|             if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: | ||||
|             if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404): | ||||
|                 raise | ||||
|  | ||||
|         # fallback to legacy playlist | ||||
| @@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|         else: | ||||
|             programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(programme_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -9,7 +9,7 @@ class BeegIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://beeg.com/5416503', | ||||
|         'md5': '634526ae978711f6b748fe0dd6c11f57', | ||||
|         'md5': '1bff67111adb785c51d1b42959ec10e5', | ||||
|         'info_dict': { | ||||
|             'id': '5416503', | ||||
|             'ext': 'mp4', | ||||
|   | ||||
| @@ -1,40 +1,35 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import remove_start | ||||
| from ..utils import ( | ||||
|     remove_start, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BlinkxIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' | ||||
|     _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' | ||||
|     IE_NAME = 'blinkx' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB', | ||||
|         'md5': '2e9a07364af40163a908edbf10bb2492', | ||||
|         'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', | ||||
|         'md5': '337cf7a344663ec79bf93a526a2e06c7', | ||||
|         'info_dict': { | ||||
|             'id': '8aQUy7GV', | ||||
|             'id': 'Da0Gw3xc', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Police Car Rolls Away', | ||||
|             'uploader': 'stupidvideos.com', | ||||
|             'upload_date': '20131215', | ||||
|             'timestamp': 1387068000, | ||||
|             'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!', | ||||
|             'duration': 14.886, | ||||
|             'thumbnails': [{ | ||||
|                 'width': 100, | ||||
|                 'height': 76, | ||||
|                 'resolution': '100x76', | ||||
|                 'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg', | ||||
|             }], | ||||
|             'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', | ||||
|             'uploader': 'IGN News', | ||||
|             'upload_date': '20150217', | ||||
|             'timestamp': 1424215740, | ||||
|             'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', | ||||
|             'duration': 47.743333, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, rl): | ||||
|         m = re.match(self._VALID_URL, rl) | ||||
|         video_id = m.group('id') | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         display_id = video_id[:8] | ||||
|  | ||||
|         api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + | ||||
| @@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor): | ||||
|             elif m['type'] in ('flv', 'mp4'): | ||||
|                 vcodec = remove_start(m['vcodec'], 'ff') | ||||
|                 acodec = remove_start(m['acodec'], 'ff') | ||||
|                 tbr = (int(m['vbr']) + int(m['abr'])) // 1000 | ||||
|                 vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) | ||||
|                 abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) | ||||
|                 tbr = vbr + abr if vbr and abr else None | ||||
|                 format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'url': m['link'], | ||||
|                     'vcodec': vcodec, | ||||
|                     'acodec': acodec, | ||||
|                     'abr': int(m['abr']) // 1000, | ||||
|                     'vbr': int(m['vbr']) // 1000, | ||||
|                     'abr': abr, | ||||
|                     'vbr': vbr, | ||||
|                     'tbr': tbr, | ||||
|                     'width': int(m['w']), | ||||
|                     'height': int(m['h']), | ||||
|                     'width': int_or_none(m.get('w')), | ||||
|                     'height': int_or_none(m.get('h')), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|   | ||||
| @@ -3,7 +3,6 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
| @@ -18,7 +17,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BlipTVIE(SubtitlesInfoExtractor): | ||||
| class BlipTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -143,7 +142,7 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|         categories = [category.text for category in item.findall('category')] | ||||
|  | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|         subtitles_urls = {} | ||||
|  | ||||
|         media_group = item.find(media('group')) | ||||
|         for media_content in media_group.findall(media('content')): | ||||
| @@ -161,7 +160,7 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|                 } | ||||
|                 lang = role.rpartition('-')[-1].strip().lower() | ||||
|                 langcode = LANGS.get(lang, lang) | ||||
|                 subtitles[langcode] = url | ||||
|                 subtitles_urls[langcode] = url | ||||
|             elif media_type.startswith('video/'): | ||||
|                 formats.append({ | ||||
|                     'url': real_url, | ||||
| @@ -175,11 +174,7 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, subtitles) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|         subtitles = self.extract_subtitles(video_id, subtitles_urls) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -192,15 +187,22 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|             'thumbnail': thumbnail, | ||||
|             'categories': categories, | ||||
|             'formats': formats, | ||||
|             'subtitles': video_subtitles, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _download_subtitle_url(self, sub_lang, url): | ||||
|         # For some weird reason, blip.tv serves a video instead of subtitles | ||||
|         # when we request with a common UA | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('User-Agent', 'youtube-dl') | ||||
|         return self._download_webpage(req, None, note=False) | ||||
|     def _get_subtitles(self, video_id, subtitles_urls): | ||||
|         subtitles = {} | ||||
|         for lang, url in subtitles_urls.items(): | ||||
|             # For some weird reason, blip.tv serves a video instead of subtitles | ||||
|             # when we request with a common UA | ||||
|             req = compat_urllib_request.Request(url) | ||||
|             req.add_header('User-Agent', 'youtube-dl') | ||||
|             subtitles[lang] = [{ | ||||
|                 # The extension is 'srt' but it's actually an 'ass' file | ||||
|                 'ext': 'ass', | ||||
|                 'data': self._download_webpage(req, None, note=False), | ||||
|             }] | ||||
|         return subtitles | ||||
|  | ||||
|  | ||||
| class BlipTVUserIE(InfoExtractor): | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class BloombergIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html' | ||||
|     _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', | ||||
| @@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         name = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, name) | ||||
|  | ||||
|         f4m_url = self._search_regex( | ||||
|             r'<source src="(https?://[^"]+\.f4m.*?)"', webpage, | ||||
|             'f4m url') | ||||
|   | ||||
| @@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor): | ||||
|             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', | ||||
|             'info_dict': { | ||||
|                 'title': 'Sealife', | ||||
|                 'id': '3550319591001', | ||||
|             }, | ||||
|             'playlist_mincount': 7, | ||||
|         }, | ||||
| @@ -108,7 +109,7 @@ class BrightcoveIE(InfoExtractor): | ||||
|         """ | ||||
|  | ||||
|         # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 | ||||
|         object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>', | ||||
|         object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>', | ||||
|                             lambda m: m.group(1) + '/>', object_str) | ||||
|         # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 | ||||
|         object_str = object_str.replace('<--', '<!--') | ||||
| @@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor): | ||||
|         playlist_info = json_data['videoList'] | ||||
|         videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] | ||||
|  | ||||
|         return self.playlist_result(videos, playlist_id=playlist_info['id'], | ||||
|         return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'], | ||||
|                                     playlist_title=playlist_info['mediaCollectionDTO']['displayName']) | ||||
|  | ||||
|     def _extract_video_info(self, video_info): | ||||
|   | ||||
| @@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor): | ||||
|             'skip_download': True,  # Got enough YouTube download tests | ||||
|         }, | ||||
|         'info_dict': { | ||||
|             'id': 'look-at-this-cute-dog-omg', | ||||
|             'description': 're:Munchkin the Teddy Bear is back ?!', | ||||
|             'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill', | ||||
|         }, | ||||
| @@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor): | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20141124', | ||||
|                 'uploader_id': 'CindysMunchkin', | ||||
|                 'description': 're:© 2014 Munchkin the Shih Tzu', | ||||
|                 'uploader': 'Munchkin the Shih Tzu', | ||||
|                 'description': 're:© 2014 Munchkin the', | ||||
|                 'uploader': 're:^Munchkin the', | ||||
|                 'title': 're:Munchkin the Teddy Bear gets her exercise', | ||||
|             }, | ||||
|         }] | ||||
|   | ||||
							
								
								
									
										153
									
								
								youtube_dl/extractor/camdemy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										153
									
								
								youtube_dl/extractor/camdemy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,153 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CamdemyIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         # single file | ||||
|         'url': 'http://www.camdemy.com/media/5181/', | ||||
|         'md5': '5a5562b6a98b37873119102e052e311b', | ||||
|         'info_dict': { | ||||
|             'id': '5181', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ch1-1 Introduction, Signals (02-23-2012)', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': '', | ||||
|             'creator': 'ss11spring', | ||||
|             'upload_date': '20130114', | ||||
|             'timestamp': 1358154556, | ||||
|             'view_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         # With non-empty description | ||||
|         'url': 'http://www.camdemy.com/media/13885', | ||||
|         'md5': '4576a3bb2581f86c61044822adbd1249', | ||||
|         'info_dict': { | ||||
|             'id': '13885', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'EverCam + Camdemy QuickStart', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': 'md5:050b62f71ed62928f8a35f1a41e186c9', | ||||
|             'creator': 'evercam', | ||||
|             'upload_date': '20140620', | ||||
|             'timestamp': 1403271569, | ||||
|         } | ||||
|     }, { | ||||
|         # External source | ||||
|         'url': 'http://www.camdemy.com/media/14842', | ||||
|         'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7', | ||||
|         'info_dict': { | ||||
|             'id': '2vsYQzNIsJo', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20130211', | ||||
|             'uploader': 'Hun Kim', | ||||
|             'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection', | ||||
|             'uploader_id': 'hunkimtutorials', | ||||
|             'title': 'Excel 2013 Tutorial - How to add Password Protection', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         src_from = self._html_search_regex( | ||||
|             r"<div class='srcFrom'>Source: <a title='([^']+)'", page, | ||||
|             'external source', default=None) | ||||
|         if src_from: | ||||
|             return self.url_result(src_from) | ||||
|  | ||||
|         oembed_obj = self._download_json( | ||||
|             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) | ||||
|  | ||||
|         thumb_url = oembed_obj['thumbnail_url'] | ||||
|         video_folder = compat_urlparse.urljoin(thumb_url, 'video/') | ||||
|         file_list_doc = self._download_xml( | ||||
|             compat_urlparse.urljoin(video_folder, 'fileList.xml'), | ||||
|             video_id, 'Filelist XML') | ||||
|         file_name = file_list_doc.find('./video/item/fileName').text | ||||
|         video_url = compat_urlparse.urljoin(video_folder, file_name) | ||||
|  | ||||
|         timestamp = parse_iso8601(self._html_search_regex( | ||||
|             r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||
|             page, 'creation time', fatal=False), | ||||
|             delimiter=' ', timezone=datetime.timedelta(hours=8)) | ||||
|         view_count = str_to_int(self._html_search_regex( | ||||
|             r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||
|             page, 'view count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': oembed_obj['title'], | ||||
|             'thumbnail': thumb_url, | ||||
|             'description': self._html_search_meta('description', page), | ||||
|             'creator': oembed_obj['author_name'], | ||||
|             'duration': oembed_obj['duration'], | ||||
|             'timestamp': timestamp, | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CamdemyFolderIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         # links with trailing slash | ||||
|         'url': 'http://www.camdemy.com/folder/450', | ||||
|         'info_dict': { | ||||
|             'id': '450', | ||||
|             'title': '信號與系統 2012 & 2011 (Signals and Systems)', | ||||
|         }, | ||||
|         'playlist_mincount': 145 | ||||
|     }, { | ||||
|         # links without trailing slash | ||||
|         # and multi-page | ||||
|         'url': 'http://www.camdemy.com/folder/853', | ||||
|         'info_dict': { | ||||
|             'id': '853', | ||||
|             'title': '科學計算 - 使用 Matlab' | ||||
|         }, | ||||
|         'playlist_mincount': 20 | ||||
|     }, { | ||||
|         # with displayMode parameter. For testing the codes to add parameters | ||||
|         'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', | ||||
|         'info_dict': { | ||||
|             'id': '853', | ||||
|             'title': '科學計算 - 使用 Matlab' | ||||
|         }, | ||||
|         'playlist_mincount': 20 | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         folder_id = self._match_id(url) | ||||
|  | ||||
|         # Add displayMode=list so that all links are displayed in a single page | ||||
|         parsed_url = list(compat_urlparse.urlparse(url)) | ||||
|         query = dict(compat_urlparse.parse_qsl(parsed_url[4])) | ||||
|         query.update({'displayMode': 'list'}) | ||||
|         parsed_url[4] = compat_urllib_parse.urlencode(query) | ||||
|         final_url = compat_urlparse.urlunparse(parsed_url) | ||||
|  | ||||
|         page = self._download_webpage(final_url, folder_id) | ||||
|         matches = re.findall(r"href='(/media/\d+/?)'", page) | ||||
|  | ||||
|         entries = [self.url_result('http://www.camdemy.com' + media_path) | ||||
|                    for media_path in matches] | ||||
|  | ||||
|         folder_title = self._html_search_meta('keywords', page) | ||||
|  | ||||
|         return self.playlist_result(entries, folder_id, folder_title) | ||||
| @@ -15,12 +15,13 @@ from ..utils import ( | ||||
|  | ||||
| class CanalplusIE(InfoExtractor): | ||||
|     IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv' | ||||
|     _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' | ||||
|     _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' | ||||
|     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s' | ||||
|     _SITE_ID_MAP = { | ||||
|         'canalplus.fr': 'cplus', | ||||
|         'piwiplus.fr': 'teletoon', | ||||
|         'd8.tv': 'd8', | ||||
|         'itele.fr': 'itele', | ||||
|     } | ||||
|  | ||||
|     _TESTS = [{ | ||||
| @@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor): | ||||
|             'upload_date': '20131108', | ||||
|         }, | ||||
|         'skip': 'videos get deleted after a while', | ||||
|     }, { | ||||
|         'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', | ||||
|         'md5': '65aa83ad62fe107ce29e564bb8712580', | ||||
|         'info_dict': { | ||||
|             'id': '1213714', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45', | ||||
|             'description': 'md5:8216206ec53426ea6321321f3b3c16db', | ||||
|             'upload_date': '20150211', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -39,8 +37,7 @@ class CBSIE(InfoExtractor): | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         real_id = self._search_regex( | ||||
|             r"video\.settings\.pid\s*=\s*'([^']+)';", | ||||
|   | ||||
							
								
								
									
										30
									
								
								youtube_dl/extractor/cbssports.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								youtube_dl/extractor/cbssports.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CBSSportsIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', | ||||
|         'info_dict': { | ||||
|             'id': '_d5_GbO8p1sT', | ||||
|             'ext': 'flv', | ||||
|             'title': 'US Open flashbacks: 1990s', | ||||
|             'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         section = mobj.group('section') | ||||
|         video_id = mobj.group('id') | ||||
|         all_videos = self._download_json( | ||||
|             'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section, | ||||
|             video_id) | ||||
|         # The json file contains the info of all the videos in the section | ||||
|         video_info = next(v for v in all_videos if v['pcid'] == video_id) | ||||
|         return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform') | ||||
							
								
								
									
										99
									
								
								youtube_dl/extractor/ccc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								youtube_dl/extractor/ccc.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,99 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CCCIE(InfoExtractor): | ||||
|     IE_NAME = 'media.ccc.de' | ||||
|     _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video', | ||||
|         'md5': '205a365d0d57c0b1e43a12c9ffe8f9be', | ||||
|         'info_dict': { | ||||
|             'id': '20131228183', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Introduction to Processor Design', | ||||
|             'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'view_count': int, | ||||
|             'upload_date': '20131229', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if self._downloader.params.get('prefer_free_formats'): | ||||
|             preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd']) | ||||
|         else: | ||||
|             preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd']) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)<h1>(.*?)</h1>', webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r"(?s)<p class='description'>(.*?)</p>", | ||||
|             webpage, 'description', fatal=False) | ||||
|         upload_date = unified_strdate(self._html_search_regex( | ||||
|             r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>", | ||||
|             webpage, 'upload date', fatal=False)) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>", | ||||
|             webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         matches = re.finditer(r'''(?xs) | ||||
|             <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s* | ||||
|             <a\s+href='(?P<http_url>[^']+)'>\s* | ||||
|             (?: | ||||
|                 .*? | ||||
|                 <a\s+href='(?P<torrent_url>[^']+\.torrent)' | ||||
|             )?''', webpage) | ||||
|         formats = [] | ||||
|         for m in matches: | ||||
|             format = m.group('format') | ||||
|             format_id = self._search_regex( | ||||
|                 r'.*/([a-z0-9_-]+)/[^/]*$', | ||||
|                 m.group('http_url'), 'format id', default=None) | ||||
|             vcodec = 'h264' if 'h264' in format_id else ( | ||||
|                 'none' if format_id in ('mp3', 'opus') else None | ||||
|             ) | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'format': format, | ||||
|                 'url': m.group('http_url'), | ||||
|                 'vcodec': vcodec, | ||||
|                 'preference': preference(format_id), | ||||
|             }) | ||||
|  | ||||
|             if m.group('torrent_url'): | ||||
|                 formats.append({ | ||||
|                     'format_id': 'torrent-%s' % (format if format_id is None else format_id), | ||||
|                     'format': '%s (torrent)' % format, | ||||
|                     'proto': 'torrent', | ||||
|                     'format_note': '(unsupported; will just download the .torrent file)', | ||||
|                     'vcodec': vcodec, | ||||
|                     'preference': -100 + preference(format_id), | ||||
|                     'url': m.group('torrent_url'), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'view_count': view_count, | ||||
|             'upload_date': upload_date, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
| @@ -15,7 +15,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CeskaTelevizeIE(SubtitlesInfoExtractor): | ||||
| class CeskaTelevizeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -107,13 +107,7 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor): | ||||
|         subtitles = {} | ||||
|         subs = item.get('subtitles') | ||||
|         if subs: | ||||
|             subtitles['cs'] = subs[0]['url'] | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles)) | ||||
|             subtitles = self.extract_subtitles(episode_id, subs) | ||||
|  | ||||
|         return { | ||||
|             'id': episode_id, | ||||
| @@ -125,11 +119,20 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor): | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _get_subtitles(self, episode_id, subs): | ||||
|         original_subtitles = self._download_webpage( | ||||
|             subs[0]['url'], episode_id, 'Downloading subtitles') | ||||
|         srt_subs = self._fix_subtitles(original_subtitles) | ||||
|         return { | ||||
|             'cs': [{ | ||||
|                 'ext': 'srt', | ||||
|                 'data': srt_subs, | ||||
|             }] | ||||
|         } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _fix_subtitles(subtitles): | ||||
|         """ Convert millisecond-based subtitles to SRT """ | ||||
|         if subtitles is None: | ||||
|             return subtitles  # subtitles not requested | ||||
|  | ||||
|         def _msectotimecode(msec): | ||||
|             """ Helper utility to convert milliseconds to timecode """ | ||||
| @@ -149,7 +152,4 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor): | ||||
|                 else: | ||||
|                     yield line | ||||
|  | ||||
|         fixed_subtitles = {} | ||||
|         for k, v in subtitles.items(): | ||||
|             fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v)) | ||||
|         return fixed_subtitles | ||||
|         return "\r\n".join(_fix_subtitle(subtitles)) | ||||
|   | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/chirbit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/chirbit.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ChirbitIE(InfoExtractor): | ||||
|     IE_NAME = 'chirbit' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://chirb.it/PrIPv5', | ||||
|         'md5': '9847b0dad6ac3e074568bf2cfb197de8', | ||||
|         'info_dict': { | ||||
|             'id': 'PrIPv5', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Фасадстрой', | ||||
|             'duration': 52, | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         audio_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://chirb.it/%s' % audio_id, audio_id) | ||||
|  | ||||
|         audio_url = self._search_regex( | ||||
|             r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'itemprop="name">([^<]+)', webpage, 'title') | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, 'duration', fatal=False)) | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'itemprop="playCount"\s*>(\d+)', webpage, | ||||
|             'listen count', fatal=False)) | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'>(\d+) Comments?:', webpage, | ||||
|             'comment count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': audio_id, | ||||
|             'url': audio_url, | ||||
|             'title': title, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ChirbitProfileIE(InfoExtractor): | ||||
|     IE_NAME = 'chirbit:profile' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://chirbit.com/ScarletBeauty', | ||||
|         'info_dict': { | ||||
|             'id': 'ScarletBeauty', | ||||
|             'title': 'Chirbits by ScarletBeauty', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         profile_id = self._match_id(url) | ||||
|  | ||||
|         rss = self._download_xml( | ||||
|             'http://chirbit.com/rss/%s' % profile_id, profile_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result(audio_url.text, 'Chirbit') | ||||
|             for audio_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         title = rss.find('./channel/title').text | ||||
|  | ||||
|         return self.playlist_result(entries, profile_id, title) | ||||
| @@ -27,7 +27,6 @@ from ..utils import ( | ||||
|     compiled_regex_type, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     HEADRequest, | ||||
|     int_or_none, | ||||
|     RegexNotFoundError, | ||||
|     sanitize_filename, | ||||
| @@ -145,17 +144,25 @@ class InfoExtractor(object): | ||||
|     thumbnail:      Full URL to a video thumbnail image. | ||||
|     description:    Full video description. | ||||
|     uploader:       Full name of the video uploader. | ||||
|     creator:        The main artist who created the video. | ||||
|     timestamp:      UNIX timestamp of the moment the video became available. | ||||
|     upload_date:    Video upload date (YYYYMMDD). | ||||
|                     If not explicitly set, calculated from timestamp. | ||||
|     uploader_id:    Nickname or id of the video uploader. | ||||
|     location:       Physical location where the video was filmed. | ||||
|     subtitles:      The subtitle file contents as a dictionary in the format | ||||
|                     {language: subtitles}. | ||||
|     subtitles:      The available subtitles as a dictionary in the format | ||||
|                     {language: subformats}. "subformats" is a list sorted from | ||||
|                     lower to higher preference, each element is a dictionary | ||||
|                     with the "ext" entry and one of: | ||||
|                         * "data": The subtitles file contents | ||||
|                         * "url": A url pointing to the subtitles file | ||||
|     automatic_captions: Like 'subtitles', used by the YoutubeIE for | ||||
|                     automatically generated captions | ||||
|     duration:       Length of the video in seconds, as an integer. | ||||
|     view_count:     How many users have watched the video on the platform. | ||||
|     like_count:     Number of positive ratings of the video | ||||
|     dislike_count:  Number of negative ratings of the video | ||||
|     average_rating: Average rating give by users, the scale used depends on the webpage | ||||
|     comment_count:  Number of comments on the video | ||||
|     comments:       A list of comments, each with one or more of the following | ||||
|                     properties (all but one of text or html optional): | ||||
| @@ -263,8 +270,15 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def extract(self, url): | ||||
|         """Extracts URL information and returns it in list of dicts.""" | ||||
|         self.initialize() | ||||
|         return self._real_extract(url) | ||||
|         try: | ||||
|             self.initialize() | ||||
|             return self._real_extract(url) | ||||
|         except ExtractorError: | ||||
|             raise | ||||
|         except compat_http_client.IncompleteRead as e: | ||||
|             raise ExtractorError('A network error has occured.', cause=e, expected=True) | ||||
|         except (KeyError, StopIteration) as e: | ||||
|             raise ExtractorError('An extractor error has occured.', cause=e) | ||||
|  | ||||
|     def set_downloader(self, downloader): | ||||
|         """Sets the downloader for this IE.""" | ||||
| @@ -383,6 +397,16 @@ class InfoExtractor(object): | ||||
|             if blocked_iframe: | ||||
|                 msg += ' Visit %s for more details' % blocked_iframe | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|         if '<title>The URL you requested has been blocked</title>' in content[:512]: | ||||
|             msg = ( | ||||
|                 'Access to this webpage has been blocked by Indian censorship. ' | ||||
|                 'Use a VPN or proxy server (with --proxy) to route around it.') | ||||
|             block_msg = self._html_search_regex( | ||||
|                 r'</h1><p>(.*?)</p>', | ||||
|                 content, 'block message', default=None) | ||||
|             if block_msg: | ||||
|                 msg += ' (Message: "%s")' % block_msg.replace('\n', ' ') | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|         return content | ||||
|  | ||||
| @@ -506,7 +530,7 @@ class InfoExtractor(object): | ||||
|                 if mobj: | ||||
|                     break | ||||
|  | ||||
|         if os.name != 'nt' and sys.stderr.isatty(): | ||||
|         if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty(): | ||||
|             _name = '\033[0;34m%s\033[0m' % name | ||||
|         else: | ||||
|             _name = name | ||||
| @@ -655,6 +679,21 @@ class InfoExtractor(object): | ||||
|         } | ||||
|         return RATING_TABLE.get(rating.lower(), None) | ||||
|  | ||||
|     def _family_friendly_search(self, html): | ||||
|         # See http://schema.org/VideoObject | ||||
|         family_friendly = self._html_search_meta('isFamilyFriendly', html) | ||||
|  | ||||
|         if not family_friendly: | ||||
|             return None | ||||
|  | ||||
|         RATING_TABLE = { | ||||
|             '1': 0, | ||||
|             'true': 0, | ||||
|             '0': 18, | ||||
|             'false': 18, | ||||
|         } | ||||
|         return RATING_TABLE.get(family_friendly.lower(), None) | ||||
|  | ||||
|     def _twitter_search_player(self, html): | ||||
|         return self._html_search_meta('twitter:player', html, | ||||
|                                       'twitter card player') | ||||
| @@ -704,15 +743,15 @@ class InfoExtractor(object): | ||||
|                 preference, | ||||
|                 f.get('language_preference') if f.get('language_preference') is not None else -1, | ||||
|                 f.get('quality') if f.get('quality') is not None else -1, | ||||
|                 f.get('tbr') if f.get('tbr') is not None else -1, | ||||
|                 f.get('filesize') if f.get('filesize') is not None else -1, | ||||
|                 f.get('vbr') if f.get('vbr') is not None else -1, | ||||
|                 f.get('height') if f.get('height') is not None else -1, | ||||
|                 f.get('width') if f.get('width') is not None else -1, | ||||
|                 ext_preference, | ||||
|                 f.get('tbr') if f.get('tbr') is not None else -1, | ||||
|                 f.get('vbr') if f.get('vbr') is not None else -1, | ||||
|                 f.get('abr') if f.get('abr') is not None else -1, | ||||
|                 audio_ext_preference, | ||||
|                 f.get('fps') if f.get('fps') is not None else -1, | ||||
|                 f.get('filesize') if f.get('filesize') is not None else -1, | ||||
|                 f.get('filesize_approx') if f.get('filesize_approx') is not None else -1, | ||||
|                 f.get('source_preference') if f.get('source_preference') is not None else -1, | ||||
|                 f.get('format_id'), | ||||
| @@ -729,9 +768,7 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def _is_valid_url(self, url, video_id, item='video'): | ||||
|         try: | ||||
|             self._request_webpage( | ||||
|                 HEADRequest(url), video_id, | ||||
|                 'Checking %s URL' % item) | ||||
|             self._request_webpage(url, video_id, 'Checking %s URL' % item) | ||||
|             return True | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
| @@ -764,7 +801,7 @@ class InfoExtractor(object): | ||||
|         self.to_screen(msg) | ||||
|         time.sleep(timeout) | ||||
|  | ||||
|     def _extract_f4m_formats(self, manifest_url, video_id): | ||||
|     def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None): | ||||
|         manifest = self._download_xml( | ||||
|             manifest_url, video_id, 'Downloading f4m manifest', | ||||
|             'Unable to download f4m manifest') | ||||
| @@ -777,30 +814,32 @@ class InfoExtractor(object): | ||||
|             media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') | ||||
|         for i, media_el in enumerate(media_nodes): | ||||
|             if manifest_version == '2.0': | ||||
|                 manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href') | ||||
|                 manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' + | ||||
|                                 (media_el.attrib.get('href') or media_el.attrib.get('url'))) | ||||
|             tbr = int_or_none(media_el.attrib.get('bitrate')) | ||||
|             format_id = 'f4m-%d' % (i if tbr is None else tbr) | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), | ||||
|                 'url': manifest_url, | ||||
|                 'ext': 'flv', | ||||
|                 'tbr': tbr, | ||||
|                 'width': int_or_none(media_el.attrib.get('width')), | ||||
|                 'height': int_or_none(media_el.attrib.get('height')), | ||||
|                 'preference': preference, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return formats | ||||
|  | ||||
|     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, | ||||
|                               entry_protocol='m3u8', preference=None): | ||||
|                               entry_protocol='m3u8', preference=None, | ||||
|                               m3u8_id=None): | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'm3u8-meta', | ||||
|             'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])), | ||||
|             'url': m3u8_url, | ||||
|             'ext': ext, | ||||
|             'protocol': 'm3u8', | ||||
|             'preference': -1, | ||||
|             'preference': preference - 1 if preference else -1, | ||||
|             'resolution': 'multiple', | ||||
|             'format_note': 'Quality selection URL', | ||||
|         }] | ||||
| @@ -815,6 +854,7 @@ class InfoExtractor(object): | ||||
|             note='Downloading m3u8 information', | ||||
|             errnote='Failed to download m3u8 information') | ||||
|         last_info = None | ||||
|         last_media = None | ||||
|         kv_rex = re.compile( | ||||
|             r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)') | ||||
|         for line in m3u8_doc.splitlines(): | ||||
| @@ -825,6 +865,13 @@ class InfoExtractor(object): | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_info[m.group('key')] = v | ||||
|             elif line.startswith('#EXT-X-MEDIA:'): | ||||
|                 last_media = {} | ||||
|                 for m in kv_rex.finditer(line): | ||||
|                     v = m.group('val') | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_media[m.group('key')] = v | ||||
|             elif line.startswith('#') or not line.strip(): | ||||
|                 continue | ||||
|             else: | ||||
| @@ -832,9 +879,8 @@ class InfoExtractor(object): | ||||
|                     formats.append({'url': format_url(line)}) | ||||
|                     continue | ||||
|                 tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) | ||||
|  | ||||
|                 f = { | ||||
|                     'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), | ||||
|                     'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])), | ||||
|                     'url': format_url(line.strip()), | ||||
|                     'tbr': tbr, | ||||
|                     'ext': ext, | ||||
| @@ -854,16 +900,22 @@ class InfoExtractor(object): | ||||
|                     width_str, height_str = resolution.split('x') | ||||
|                     f['width'] = int(width_str) | ||||
|                     f['height'] = int(height_str) | ||||
|                 if last_media is not None: | ||||
|                     f['m3u8_media'] = last_media | ||||
|                     last_media = None | ||||
|                 formats.append(f) | ||||
|                 last_info = {} | ||||
|         self._sort_formats(formats) | ||||
|         return formats | ||||
|  | ||||
|     # TODO: improve extraction | ||||
|     def _extract_smil_formats(self, smil_url, video_id): | ||||
|     def _extract_smil_formats(self, smil_url, video_id, fatal=True): | ||||
|         smil = self._download_xml( | ||||
|             smil_url, video_id, 'Downloading SMIL file', | ||||
|             'Unable to download SMIL file') | ||||
|             'Unable to download SMIL file', fatal=fatal) | ||||
|         if smil is False: | ||||
|             assert not fatal | ||||
|             return [] | ||||
|  | ||||
|         base = smil.find('./head/meta').get('base') | ||||
|  | ||||
| @@ -965,6 +1017,24 @@ class InfoExtractor(object): | ||||
|             any_restricted = any_restricted or is_restricted | ||||
|         return not any_restricted | ||||
|  | ||||
|     def extract_subtitles(self, *args, **kwargs): | ||||
|         if (self._downloader.params.get('writesubtitles', False) or | ||||
|                 self._downloader.params.get('listsubtitles')): | ||||
|             return self._get_subtitles(*args, **kwargs) | ||||
|         return {} | ||||
|  | ||||
|     def _get_subtitles(self, *args, **kwargs): | ||||
|         raise NotImplementedError("This method must be implemented by subclasses") | ||||
|  | ||||
|     def extract_automatic_captions(self, *args, **kwargs): | ||||
|         if (self._downloader.params.get('writeautomaticsub', False) or | ||||
|                 self._downloader.params.get('listsubtitles')): | ||||
|             return self._get_automatic_captions(*args, **kwargs) | ||||
|         return {} | ||||
|  | ||||
|     def _get_automatic_captions(self, *args, **kwargs): | ||||
|         raise NotImplementedError("This method must be implemented by subclasses") | ||||
|  | ||||
|  | ||||
| class SearchInfoExtractor(InfoExtractor): | ||||
|     """ | ||||
|   | ||||
| @@ -24,6 +24,23 @@ class CommonMistakesIE(InfoExtractor): | ||||
|             'That doesn\'t make any sense. ' | ||||
|             'Simply remove the parameter in your command or configuration.' | ||||
|         ) % url | ||||
|         if self._downloader.params.get('verbose'): | ||||
|         if not self._downloader.params.get('verbose'): | ||||
|             msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.' | ||||
|         raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|  | ||||
| class UnicodeBOMIE(InfoExtractor): | ||||
|         IE_DESC = False | ||||
|         _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$' | ||||
|  | ||||
|         _TESTS = [{ | ||||
|             'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', | ||||
|             'only_matching': True, | ||||
|         }] | ||||
|  | ||||
|         def _real_extract(self, url): | ||||
|             real_url = self._match_id(url) | ||||
|             self.report_warning( | ||||
|                 'Your URL starts with a Byte Order Mark (BOM). ' | ||||
|                 'Removing the BOM and looking for "%s" ...' % real_url) | ||||
|             return self.url_result(real_url) | ||||
|   | ||||
| @@ -9,7 +9,7 @@ import xml.etree.ElementTree | ||||
|  | ||||
| from hashlib import sha1 | ||||
| from math import pow, sqrt, floor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| @@ -25,10 +25,9 @@ from ..aes import ( | ||||
|     aes_cbc_decrypt, | ||||
|     inc, | ||||
| ) | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CrunchyrollIE(SubtitlesInfoExtractor): | ||||
| class CrunchyrollIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', | ||||
| @@ -187,6 +186,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|  | ||||
|         return output | ||||
|  | ||||
|     def _get_subtitles(self, video_id, webpage): | ||||
|         subtitles = {} | ||||
|         for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): | ||||
|             sub_page = self._download_webpage( | ||||
|                 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, | ||||
|                 video_id, note='Downloading subtitles for ' + sub_name) | ||||
|             id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) | ||||
|             iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) | ||||
|             data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) | ||||
|             if not id or not iv or not data: | ||||
|                 continue | ||||
|             id = int(id) | ||||
|             iv = base64.b64decode(iv) | ||||
|             data = base64.b64decode(data) | ||||
|  | ||||
|             subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') | ||||
|             lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) | ||||
|             if not lang_code: | ||||
|                 continue | ||||
|             sub_root = xml.etree.ElementTree.fromstring(subtitle) | ||||
|             subtitles[lang_code] = [ | ||||
|                 { | ||||
|                     'ext': 'srt', | ||||
|                     'data': self._convert_subtitles_to_srt(sub_root), | ||||
|                 }, | ||||
|                 { | ||||
|                     'ext': 'ass', | ||||
|                     'data': self._convert_subtitles_to_ass(sub_root), | ||||
|                 }, | ||||
|             ] | ||||
|         return subtitles | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
| @@ -249,34 +280,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|                 'format_id': video_format, | ||||
|             }) | ||||
|  | ||||
|         subtitles = {} | ||||
|         sub_format = self._downloader.params.get('subtitlesformat', 'srt') | ||||
|         for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): | ||||
|             sub_page = self._download_webpage( | ||||
|                 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, | ||||
|                 video_id, note='Downloading subtitles for ' + sub_name) | ||||
|             id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) | ||||
|             iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) | ||||
|             data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) | ||||
|             if not id or not iv or not data: | ||||
|                 continue | ||||
|             id = int(id) | ||||
|             iv = base64.b64decode(iv) | ||||
|             data = base64.b64decode(data) | ||||
|  | ||||
|             subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') | ||||
|             lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) | ||||
|             if not lang_code: | ||||
|                 continue | ||||
|             sub_root = xml.etree.ElementTree.fromstring(subtitle) | ||||
|             if sub_format == 'ass': | ||||
|                 subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root) | ||||
|             else: | ||||
|                 subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|         subtitles = self.extract_subtitles(video_id, webpage) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -6,7 +6,6 @@ import json | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
| @@ -31,7 +30,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): | ||||
|         return request | ||||
|  | ||||
|  | ||||
| class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
| class DailymotionIE(DailymotionBaseInfoExtractor): | ||||
|     """Information Extractor for Dailymotion""" | ||||
|  | ||||
|     _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)' | ||||
| @@ -143,9 +142,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, webpage) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, webpage) | ||||
|             return | ||||
|  | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'video_views_count[^>]+>\s+([\d\.,]+)', | ||||
| @@ -169,7 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|     def _get_subtitles(self, video_id, webpage): | ||||
|         try: | ||||
|             sub_list = self._download_webpage( | ||||
|                 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, | ||||
| @@ -179,7 +175,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             return {} | ||||
|         info = json.loads(sub_list) | ||||
|         if (info['total'] > 0): | ||||
|             sub_lang_list = dict((l['language'], l['url']) for l in info['list']) | ||||
|             sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list']) | ||||
|             return sub_lang_list | ||||
|         self._downloader.report_warning('video doesn\'t have subtitles') | ||||
|         return {} | ||||
| @@ -194,6 +190,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|         'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', | ||||
|         'info_dict': { | ||||
|             'title': 'SPORT', | ||||
|             'id': 'xv4bw_nqtv_sport', | ||||
|         }, | ||||
|         'playlist_mincount': 20, | ||||
|     }] | ||||
|   | ||||
| @@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor): | ||||
|             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Videoinstallation für eine Kaufhausfassade' | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor): | ||||
|             r"flashvars.pvg_id=\"(\d+)\";", | ||||
|             webpage, 'ID') | ||||
|  | ||||
|         json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' | ||||
|                     + video_id) | ||||
|         json_url = ( | ||||
|             'http://static.videos.gouv.fr/brightcovehub/export/json/%s' % | ||||
|             video_id) | ||||
|         info = self._download_json(json_url, title, 'Downloading JSON config') | ||||
|         video_url = info['renditions'][0]['url'] | ||||
|  | ||||
|   | ||||
| @@ -1,13 +1,14 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DotsubIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27', | ||||
|         'md5': '0914d4d69605090f623b7ac329fea66e', | ||||
| @@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor): | ||||
|             'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary', | ||||
|             'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074', | ||||
|             'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', | ||||
|             'duration': 3169, | ||||
|             'uploader': '4v4l0n42', | ||||
|             'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism  and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com', | ||||
|             'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', | ||||
|             'timestamp': 1292248482.625, | ||||
|             'upload_date': '20101213', | ||||
|             'view_count': int, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         info_url = "https://dotsub.com/api/media/%s/metadata" % video_id | ||||
|         info = self._download_json(info_url, video_id) | ||||
|         date = time.gmtime(info['dateCreated'] / 1000)  # The timestamp is in miliseconds | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'https://dotsub.com/api/media/%s/metadata' % video_id, video_id) | ||||
|         video_url = info.get('mediaURI') | ||||
|  | ||||
|         if not video_url: | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|             video_url = self._search_regex( | ||||
|                 r'"file"\s*:\s*\'([^\']+)', webpage, 'video url') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': info['mediaURI'], | ||||
|             'url': video_url, | ||||
|             'ext': 'flv', | ||||
|             'title': info['title'], | ||||
|             'thumbnail': info['screenshotURI'], | ||||
|             'description': info['description'], | ||||
|             'uploader': info['user'], | ||||
|             'view_count': info['numberOfViews'], | ||||
|             'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday), | ||||
|             'description': info.get('description'), | ||||
|             'thumbnail': info.get('screenshotURI'), | ||||
|             'duration': int_or_none(info.get('duration'), 1000), | ||||
|             'uploader': info.get('user'), | ||||
|             'timestamp': float_or_none(info.get('dateCreated'), 1000), | ||||
|             'view_count': int_or_none(info.get('numberOfViews')), | ||||
|         } | ||||
|   | ||||
| @@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor): | ||||
|             'id': '1740434', | ||||
|             'display_id': 'hot-perky-blonde-naked-golf', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hot Perky Blonde Naked Golf', | ||||
|             'title': 'hot perky blonde naked golf', | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|             'comment_count': int, | ||||
| @@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor): | ||||
|             r'<source src="([^"]+)"', webpage, 'video URL') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>([^<]+)\s*-\s*Free', webpage, 'title') | ||||
|             [r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'], | ||||
|             webpage, 'title') | ||||
|  | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'poster="([^"]+)"', | ||||
|   | ||||
| @@ -1,11 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import ExtractorError | ||||
| from .common import InfoExtractor, ExtractorError | ||||
| from ..utils import parse_iso8601 | ||||
|  | ||||
|  | ||||
| class DRTVIE(SubtitlesInfoExtractor): | ||||
| class DRTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' | ||||
|  | ||||
|     _TEST = { | ||||
| @@ -25,9 +24,15 @@ class DRTVIE(SubtitlesInfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         programcard = self._download_json( | ||||
|             'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'data-(?:material-identifier|episode-slug)="([^"]+)"', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         programcard = self._download_json( | ||||
|             'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, | ||||
|             video_id, 'Downloading video JSON') | ||||
|         data = programcard['Data'][0] | ||||
|  | ||||
|         title = data['Title'] | ||||
| @@ -70,7 +75,7 @@ class DRTVIE(SubtitlesInfoExtractor): | ||||
|                     } | ||||
|                     for subs in subtitles_list: | ||||
|                         lang = subs['Language'] | ||||
|                         subtitles[LANGS.get(lang, lang)] = subs['Uri'] | ||||
|                         subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}] | ||||
|  | ||||
|         if not formats and restricted_to_denmark: | ||||
|             raise ExtractorError( | ||||
| @@ -78,10 +83,6 @@ class DRTVIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
| @@ -90,5 +91,5 @@ class DRTVIE(SubtitlesInfoExtractor): | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': self.extract_subtitles(video_id, subtitles), | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										16
									
								
								youtube_dl/extractor/embedly.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								youtube_dl/extractor/embedly.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
|  | ||||
|  | ||||
| class EmbedlyIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) | ||||
| @@ -1,18 +1,17 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     js_to_json, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EscapistIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-' | ||||
|     _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', | ||||
|         'md5': 'ab3a706c681efca53f0a35f1415cf0d1', | ||||
| @@ -20,31 +19,37 @@ class EscapistIE(InfoExtractor): | ||||
|             'id': '6618', | ||||
|             'ext': 'mp4', | ||||
|             'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", | ||||
|             'uploader': 'the-escapist-presents', | ||||
|             'uploader_id': 'the-escapist-presents', | ||||
|             'uploader': 'The Escapist Presents', | ||||
|             'title': "Breaking Down Baldur's Gate", | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         showName = mobj.group('showname') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         videoDesc = self._html_search_regex( | ||||
|             r'<meta name="description" content="([^"]*)"', | ||||
|             webpage, 'description', fatal=False) | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'", | ||||
|             webpage, 'uploader ID', fatal=False) | ||||
|         uploader = self._html_search_regex( | ||||
|             r"<h1\s+class='headline'>(.*?)</a>", | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         playerUrl = self._og_search_video_url(webpage, name='player URL') | ||||
|         raw_title = self._html_search_meta('title', webpage, fatal=True) | ||||
|         title = raw_title.partition(' : ')[2] | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<meta name="title" content="([^"]*)"', | ||||
|             webpage, 'title').split(' : ')[-1] | ||||
|  | ||||
|         configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL') | ||||
|         configUrl = compat_urllib_parse.unquote(configUrl) | ||||
|         config_url = compat_urllib_parse.unquote(self._html_search_regex( | ||||
|             r'''(?x) | ||||
|             (?: | ||||
|                 <param\s+name="flashvars"\s+value="config=| | ||||
|                 flashvars="config= | ||||
|             ) | ||||
|             ([^"&]+) | ||||
|             ''', | ||||
|             webpage, 'config URL')) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
| @@ -53,18 +58,21 @@ class EscapistIE(InfoExtractor): | ||||
|                 cfgurl, video_id, | ||||
|                 'Downloading ' + name + ' configuration', | ||||
|                 'Unable to download ' + name + ' configuration', | ||||
|                 transform_source=lambda s: s.replace("'", '"')) | ||||
|                 transform_source=js_to_json) | ||||
|  | ||||
|             playlist = config['playlist'] | ||||
|             video_url = next( | ||||
|                 p['url'] for p in playlist | ||||
|                 if p.get('eventCategory') == 'Video') | ||||
|             formats.append({ | ||||
|                 'url': playlist[1]['url'], | ||||
|                 'url': video_url, | ||||
|                 'format_id': name, | ||||
|                 'quality': quality, | ||||
|             }) | ||||
|  | ||||
|         _add_format('normal', configUrl, quality=0) | ||||
|         hq_url = (configUrl + | ||||
|                   ('&hq=1' if '?' in configUrl else configUrl + '?hq=1')) | ||||
|         _add_format('normal', config_url, quality=0) | ||||
|         hq_url = (config_url + | ||||
|                   ('&hq=1' if '?' in config_url else config_url + '?hq=1')) | ||||
|         try: | ||||
|             _add_format('hq', hq_url, quality=1) | ||||
|         except ExtractorError: | ||||
| @@ -75,9 +83,9 @@ class EscapistIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'uploader': showName, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'title': title, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': videoDesc, | ||||
|             'player_url': playerUrl, | ||||
|             'description': description, | ||||
|         } | ||||
|   | ||||
| @@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor): | ||||
|         params_raw = compat_urllib_parse.unquote(data['params']) | ||||
|         params = json.loads(params_raw) | ||||
|         video_data = params['video_data'][0] | ||||
|         video_url = video_data.get('hd_src') | ||||
|         if not video_url: | ||||
|             video_url = video_data['sd_src'] | ||||
|         if not video_url: | ||||
|             raise ExtractorError('Cannot find video URL') | ||||
|  | ||||
|         formats = [] | ||||
|         for quality in ['sd', 'hd']: | ||||
|             src = video_data.get('%s_src' % quality) | ||||
|             if src is not None: | ||||
|                 formats.append({ | ||||
|                     'format_id': quality, | ||||
|                     'url': src, | ||||
|                 }) | ||||
|         if not formats: | ||||
|             raise ExtractorError('Cannot find video formats') | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title', | ||||
| @@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'url': video_url, | ||||
|             'formats': formats, | ||||
|             'duration': int_or_none(video_data.get('video_duration')), | ||||
|             'thumbnail': video_data.get('thumbnail_src'), | ||||
|         } | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         page = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_meta('twitter:title', page, 'title') | ||||
|  | ||||
|         title = self._html_search_meta('twitter:title', page, 'title', fatal=True) | ||||
|         description = self._html_search_meta('twitter:description', page, 'title') | ||||
|  | ||||
|         data = self._download_xml( | ||||
| @@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor): | ||||
|                 'height': int(details.find('./height').text.strip()), | ||||
|             } for details in item.findall('./source/file_details') if details.find('./file').text | ||||
|         ] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -1,52 +1,71 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class FirstTVIE(InfoExtractor): | ||||
|     IE_NAME = 'firsttv' | ||||
|     IE_DESC = 'Видеоархив - Первый канал' | ||||
|     _VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)' | ||||
|     IE_NAME = '1tv' | ||||
|     IE_DESC = 'Первый канал' | ||||
|     _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.1tv.ru/videoarchive/73390', | ||||
|         'md5': '3de6390cf0cca4a5eae1d1d83895e5ad', | ||||
|         'md5': '777f525feeec4806130f4f764bc18a4f', | ||||
|         'info_dict': { | ||||
|             'id': '73390', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Олимпийские канатные дороги', | ||||
|             'description': 'md5:cc730d2bf4215463e37fff6a1e277b13', | ||||
|             'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG', | ||||
|             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | ||||
|             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', | ||||
|             'duration': 149, | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|         }, | ||||
|         'skip': 'Only works from Russia', | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930', | ||||
|         'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', | ||||
|         'info_dict': { | ||||
|             'id': '35930', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Наедине со всеми. Людмила Сенчина', | ||||
|             'description': 'md5:89553aed1d641416001fe8d450f06cb9', | ||||
|             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', | ||||
|             'duration': 2694, | ||||
|         }, | ||||
|         'skip': 'Only works from Russia', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id, 'Downloading page') | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL') | ||||
|             r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''', | ||||
|             webpage, 'video URL') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title') | ||||
|             [r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', | ||||
|              r"'title'\s*:\s*'([^']+)'"], webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False) | ||||
|             r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', | ||||
|             webpage, 'description', default=None) or self._html_search_meta( | ||||
|                 'description', webpage, 'description') | ||||
|  | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False) | ||||
|         duration = self._og_search_property( | ||||
|             'video:duration', webpage, | ||||
|             'video duration', fatal=False) | ||||
|  | ||||
|         like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]', | ||||
|                                              webpage, 'like count', fatal=False) | ||||
|         dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]', | ||||
|                                                 webpage, 'dislike count', fatal=False) | ||||
|         like_count = self._html_search_regex( | ||||
|             r'title="Понравилось".*?/></label> \[(\d+)\]', | ||||
|             webpage, 'like count', default=None) | ||||
|         dislike_count = self._html_search_regex( | ||||
|             r'title="Не понравилось".*?/></label> \[(\d+)\]', | ||||
|             webpage, 'dislike count', default=None) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor): | ||||
|     IE_NAME = '5min' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=| | ||||
|             https?://(?:(?:massively|www)\.)?joystiq\.com/video/| | ||||
|             5min:) | ||||
|         (?P<id>\d+) | ||||
|         ''' | ||||
|   | ||||
| @@ -1,77 +1,69 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FranceCultureIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.franceculture.fr/player/reecouter?play=4795174', | ||||
|         'info_dict': { | ||||
|             'id': '4795174', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Rendez-vous au pays des geeks', | ||||
|             'alt_title': 'Carnet nomade | 13-14', | ||||
|             'vcodec': 'none', | ||||
|             'uploader': 'Colette Fellous', | ||||
|             'upload_date': '20140301', | ||||
|             'duration': 3601, | ||||
|             'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$', | ||||
|             'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...', | ||||
|             'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats', | ||||
|             'timestamp': 1393700400, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         baseurl = mobj.group('baseurl') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         params_code = self._search_regex( | ||||
|             r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />", | ||||
|             webpage, 'parameter code') | ||||
|         params = compat_parse_qs(params_code) | ||||
|         video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0]) | ||||
|  | ||||
|         video_path = self._search_regex( | ||||
|             r'<a id="player".*?href="([^"]+)"', webpage, 'video path') | ||||
|         video_url = compat_urlparse.urljoin(url, video_path) | ||||
|         timestamp = int_or_none(self._search_regex( | ||||
|             r'<a id="player".*?data-date="([0-9]+)"', | ||||
|             webpage, 'upload date', fatal=False)) | ||||
|         thumbnail = self._search_regex( | ||||
|             r'<a id="player".*?>\s+<img src="([^"]+)"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title') | ||||
|             r'<span class="title-diffusion">(.*?)</span>', webpage, 'title') | ||||
|         alt_title = self._html_search_regex( | ||||
|             r'<span class="title">(.*?)</span>', | ||||
|             webpage, 'alt_title', fatal=False) | ||||
|         description = self._html_search_regex( | ||||
|             r'<span class="description">(.*?)</span>', | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         uploader = self._html_search_regex( | ||||
|             r'(?s)<div id="emission".*?<span class="author">(.*?)</span>', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         thumbnail_part = self._html_search_regex( | ||||
|             r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage, | ||||
|             'thumbnail', fatal=False) | ||||
|         if thumbnail_part is None: | ||||
|             thumbnail = None | ||||
|         else: | ||||
|             thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part) | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<p class="desc">(.*?)</p>', webpage, 'description') | ||||
|  | ||||
|         info = json.loads(params['infoData'][0])[0] | ||||
|         duration = info.get('media_length') | ||||
|         upload_date_candidate = info.get('media_section5') | ||||
|         upload_date = ( | ||||
|             upload_date_candidate | ||||
|             if (upload_date_candidate is not None and | ||||
|                 re.match(r'[0-9]{8}$', upload_date_candidate)) | ||||
|             else None) | ||||
|             webpage, 'uploader', default=None) | ||||
|         vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'vcodec': 'none' if video_url.lower().endswith('.mp3') else None, | ||||
|             'duration': duration, | ||||
|             'vcodec': vcodec, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'timestamp': timestamp, | ||||
|             'title': title, | ||||
|             'alt_title': alt_title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|         } | ||||
|   | ||||
| @@ -1,41 +1,67 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GamekingsIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', | ||||
|         # MD5 is flaky, seems to change regularly | ||||
|         # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', | ||||
|         'info_dict': { | ||||
|             'id': '20130811', | ||||
|             'id': 'phoenix-wright-ace-attorney-dual-destinies-review', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', | ||||
|             'description': 'md5:36fd701e57e8c15ac8682a2374c99731', | ||||
|         } | ||||
|     } | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     }, { | ||||
|         # vimeo video | ||||
|         'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/', | ||||
|         'md5': '12bf04dfd238e70058046937657ea68d', | ||||
|         'info_dict': { | ||||
|             'id': 'the-legend-of-zelda-majoras-mask', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Legend of Zelda: Majora’s Mask', | ||||
|             'description': 'md5:9917825fe0e9f4057601fe1e38860de3', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         video_url = self._og_search_video_url(webpage) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video = re.search(r'[0-9]+', video_url) | ||||
|         video_id = video.group(0) | ||||
|         playlist_id = self._search_regex( | ||||
|             r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id') | ||||
|  | ||||
|         # Todo: add medium format | ||||
|         video_url = video_url.replace(video_id, 'large/' + video_id) | ||||
|         playlist = self._download_xml( | ||||
|             'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id, | ||||
|             video_id) | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'jwplayer': 'http://rss.jwpcdn.com/' | ||||
|         } | ||||
|  | ||||
|         item = playlist.find('./channel/item') | ||||
|  | ||||
|         thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail') | ||||
|         video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'ext': 'mp4', | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|   | ||||
| @@ -7,6 +7,7 @@ from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from ..utils import remove_end | ||||
|  | ||||
|  | ||||
| class GDCVaultIE(InfoExtractor): | ||||
| @@ -65,10 +66,12 @@ class GDCVaultIE(InfoExtractor): | ||||
|  | ||||
|     def _parse_flv(self, xml_description): | ||||
|         video_formats = [] | ||||
|         akami_url = xml_description.find('./metadata/akamaiHost').text | ||||
|         akamai_url = xml_description.find('./metadata/akamaiHost').text | ||||
|         slide_video_path = xml_description.find('./metadata/slideVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + slide_video_path, | ||||
|             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, | ||||
|             'play_path': remove_end(slide_video_path, '.flv'), | ||||
|             'ext': 'flv', | ||||
|             'format_note': 'slide deck video', | ||||
|             'quality': -2, | ||||
|             'preference': -2, | ||||
| @@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor): | ||||
|         }) | ||||
|         speaker_video_path = xml_description.find('./metadata/speakerVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + speaker_video_path, | ||||
|             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, | ||||
|             'play_path': remove_end(speaker_video_path, '.flv'), | ||||
|             'ext': 'flv', | ||||
|             'format_note': 'speaker video', | ||||
|             'quality': -1, | ||||
|             'preference': -1, | ||||
|   | ||||
| @@ -140,6 +140,19 @@ class GenericIE(InfoExtractor): | ||||
|             }, | ||||
|             'add_ie': ['Ooyala'], | ||||
|         }, | ||||
|         # multiple ooyala embeds on SBN network websites | ||||
|         { | ||||
|             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', | ||||
|             'info_dict': { | ||||
|                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok', | ||||
|                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com', | ||||
|             }, | ||||
|             'playlist_mincount': 3, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'add_ie': ['Ooyala'], | ||||
|         }, | ||||
|         # google redirect | ||||
|         { | ||||
|             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', | ||||
| @@ -460,6 +473,7 @@ class GenericIE(InfoExtractor): | ||||
|         { | ||||
|             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', | ||||
|             'info_dict': { | ||||
|                 'id': '1986', | ||||
|                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', | ||||
|             }, | ||||
|             'playlist_mincount': 2, | ||||
| @@ -511,7 +525,38 @@ class GenericIE(InfoExtractor): | ||||
|                 'upload_date': '20150126', | ||||
|             }, | ||||
|             'add_ie': ['Viddler'], | ||||
|         } | ||||
|         }, | ||||
|         # jwplayer YouTube | ||||
|         { | ||||
|             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/', | ||||
|             'info_dict': { | ||||
|                 'id': 'Mrj4DVp2zeA', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20150212', | ||||
|                 'uploader': 'The National Archives UK', | ||||
|                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', | ||||
|                 'uploader_id': 'NationalArchives08', | ||||
|                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue', | ||||
|             }, | ||||
|         }, | ||||
|         # rtl.nl embed | ||||
|         { | ||||
|             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', | ||||
|             'playlist_mincount': 5, | ||||
|             'info_dict': { | ||||
|                 'id': 'aanslagen-kopenhagen', | ||||
|                 'title': 'Aanslagen Kopenhagen | RTL Nieuws', | ||||
|             } | ||||
|         }, | ||||
|         # Zapiks embed | ||||
|         { | ||||
|             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html', | ||||
|             'info_dict': { | ||||
|                 'id': '118046', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
| @@ -756,6 +801,13 @@ class GenericIE(InfoExtractor): | ||||
|                 'entries': entries, | ||||
|             } | ||||
|  | ||||
|         # Look for embedded rtl.nl player | ||||
|         matches = re.findall( | ||||
|             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"', | ||||
|             webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches(matches, ie='RtlNl') | ||||
|  | ||||
|         # Look for embedded (iframe) Vimeo player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) | ||||
| @@ -763,7 +815,6 @@ class GenericIE(InfoExtractor): | ||||
|             player_url = unescapeHTML(mobj.group('url')) | ||||
|             surl = smuggle_url(player_url, {'Referer': url}) | ||||
|             return self.url_result(surl) | ||||
|  | ||||
|         # Look for embedded (swf embed) Vimeo player | ||||
|         mobj = re.search( | ||||
|             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) | ||||
| @@ -882,10 +933,19 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for Ooyala videos | ||||
|         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or | ||||
|                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)) | ||||
|                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or | ||||
|                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)) | ||||
|         if mobj is not None: | ||||
|             return OoyalaIE._build_url_result(mobj.group('ec')) | ||||
|  | ||||
|         # Look for multiple Ooyala embeds on SBN network websites | ||||
|         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) | ||||
|         if mobj is not None: | ||||
|             embeds = self._parse_json(mobj.group(1), video_id, fatal=False) | ||||
|             if embeds: | ||||
|                 return _playlist_from_matches( | ||||
|                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala') | ||||
|  | ||||
|         # Look for Aparat videos | ||||
|         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage) | ||||
|         if mobj is not None: | ||||
| @@ -1012,7 +1072,12 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for embedded sbs.com.au player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1', | ||||
|             r'''(?x) | ||||
|             (?: | ||||
|                 <meta\s+property="og:video"\s+content=| | ||||
|                 <iframe[^>]+?src= | ||||
|             ) | ||||
|             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'SBS') | ||||
| @@ -1042,7 +1107,15 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Livestream') | ||||
|  | ||||
|         # Look for Zapiks embed | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Zapiks') | ||||
|  | ||||
|         def check_video(vurl): | ||||
|             if YoutubeIE.suitable(vurl): | ||||
|                 return True | ||||
|             vpath = compat_urlparse.urlparse(vurl).path | ||||
|             vext = determine_ext(vpath) | ||||
|             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml') | ||||
| @@ -1060,7 +1133,8 @@ class GenericIE(InfoExtractor): | ||||
|                     JWPlayerOptions| | ||||
|                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup | ||||
|                 ) | ||||
|                 .*?file\s*:\s*["\'](.*?)["\']''', webpage)) | ||||
|                 .*? | ||||
|                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) | ||||
|         if not found: | ||||
|             # Broaden the search a little bit | ||||
|             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) | ||||
|   | ||||
| @@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor): | ||||
|         duration = parse_duration(self._html_search_regex( | ||||
|             r'<span class="duration">\s*-?\s*(.*?)</span>', | ||||
|             webpage, 'duration', fatal=False)) | ||||
|         family_friendly = self._html_search_meta( | ||||
|             'isFamilyFriendly', webpage, default='false') | ||||
|  | ||||
|         flashvars = compat_parse_qs(self._html_search_regex( | ||||
|             r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"', | ||||
| @@ -49,5 +47,5 @@ class GoshgayIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'age_limit': 0 if family_friendly == 'true' else 18, | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|         } | ||||
|   | ||||
							
								
								
									
										46
									
								
								youtube_dl/extractor/historicfilms.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								youtube_dl/extractor/historicfilms.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,46 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import parse_duration | ||||
|  | ||||
|  | ||||
| class HistoricFilmsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.historicfilms.com/tapes/4728', | ||||
|         'md5': 'd4a437aec45d8d796a38a215db064e9a', | ||||
|         'info_dict': { | ||||
|             'id': '4728', | ||||
|             'ext': 'mov', | ||||
|             'title': 'Historic Films: GP-7', | ||||
|             'description': 'md5:1a86a0f3ac54024e419aba97210d959a', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 2096, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         tape_id = self._search_regex( | ||||
|             r'class="tapeId">([^<]+)<', webpage, 'tape id') | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|         thumbnail = self._html_search_meta( | ||||
|             'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage) | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, 'duration')) | ||||
|  | ||||
|         video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|         } | ||||
							
								
								
									
										31
									
								
								youtube_dl/extractor/history.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								youtube_dl/extractor/history.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import smuggle_url | ||||
|  | ||||
|  | ||||
| class HistoryIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', | ||||
|         'md5': '6fe632d033c92aa10b8d4a9be047a7c5', | ||||
|         'info_dict': { | ||||
|             'id': 'bLx5Dv5Aka1G', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Bet You Didn't Know: Valentine's Day", | ||||
|             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id, | ||||
|             webpage, 'video url') | ||||
|  | ||||
|         return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}})) | ||||
| @@ -34,6 +34,9 @@ class IGNIE(InfoExtractor): | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', | ||||
|             'info_dict': { | ||||
|                 'id': '100-little-things-in-gta-5-that-will-blow-your-mind', | ||||
|             }, | ||||
|             'playlist': [ | ||||
|                 { | ||||
|                     'info_dict': { | ||||
|   | ||||
							
								
								
									
										97
									
								
								youtube_dl/extractor/imgur.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								youtube_dl/extractor/imgur.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     mimetype2ext, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ImgurIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://i.imgur.com/A61SaA1.gifv', | ||||
|         'info_dict': { | ||||
|             'id': 'A61SaA1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', | ||||
|             'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://imgur.com/A61SaA1', | ||||
|         'info_dict': { | ||||
|             'id': 'A61SaA1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', | ||||
|             'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         width = int_or_none(self._search_regex( | ||||
|             r'<param name="width" value="([0-9]+)"', | ||||
|             webpage, 'width', fatal=False)) | ||||
|         height = int_or_none(self._search_regex( | ||||
|             r'<param name="height" value="([0-9]+)"', | ||||
|             webpage, 'height', fatal=False)) | ||||
|  | ||||
|         video_elements = self._search_regex( | ||||
|             r'(?s)<div class="video-elements">(.*?)</div>', | ||||
|             webpage, 'video elements', default=None) | ||||
|         if not video_elements: | ||||
|             raise ExtractorError( | ||||
|                 'No sources found for video %s. Maybe an image?' % video_id, | ||||
|                 expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): | ||||
|             formats.append({ | ||||
|                 'format_id': m.group('type').partition('/')[2], | ||||
|                 'url': self._proto_relative_url(m.group('src')), | ||||
|                 'ext': mimetype2ext(m.group('type')), | ||||
|                 'acodec': 'none', | ||||
|                 'width': width, | ||||
|                 'height': height, | ||||
|                 'http_headers': { | ||||
|                     'User-Agent': 'youtube-dl (like wget)', | ||||
|                 }, | ||||
|             }) | ||||
|  | ||||
|         gif_json = self._search_regex( | ||||
|             r'(?s)var\s+videoItem\s*=\s*(\{.*?\})', | ||||
|             webpage, 'GIF code', fatal=False) | ||||
|         if gif_json: | ||||
|             gifd = self._parse_json( | ||||
|                 gif_json, video_id, transform_source=js_to_json) | ||||
|             formats.append({ | ||||
|                 'format_id': 'gif', | ||||
|                 'preference': -10, | ||||
|                 'width': width, | ||||
|                 'height': height, | ||||
|                 'ext': 'gif', | ||||
|                 'acodec': 'none', | ||||
|                 'vcodec': 'gif', | ||||
|                 'container': 'gif', | ||||
|                 'url': self._proto_relative_url(gifd['gifUrl']), | ||||
|                 'filesize': gifd.get('size'), | ||||
|                 'http_headers': { | ||||
|                     'User-Agent': 'youtube-dl (like wget)', | ||||
|                 }, | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'title': self._og_search_title(webpage), | ||||
|         } | ||||
| @@ -80,9 +80,6 @@ class IzleseneIE(InfoExtractor): | ||||
|             r'comment_count\s*=\s*\'([^\']+)\';', | ||||
|             webpage, 'comment_count', fatal=False) | ||||
|  | ||||
|         family_friendly = self._html_search_meta( | ||||
|             'isFamilyFriendly', webpage, 'age limit', fatal=False) | ||||
|  | ||||
|         content_url = self._html_search_meta( | ||||
|             'contentURL', webpage, 'content URL', fatal=False) | ||||
|         ext = determine_ext(content_url, 'mp4') | ||||
| @@ -120,6 +117,6 @@ class IzleseneIE(InfoExtractor): | ||||
|             'duration': duration, | ||||
|             'view_count': int_or_none(view_count), | ||||
|             'comment_count': int_or_none(comment_count), | ||||
|             'age_limit': 18 if family_friendly == 'False' else 0, | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -1,23 +1,26 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import random | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class Laola1TvIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html', | ||||
|         'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', | ||||
|         'info_dict': { | ||||
|             'id': '250019', | ||||
|             'id': '227883', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Bitburger Open Grand Prix Gold - Court 1', | ||||
|             'categories': ['Badminton'], | ||||
|             'uploader': 'BWF - Badminton World Federation', | ||||
|             'is_live': True, | ||||
|             'title': 'Straubing Tigers - Kölner Haie', | ||||
|             'categories': ['Eishockey'], | ||||
|             'is_live': False, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -43,15 +46,26 @@ class Laola1TvIE(InfoExtractor): | ||||
|             r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe) | ||||
|         flashvars = dict((m[0], m[1]) for m in flashvars_m) | ||||
|  | ||||
|         partner_id = self._search_regex( | ||||
|             r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id') | ||||
|  | ||||
|         xml_url = ('http://www.laola1.tv/server/hd_video.php?' + | ||||
|                    'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % ( | ||||
|                        video_id, portal, lang)) | ||||
|                    'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % ( | ||||
|                        video_id, partner_id, portal, lang)) | ||||
|         hd_doc = self._download_xml(xml_url, video_id) | ||||
|  | ||||
|         title = hd_doc.find('.//video/title').text | ||||
|         flash_url = hd_doc.find('.//video/url').text | ||||
|         categories = hd_doc.find('.//video/meta_sports').text.split(',') | ||||
|         uploader = hd_doc.find('.//video/meta_organistation').text | ||||
|         title = xpath_text(hd_doc, './/video/title', fatal=True) | ||||
|         flash_url = xpath_text(hd_doc, './/video/url', fatal=True) | ||||
|         uploader = xpath_text(hd_doc, './/video/meta_organistation') | ||||
|  | ||||
|         is_live = xpath_text(hd_doc, './/video/islive') == 'true' | ||||
|         if is_live: | ||||
|             raise ExtractorError( | ||||
|                 'Live streams are not supported by the f4m downloader.') | ||||
|  | ||||
|         categories = xpath_text(hd_doc, './/video/meta_sports') | ||||
|         if categories: | ||||
|             categories = categories.split(',') | ||||
|  | ||||
|         ident = random.randint(10000000, 99999999) | ||||
|         token_url = '%s&ident=%s&klub=0&unikey=0×tamp=%s&auth=%s' % ( | ||||
| @@ -60,15 +74,16 @@ class Laola1TvIE(InfoExtractor): | ||||
|         token_doc = self._download_xml( | ||||
|             token_url, video_id, note='Downloading token') | ||||
|         token_attrib = token_doc.find('.//token').attrib | ||||
|         if token_attrib.get('auth') == 'blocked': | ||||
|             raise ExtractorError('Token error: ' % token_attrib.get('comment')) | ||||
|         if token_attrib.get('auth') in ('blocked', 'restricted'): | ||||
|             raise ExtractorError( | ||||
|                 'Token error: %s' % token_attrib.get('comment'), expected=True) | ||||
|  | ||||
|         video_url = '%s?hdnea=%s&hdcore=3.2.0' % ( | ||||
|             token_attrib['url'], token_attrib['auth']) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'is_live': True, | ||||
|             'is_live': is_live, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'uploader': uploader, | ||||
|   | ||||
| @@ -37,6 +37,7 @@ class LivestreamIE(InfoExtractor): | ||||
|         'url': 'http://new.livestream.com/tedx/cityenglish', | ||||
|         'info_dict': { | ||||
|             'title': 'TEDCity2.0 (English)', | ||||
|             'id': '2245590', | ||||
|         }, | ||||
|         'playlist_mincount': 4, | ||||
|     }, { | ||||
| @@ -148,7 +149,8 @@ class LivestreamIE(InfoExtractor): | ||||
|                   if is_relevant(video_data, video_id)] | ||||
|         if video_id is None: | ||||
|             # This is an event page: | ||||
|             return self.playlist_result(videos, info['id'], info['full_name']) | ||||
|             return self.playlist_result( | ||||
|                 videos, '%s' % info['id'], info['full_name']) | ||||
|         else: | ||||
|             if not videos: | ||||
|                 raise ExtractorError('Cannot find video %s' % video_id) | ||||
|   | ||||
| @@ -3,7 +3,6 @@ from __future__ import unicode_literals | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
| @@ -16,7 +15,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LyndaIE(SubtitlesInfoExtractor): | ||||
| class LyndaIE(InfoExtractor): | ||||
|     IE_NAME = 'lynda' | ||||
|     IE_DESC = 'lynda.com videos' | ||||
|     _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html' | ||||
| @@ -88,11 +87,7 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|         self._check_formats(formats, video_id) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, page) | ||||
|             return | ||||
|  | ||||
|         subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page)) | ||||
|         subtitles = self.extract_subtitles(video_id, page) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -144,38 +139,31 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|         if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: | ||||
|             raise ExtractorError('Unable to log in') | ||||
|  | ||||
|     def _fix_subtitles(self, subtitles): | ||||
|         if subtitles is None: | ||||
|             return subtitles  # subtitles not requested | ||||
|  | ||||
|         fixed_subtitles = {} | ||||
|         for k, v in subtitles.items(): | ||||
|             subs = json.loads(v) | ||||
|             if len(subs) == 0: | ||||
|     def _fix_subtitles(self, subs): | ||||
|         srt = '' | ||||
|         for pos in range(0, len(subs) - 1): | ||||
|             seq_current = subs[pos] | ||||
|             m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode']) | ||||
|             if m_current is None: | ||||
|                 continue | ||||
|             srt = '' | ||||
|             for pos in range(0, len(subs) - 1): | ||||
|                 seq_current = subs[pos] | ||||
|                 m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode']) | ||||
|                 if m_current is None: | ||||
|                     continue | ||||
|                 seq_next = subs[pos + 1] | ||||
|                 m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode']) | ||||
|                 if m_next is None: | ||||
|                     continue | ||||
|                 appear_time = m_current.group('timecode') | ||||
|                 disappear_time = m_next.group('timecode') | ||||
|                 text = seq_current['Caption'] | ||||
|                 srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text) | ||||
|             if srt: | ||||
|                 fixed_subtitles[k] = srt | ||||
|         return fixed_subtitles | ||||
|             seq_next = subs[pos + 1] | ||||
|             m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode']) | ||||
|             if m_next is None: | ||||
|                 continue | ||||
|             appear_time = m_current.group('timecode') | ||||
|             disappear_time = m_next.group('timecode') | ||||
|             text = seq_current['Caption'] | ||||
|             srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text) | ||||
|         if srt: | ||||
|             return srt | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|     def _get_subtitles(self, video_id, webpage): | ||||
|         url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id | ||||
|         sub = self._download_webpage(url, None, False) | ||||
|         sub_json = json.loads(sub) | ||||
|         return {'en': url} if len(sub_json) > 0 else {} | ||||
|         subs = self._download_json(url, None, False) | ||||
|         if subs: | ||||
|             return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} | ||||
|         else: | ||||
|             return {} | ||||
|  | ||||
|  | ||||
| class LyndaCourseIE(InfoExtractor): | ||||
|   | ||||
| @@ -5,9 +5,6 @@ import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
| @@ -108,7 +105,6 @@ class OCWMITIE(InfoExtractor): | ||||
|                 'upload_date': '20121109', | ||||
|                 'uploader_id': 'MIT', | ||||
|                 'uploader': 'MIT OpenCourseWare', | ||||
|                 # 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt' | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
| @@ -121,7 +117,6 @@ class OCWMITIE(InfoExtractor): | ||||
|                 'uploader_id': 'MIT', | ||||
|                 'uploader': 'MIT OpenCourseWare', | ||||
|                 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.', | ||||
|                 # 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT' | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
| @@ -140,7 +135,6 @@ class OCWMITIE(InfoExtractor): | ||||
|             metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1)) | ||||
|             metadata = re.split(r', ?', metadata) | ||||
|             yt = metadata[1] | ||||
|             subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7]) | ||||
|         else: | ||||
|             # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file) | ||||
|             embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage) | ||||
| @@ -148,7 +142,6 @@ class OCWMITIE(InfoExtractor): | ||||
|                 metadata = re.sub(r'[\'"]', '', embed_media.group(1)) | ||||
|                 metadata = re.split(r', ?', metadata) | ||||
|                 yt = metadata[1] | ||||
|                 subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5]) | ||||
|             else: | ||||
|                 raise ExtractorError('Unable to find embedded YouTube video.') | ||||
|         video_id = YoutubeIE.extract_id(yt) | ||||
| @@ -159,7 +152,5 @@ class OCWMITIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'url': yt, | ||||
|             'url_transparent' | ||||
|             'subtitles': subs, | ||||
|             'ie_key': 'Youtube', | ||||
|         } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ class MixcloudIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' | ||||
|     IE_NAME = 'mixcloud' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', | ||||
|         'info_dict': { | ||||
|             'id': 'dholbach-cryptkeeper', | ||||
| @@ -33,7 +33,20 @@ class MixcloudIE(InfoExtractor): | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', | ||||
|         'info_dict': { | ||||
|             'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', | ||||
|             'ext': 'm4a', | ||||
|             'title': 'Electric Relaxation vol. 3', | ||||
|             'description': 'md5:2b8aec6adce69f9d41724647c65875e8', | ||||
|             'uploader': 'Daniel Drumz', | ||||
|             'uploader_id': 'gillespeterson', | ||||
|             'thumbnail': 're:https?://.*\.jpg', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _get_url(self, track_id, template_url): | ||||
|         server_count = 30 | ||||
| @@ -60,7 +73,7 @@ class MixcloudIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, track_id) | ||||
|  | ||||
|         preview_url = self._search_regex( | ||||
|             r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url') | ||||
|             r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') | ||||
|         song_url = preview_url.replace('/previews/', '/c/originals/') | ||||
|         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) | ||||
|         final_song_url = self._get_url(track_id, template_url) | ||||
|   | ||||
| @@ -2,7 +2,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| @@ -23,7 +23,7 @@ def _media_xml_tag(tag): | ||||
|     return '{http://search.yahoo.com/mrss/}%s' % tag | ||||
|  | ||||
|  | ||||
| class MTVServicesInfoExtractor(SubtitlesInfoExtractor): | ||||
| class MTVServicesInfoExtractor(InfoExtractor): | ||||
|     _MOBILE_TEMPLATE = None | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -95,25 +95,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor): | ||||
|  | ||||
|     def _extract_subtitles(self, mdoc, mtvn_id): | ||||
|         subtitles = {} | ||||
|         FORMATS = { | ||||
|             'scc': 'cea-608', | ||||
|             'eia-608': 'cea-608', | ||||
|             'xml': 'ttml', | ||||
|         } | ||||
|         subtitles_format = FORMATS.get( | ||||
|             self._downloader.params.get('subtitlesformat'), 'ttml') | ||||
|         for transcript in mdoc.findall('.//transcript'): | ||||
|             if transcript.get('kind') != 'captions': | ||||
|                 continue | ||||
|             lang = transcript.get('srclang') | ||||
|             for typographic in transcript.findall('./typographic'): | ||||
|                 captions_format = typographic.get('format') | ||||
|                 if captions_format == subtitles_format: | ||||
|                     subtitles[lang] = compat_str(typographic.get('src')) | ||||
|                     break | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(mtvn_id, subtitles) | ||||
|         return self.extract_subtitles(mtvn_id, subtitles) | ||||
|             subtitles[lang] = [{ | ||||
|                 'url': compat_str(typographic.get('src')), | ||||
|                 'ext': typographic.get('format') | ||||
|             } for typographic in transcript.findall('./typographic')] | ||||
|         return subtitles | ||||
|  | ||||
|     def _get_video_info(self, itemdoc): | ||||
|         uri = itemdoc.find('guid').text | ||||
| @@ -196,8 +186,6 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor): | ||||
|                 webpage, 'mgid') | ||||
|  | ||||
|         videos_info = self._get_videos_info(mgid) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             return | ||||
|         return videos_info | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/nationalgeographic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/nationalgeographic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
|     url_basename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NationalGeographicIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo', | ||||
|         'info_dict': { | ||||
|             'id': '4DmDACA6Qtk_', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Mating Crabs Busted by Sharks', | ||||
|             'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         name = url_basename(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url') | ||||
|         guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid') | ||||
|  | ||||
|         feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name) | ||||
|         content = feed.find('.//{http://search.yahoo.com/mrss/}content') | ||||
|         theplatform_id = url_basename(content.attrib.get('url')) | ||||
|  | ||||
|         return self.url_result(smuggle_url( | ||||
|             'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id, | ||||
|             # For some reason, the normal links don't work and we must force the use of f4m | ||||
|             {'force_smil_url': True})) | ||||
| @@ -1,7 +1,6 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
| @@ -19,13 +18,13 @@ class NBCIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', | ||||
|             'url': 'http://www.nbc.com/the-tonight-show/segments/112966', | ||||
|             # md5 checksum is not stable | ||||
|             'info_dict': { | ||||
|                 'id': 'bTmnLCvIbaaH', | ||||
|                 'id': 'c9xnCo0YPOPH', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'I Am a Firefighter', | ||||
|                 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', | ||||
|                 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', | ||||
|                 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -52,9 +51,9 @@ class NBCIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class NBCNewsIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/ | ||||
|         ((video/.+?/(?P<id>\d+))| | ||||
|         (feature/[^/]+/(?P<title>.+))) | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ | ||||
|         (?:video/.+?/(?P<id>\d+)| | ||||
|         (?:feature|nightly-news)/[^/]+/(?P<title>.+)) | ||||
|         ''' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -89,6 +88,16 @@ class NBCNewsIE(InfoExtractor): | ||||
|                 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', | ||||
|             'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d', | ||||
|             'info_dict': { | ||||
|                 'id': 'sekXqyTVnmN3', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', | ||||
|                 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -107,13 +116,13 @@ class NBCNewsIE(InfoExtractor): | ||||
|                 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, | ||||
|             } | ||||
|         else: | ||||
|             # "feature" pages use theplatform.com | ||||
|             # "feature" and "nightly-news" pages use theplatform.com | ||||
|             title = mobj.group('title') | ||||
|             webpage = self._download_webpage(url, title) | ||||
|             bootstrap_json = self._search_regex( | ||||
|                 r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json', | ||||
|                 flags=re.MULTILINE) | ||||
|             bootstrap = json.loads(bootstrap_json) | ||||
|                 r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$', | ||||
|                 webpage, 'bootstrap json', flags=re.MULTILINE) | ||||
|             bootstrap = self._parse_json(bootstrap_json, video_id) | ||||
|             info = bootstrap['results'][0]['video'] | ||||
|             mpxid = info['mpxId'] | ||||
|  | ||||
|   | ||||
							
								
								
									
										80
									
								
								youtube_dl/extractor/nerdist.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/extractor/nerdist.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     parse_iso8601, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NerdistIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?nerdist\.com/vepisode/(?P<id>[^/?#]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.nerdist.com/vepisode/exclusive-which-dc-characters-w', | ||||
|         'md5': '3698ed582931b90d9e81e02e26e89f23', | ||||
|         'info_dict': { | ||||
|             'display_id': 'exclusive-which-dc-characters-w', | ||||
|             'id': 'RPHpvJyr', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Your TEEN TITANS Revealed! Who\'s on the show?', | ||||
|             'thumbnail': 're:^https?://.*/thumbs/.*\.jpg$', | ||||
|             'description': 'Exclusive: Find out which DC Comics superheroes will star in TEEN TITANS Live-Action TV Show on Nerdist News with Jessica Chobot!', | ||||
|             'uploader': 'Eric Diaz', | ||||
|             'upload_date': '20150202', | ||||
|             'timestamp': 1422892808, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'''(?x)<script\s+(?:type="text/javascript"\s+)? | ||||
|                 src="https?://content\.nerdist\.com/players/([a-zA-Z0-9_]+)-''', | ||||
|             webpage, 'video ID') | ||||
|         timestamp = parse_iso8601(self._html_search_meta( | ||||
|             'shareaholic:article_published_time', webpage, 'upload date')) | ||||
|         uploader = self._html_search_meta( | ||||
|             'shareaholic:article_author_name', webpage, 'article author') | ||||
|  | ||||
|         doc = self._download_xml( | ||||
|             'http://content.nerdist.com/jw6/%s.xml' % video_id, video_id) | ||||
|         video_info = doc.find('.//item') | ||||
|         title = xpath_text(video_info, './title', fatal=True) | ||||
|         description = xpath_text(video_info, './description') | ||||
|         thumbnail = xpath_text( | ||||
|             video_info, './{http://rss.jwpcdn.com/}image', 'thumbnail') | ||||
|  | ||||
|         formats = [] | ||||
|         for source in video_info.findall('./{http://rss.jwpcdn.com/}source'): | ||||
|             vurl = source.attrib['file'] | ||||
|             ext = determine_ext(vurl) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     vurl, video_id, entry_protocol='m3u8_native', ext='mp4', | ||||
|                     preference=0)) | ||||
|             elif ext == 'smil': | ||||
|                 formats.extend(self._extract_smil_formats( | ||||
|                     vurl, video_id, fatal=False | ||||
|                 )) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'format_id': ext, | ||||
|                     'url': vurl, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'formats': formats, | ||||
|             'uploader': uploader, | ||||
|         } | ||||
| @@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor): | ||||
|             'timestamp': 1344858571, | ||||
|             'age_limit': 12, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'Download only works from Germany', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -46,7 +46,18 @@ class NFLIE(InfoExtractor): | ||||
|                 'timestamp': 1388354455, | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish', | ||||
|             'info_dict': { | ||||
|                 'id': '0ap3000000467607', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Frustrations flare on the field', | ||||
|                 'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.', | ||||
|                 'timestamp': 1422850320, | ||||
|                 'upload_date': '20150202', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -80,7 +91,11 @@ class NFLIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         config_url = NFLIE.prepend_host(host, self._search_regex( | ||||
|             r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL')) | ||||
|             r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL', | ||||
|             default='static/content/static/config/video/config.json')) | ||||
|         # For articles, the id in the url is not the video id | ||||
|         video_id = self._search_regex( | ||||
|             r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id) | ||||
|         config = self._download_json(config_url, video_id, | ||||
|                                      note='Downloading player config') | ||||
|         url_template = NFLIE.prepend_host( | ||||
|   | ||||
| @@ -1,8 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
| @@ -11,7 +9,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class NormalbootsIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$' | ||||
|     _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$' | ||||
|     _TEST = { | ||||
|         'url': 'http://normalboots.com/video/home-alone-games-jontron/', | ||||
|         'md5': '8bf6de238915dd501105b44ef5f1e0f6', | ||||
| @@ -30,19 +28,22 @@ class NormalbootsIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>', | ||||
|                                                  webpage, 'uploader') | ||||
|         raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', | ||||
|                                                   webpage, 'date') | ||||
|         video_upload_date = unified_strdate(raw_upload_date) | ||||
|  | ||||
|         player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url') | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         video_upload_date = unified_strdate(self._html_search_regex( | ||||
|             r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', | ||||
|             webpage, 'date', fatal=False)) | ||||
|  | ||||
|         player_url = self._html_search_regex( | ||||
|             r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', | ||||
|             webpage, 'player url') | ||||
|         player_page = self._download_webpage(player_url, video_id) | ||||
|         video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file') | ||||
|         video_url = self._html_search_regex( | ||||
|             r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -22,7 +22,7 @@ class NPOBaseIE(InfoExtractor): | ||||
|  | ||||
| class NPOIE(NPOBaseIE): | ||||
|     IE_NAME = 'npo.nl' | ||||
|     _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -161,6 +161,13 @@ class NPOIE(NPOBaseIE): | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         if metadata.get('tt888') == 'ja': | ||||
|             subtitles['nl'] = [{ | ||||
|                 'ext': 'vtt', | ||||
|                 'url': 'http://e.omroep.nl/tt888/%s' % video_id, | ||||
|             }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': metadata['titel'], | ||||
| @@ -169,12 +176,13 @@ class NPOIE(NPOBaseIE): | ||||
|             'upload_date': unified_strdate(metadata.get('gidsdatum')), | ||||
|             'duration': parse_duration(metadata.get('tijdsduur')), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NPOLiveIE(NPOBaseIE): | ||||
|     IE_NAME = 'npo.nl:live' | ||||
|     _VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.npo.nl/live/npo-1', | ||||
| @@ -249,6 +257,84 @@ class NPOLiveIE(NPOBaseIE): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NPORadioIE(InfoExtractor): | ||||
|     IE_NAME = 'npo.nl:radio' | ||||
|     _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.npo.nl/radio/radio-1', | ||||
|         'info_dict': { | ||||
|             'id': 'radio-1', | ||||
|             'ext': 'mp3', | ||||
|             'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _html_get_attribute_regex(attribute): | ||||
|         return r'{0}\s*=\s*\'([^\']+)\''.format(attribute) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             self._html_get_attribute_regex('data-channel'), webpage, 'title') | ||||
|  | ||||
|         stream = self._parse_json( | ||||
|             self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'), | ||||
|             video_id) | ||||
|  | ||||
|         codec = stream.get('codec') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': stream['url'], | ||||
|             'title': self._live_title(title), | ||||
|             'acodec': codec, | ||||
|             'ext': codec, | ||||
|             'is_live': True, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NPORadioFragmentIE(InfoExtractor): | ||||
|     IE_NAME = 'npo.nl:radio:fragment' | ||||
|     _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.npo.nl/radio/radio-5/fragment/174356', | ||||
|         'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2', | ||||
|         'info_dict': { | ||||
|             'id': '174356', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Jubileumconcert Willeke Alberti', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         audio_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, audio_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id, | ||||
|             webpage, 'title') | ||||
|  | ||||
|         audio_url = self._search_regex( | ||||
|             r"data-streams='([^']+)'", webpage, 'audio url') | ||||
|  | ||||
|         return { | ||||
|             'id': audio_id, | ||||
|             'url': audio_url, | ||||
|             'title': title, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class TegenlichtVproIE(NPOIE): | ||||
|     IE_NAME = 'tegenlicht.vpro.nl' | ||||
|     _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' | ||||
|   | ||||
| @@ -10,7 +10,6 @@ from ..utils import ( | ||||
|     parse_duration, | ||||
|     unified_strdate, | ||||
| ) | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
|  | ||||
| class NRKIE(InfoExtractor): | ||||
| @@ -73,7 +72,7 @@ class NRKIE(InfoExtractor): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NRKTVIE(SubtitlesInfoExtractor): | ||||
| class NRKTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -156,7 +155,7 @@ class NRKTVIE(SubtitlesInfoExtractor): | ||||
|         if self._downloader.params.get('verbose', False): | ||||
|             self.to_screen('[debug] %s' % txt) | ||||
|  | ||||
|     def _extract_captions(self, subtitlesurl, video_id, baseurl): | ||||
|     def _get_subtitles(self, subtitlesurl, video_id, baseurl): | ||||
|         url = "%s%s" % (baseurl, subtitlesurl) | ||||
|         self._debug_print('%s: Subtitle url: %s' % (video_id, url)) | ||||
|         captions = self._download_xml(url, video_id, 'Downloading subtitles') | ||||
| @@ -170,7 +169,10 @@ class NRKTVIE(SubtitlesInfoExtractor): | ||||
|             endtime = self._seconds2str(begin + duration) | ||||
|             text = '\n'.join(p.itertext()) | ||||
|             srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text) | ||||
|         return {lang: srt} | ||||
|         return {lang: [ | ||||
|             {'ext': 'ttml', 'url': url}, | ||||
|             {'ext': 'srt', 'data': srt}, | ||||
|         ]} | ||||
|  | ||||
|     def _extract_f4m(self, manifest_url, video_id): | ||||
|         return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id) | ||||
| @@ -243,10 +245,7 @@ class NRKTVIE(SubtitlesInfoExtractor): | ||||
|             webpage, 'subtitle URL', default=None) | ||||
|         subtitles = None | ||||
|         if subtitles_url: | ||||
|             subtitles = self._extract_captions(subtitles_url, video_id, baseurl) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|             subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
							
								
								
									
										68
									
								
								youtube_dl/extractor/ntvde.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								youtube_dl/extractor/ntvde.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,68 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     parse_duration, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NTVDeIE(InfoExtractor): | ||||
|     IE_NAME = 'n-tv.de' | ||||
|     _VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/videos/[^/?#]+/[^/?#]+-article(?P<id>.+)\.html' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html', | ||||
|         'md5': '6ef2514d4b1e8e03ca24b49e2f167153', | ||||
|         'info_dict': { | ||||
|             'id': '14438086', | ||||
|             'ext': 'mp4', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus', | ||||
|             'alt_title': 'Winterchaos auf deutschen Straßen', | ||||
|             'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.', | ||||
|             'duration': 4020, | ||||
|             'timestamp': 1422892797, | ||||
|             'upload_date': '20150202', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         info = self._parse_json(self._search_regex( | ||||
|             r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'), | ||||
|             video_id, transform_source=js_to_json) | ||||
|         timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp')) | ||||
|         vdata = self._parse_json(self._search_regex( | ||||
|             r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);', | ||||
|             webpage, 'player data'), | ||||
|             video_id, transform_source=js_to_json) | ||||
|         duration = parse_duration(vdata.get('duration')) | ||||
|         formats = [{ | ||||
|             'format_id': 'flash', | ||||
|             'url': 'rtmp://fms.n-tv.de/' + vdata['video'], | ||||
|         }, { | ||||
|             'format_id': 'mobile', | ||||
|             'url': 'http://video.n-tv.de' + vdata['videoMp4'], | ||||
|             'tbr': 400,  # estimation | ||||
|         }] | ||||
|         m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8'] | ||||
|         formats.extend(self._extract_m3u8_formats( | ||||
|             m3u8_url, video_id, ext='mp4', | ||||
|             entry_protocol='m3u8_native', preference=0)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info['headline'], | ||||
|             'description': info.get('intro'), | ||||
|             'alt_title': info.get('kicker'), | ||||
|             'timestamp': timestamp, | ||||
|             'thumbnail': vdata.get('html5VideoPoster'), | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -1,87 +1,78 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import re | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unescapeHTML | ||||
|     clean_html, | ||||
|     xpath_text, | ||||
|     int_or_none, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| class NTVIE(InfoExtractor): | ||||
| class NTVRuIE(InfoExtractor): | ||||
|     IE_NAME = 'ntv.ru' | ||||
|     _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)' | ||||
| 
 | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/novosti/863142/', | ||||
|             'md5': 'ba7ea172a91cb83eb734cad18c10e723', | ||||
|             'info_dict': { | ||||
|                 'id': '746000', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', | ||||
|                 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 136, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/video/novosti/750370/', | ||||
|             'md5': 'adecff79691b4d71e25220a191477124', | ||||
|             'info_dict': { | ||||
|                 'id': '750370', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', | ||||
|                 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 172, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', | ||||
|             'md5': '82dbd49b38e3af1d00df16acbeab260c', | ||||
|             'info_dict': { | ||||
|                 'id': '747480', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '«Сегодня». 21 марта 2014 года. 16:00 ', | ||||
|                 'description': '«Сегодня». 21 марта 2014 года. 16:00 ', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '«Сегодня». 21 марта 2014 года. 16:00', | ||||
|                 'description': '«Сегодня». 21 марта 2014 года. 16:00', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 1496, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/kino/Koma_film', | ||||
|             'md5': 'f825770930937aa7e5aca0dc0d29319a', | ||||
|             'info_dict': { | ||||
|                 'id': '758100', | ||||
|                 'ext': 'flv', | ||||
|                 'id': '1007609', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Остросюжетный фильм «Кома»', | ||||
|                 'description': 'Остросюжетный фильм «Кома»', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 5592, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/', | ||||
|             'md5': '9320cd0e23f3ea59c330dc744e06ff3b', | ||||
|             'info_dict': { | ||||
|                 'id': '751482', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '«Дело врачей»: «Деревце жизни»', | ||||
|                 'description': '«Дело врачей»: «Деревце жизни»', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 2590, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
| 
 | ||||
| @@ -92,48 +83,37 @@ class NTVIE(InfoExtractor): | ||||
|     ] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
| 
 | ||||
|         page = self._download_webpage(url, video_id) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
| 
 | ||||
|         video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id') | ||||
|         video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id') | ||||
| 
 | ||||
|         player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML') | ||||
|         title = unescapeHTML(player.find('./data/title').text) | ||||
|         description = unescapeHTML(player.find('./data/description').text) | ||||
|         player = self._download_xml( | ||||
|             'http://www.ntv.ru/vi%s/' % video_id, | ||||
|             video_id, 'Downloading video XML') | ||||
|         title = clean_html(xpath_text(player, './data/title', 'title', fatal=True)) | ||||
|         description = clean_html(xpath_text(player, './data/description', 'description')) | ||||
| 
 | ||||
|         video = player.find('./data/video') | ||||
|         video_id = video.find('./id').text | ||||
|         thumbnail = video.find('./splash').text | ||||
|         duration = int(video.find('./totaltime').text) | ||||
|         view_count = int(video.find('./views').text) | ||||
|         puid22 = video.find('./puid22').text | ||||
|         video_id = xpath_text(video, './id', 'video id') | ||||
|         thumbnail = xpath_text(video, './splash', 'thumbnail') | ||||
|         duration = int_or_none(xpath_text(video, './totaltime', 'duration')) | ||||
|         view_count = int_or_none(xpath_text(video, './views', 'view count')) | ||||
| 
 | ||||
|         apps = { | ||||
|             '4': 'video1', | ||||
|             '7': 'video2', | ||||
|         } | ||||
| 
 | ||||
|         app = apps.get(puid22, apps['4']) | ||||
|         token = self._download_webpage( | ||||
|             'http://stat.ntv.ru/services/access/token', | ||||
|             video_id, 'Downloading access token') | ||||
| 
 | ||||
|         formats = [] | ||||
|         for format_id in ['', 'hi', 'webm']: | ||||
|             file = video.find('./%sfile' % format_id) | ||||
|             if file is None: | ||||
|             file_ = video.find('./%sfile' % format_id) | ||||
|             if file_ is None: | ||||
|                 continue | ||||
|             size = video.find('./%ssize' % format_id) | ||||
|             formats.append({ | ||||
|                 'url': 'rtmp://media.ntv.ru/%s' % app, | ||||
|                 'app': app, | ||||
|                 'play_path': file.text, | ||||
|                 'rtmp_conn': 'B:1', | ||||
|                 'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128', | ||||
|                 'page_url': 'http://www.ntv.ru', | ||||
|                 'flash_version': 'LNX 11,2,202,341', | ||||
|                 'rtmp_live': True, | ||||
|                 'ext': 'flv', | ||||
|                 'filesize': int(size.text), | ||||
|                 'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token), | ||||
|                 'filesize': int_or_none(size.text if size is not None else None), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
| 
 | ||||
| @@ -1,9 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
| @@ -11,7 +8,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class PatreonIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.patreon.com/creation?hid=743933', | ||||
| @@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor): | ||||
|                 'thumbnail': 're:^https?://.*$', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://www.patreon.com/creation?hid=1682498', | ||||
|             'info_dict': { | ||||
|                 'id': 'SU4fj_aEMVw', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'I\'m on Patreon!', | ||||
|                 'uploader': 'TraciJHines', | ||||
|                 'thumbnail': 're:^https?://.*$', | ||||
|                 'upload_date': '20150211', | ||||
|                 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', | ||||
|                 'uploader_id': 'TraciJHines', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'noplaylist': True, | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     # Currently Patreon exposes download URL via hidden CSS, so login is not | ||||
| @@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor): | ||||
|     ''' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._og_search_title(webpage).strip() | ||||
|  | ||||
|         attach_fn = self._html_search_regex( | ||||
|             r'<div class="attach"><a target="_blank" href="([^"]+)">', | ||||
|             webpage, 'attachment URL', default=None) | ||||
|         embed = self._html_search_regex( | ||||
|             r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"', | ||||
|             webpage, 'embedded URL', default=None) | ||||
|  | ||||
|         if attach_fn is not None: | ||||
|             video_url = 'http://www.patreon.com' + attach_fn | ||||
|             thumbnail = self._og_search_thumbnail(webpage) | ||||
|             uploader = self._html_search_regex( | ||||
|                 r'<strong>(.*?)</strong> is creating', webpage, 'uploader') | ||||
|         elif embed is not None: | ||||
|             return self.url_result(embed) | ||||
|         else: | ||||
|             playlist_js = self._search_regex( | ||||
|             playlist = self._parse_json(self._search_regex( | ||||
|                 r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])', | ||||
|                 webpage, 'playlist JSON') | ||||
|             playlist_json = js_to_json(playlist_js) | ||||
|             playlist = json.loads(playlist_json) | ||||
|                 webpage, 'playlist JSON'), | ||||
|                 video_id, transform_source=js_to_json) | ||||
|             data = playlist[0] | ||||
|             video_url = self._proto_relative_url(data['mp3']) | ||||
|             thumbnail = self._proto_relative_url(data.get('cover')) | ||||
|   | ||||
| @@ -46,16 +46,17 @@ class PornHdIE(InfoExtractor): | ||||
|  | ||||
|         quality = qualities(['sd', 'hd']) | ||||
|         sources = json.loads(js_to_json(self._search_regex( | ||||
|             r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources'))) | ||||
|             r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", | ||||
|             webpage, 'sources'))) | ||||
|         formats = [] | ||||
|         for container, s in sources.items(): | ||||
|             for qname, video_url in s.items(): | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'container': container, | ||||
|                     'format_id': '%s-%s' % (container, qname), | ||||
|                     'quality': quality(qname), | ||||
|                 }) | ||||
|         for qname, video_url in sources.items(): | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': qname, | ||||
|                 'quality': quality(qname), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor): | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<', | ||||
|             r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) | ||||
|         if thumbnail: | ||||
| @@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor): | ||||
|             'formats': formats, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class PornHubPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.pornhub.com/playlist/6201671', | ||||
|         'info_dict': { | ||||
|             'id': '6201671', | ||||
|             'title': 'P0p4', | ||||
|         }, | ||||
|         'playlist_mincount': 35, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') | ||||
|             for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage)) | ||||
|         ] | ||||
|  | ||||
|         playlist = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'), | ||||
|             playlist_id) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, playlist_id, playlist.get('title'), playlist.get('description')) | ||||
|   | ||||
							
								
								
									
										88
									
								
								youtube_dl/extractor/r7.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								youtube_dl/extractor/r7.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
|     unescapeHTML, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class R7IE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|                         (?: | ||||
|                             (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| | ||||
|                             noticias\.r7\.com(?:/[^/]+)+/[^/]+-| | ||||
|                             player\.r7\.com/video/i/ | ||||
|                         ) | ||||
|                         (?P<id>[\da-f]{24}) | ||||
|                         ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', | ||||
|         'md5': '403c4e393617e8e8ddc748978ee8efde', | ||||
|         'info_dict': { | ||||
|             'id': '54e7050b0cf2ff57e0279389', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 98, | ||||
|             'like_count': int, | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://player.r7.com/video/i/%s' % video_id, video_id) | ||||
|  | ||||
|         item = self._parse_json(js_to_json(self._search_regex( | ||||
|             r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id) | ||||
|  | ||||
|         title = unescapeHTML(item['title']) | ||||
|         thumbnail = item.get('init', {}).get('thumbUri') | ||||
|         duration = None | ||||
|  | ||||
|         statistics = item.get('statistics', {}) | ||||
|         like_count = int_or_none(statistics.get('likes')) | ||||
|         view_count = int_or_none(statistics.get('views')) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_key, format_dict in item['playlist'][0].items(): | ||||
|             src = format_dict.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             format_id = format_dict.get('format') or format_key | ||||
|             if duration is None: | ||||
|                 duration = format_dict.get('duration') | ||||
|             if '.f4m' in src: | ||||
|                 formats.extend(self._extract_f4m_formats(src, video_id, preference=-1)) | ||||
|             elif src.endswith('.m3u8'): | ||||
|                 formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': src, | ||||
|                     'format_id': format_id, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'like_count': like_count, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -10,13 +8,13 @@ class RadioDeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' | ||||
|     _TEST = { | ||||
|         'url': 'http://ndr2.radio.de/', | ||||
|         'md5': '3b4cdd011bc59174596b6145cda474a4', | ||||
|         'info_dict': { | ||||
|             'id': 'ndr2', | ||||
|             'ext': 'mp3', | ||||
|             'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'description': 'md5:591c49c702db1a33751625ebfb67f273', | ||||
|             'thumbnail': 're:^https?://.*\.png', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -25,16 +23,15 @@ class RadioDeIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         radio_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, radio_id) | ||||
|         jscode = self._search_regex( | ||||
|             r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n", | ||||
|             webpage, 'broadcast') | ||||
|  | ||||
|         broadcast = json.loads(self._search_regex( | ||||
|             r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}', | ||||
|             webpage, 'broadcast')) | ||||
|  | ||||
|         broadcast = self._parse_json(jscode, radio_id) | ||||
|         title = self._live_title(broadcast['name']) | ||||
|         description = broadcast.get('description') or broadcast.get('shortDescription') | ||||
|         thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') | ||||
|         thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100') | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': stream['streamUrl'], | ||||
|   | ||||
| @@ -2,7 +2,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
| @@ -12,7 +12,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RaiIE(SubtitlesInfoExtractor): | ||||
| class RaiIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -89,15 +89,7 @@ class RaiIE(SubtitlesInfoExtractor): | ||||
|                 'ext': 'mp4', | ||||
|             }) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             page = self._download_webpage(url, video_id) | ||||
|             self._list_available_subtitles(video_id, page) | ||||
|             return | ||||
|  | ||||
|         subtitles = {} | ||||
|         if self._have_to_download_any_subtitles: | ||||
|             page = self._download_webpage(url, video_id) | ||||
|             subtitles = self.extract_subtitles(video_id, page) | ||||
|         subtitles = self.extract_subtitles(video_id, url) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -111,7 +103,8 @@ class RaiIE(SubtitlesInfoExtractor): | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|     def _get_subtitles(self, video_id, url): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         subtitles = {} | ||||
|         m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage) | ||||
|         if m: | ||||
| @@ -120,5 +113,8 @@ class RaiIE(SubtitlesInfoExtractor): | ||||
|             SRT_EXT = '.srt' | ||||
|             if captions.endswith(STL_EXT): | ||||
|                 captions = captions[:-len(STL_EXT)] + SRT_EXT | ||||
|             subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions) | ||||
|             subtitles['it'] = [{ | ||||
|                 'ext': 'srt', | ||||
|                 'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions), | ||||
|             }] | ||||
|         return subtitles | ||||
|   | ||||
| @@ -1,16 +1,25 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import parse_duration | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RtlXlIE(InfoExtractor): | ||||
|     IE_NAME = 'rtlxl.nl' | ||||
|     _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' | ||||
| class RtlNlIE(InfoExtractor): | ||||
|     IE_NAME = 'rtl.nl' | ||||
|     IE_DESC = 'rtl.nl and rtlxl.nl' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https?://(www\.)? | ||||
|         (?: | ||||
|             rtlxl\.nl/\#!/[^/]+/| | ||||
|             rtl\.nl/system/videoplayer/[^?#]+?/video_embed\.html\#uuid= | ||||
|         ) | ||||
|         (?P<id>[0-9a-f-]+)''' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', | ||||
|         'md5': 'cc16baa36a6c169391f0764fa6b16654', | ||||
|         'info_dict': { | ||||
| @@ -22,21 +31,30 @@ class RtlXlIE(InfoExtractor): | ||||
|             'upload_date': '20140814', | ||||
|             'duration': 576.880, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', | ||||
|         'md5': 'dea7474214af1271d91ef332fb8be7ea', | ||||
|         'info_dict': { | ||||
|             'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed', | ||||
|             'ext': 'mp4', | ||||
|             'timestamp': 1424039400, | ||||
|             'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag', | ||||
|             'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', | ||||
|             'upload_date': '20150215', | ||||
|             'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         uuid = mobj.group('uuid') | ||||
|  | ||||
|         uuid = self._match_id(url) | ||||
|         info = self._download_json( | ||||
|             'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid, | ||||
|             uuid) | ||||
|  | ||||
|         material = info['material'][0] | ||||
|         episode_info = info['episodes'][0] | ||||
|  | ||||
|         progname = info['abstracts'][0]['name'] | ||||
|         subtitle = material['title'] or info['episodes'][0]['name'] | ||||
|         description = material.get('synopsis') or info['episodes'][0]['synopsis'] | ||||
|  | ||||
|         # Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118) | ||||
|         videopath = material['videopath'].replace('.f4m', '.m3u8') | ||||
| @@ -58,14 +76,29 @@ class RtlXlIE(InfoExtractor): | ||||
|                 'quality': 0, | ||||
|             } | ||||
|         ]) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         meta = info.get('meta', {}) | ||||
|         for p in ('poster_base_url', '"thumb_base_url"'): | ||||
|             if not meta.get(p): | ||||
|                 continue | ||||
|  | ||||
|             thumbnails.append({ | ||||
|                 'url': self._proto_relative_url(meta[p] + uuid), | ||||
|                 'width': int_or_none(self._search_regex( | ||||
|                     r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)), | ||||
|                 'height': int_or_none(self._search_regex( | ||||
|                     r'/sz=[0-9]+x([0-9]+)', | ||||
|                     meta[p], 'thumbnail height', fatal=False)) | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': uuid, | ||||
|             'title': '%s - %s' % (progname, subtitle), | ||||
|             'formats': formats, | ||||
|             'timestamp': material['original_date'], | ||||
|             'description': episode_info['synopsis'], | ||||
|             'description': description, | ||||
|             'duration': parse_duration(material.get('duration')), | ||||
|             'thumbnails': thumbnails, | ||||
|         } | ||||
|   | ||||
| @@ -91,6 +91,15 @@ class RTLnowIE(InfoExtractor): | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5', | ||||
|             'info_dict': { | ||||
|                 'id': '188729', | ||||
|                 'ext': 'flv', | ||||
|                 'upload_date': '20150204', | ||||
|                 'description': 'md5:5e1ce23095e61a79c166d134b683cecc', | ||||
|                 'title': 'Der Bachelor - Folge 4', | ||||
|             } | ||||
|         }, { | ||||
|             'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
| @@ -134,9 +143,18 @@ class RTLnowIE(InfoExtractor): | ||||
|                     'player_url': video_page_url + 'includes/vodplayer.swf', | ||||
|                 } | ||||
|             else: | ||||
|                 fmt = { | ||||
|                     'url': filename.text, | ||||
|                 } | ||||
|                 mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text) | ||||
|                 if mobj: | ||||
|                     fmt = { | ||||
|                         'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'), | ||||
|                         'play_path': 'mp4:' + mobj.group('play_path'), | ||||
|                         'page_url': url, | ||||
|                         'player_url': video_page_url + 'includes/vodplayer.swf', | ||||
|                     } | ||||
|                 else: | ||||
|                     fmt = { | ||||
|                         'url': filename.text, | ||||
|                     } | ||||
|             fmt.update({ | ||||
|                 'width': int_or_none(filename.get('width')), | ||||
|                 'height': int_or_none(filename.get('height')), | ||||
|   | ||||
| @@ -1,16 +1,16 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import js_to_json | ||||
|  | ||||
|  | ||||
| class RTPIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', | ||||
|         'md5': 'e736ce0c665e459ddb818546220b4ef8', | ||||
|         'info_dict': { | ||||
|             'id': 'e174042', | ||||
|             'ext': 'mp3', | ||||
| @@ -18,9 +18,6 @@ class RTPIE(InfoExtractor): | ||||
|             'description': 'As paixões musicais de António Cartaxo e António Macedo', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # RTMP download | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', | ||||
|         'only_matching': True, | ||||
| @@ -37,20 +34,48 @@ class RTPIE(InfoExtractor): | ||||
|  | ||||
|         player_config = self._search_regex( | ||||
|             r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config') | ||||
|         config = json.loads(js_to_json(player_config)) | ||||
|         config = self._parse_json(player_config, video_id) | ||||
|  | ||||
|         path, ext = config.get('file').rsplit('.', 1) | ||||
|         formats = [{ | ||||
|             'format_id': 'rtmp', | ||||
|             'ext': ext, | ||||
|             'vcodec': config.get('type') == 'audio' and 'none' or None, | ||||
|             'preference': -2, | ||||
|             'url': 'rtmp://{streamer:s}/{application:s}'.format(**config), | ||||
|             'app': config.get('application'), | ||||
|             'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path), | ||||
|             'page_url': url, | ||||
|             'url': 'rtmp://{streamer:s}/{application:s}'.format(**config), | ||||
|             'rtmp_live': config.get('live', False), | ||||
|             'ext': ext, | ||||
|             'vcodec': config.get('type') == 'audio' and 'none' or None, | ||||
|             'player_url': 'http://programas.rtp.pt/play/player.swf?v3', | ||||
|             'rtmp_real_time': True, | ||||
|         }] | ||||
|  | ||||
|         # Construct regular HTTP download URLs | ||||
|         replacements = { | ||||
|             'audio': { | ||||
|                 'format_id': 'mp3', | ||||
|                 'pattern': r'^nas2\.share/wavrss/', | ||||
|                 'repl': 'http://rsspod.rtp.pt/podcasts/', | ||||
|                 'vcodec': 'none', | ||||
|             }, | ||||
|             'video': { | ||||
|                 'format_id': 'mp4_h264', | ||||
|                 'pattern': r'^nas2\.share/h264/', | ||||
|                 'repl': 'http://rsspod.rtp.pt/videocasts/', | ||||
|                 'vcodec': 'h264', | ||||
|             }, | ||||
|         } | ||||
|         r = replacements[config['type']] | ||||
|         if re.match(r['pattern'], config['file']) is not None: | ||||
|             formats.append({ | ||||
|                 'format_id': r['format_id'], | ||||
|                 'url': re.sub(r['pattern'], r['repl'], config['file']), | ||||
|                 'vcodec': r['vcodec'], | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user