Compare commits
	
		
			308 Commits
		
	
	
		
			2014.02.19
			...
			2014.03.21
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | d77650525d | ||
|  | 3e50c29984 | ||
|  | 64e7ad6045 | ||
|  | 23f4a93bb4 | ||
|  | 6f13b055f1 | ||
|  | 1f91bd15c3 | ||
|  | 11a15be4ce | ||
|  | 14e17e18cb | ||
|  | 1b124d1942 | ||
|  | 747373d4ae | ||
|  | 18d367c0a5 | ||
|  | a1a530b067 | ||
|  | cb9722cb3f | ||
|  | 773c0b4bb8 | ||
|  | 23c322a531 | ||
|  | 7e8c0af004 | ||
|  | d2983ccb25 | ||
|  | f24e9833dc | ||
|  | bc2bdf5709 | ||
|  | 627a209f74 | ||
|  | 1a4895453a | ||
|  | aab74fa106 | ||
|  | 2bd9efd4c2 | ||
|  | 39a743fb9b | ||
|  | 4966a0b22d | ||
|  | fc26023120 | ||
|  | 8d7c0cca13 | ||
|  | f66ede4328 | ||
|  | cc88b90ec8 | ||
|  | b6c5fa9a0b | ||
|  | dff10eaa77 | ||
|  | 4e6f9aeca1 | ||
|  | e68301af21 | ||
|  | 17286a96f2 | ||
|  | 0892363e6d | ||
|  | f102372b5f | ||
|  | ecbe1ad207 | ||
|  | 9d840c43b5 | ||
|  | 6f50f63382 | ||
|  | ff14fc4964 | ||
|  | e125c21531 | ||
|  | 93d020dd65 | ||
|  | a7515ec265 | ||
|  | b6c1ceccc2 | ||
|  | 4056ad8f36 | ||
|  | 6563837ee1 | ||
|  | fd5e6f7ef2 | ||
|  | 15fd51b37c | ||
|  | f1cef7a9ff | ||
|  | 8264223511 | ||
|  | bc6d597828 | ||
|  | aba77bbfc2 | ||
|  | 955c451456 | ||
|  | e5de3f6c89 | ||
|  | 2a1db721d4 | ||
|  | 1e0eb60f1a | ||
|  | 87a29e6f25 | ||
|  | c3d36f134f | ||
|  | 84769e708c | ||
|  | 9d2ecdbc71 | ||
|  | 9b69af5342 | ||
|  | c21215b421 | ||
|  | cddcfd90b4 | ||
|  | f36aacba0f | ||
|  | 355271fb61 | ||
|  | 2a5b502364 | ||
|  | 98ff9d82d4 | ||
|  | b1ff87224c | ||
|  | b461641fb9 | ||
|  | b047de6f6e | ||
|  | 34ca5d9ba0 | ||
|  | 60cc4dc4b4 | ||
|  | db95dc13a1 | ||
|  | 777ac90791 | ||
|  | 04f9bebbcb | ||
|  | 4ea3137e41 | ||
|  | a0792b738e | ||
|  | 19a41fc613 | ||
|  | 3ee52157fb | ||
|  | c4d197ee2d | ||
|  | a33932cfe3 | ||
|  | bcf89ce62c | ||
|  | e3899d0e00 | ||
|  | dcb00da49c | ||
|  | aa51d20d19 | ||
|  | ae7ed92057 | ||
|  | e45b31d9bd | ||
|  | 5a25f39653 | ||
|  | 963d7ec412 | ||
|  | e712d94adf | ||
|  | 6a72423955 | ||
|  | 4126826b10 | ||
|  | b773ead7fd | ||
|  | 855e2750bc | ||
|  | 805ef3c60b | ||
|  | fbc2dcb40b | ||
|  | 5375d7ad84 | ||
|  | 90f3476180 | ||
|  | ee95c09333 | ||
|  | 75d06db9fc | ||
|  | 439a1fffcb | ||
|  | 9d9d70c462 | ||
|  | b4a186b7be | ||
|  | bdebf51c8f | ||
|  | 264b86f9b4 | ||
|  | 9e55e37a2e | ||
|  | 1471956573 | ||
|  | 27865b2169 | ||
|  | 6d07ce0162 | ||
|  | edb7fc5435 | ||
|  | 31f77343f2 | ||
|  | 63ad031583 | ||
|  | 957688cee6 | ||
|  | 806d6c2e8c | ||
|  | 0ef68e04d9 | ||
|  | a496524db2 | ||
|  | 935c7360cc | ||
|  | 340b046876 | ||
|  | cc1db7f9b7 | ||
|  | a4ff6c4762 | ||
|  | 1060425cbb | ||
|  | e9c092f125 | ||
|  | 22ff5d2105 | ||
|  | 136db7881b | ||
|  | dae313e725 | ||
|  | b74fa8cd2c | ||
|  | 94eae04c94 | ||
|  | 16ff7ebc77 | ||
|  | c361c505b0 | ||
|  | d37c07c575 | ||
|  | 9d6105c9f0 | ||
|  | 8dec03ecba | ||
|  | 826547870b | ||
|  | 52d6a9a61d | ||
|  | ad242b5fbc | ||
|  | 3524175625 | ||
|  | 7b9965ea93 | ||
|  | 0a5bce566f | ||
|  | 8012bd2424 | ||
|  | f55a1f0a88 | ||
|  | bacac173a9 | ||
|  | ca1fee34f2 | ||
|  | 6dadaa9930 | ||
|  | 553f6e4633 | ||
|  | 652bee05f0 | ||
|  | d63516e9cd | ||
|  | e477dcf649 | ||
|  | 9d3f7781f3 | ||
|  | c7095dada3 | ||
|  | 607dbbad76 | ||
|  | 17b75c0de1 | ||
|  | ab24f4f3be | ||
|  | e1a52d9e10 | ||
|  | d0ff838433 | ||
|  | b37b94501c | ||
|  | cb3bb2cfef | ||
|  | e2cc7983e9 | ||
|  | c9ae7b9565 | ||
|  | 86fb4347f7 | ||
|  | 2fcec131f5 | ||
|  | 9f62eaf4ef | ||
|  | f92259c026 | ||
|  | 0afef30b23 | ||
|  | dcdfd1c711 | ||
|  | 2acc1f8f50 | ||
|  | 2c39b0c695 | ||
|  | e77c5b4f63 | ||
|  | 409a16cb72 | ||
|  | 94d5e90b4f | ||
|  | 2d73b45805 | ||
|  | 271a2dbfa2 | ||
|  | bf4adcac66 | ||
|  | fb8b8fdd62 | ||
|  | 5a0b26252e | ||
|  | 7d78f0cc48 | ||
|  | f00fc78674 | ||
|  | 392017874c | ||
|  | c3cb92d1ab | ||
|  | aa5590fa07 | ||
|  | 8cfb5bbf92 | ||
|  | 69bb54ebf9 | ||
|  | ca97a56e4b | ||
|  | fc26f3b4c2 | ||
|  | f604c93c64 | ||
|  | dc3727b65c | ||
|  | aba3231de1 | ||
|  | 9193bab91d | ||
|  | fbcf3e416d | ||
|  | c0e5d85631 | ||
|  | ca7fa3dcb3 | ||
|  | 4ccfba28d9 | ||
|  | abb82f1ddc | ||
|  | cda008cff1 | ||
|  | 1877a14049 | ||
|  | 546582ec3e | ||
|  | 4534485586 | ||
|  | a9ab8855e4 | ||
|  | 8a44ef6868 | ||
|  | 0c7214c404 | ||
|  | 4cf9654693 | ||
|  | 50a138d95c | ||
|  | 1b86cc41cf | ||
|  | 91346358b0 | ||
|  | f3783d4b77 | ||
|  | 89ef304bed | ||
|  | 83cebb8b7a | ||
|  | 9e68f9fdf1 | ||
|  | 2acea5c03d | ||
|  | 978177527e | ||
|  | 2648c436f3 | ||
|  | 33f1f2c455 | ||
|  | 995befe0e9 | ||
|  | 1bb92aff55 | ||
|  | b8e1471d3a | ||
|  | 60daf7f0bb | ||
|  | a83a3139d1 | ||
|  | fdb7ca3b8d | ||
|  | 0d7caf5cdf | ||
|  | a339d7ba91 | ||
|  | 7216de55d6 | ||
|  | 2437fbca64 | ||
|  | 7d75d06b78 | ||
|  | 13ef5648c4 | ||
|  | 5b2478e2ba | ||
|  | 8b286571c3 | ||
|  | f3ac523794 | ||
|  | 020cf5ebfd | ||
|  | 54ab193970 | ||
|  | 8f563f32ab | ||
|  | 151bae3566 | ||
|  | 76df418cba | ||
|  | d0a72674c6 | ||
|  | 1d430674c7 | ||
|  | 70cb73922b | ||
|  | 344400951c | ||
|  | ea5a0be811 | ||
|  | 3c7fd0bdb2 | ||
|  | 6cadf8c858 | ||
|  | 27579b9e4c | ||
|  | 4d756a9cc0 | ||
|  | 3e668e05be | ||
|  | 60d3a2e0f8 | ||
|  | cc3a3b6b47 | ||
|  | eda1d49a62 | ||
|  | 62e609ab77 | ||
|  | 2bfe4ead4b | ||
|  | b1c6c32f78 | ||
|  | f6acbdecf4 | ||
|  | f1c9dfcc01 | ||
|  | ce78943ae1 | ||
|  | d6f0d86649 | ||
|  | 5bb67dbfea | ||
|  | 47610c4d3e | ||
|  | b732f3581f | ||
|  | 9e57ce716f | ||
|  | cd7ee7aa44 | ||
|  | 3cfe791473 | ||
|  | 973f2532f5 | ||
|  | bc3be21d59 | ||
|  | 0bf5cf9886 | ||
|  | 919052d094 | ||
|  | a2dafe2887 | ||
|  | 92661c994b | ||
|  | ffe8fe356a | ||
|  | bc2f773b4f | ||
|  | f919201ecc | ||
|  | 7ff5d5c2e2 | ||
|  | 9b77f951c7 | ||
|  | a25f2f990a | ||
|  | 78b373975d | ||
|  | 2fcc873c4c | ||
|  | 23c2baadb3 | ||
|  | 521ee82334 | ||
|  | 1df96e59ce | ||
|  | 3e123c1e28 | ||
|  | f38da66731 | ||
|  | 06aabfc422 | ||
|  | 1052d2bfec | ||
|  | 5e0b652344 | ||
|  | 0f8f097183 | ||
|  | 491ed3dda2 | ||
|  | af284c6d1b | ||
|  | 41d3ec5fba | ||
|  | 0568c352f3 | ||
|  | 2e7b4cb714 | ||
|  | 9767726b66 | ||
|  | 9ddfd84e41 | ||
|  | 1cf563d84b | ||
|  | 7928024f57 | ||
|  | 3eb38acb43 | ||
|  | f7300c5c90 | ||
|  | 3489b7d26c | ||
|  | acd2bcc384 | ||
|  | 43e77ca455 | ||
|  | da36297988 | ||
|  | dbb94fb044 | ||
|  | d68f0cdb23 | ||
|  | eae16eb67b | ||
|  | 4fc946b546 | ||
|  | 280bc5dad6 | ||
|  | f43770d8c9 | ||
|  | 98c4b8fa1b | ||
|  | ccb079ee67 | ||
|  | 2ea237472c | ||
|  | 0d4b4865cc | ||
|  | fe52f9f956 | ||
|  | 882907a818 | ||
|  | 572a89cc4e | 
							
								
								
									
										23
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										23
									
								
								README.md
									
									
									
									
									
								
							| @@ -20,7 +20,7 @@ which means you can modify it, redistribute it or use it however you like. | |||||||
|                                      sure that you have sufficient permissions |                                      sure that you have sufficient permissions | ||||||
|                                      (run with sudo if needed) |                                      (run with sudo if needed) | ||||||
|     -i, --ignore-errors              continue on download errors, for example to |     -i, --ignore-errors              continue on download errors, for example to | ||||||
|                                      to skip unavailable videos in a playlist |                                      skip unavailable videos in a playlist | ||||||
|     --abort-on-error                 Abort downloading of further videos (in the |     --abort-on-error                 Abort downloading of further videos (in the | ||||||
|                                      playlist or the command line) if an error |                                      playlist or the command line) if an error | ||||||
|                                      occurs |                                      occurs | ||||||
| @@ -36,6 +36,9 @@ which means you can modify it, redistribute it or use it however you like. | |||||||
|                                      an empty string (--proxy "") for direct |                                      an empty string (--proxy "") for direct | ||||||
|                                      connection |                                      connection | ||||||
|     --no-check-certificate           Suppress HTTPS certificate validation. |     --no-check-certificate           Suppress HTTPS certificate validation. | ||||||
|  |     --prefer-insecure                Use an unencrypted connection to retrieve | ||||||
|  |                                      information about the video. (Currently | ||||||
|  |                                      supported only for YouTube) | ||||||
|     --cache-dir DIR                  Location in the filesystem where youtube-dl |     --cache-dir DIR                  Location in the filesystem where youtube-dl | ||||||
|                                      can store some downloaded information |                                      can store some downloaded information | ||||||
|                                      permanently. By default $XDG_CACHE_HOME |                                      permanently. By default $XDG_CACHE_HOME | ||||||
| @@ -124,8 +127,12 @@ which means you can modify it, redistribute it or use it however you like. | |||||||
|                                      video id, %(playlist)s for the playlist the |                                      video id, %(playlist)s for the playlist the | ||||||
|                                      video is in, %(playlist_index)s for the |                                      video is in, %(playlist_index)s for the | ||||||
|                                      position in the playlist and %% for a |                                      position in the playlist and %% for a | ||||||
|                                      literal percent. Use - to output to stdout. |                                      literal percent. %(height)s and %(width)s | ||||||
|                                      Can also be used to download to a different |                                      for the width and height of the video | ||||||
|  |                                      format. %(resolution)s for a textual | ||||||
|  |                                      description of the resolution of the video | ||||||
|  |                                      format. Use - to output to stdout. Can also | ||||||
|  |                                      be used to download to a different | ||||||
|                                      directory, for example with -o '/my/downloa |                                      directory, for example with -o '/my/downloa | ||||||
|                                      ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' . |                                      ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' . | ||||||
|     --autonumber-size NUMBER         Specifies the number of digits in |     --autonumber-size NUMBER         Specifies the number of digits in | ||||||
| @@ -187,9 +194,9 @@ which means you can modify it, redistribute it or use it however you like. | |||||||
|                                      preference using slashes: "-f 22/17/18". |                                      preference using slashes: "-f 22/17/18". | ||||||
|                                      "-f mp4" and "-f flv" are also supported. |                                      "-f mp4" and "-f flv" are also supported. | ||||||
|                                      You can also use the special names "best", |                                      You can also use the special names "best", | ||||||
|                                      "bestaudio", "worst", and "worstaudio". By |                                      "bestvideo", "bestaudio", "worst", | ||||||
|                                      default, youtube-dl will pick the best |                                      "worstvideo" and "worstaudio". By default, | ||||||
|                                      quality. |                                      youtube-dl will pick the best quality. | ||||||
|     --all-formats                    download all available video formats |     --all-formats                    download all available video formats | ||||||
|     --prefer-free-formats            prefer free video formats unless a specific |     --prefer-free-formats            prefer free video formats unless a specific | ||||||
|                                      one is requested |                                      one is requested | ||||||
| @@ -246,7 +253,7 @@ which means you can modify it, redistribute it or use it however you like. | |||||||
|  |  | ||||||
| # CONFIGURATION | # CONFIGURATION | ||||||
|  |  | ||||||
| You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`. | You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`. | ||||||
|  |  | ||||||
| # OUTPUT TEMPLATE | # OUTPUT TEMPLATE | ||||||
|  |  | ||||||
| @@ -357,7 +364,7 @@ If you want to create a build of youtube-dl yourself, you'll need | |||||||
|  |  | ||||||
| ### Adding support for a new site | ### Adding support for a new site | ||||||
|  |  | ||||||
| If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/). | If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/). | ||||||
|  |  | ||||||
| # BUGS | # BUGS | ||||||
|  |  | ||||||
|   | |||||||
| @@ -14,9 +14,9 @@ | |||||||
|  |  | ||||||
| set -e | set -e | ||||||
|  |  | ||||||
| skip_tests=false | skip_tests=true | ||||||
| if [ "$1" = '--skip-test' ]; then | if [ "$1" = '--run-tests' ]; then | ||||||
|     skip_tests=true |     skip_tests=false | ||||||
|     shift |     shift | ||||||
| fi | fi | ||||||
|  |  | ||||||
| @@ -70,7 +70,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" | |||||||
| git checkout HEAD -- youtube-dl youtube-dl.exe | git checkout HEAD -- youtube-dl youtube-dl.exe | ||||||
|  |  | ||||||
| /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." | /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." | ||||||
| for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done | for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done | ||||||
| scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ | scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ | ||||||
| ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" | ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" | ||||||
| ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" | ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" | ||||||
| @@ -97,7 +97,7 @@ rm -rf build | |||||||
|  |  | ||||||
| make pypi-files | make pypi-files | ||||||
| echo "Uploading to PyPi ..." | echo "Uploading to PyPi ..." | ||||||
| python setup.py sdist upload | python setup.py sdist bdist_wheel upload | ||||||
| make clean | make clean | ||||||
|  |  | ||||||
| /bin/echo -e "\n### DONE!" | /bin/echo -e "\n### DONE!" | ||||||
|   | |||||||
| @@ -71,7 +71,7 @@ class FakeYDL(YoutubeDL): | |||||||
|             old_report_warning(message) |             old_report_warning(message) | ||||||
|         self.report_warning = types.MethodType(report_warning, self) |         self.report_warning = types.MethodType(report_warning, self) | ||||||
|  |  | ||||||
| def get_testcases(): | def gettestcases(): | ||||||
|     for ie in youtube_dl.extractor.gen_extractors(): |     for ie in youtube_dl.extractor.gen_extractors(): | ||||||
|         t = getattr(ie, '_TEST', None) |         t = getattr(ie, '_TEST', None) | ||||||
|         if t: |         if t: | ||||||
|   | |||||||
							
								
								
									
										44
									
								
								test/test_InfoExtractor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								test/test_InfoExtractor.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | |||||||
|  | #!/usr/bin/env python | ||||||
|  |  | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | # Allow direct execution | ||||||
|  | import os | ||||||
|  | import sys | ||||||
|  | import unittest | ||||||
|  | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||||
|  |  | ||||||
|  | from test.helper import FakeYDL | ||||||
|  | from youtube_dl.extractor.common import InfoExtractor | ||||||
|  | from youtube_dl.extractor import YoutubeIE, get_info_extractor | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TestIE(InfoExtractor): | ||||||
|  |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TestInfoExtractor(unittest.TestCase): | ||||||
|  |     def setUp(self): | ||||||
|  |         self.ie = TestIE(FakeYDL()) | ||||||
|  |  | ||||||
|  |     def test_ie_key(self): | ||||||
|  |         self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) | ||||||
|  |  | ||||||
|  |     def test_html_search_regex(self): | ||||||
|  |         html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>' | ||||||
|  |         search = lambda re, *args: self.ie._html_search_regex(re, html, *args) | ||||||
|  |         self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video') | ||||||
|  |  | ||||||
|  |     def test_opengraph(self): | ||||||
|  |         ie = self.ie | ||||||
|  |         html = ''' | ||||||
|  |             <meta name="og:title" content='Foo'/> | ||||||
|  |             <meta content="Some video's description " name="og:description"/> | ||||||
|  |             <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/> | ||||||
|  |             ''' | ||||||
|  |         self.assertEqual(ie._og_search_title(html), 'Foo') | ||||||
|  |         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') | ||||||
|  |         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') | ||||||
|  |  | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     unittest.main() | ||||||
| @@ -182,6 +182,24 @@ class TestFormatSelection(unittest.TestCase): | |||||||
|         downloaded = ydl.downloaded_info_dicts[0] |         downloaded = ydl.downloaded_info_dicts[0] | ||||||
|         self.assertEqual(downloaded['format_id'], 'vid-high') |         self.assertEqual(downloaded['format_id'], 'vid-high') | ||||||
|  |  | ||||||
|  |     def test_format_selection_video(self): | ||||||
|  |         formats = [ | ||||||
|  |             {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'}, | ||||||
|  |             {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'}, | ||||||
|  |             {'format_id': 'vid', 'ext': 'mp4', 'preference': 3}, | ||||||
|  |         ] | ||||||
|  |         info_dict = {'formats': formats, 'extractor': 'test'} | ||||||
|  |  | ||||||
|  |         ydl = YDL({'format': 'bestvideo'}) | ||||||
|  |         ydl.process_ie_result(info_dict.copy()) | ||||||
|  |         downloaded = ydl.downloaded_info_dicts[0] | ||||||
|  |         self.assertEqual(downloaded['format_id'], 'dash-video-high') | ||||||
|  |  | ||||||
|  |         ydl = YDL({'format': 'worstvideo'}) | ||||||
|  |         ydl.process_ie_result(info_dict.copy()) | ||||||
|  |         downloaded = ydl.downloaded_info_dicts[0] | ||||||
|  |         self.assertEqual(downloaded['format_id'], 'dash-video-low') | ||||||
|  |  | ||||||
|     def test_youtube_format_selection(self): |     def test_youtube_format_selection(self): | ||||||
|         order = [ |         order = [ | ||||||
|             '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13', |             '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13', | ||||||
|   | |||||||
| @@ -9,7 +9,7 @@ import unittest | |||||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||||
|  |  | ||||||
|  |  | ||||||
| from test.helper import get_testcases | from test.helper import gettestcases | ||||||
|  |  | ||||||
| from youtube_dl.extractor import ( | from youtube_dl.extractor import ( | ||||||
|     FacebookIE, |     FacebookIE, | ||||||
| @@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase): | |||||||
|     def test_youtube_truncated(self): |     def test_youtube_truncated(self): | ||||||
|         self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) |         self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) | ||||||
|  |  | ||||||
|  |     def test_youtube_search_matching(self): | ||||||
|  |         self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) | ||||||
|  |         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) | ||||||
|  |  | ||||||
|     def test_justin_tv_channelid_matching(self): |     def test_justin_tv_channelid_matching(self): | ||||||
|         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) |         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) | ||||||
|         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) |         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) | ||||||
| @@ -101,7 +105,7 @@ class TestAllURLsMatching(unittest.TestCase): | |||||||
|  |  | ||||||
|     def test_no_duplicates(self): |     def test_no_duplicates(self): | ||||||
|         ies = gen_extractors() |         ies = gen_extractors() | ||||||
|         for tc in get_testcases(): |         for tc in gettestcases(): | ||||||
|             url = tc['url'] |             url = tc['url'] | ||||||
|             for ie in ies: |             for ie in ies: | ||||||
|                 if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): |                 if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): | ||||||
| @@ -120,6 +124,8 @@ class TestAllURLsMatching(unittest.TestCase): | |||||||
|  |  | ||||||
|     def test_vimeo_matching(self): |     def test_vimeo_matching(self): | ||||||
|         self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel']) |         self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel']) | ||||||
|  |         self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel']) | ||||||
|  |         self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo']) | ||||||
|         self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user']) |         self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user']) | ||||||
|         self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user']) |         self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user']) | ||||||
|         self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) |         self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) | ||||||
| @@ -135,6 +141,7 @@ class TestAllURLsMatching(unittest.TestCase): | |||||||
|     def test_pbs(self): |     def test_pbs(self): | ||||||
|         # https://github.com/rg3/youtube-dl/issues/2350 |         # https://github.com/rg3/youtube-dl/issues/2350 | ||||||
|         self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) |         self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) | ||||||
|  |         self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS']) | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||||||
|  |  | ||||||
| from test.helper import ( | from test.helper import ( | ||||||
|     get_params, |     get_params, | ||||||
|     get_testcases, |     gettestcases, | ||||||
|     try_rm, |     try_rm, | ||||||
|     md5, |     md5, | ||||||
|     report_warning |     report_warning | ||||||
| @@ -18,6 +18,7 @@ from test.helper import ( | |||||||
| import hashlib | import hashlib | ||||||
| import io | import io | ||||||
| import json | import json | ||||||
|  | import re | ||||||
| import socket | import socket | ||||||
|  |  | ||||||
| import youtube_dl.YoutubeDL | import youtube_dl.YoutubeDL | ||||||
| @@ -50,7 +51,7 @@ def _file_md5(fn): | |||||||
|     with open(fn, 'rb') as f: |     with open(fn, 'rb') as f: | ||||||
|         return hashlib.md5(f.read()).hexdigest() |         return hashlib.md5(f.read()).hexdigest() | ||||||
|  |  | ||||||
| defs = get_testcases() | defs = gettestcases() | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestDownload(unittest.TestCase): | class TestDownload(unittest.TestCase): | ||||||
| @@ -72,9 +73,7 @@ def generator(test_case): | |||||||
|         if 'playlist' not in test_case: |         if 'playlist' not in test_case: | ||||||
|             info_dict = test_case.get('info_dict', {}) |             info_dict = test_case.get('info_dict', {}) | ||||||
|             if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')): |             if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')): | ||||||
|                 print_skipping('The output file cannot be know, the "file" ' |                 raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?') | ||||||
|                     'key is missing or the info_dict is incomplete') |  | ||||||
|                 return |  | ||||||
|         if 'skip' in test_case: |         if 'skip' in test_case: | ||||||
|             print_skipping(test_case['skip']) |             print_skipping(test_case['skip']) | ||||||
|             return |             return | ||||||
| @@ -137,19 +136,25 @@ def generator(test_case): | |||||||
|                 with io.open(info_json_fn, encoding='utf-8') as infof: |                 with io.open(info_json_fn, encoding='utf-8') as infof: | ||||||
|                     info_dict = json.load(infof) |                     info_dict = json.load(infof) | ||||||
|                 for (info_field, expected) in tc.get('info_dict', {}).items(): |                 for (info_field, expected) in tc.get('info_dict', {}).items(): | ||||||
|                     if isinstance(expected, compat_str) and expected.startswith('md5:'): |                     if isinstance(expected, compat_str) and expected.startswith('re:'): | ||||||
|                         got = 'md5:' + md5(info_dict.get(info_field)) |  | ||||||
|                     else: |  | ||||||
|                         got = info_dict.get(info_field) |                         got = info_dict.get(info_field) | ||||||
|                     self.assertEqual(expected, got, |                         match_str = expected[len('re:'):] | ||||||
|                         u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) |                         match_rex = re.compile(match_str) | ||||||
|  |  | ||||||
|                 # If checkable fields are missing from the test case, print the info_dict |                         self.assertTrue( | ||||||
|                 test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) |                             isinstance(got, compat_str) and match_rex.match(got), | ||||||
|                     for key, value in info_dict.items() |                             u'field %s (value: %r) should match %r' % (info_field, got, match_str)) | ||||||
|                     if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location')) |                     elif isinstance(expected, type): | ||||||
|                 if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): |                         got = info_dict.get(info_field) | ||||||
|                     sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') |                         self.assertTrue(isinstance(got, expected), | ||||||
|  |                             u'Expected type %r, but got value %r of type %r' % (expected, got, type(got))) | ||||||
|  |                     else: | ||||||
|  |                         if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||||
|  |                             got = 'md5:' + md5(info_dict.get(info_field)) | ||||||
|  |                         else: | ||||||
|  |                             got = info_dict.get(info_field) | ||||||
|  |                         self.assertEqual(expected, got, | ||||||
|  |                             u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) | ||||||
|  |  | ||||||
|                 # Check for the presence of mandatory fields |                 # Check for the presence of mandatory fields | ||||||
|                 for key in ('id', 'url', 'title', 'ext'): |                 for key in ('id', 'url', 'title', 'ext'): | ||||||
| @@ -157,6 +162,13 @@ def generator(test_case): | |||||||
|                 # Check for mandatory fields that are automatically set by YoutubeDL |                 # Check for mandatory fields that are automatically set by YoutubeDL | ||||||
|                 for key in ['webpage_url', 'extractor', 'extractor_key']: |                 for key in ['webpage_url', 'extractor', 'extractor_key']: | ||||||
|                     self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) |                     self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) | ||||||
|  |  | ||||||
|  |                 # If checkable fields are missing from the test case, print the info_dict | ||||||
|  |                 test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) | ||||||
|  |                     for key, value in info_dict.items() | ||||||
|  |                     if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) | ||||||
|  |                 if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): | ||||||
|  |                     sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') | ||||||
|         finally: |         finally: | ||||||
|             try_rm_tcs_files() |             try_rm_tcs_files() | ||||||
|  |  | ||||||
|   | |||||||
| @@ -36,6 +36,7 @@ from youtube_dl.extractor import ( | |||||||
|     RutubeChannelIE, |     RutubeChannelIE, | ||||||
|     GoogleSearchIE, |     GoogleSearchIE, | ||||||
|     GenericIE, |     GenericIE, | ||||||
|  |     TEDIE, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -98,7 +99,7 @@ class TestPlaylists(unittest.TestCase): | |||||||
|         result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') |         result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') | ||||||
|         self.assertIsPlaylist(result) |         self.assertIsPlaylist(result) | ||||||
|         self.assertEqual(result['id'], '5124905') |         self.assertEqual(result['id'], '5124905') | ||||||
|         self.assertTrue(len(result['entries']) >= 11) |         self.assertTrue(len(result['entries']) >= 6) | ||||||
|  |  | ||||||
|     def test_soundcloud_set(self): |     def test_soundcloud_set(self): | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
| @@ -170,12 +171,12 @@ class TestPlaylists(unittest.TestCase): | |||||||
|     def test_AcademicEarthCourse(self): |     def test_AcademicEarthCourse(self): | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = AcademicEarthCourseIE(dl) |         ie = AcademicEarthCourseIE(dl) | ||||||
|         result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/') |         result = ie.extract('http://academicearth.org/playlists/laws-of-nature/') | ||||||
|         self.assertIsPlaylist(result) |         self.assertIsPlaylist(result) | ||||||
|         self.assertEqual(result['id'], 'building-dynamic-websites') |         self.assertEqual(result['id'], 'laws-of-nature') | ||||||
|         self.assertEqual(result['title'], 'Building Dynamic Websites') |         self.assertEqual(result['title'], 'Laws of Nature') | ||||||
|         self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.") |         self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.") | ||||||
|         self.assertEqual(len(result['entries']), 10) |         self.assertEqual(len(result['entries']), 4) | ||||||
|          |          | ||||||
|     def test_ivi_compilation(self): |     def test_ivi_compilation(self): | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
| @@ -248,7 +249,25 @@ class TestPlaylists(unittest.TestCase): | |||||||
|         self.assertIsPlaylist(result) |         self.assertIsPlaylist(result) | ||||||
|         self.assertEqual(result['id'], 'python language') |         self.assertEqual(result['id'], 'python language') | ||||||
|         self.assertEqual(result['title'], 'python language') |         self.assertEqual(result['title'], 'python language') | ||||||
|         self.assertTrue(len(result['entries']) == 15) |         self.assertEqual(len(result['entries']), 15) | ||||||
|  |  | ||||||
|  |     def test_generic_rss_feed(self): | ||||||
|  |         dl = FakeYDL() | ||||||
|  |         ie = GenericIE(dl) | ||||||
|  |         result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml') | ||||||
|  |         self.assertIsPlaylist(result) | ||||||
|  |         self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml') | ||||||
|  |         self.assertEqual(result['title'], 'Zero Punctuation') | ||||||
|  |         self.assertTrue(len(result['entries']) > 10) | ||||||
|  |  | ||||||
|  |     def test_ted_playlist(self): | ||||||
|  |         dl = FakeYDL() | ||||||
|  |         ie = TEDIE(dl) | ||||||
|  |         result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers') | ||||||
|  |         self.assertIsPlaylist(result) | ||||||
|  |         self.assertEqual(result['id'], '10') | ||||||
|  |         self.assertEqual(result['title'], 'Who are the hackers?') | ||||||
|  |         self.assertTrue(len(result['entries']) >= 6) | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||||||
|  |  | ||||||
|  |  | ||||||
| # Various small unit tests | # Various small unit tests | ||||||
|  | import io | ||||||
| import xml.etree.ElementTree | import xml.etree.ElementTree | ||||||
|  |  | ||||||
| #from youtube_dl.utils import htmlentity_transform | #from youtube_dl.utils import htmlentity_transform | ||||||
| @@ -21,6 +22,7 @@ from youtube_dl.utils import ( | |||||||
|     orderedSet, |     orderedSet, | ||||||
|     PagedList, |     PagedList, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|  |     read_batch_urls, | ||||||
|     sanitize_filename, |     sanitize_filename, | ||||||
|     shell_quote, |     shell_quote, | ||||||
|     smuggle_url, |     smuggle_url, | ||||||
| @@ -31,6 +33,7 @@ from youtube_dl.utils import ( | |||||||
|     unified_strdate, |     unified_strdate, | ||||||
|     unsmuggle_url, |     unsmuggle_url, | ||||||
|     url_basename, |     url_basename, | ||||||
|  |     urlencode_postdata, | ||||||
|     xpath_with_ns, |     xpath_with_ns, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -250,5 +253,18 @@ class TestUtil(unittest.TestCase): | |||||||
|     def test_struct_unpack(self): |     def test_struct_unpack(self): | ||||||
|         self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) |         self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) | ||||||
|  |  | ||||||
|  |     def test_read_batch_urls(self): | ||||||
|  |         f = io.StringIO(u'''\xef\xbb\xbf foo | ||||||
|  |             bar\r | ||||||
|  |             baz | ||||||
|  |             # More after this line\r | ||||||
|  |             ; or after this | ||||||
|  |             bam''') | ||||||
|  |         self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) | ||||||
|  |  | ||||||
|  |     def test_urlencode_postdata(self): | ||||||
|  |         data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) | ||||||
|  |         self.assertTrue(isinstance(data, bytes)) | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -16,6 +16,7 @@ from youtube_dl.extractor import ( | |||||||
|     YoutubeChannelIE, |     YoutubeChannelIE, | ||||||
|     YoutubeShowIE, |     YoutubeShowIE, | ||||||
|     YoutubeTopListIE, |     YoutubeTopListIE, | ||||||
|  |     YoutubeSearchURLIE, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -118,6 +119,8 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|         self.assertEqual(original_video['id'], 'rjFaenf1T-Y') |         self.assertEqual(original_video['id'], 'rjFaenf1T-Y') | ||||||
|  |  | ||||||
|     def test_youtube_toptracks(self): |     def test_youtube_toptracks(self): | ||||||
|  |         print('Skipping: The playlist page gives error 500') | ||||||
|  |         return | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = YoutubePlaylistIE(dl) |         ie = YoutubePlaylistIE(dl) | ||||||
|         result = ie.extract('https://www.youtube.com/playlist?list=MCUS') |         result = ie.extract('https://www.youtube.com/playlist?list=MCUS') | ||||||
| @@ -131,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|         entries = result['entries'] |         entries = result['entries'] | ||||||
|         self.assertTrue(len(entries) >= 5) |         self.assertTrue(len(entries) >= 5) | ||||||
|  |  | ||||||
|  |     def test_youtube_search_url(self): | ||||||
|  |         dl = FakeYDL() | ||||||
|  |         ie = YoutubeSearchURLIE(dl) | ||||||
|  |         result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video') | ||||||
|  |         entries = result['entries'] | ||||||
|  |         self.assertIsPlaylist(result) | ||||||
|  |         self.assertEqual(result['title'], 'youtube-dl test video') | ||||||
|  |         self.assertTrue(len(entries) >= 5) | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -1,4 +0,0 @@ | |||||||
| # Legacy file for backwards compatibility, use youtube_dl.extractor instead! |  | ||||||
|  |  | ||||||
| from .extractor.common import InfoExtractor, SearchInfoExtractor |  | ||||||
| from .extractor import gen_extractors, get_info_extractor |  | ||||||
| @@ -4,6 +4,7 @@ | |||||||
| from __future__ import absolute_import, unicode_literals | from __future__ import absolute_import, unicode_literals | ||||||
|  |  | ||||||
| import collections | import collections | ||||||
|  | import datetime | ||||||
| import errno | import errno | ||||||
| import io | import io | ||||||
| import json | import json | ||||||
| @@ -147,6 +148,8 @@ class YoutubeDL(object): | |||||||
|                        again. |                        again. | ||||||
|     cookiefile:        File name where cookies should be read from and dumped to. |     cookiefile:        File name where cookies should be read from and dumped to. | ||||||
|     nocheckcertificate:Do not verify SSL certificates |     nocheckcertificate:Do not verify SSL certificates | ||||||
|  |     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information. | ||||||
|  |                        At the moment, this is only supported by YouTube. | ||||||
|     proxy:             URL of the proxy server to use |     proxy:             URL of the proxy server to use | ||||||
|     socket_timeout:    Time to wait for unresponsive hosts, in seconds |     socket_timeout:    Time to wait for unresponsive hosts, in seconds | ||||||
|     bidi_workaround:   Work around buggy terminals without bidirectional text |     bidi_workaround:   Work around buggy terminals without bidirectional text | ||||||
| @@ -370,12 +373,15 @@ class YoutubeDL(object): | |||||||
|         Print the message to stderr, it will be prefixed with 'WARNING:' |         Print the message to stderr, it will be prefixed with 'WARNING:' | ||||||
|         If stderr is a tty file the 'WARNING:' will be colored |         If stderr is a tty file the 'WARNING:' will be colored | ||||||
|         ''' |         ''' | ||||||
|         if self._err_file.isatty() and os.name != 'nt': |         if self.params.get('logger') is not None: | ||||||
|             _msg_header = '\033[0;33mWARNING:\033[0m' |             self.params['logger'].warning(message) | ||||||
|         else: |         else: | ||||||
|             _msg_header = 'WARNING:' |             if self._err_file.isatty() and os.name != 'nt': | ||||||
|         warning_message = '%s %s' % (_msg_header, message) |                 _msg_header = '\033[0;33mWARNING:\033[0m' | ||||||
|         self.to_stderr(warning_message) |             else: | ||||||
|  |                 _msg_header = 'WARNING:' | ||||||
|  |             warning_message = '%s %s' % (_msg_header, message) | ||||||
|  |             self.to_stderr(warning_message) | ||||||
|  |  | ||||||
|     def report_error(self, message, tb=None): |     def report_error(self, message, tb=None): | ||||||
|         ''' |         ''' | ||||||
| @@ -409,6 +415,13 @@ class YoutubeDL(object): | |||||||
|             template_dict['autonumber'] = autonumber_templ % self._num_downloads |             template_dict['autonumber'] = autonumber_templ % self._num_downloads | ||||||
|             if template_dict.get('playlist_index') is not None: |             if template_dict.get('playlist_index') is not None: | ||||||
|                 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index'] |                 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index'] | ||||||
|  |             if template_dict.get('resolution') is None: | ||||||
|  |                 if template_dict.get('width') and template_dict.get('height'): | ||||||
|  |                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) | ||||||
|  |                 elif template_dict.get('height'): | ||||||
|  |                     template_dict['resolution'] = '%sp' % template_dict['height'] | ||||||
|  |                 elif template_dict.get('width'): | ||||||
|  |                     template_dict['resolution'] = '?x%d' % template_dict['width'] | ||||||
|  |  | ||||||
|             sanitize = lambda k, v: sanitize_filename( |             sanitize = lambda k, v: sanitize_filename( | ||||||
|                 compat_str(v), |                 compat_str(v), | ||||||
| @@ -522,7 +535,7 @@ class YoutubeDL(object): | |||||||
|                 else: |                 else: | ||||||
|                     raise |                     raise | ||||||
|         else: |         else: | ||||||
|             self.report_error('no suitable InfoExtractor: %s' % url) |             self.report_error('no suitable InfoExtractor for URL %s' % url) | ||||||
|  |  | ||||||
|     def process_ie_result(self, ie_result, download=True, extra_info={}): |     def process_ie_result(self, ie_result, download=True, extra_info={}): | ||||||
|         """ |         """ | ||||||
| @@ -656,6 +669,18 @@ class YoutubeDL(object): | |||||||
|                 if f.get('vcodec') == 'none'] |                 if f.get('vcodec') == 'none'] | ||||||
|             if audio_formats: |             if audio_formats: | ||||||
|                 return audio_formats[0] |                 return audio_formats[0] | ||||||
|  |         elif format_spec == 'bestvideo': | ||||||
|  |             video_formats = [ | ||||||
|  |                 f for f in available_formats | ||||||
|  |                 if f.get('acodec') == 'none'] | ||||||
|  |             if video_formats: | ||||||
|  |                 return video_formats[-1] | ||||||
|  |         elif format_spec == 'worstvideo': | ||||||
|  |             video_formats = [ | ||||||
|  |                 f for f in available_formats | ||||||
|  |                 if f.get('acodec') == 'none'] | ||||||
|  |             if video_formats: | ||||||
|  |                 return video_formats[0] | ||||||
|         else: |         else: | ||||||
|             extensions = ['mp4', 'flv', 'webm', '3gp'] |             extensions = ['mp4', 'flv', 'webm', '3gp'] | ||||||
|             if format_spec in extensions: |             if format_spec in extensions: | ||||||
| @@ -675,6 +700,14 @@ class YoutubeDL(object): | |||||||
|             info_dict['playlist'] = None |             info_dict['playlist'] = None | ||||||
|             info_dict['playlist_index'] = None |             info_dict['playlist_index'] = None | ||||||
|  |  | ||||||
|  |         if 'display_id' not in info_dict and 'id' in info_dict: | ||||||
|  |             info_dict['display_id'] = info_dict['id'] | ||||||
|  |  | ||||||
|  |         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None: | ||||||
|  |             upload_date = datetime.datetime.utcfromtimestamp( | ||||||
|  |                 info_dict['timestamp']) | ||||||
|  |             info_dict['upload_date'] = upload_date.strftime('%Y%m%d') | ||||||
|  |  | ||||||
|         # This extractors handle format selection themselves |         # This extractors handle format selection themselves | ||||||
|         if info_dict['extractor'] in ['Youku']: |         if info_dict['extractor'] in ['Youku']: | ||||||
|             if download: |             if download: | ||||||
| @@ -688,8 +721,11 @@ class YoutubeDL(object): | |||||||
|         else: |         else: | ||||||
|             formats = info_dict['formats'] |             formats = info_dict['formats'] | ||||||
|  |  | ||||||
|  |         if not formats: | ||||||
|  |             raise ExtractorError('No video formats found!') | ||||||
|  |  | ||||||
|         # We check that all the formats have the format and format_id fields |         # We check that all the formats have the format and format_id fields | ||||||
|         for (i, format) in enumerate(formats): |         for i, format in enumerate(formats): | ||||||
|             if format.get('format_id') is None: |             if format.get('format_id') is None: | ||||||
|                 format['format_id'] = compat_str(i) |                 format['format_id'] = compat_str(i) | ||||||
|             if format.get('format') is None: |             if format.get('format') is None: | ||||||
| @@ -908,7 +944,7 @@ class YoutubeDL(object): | |||||||
|                     self.to_screen('[%s] %s: Downloading thumbnail ...' % |                     self.to_screen('[%s] %s: Downloading thumbnail ...' % | ||||||
|                                    (info_dict['extractor'], info_dict['id'])) |                                    (info_dict['extractor'], info_dict['id'])) | ||||||
|                     try: |                     try: | ||||||
|                         uf = compat_urllib_request.urlopen(info_dict['thumbnail']) |                         uf = self.urlopen(info_dict['thumbnail']) | ||||||
|                         with open(thumb_filename, 'wb') as thumbf: |                         with open(thumb_filename, 'wb') as thumbf: | ||||||
|                             shutil.copyfileobj(uf, thumbf) |                             shutil.copyfileobj(uf, thumbf) | ||||||
|                         self.to_screen('[%s] %s: Writing thumbnail to: %s' % |                         self.to_screen('[%s] %s: Writing thumbnail to: %s' % | ||||||
| @@ -1154,7 +1190,7 @@ class YoutubeDL(object): | |||||||
|  |  | ||||||
|     def urlopen(self, req): |     def urlopen(self, req): | ||||||
|         """ Start an HTTP download """ |         """ Start an HTTP download """ | ||||||
|         return self._opener.open(req) |         return self._opener.open(req, timeout=self._socket_timeout) | ||||||
|  |  | ||||||
|     def print_debug_header(self): |     def print_debug_header(self): | ||||||
|         if not self.params.get('verbose'): |         if not self.params.get('verbose'): | ||||||
| @@ -1185,7 +1221,7 @@ class YoutubeDL(object): | |||||||
|  |  | ||||||
|     def _setup_opener(self): |     def _setup_opener(self): | ||||||
|         timeout_val = self.params.get('socket_timeout') |         timeout_val = self.params.get('socket_timeout') | ||||||
|         timeout = 600 if timeout_val is None else float(timeout_val) |         self._socket_timeout = 600 if timeout_val is None else float(timeout_val) | ||||||
|  |  | ||||||
|         opts_cookiefile = self.params.get('cookiefile') |         opts_cookiefile = self.params.get('cookiefile') | ||||||
|         opts_proxy = self.params.get('proxy') |         opts_proxy = self.params.get('proxy') | ||||||
| @@ -1223,7 +1259,3 @@ class YoutubeDL(object): | |||||||
|         # (See https://github.com/rg3/youtube-dl/issues/1309 for details) |         # (See https://github.com/rg3/youtube-dl/issues/1309 for details) | ||||||
|         opener.addheaders = [] |         opener.addheaders = [] | ||||||
|         self._opener = opener |         self._opener = opener | ||||||
|  |  | ||||||
|         # TODO remove this global modification |  | ||||||
|         compat_urllib_request.install_opener(opener) |  | ||||||
|         socket.setdefaulttimeout(timeout) |  | ||||||
|   | |||||||
| @@ -46,12 +46,17 @@ __authors__  = ( | |||||||
|     'Andreas Schmitz', |     'Andreas Schmitz', | ||||||
|     'Michael Kaiser', |     'Michael Kaiser', | ||||||
|     'Niklas Laxström', |     'Niklas Laxström', | ||||||
|  |     'David Triendl', | ||||||
|  |     'Anthony Weems', | ||||||
|  |     'David Wagner', | ||||||
|  |     'Juan C. Olivares', | ||||||
|  |     'Mattias Harrysson', | ||||||
| ) | ) | ||||||
|  |  | ||||||
| __license__ = 'Public Domain' | __license__ = 'Public Domain' | ||||||
|  |  | ||||||
| import codecs | import codecs | ||||||
| import getpass | import io | ||||||
| import locale | import locale | ||||||
| import optparse | import optparse | ||||||
| import os | import os | ||||||
| @@ -62,6 +67,7 @@ import sys | |||||||
|  |  | ||||||
|  |  | ||||||
| from .utils import ( | from .utils import ( | ||||||
|  |     compat_getpass, | ||||||
|     compat_print, |     compat_print, | ||||||
|     DateRange, |     DateRange, | ||||||
|     decodeOption, |     decodeOption, | ||||||
| @@ -70,6 +76,7 @@ from .utils import ( | |||||||
|     get_cachedir, |     get_cachedir, | ||||||
|     MaxDownloadsReached, |     MaxDownloadsReached, | ||||||
|     preferredencoding, |     preferredencoding, | ||||||
|  |     read_batch_urls, | ||||||
|     SameFileError, |     SameFileError, | ||||||
|     setproctitle, |     setproctitle, | ||||||
|     std_headers, |     std_headers, | ||||||
| @@ -208,7 +215,7 @@ def parseOpts(overrideArguments=None): | |||||||
|     general.add_option('-U', '--update', |     general.add_option('-U', '--update', | ||||||
|             action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') |             action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') | ||||||
|     general.add_option('-i', '--ignore-errors', |     general.add_option('-i', '--ignore-errors', | ||||||
|             action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False) |             action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False) | ||||||
|     general.add_option('--abort-on-error', |     general.add_option('--abort-on-error', | ||||||
|             action='store_false', dest='ignoreerrors', |             action='store_false', dest='ignoreerrors', | ||||||
|             help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') |             help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') | ||||||
| @@ -230,6 +237,9 @@ def parseOpts(overrideArguments=None): | |||||||
|         '--proxy', dest='proxy', default=None, metavar='URL', |         '--proxy', dest='proxy', default=None, metavar='URL', | ||||||
|         help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') |         help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') | ||||||
|     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') |     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') | ||||||
|  |     general.add_option( | ||||||
|  |         '--prefer-insecure', action='store_true', dest='prefer_insecure', | ||||||
|  |         help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') | ||||||
|     general.add_option( |     general.add_option( | ||||||
|         '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', |         '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', | ||||||
|         help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') |         help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') | ||||||
| @@ -250,7 +260,6 @@ def parseOpts(overrideArguments=None): | |||||||
|         action='store_true', |         action='store_true', | ||||||
|         help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') |         help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') | ||||||
|  |  | ||||||
|  |  | ||||||
|     selection.add_option( |     selection.add_option( | ||||||
|         '--playlist-start', |         '--playlist-start', | ||||||
|         dest='playliststart', metavar='NUMBER', default=1, type=int, |         dest='playliststart', metavar='NUMBER', default=1, type=int, | ||||||
| @@ -309,7 +318,7 @@ def parseOpts(overrideArguments=None): | |||||||
|  |  | ||||||
|     video_format.add_option('-f', '--format', |     video_format.add_option('-f', '--format', | ||||||
|             action='store', dest='format', metavar='FORMAT', default=None, |             action='store', dest='format', metavar='FORMAT', default=None, | ||||||
|             help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestaudio", "worst", and "worstaudio". By default, youtube-dl will pick the best quality.') |             help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') | ||||||
|     video_format.add_option('--all-formats', |     video_format.add_option('--all-formats', | ||||||
|             action='store_const', dest='format', help='download all available video formats', const='all') |             action='store_const', dest='format', help='download all available video formats', const='all') | ||||||
|     video_format.add_option('--prefer-free-formats', |     video_format.add_option('--prefer-free-formats', | ||||||
| @@ -424,6 +433,8 @@ def parseOpts(overrideArguments=None): | |||||||
|                   '%(extractor)s for the provider (youtube, metacafe, etc), ' |                   '%(extractor)s for the provider (youtube, metacafe, etc), ' | ||||||
|                   '%(id)s for the video id, %(playlist)s for the playlist the video is in, ' |                   '%(id)s for the video id, %(playlist)s for the playlist the video is in, ' | ||||||
|                   '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' |                   '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' | ||||||
|  |                   '%(height)s and %(width)s for the width and height of the video format. ' | ||||||
|  |                   '%(resolution)s for a textual description of the resolution of the video format. ' | ||||||
|                   'Use - to output to stdout. Can also be used to download to a different directory, ' |                   'Use - to output to stdout. Can also be used to download to a different directory, ' | ||||||
|                   'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) |                   'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) | ||||||
|     filesystem.add_option('--autonumber-size', |     filesystem.add_option('--autonumber-size', | ||||||
| @@ -551,21 +562,19 @@ def _real_main(argv=None): | |||||||
|         sys.exit(0) |         sys.exit(0) | ||||||
|  |  | ||||||
|     # Batch file verification |     # Batch file verification | ||||||
|     batchurls = [] |     batch_urls = [] | ||||||
|     if opts.batchfile is not None: |     if opts.batchfile is not None: | ||||||
|         try: |         try: | ||||||
|             if opts.batchfile == '-': |             if opts.batchfile == '-': | ||||||
|                 batchfd = sys.stdin |                 batchfd = sys.stdin | ||||||
|             else: |             else: | ||||||
|                 batchfd = open(opts.batchfile, 'r') |                 batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore') | ||||||
|             batchurls = batchfd.readlines() |             batch_urls = read_batch_urls(batchfd) | ||||||
|             batchurls = [x.strip() for x in batchurls] |  | ||||||
|             batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] |  | ||||||
|             if opts.verbose: |             if opts.verbose: | ||||||
|                 write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') |                 write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n') | ||||||
|         except IOError: |         except IOError: | ||||||
|             sys.exit(u'ERROR: batch file could not be read') |             sys.exit(u'ERROR: batch file could not be read') | ||||||
|     all_urls = batchurls + args |     all_urls = batch_urls + args | ||||||
|     all_urls = [url.strip() for url in all_urls] |     all_urls = [url.strip() for url in all_urls] | ||||||
|     _enc = preferredencoding() |     _enc = preferredencoding() | ||||||
|     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] |     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] | ||||||
| @@ -604,7 +613,7 @@ def _real_main(argv=None): | |||||||
|     if opts.usetitle and opts.useid: |     if opts.usetitle and opts.useid: | ||||||
|         parser.error(u'using title conflicts with using video ID') |         parser.error(u'using title conflicts with using video ID') | ||||||
|     if opts.username is not None and opts.password is None: |     if opts.username is not None and opts.password is None: | ||||||
|         opts.password = getpass.getpass(u'Type account password and press return:') |         opts.password = compat_getpass(u'Type account password and press [Return]: ') | ||||||
|     if opts.ratelimit is not None: |     if opts.ratelimit is not None: | ||||||
|         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) |         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) | ||||||
|         if numeric_limit is None: |         if numeric_limit is None: | ||||||
| @@ -749,6 +758,7 @@ def _real_main(argv=None): | |||||||
|         'download_archive': download_archive_fn, |         'download_archive': download_archive_fn, | ||||||
|         'cookiefile': opts.cookiefile, |         'cookiefile': opts.cookiefile, | ||||||
|         'nocheckcertificate': opts.no_check_certificate, |         'nocheckcertificate': opts.no_check_certificate, | ||||||
|  |         'prefer_insecure': opts.prefer_insecure, | ||||||
|         'proxy': opts.proxy, |         'proxy': opts.proxy, | ||||||
|         'socket_timeout': opts.socket_timeout, |         'socket_timeout': opts.socket_timeout, | ||||||
|         'bidi_workaround': opts.bidi_workaround, |         'bidi_workaround': opts.bidi_workaround, | ||||||
|   | |||||||
| @@ -12,7 +12,6 @@ from .http import HttpFD | |||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     struct_pack, |     struct_pack, | ||||||
|     struct_unpack, |     struct_unpack, | ||||||
|     compat_urllib_request, |  | ||||||
|     compat_urlparse, |     compat_urlparse, | ||||||
|     format_bytes, |     format_bytes, | ||||||
|     encodeFilename, |     encodeFilename, | ||||||
| @@ -117,8 +116,8 @@ class FlvReader(io.BytesIO): | |||||||
|         self.read_unsigned_char() |         self.read_unsigned_char() | ||||||
|         # flags |         # flags | ||||||
|         self.read(3) |         self.read(3) | ||||||
|         # BootstrapinfoVersion |  | ||||||
|         bootstrap_info_version = self.read_unsigned_int() |         self.read_unsigned_int()  # BootstrapinfoVersion | ||||||
|         # Profile,Live,Update,Reserved |         # Profile,Live,Update,Reserved | ||||||
|         self.read(1) |         self.read(1) | ||||||
|         # time scale |         # time scale | ||||||
| @@ -127,15 +126,15 @@ class FlvReader(io.BytesIO): | |||||||
|         self.read_unsigned_long_long() |         self.read_unsigned_long_long() | ||||||
|         # SmpteTimeCodeOffset |         # SmpteTimeCodeOffset | ||||||
|         self.read_unsigned_long_long() |         self.read_unsigned_long_long() | ||||||
|         # MovieIdentifier |  | ||||||
|         movie_identifier = self.read_string() |         self.read_string()  # MovieIdentifier | ||||||
|         server_count = self.read_unsigned_char() |         server_count = self.read_unsigned_char() | ||||||
|         # ServerEntryTable |         # ServerEntryTable | ||||||
|         for i in range(server_count): |         for i in range(server_count): | ||||||
|             self.read_string() |             self.read_string() | ||||||
|         quality_count = self.read_unsigned_char() |         quality_count = self.read_unsigned_char() | ||||||
|         # QualityEntryTable |         # QualityEntryTable | ||||||
|         for i in range(server_count): |         for i in range(quality_count): | ||||||
|             self.read_string() |             self.read_string() | ||||||
|         # DrmData |         # DrmData | ||||||
|         self.read_string() |         self.read_string() | ||||||
|   | |||||||
| @@ -49,7 +49,7 @@ class HttpFD(FileDownloader): | |||||||
|         while count <= retries: |         while count <= retries: | ||||||
|             # Establish connection |             # Establish connection | ||||||
|             try: |             try: | ||||||
|                 data = compat_urllib_request.urlopen(request) |                 data = self.ydl.urlopen(request) | ||||||
|                 break |                 break | ||||||
|             except (compat_urllib_error.HTTPError, ) as err: |             except (compat_urllib_error.HTTPError, ) as err: | ||||||
|                 if (err.code < 500 or err.code >= 600) and err.code != 416: |                 if (err.code < 500 or err.code >= 600) and err.code != 416: | ||||||
| @@ -59,7 +59,7 @@ class HttpFD(FileDownloader): | |||||||
|                     # Unable to resume (requested range not satisfiable) |                     # Unable to resume (requested range not satisfiable) | ||||||
|                     try: |                     try: | ||||||
|                         # Open the connection again without the range header |                         # Open the connection again without the range header | ||||||
|                         data = compat_urllib_request.urlopen(basic_request) |                         data = self.ydl.urlopen(basic_request) | ||||||
|                         content_length = data.info()['Content-Length'] |                         content_length = data.info()['Content-Length'] | ||||||
|                     except (compat_urllib_error.HTTPError, ) as err: |                     except (compat_urllib_error.HTTPError, ) as err: | ||||||
|                         if err.code < 500 or err.code >= 600: |                         if err.code < 500 or err.code >= 600: | ||||||
| @@ -85,6 +85,7 @@ class HttpFD(FileDownloader): | |||||||
|                         else: |                         else: | ||||||
|                             # The length does not match, we start the download over |                             # The length does not match, we start the download over | ||||||
|                             self.report_unable_to_resume() |                             self.report_unable_to_resume() | ||||||
|  |                             resume_len = 0 | ||||||
|                             open_mode = 'wb' |                             open_mode = 'wb' | ||||||
|                             break |                             break | ||||||
|             # Retry |             # Retry | ||||||
|   | |||||||
| @@ -1,3 +1,5 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| import subprocess | import subprocess | ||||||
| @@ -22,7 +24,7 @@ class RtmpFD(FileDownloader): | |||||||
|             proc_stderr_closed = False |             proc_stderr_closed = False | ||||||
|             while not proc_stderr_closed: |             while not proc_stderr_closed: | ||||||
|                 # read line from stderr |                 # read line from stderr | ||||||
|                 line = u'' |                 line = '' | ||||||
|                 while True: |                 while True: | ||||||
|                     char = proc.stderr.read(1) |                     char = proc.stderr.read(1) | ||||||
|                     if not char: |                     if not char: | ||||||
| @@ -46,7 +48,7 @@ class RtmpFD(FileDownloader): | |||||||
|                     data_len = None |                     data_len = None | ||||||
|                     if percent > 0: |                     if percent > 0: | ||||||
|                         data_len = int(downloaded_data_len * 100 / percent) |                         data_len = int(downloaded_data_len * 100 / percent) | ||||||
|                     data_len_str = u'~' + format_bytes(data_len) |                     data_len_str = '~' + format_bytes(data_len) | ||||||
|                     self.report_progress(percent, data_len_str, speed, eta) |                     self.report_progress(percent, data_len_str, speed, eta) | ||||||
|                     cursor_in_new_line = False |                     cursor_in_new_line = False | ||||||
|                     self._hook_progress({ |                     self._hook_progress({ | ||||||
| @@ -76,12 +78,12 @@ class RtmpFD(FileDownloader): | |||||||
|                         }) |                         }) | ||||||
|                     elif self.params.get('verbose', False): |                     elif self.params.get('verbose', False): | ||||||
|                         if not cursor_in_new_line: |                         if not cursor_in_new_line: | ||||||
|                             self.to_screen(u'') |                             self.to_screen('') | ||||||
|                         cursor_in_new_line = True |                         cursor_in_new_line = True | ||||||
|                         self.to_screen(u'[rtmpdump] '+line) |                         self.to_screen('[rtmpdump] '+line) | ||||||
|             proc.wait() |             proc.wait() | ||||||
|             if not cursor_in_new_line: |             if not cursor_in_new_line: | ||||||
|                 self.to_screen(u'') |                 self.to_screen('') | ||||||
|             return proc.returncode |             return proc.returncode | ||||||
|  |  | ||||||
|         url = info_dict['url'] |         url = info_dict['url'] | ||||||
| @@ -102,7 +104,7 @@ class RtmpFD(FileDownloader): | |||||||
|         try: |         try: | ||||||
|             subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) |             subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | ||||||
|         except (OSError, IOError): |         except (OSError, IOError): | ||||||
|             self.report_error(u'RTMP download detected but "rtmpdump" could not be run') |             self.report_error('RTMP download detected but "rtmpdump" could not be run') | ||||||
|             return False |             return False | ||||||
|  |  | ||||||
|         # Download using rtmpdump. rtmpdump returns exit code 2 when |         # Download using rtmpdump. rtmpdump returns exit code 2 when | ||||||
| @@ -127,7 +129,7 @@ class RtmpFD(FileDownloader): | |||||||
|             basic_args += ['--live'] |             basic_args += ['--live'] | ||||||
|         if conn: |         if conn: | ||||||
|             basic_args += ['--conn', conn] |             basic_args += ['--conn', conn] | ||||||
|         args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] |         args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)] | ||||||
|  |  | ||||||
|         if sys.platform == 'win32' and sys.version_info < (3, 0): |         if sys.platform == 'win32' and sys.version_info < (3, 0): | ||||||
|             # Windows subprocess module does not actually support Unicode |             # Windows subprocess module does not actually support Unicode | ||||||
| @@ -150,26 +152,35 @@ class RtmpFD(FileDownloader): | |||||||
|                 shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) |                 shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) | ||||||
|             except ImportError: |             except ImportError: | ||||||
|                 shell_quote = repr |                 shell_quote = repr | ||||||
|             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args)) |             self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args)) | ||||||
|  |  | ||||||
|  |         RD_SUCCESS = 0 | ||||||
|  |         RD_FAILED = 1 | ||||||
|  |         RD_INCOMPLETE = 2 | ||||||
|  |         RD_NO_CONNECT = 3 | ||||||
|  |  | ||||||
|         retval = run_rtmpdump(args) |         retval = run_rtmpdump(args) | ||||||
|  |  | ||||||
|         while (retval == 2 or retval == 1) and not test: |         if retval == RD_NO_CONNECT: | ||||||
|  |             self.report_error('[rtmpdump] Could not connect to RTMP server.') | ||||||
|  |             return False | ||||||
|  |  | ||||||
|  |         while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live: | ||||||
|             prevsize = os.path.getsize(encodeFilename(tmpfilename)) |             prevsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||||
|             self.to_screen(u'[rtmpdump] %s bytes' % prevsize) |             self.to_screen('[rtmpdump] %s bytes' % prevsize) | ||||||
|             time.sleep(5.0) # This seems to be needed |             time.sleep(5.0) # This seems to be needed | ||||||
|             retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) |             retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED]) | ||||||
|             cursize = os.path.getsize(encodeFilename(tmpfilename)) |             cursize = os.path.getsize(encodeFilename(tmpfilename)) | ||||||
|             if prevsize == cursize and retval == 1: |             if prevsize == cursize and retval == RD_FAILED: | ||||||
|                 break |                 break | ||||||
|              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those |              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those | ||||||
|             if prevsize == cursize and retval == 2 and cursize > 1024: |             if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024: | ||||||
|                 self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.') |                 self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.') | ||||||
|                 retval = 0 |                 retval = RD_SUCCESS | ||||||
|                 break |                 break | ||||||
|         if retval == 0 or (test and retval == 2): |         if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): | ||||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) |             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||||
|             self.to_screen(u'[rtmpdump] %s bytes' % fsize) |             self.to_screen('[rtmpdump] %s bytes' % fsize) | ||||||
|             self.try_rename(tmpfilename, filename) |             self.try_rename(tmpfilename, filename) | ||||||
|             self._hook_progress({ |             self._hook_progress({ | ||||||
|                 'downloaded_bytes': fsize, |                 'downloaded_bytes': fsize, | ||||||
| @@ -179,6 +190,6 @@ class RtmpFD(FileDownloader): | |||||||
|             }) |             }) | ||||||
|             return True |             return True | ||||||
|         else: |         else: | ||||||
|             self.to_stderr(u"\n") |             self.to_stderr('\n') | ||||||
|             self.report_error(u'rtmpdump exited with code %d' % retval) |             self.report_error('rtmpdump exited with code %d' % retval) | ||||||
|             return False |             return False | ||||||
|   | |||||||
| @@ -1,5 +1,6 @@ | |||||||
| from .academicearth import AcademicEarthCourseIE | from .academicearth import AcademicEarthCourseIE | ||||||
| from .addanime import AddAnimeIE | from .addanime import AddAnimeIE | ||||||
|  | from .aftonbladet import AftonbladetIE | ||||||
| from .anitube import AnitubeIE | from .anitube import AnitubeIE | ||||||
| from .aparat import AparatIE | from .aparat import AparatIE | ||||||
| from .appletrailers import AppleTrailersIE | from .appletrailers import AppleTrailersIE | ||||||
| @@ -9,6 +10,7 @@ from .arte import ( | |||||||
|     ArteTvIE, |     ArteTvIE, | ||||||
|     ArteTVPlus7IE, |     ArteTVPlus7IE, | ||||||
|     ArteTVCreativeIE, |     ArteTVCreativeIE, | ||||||
|  |     ArteTVConcertIE, | ||||||
|     ArteTVFutureIE, |     ArteTVFutureIE, | ||||||
|     ArteTVDDCIE, |     ArteTVDDCIE, | ||||||
| ) | ) | ||||||
| @@ -19,12 +21,15 @@ from .bbccouk import BBCCoUkIE | |||||||
| from .blinkx import BlinkxIE | from .blinkx import BlinkxIE | ||||||
| from .bliptv import BlipTVIE, BlipTVUserIE | from .bliptv import BlipTVIE, BlipTVUserIE | ||||||
| from .bloomberg import BloombergIE | from .bloomberg import BloombergIE | ||||||
|  | from .br import BRIE | ||||||
| from .breakcom import BreakIE | from .breakcom import BreakIE | ||||||
| from .brightcove import BrightcoveIE | from .brightcove import BrightcoveIE | ||||||
| from .c56 import C56IE | from .c56 import C56IE | ||||||
|  | from .canal13cl import Canal13clIE | ||||||
| from .canalplus import CanalplusIE | from .canalplus import CanalplusIE | ||||||
| from .canalc2 import Canalc2IE | from .canalc2 import Canalc2IE | ||||||
| from .cbs import CBSIE | from .cbs import CBSIE | ||||||
|  | from .ceskatelevize import CeskaTelevizeIE | ||||||
| from .channel9 import Channel9IE | from .channel9 import Channel9IE | ||||||
| from .chilloutzone import ChilloutzoneIE | from .chilloutzone import ChilloutzoneIE | ||||||
| from .cinemassacre import CinemassacreIE | from .cinemassacre import CinemassacreIE | ||||||
| @@ -49,7 +54,6 @@ from .dailymotion import ( | |||||||
|     DailymotionUserIE, |     DailymotionUserIE, | ||||||
| ) | ) | ||||||
| from .daum import DaumIE | from .daum import DaumIE | ||||||
| from .depositfiles import DepositFilesIE |  | ||||||
| from .dotsub import DotsubIE | from .dotsub import DotsubIE | ||||||
| from .dreisat import DreiSatIE | from .dreisat import DreiSatIE | ||||||
| from .defense import DefenseGouvFrIE | from .defense import DefenseGouvFrIE | ||||||
| @@ -88,6 +92,7 @@ from .funnyordie import FunnyOrDieIE | |||||||
| from .gamekings import GamekingsIE | from .gamekings import GamekingsIE | ||||||
| from .gamespot import GameSpotIE | from .gamespot import GameSpotIE | ||||||
| from .gametrailers import GametrailersIE | from .gametrailers import GametrailersIE | ||||||
|  | from .gdcvault import GDCVaultIE | ||||||
| from .generic import GenericIE | from .generic import GenericIE | ||||||
| from .googleplus import GooglePlusIE | from .googleplus import GooglePlusIE | ||||||
| from .googlesearch import GoogleSearchIE | from .googlesearch import GoogleSearchIE | ||||||
| @@ -132,11 +137,12 @@ from .lynda import ( | |||||||
| ) | ) | ||||||
| from .m6 import M6IE | from .m6 import M6IE | ||||||
| from .macgamestore import MacGameStoreIE | from .macgamestore import MacGameStoreIE | ||||||
|  | from .mailru import MailRuIE | ||||||
| from .malemotion import MalemotionIE | from .malemotion import MalemotionIE | ||||||
| from .mdr import MDRIE | from .mdr import MDRIE | ||||||
| from .metacafe import MetacafeIE | from .metacafe import MetacafeIE | ||||||
| from .metacritic import MetacriticIE | from .metacritic import MetacriticIE | ||||||
| from .mit import TechTVMITIE, MITIE | from .mit import TechTVMITIE, MITIE, OCWMITIE | ||||||
| from .mixcloud import MixcloudIE | from .mixcloud import MixcloudIE | ||||||
| from .mpora import MporaIE | from .mpora import MporaIE | ||||||
| from .mofosex import MofosexIE | from .mofosex import MofosexIE | ||||||
| @@ -151,7 +157,10 @@ from .myspass import MySpassIE | |||||||
| from .myvideo import MyVideoIE | from .myvideo import MyVideoIE | ||||||
| from .naver import NaverIE | from .naver import NaverIE | ||||||
| from .nba import NBAIE | from .nba import NBAIE | ||||||
| from .nbc import NBCNewsIE | from .nbc import ( | ||||||
|  |     NBCIE, | ||||||
|  |     NBCNewsIE, | ||||||
|  | ) | ||||||
| from .ndr import NDRIE | from .ndr import NDRIE | ||||||
| from .ndtv import NDTVIE | from .ndtv import NDTVIE | ||||||
| from .newgrounds import NewgroundsIE | from .newgrounds import NewgroundsIE | ||||||
| @@ -160,17 +169,20 @@ from .nhl import NHLIE, NHLVideocenterIE | |||||||
| from .niconico import NiconicoIE | from .niconico import NiconicoIE | ||||||
| from .ninegag import NineGagIE | from .ninegag import NineGagIE | ||||||
| from .normalboots import NormalbootsIE | from .normalboots import NormalbootsIE | ||||||
| from .novamov import NovamovIE | from .novamov import NovaMovIE | ||||||
| from .nowness import NownessIE | from .nowness import NownessIE | ||||||
| from .nowvideo import NowVideoIE | from .nowvideo import NowVideoIE | ||||||
| from .ooyala import OoyalaIE | from .ooyala import OoyalaIE | ||||||
| from .orf import ORFIE | from .orf import ORFIE | ||||||
|  | from .parliamentliveuk import ParliamentLiveUKIE | ||||||
| from .pbs import PBSIE | from .pbs import PBSIE | ||||||
| from .photobucket import PhotobucketIE | from .photobucket import PhotobucketIE | ||||||
|  | from .playvid import PlayvidIE | ||||||
| from .podomatic import PodomaticIE | from .podomatic import PodomaticIE | ||||||
| from .pornhd import PornHdIE | from .pornhd import PornHdIE | ||||||
| from .pornhub import PornHubIE | from .pornhub import PornHubIE | ||||||
| from .pornotube import PornotubeIE | from .pornotube import PornotubeIE | ||||||
|  | from .prosiebensat1 import ProSiebenSat1IE | ||||||
| from .pyvideo import PyvideoIE | from .pyvideo import PyvideoIE | ||||||
| from .radiofrance import RadioFranceIE | from .radiofrance import RadioFranceIE | ||||||
| from .rbmaradio import RBMARadioIE | from .rbmaradio import RBMARadioIE | ||||||
| @@ -186,6 +198,8 @@ from .rutube import ( | |||||||
|     RutubeMovieIE, |     RutubeMovieIE, | ||||||
|     RutubePersonIE, |     RutubePersonIE, | ||||||
| ) | ) | ||||||
|  | from .rutv import RUTVIE | ||||||
|  | from .savefrom import SaveFromIE | ||||||
| from .servingsys import ServingSysIE | from .servingsys import ServingSysIE | ||||||
| from .sina import SinaIE | from .sina import SinaIE | ||||||
| from .slashdot import SlashdotIE | from .slashdot import SlashdotIE | ||||||
| @@ -224,11 +238,17 @@ from .tinypic import TinyPicIE | |||||||
| from .toutv import TouTvIE | from .toutv import TouTvIE | ||||||
| from .traileraddict import TrailerAddictIE | from .traileraddict import TrailerAddictIE | ||||||
| from .trilulilu import TriluliluIE | from .trilulilu import TriluliluIE | ||||||
|  | from .trutube import TruTubeIE | ||||||
| from .tube8 import Tube8IE | from .tube8 import Tube8IE | ||||||
| from .tudou import TudouIE | from .tudou import TudouIE | ||||||
| from .tumblr import TumblrIE | from .tumblr import TumblrIE | ||||||
| from .tutv import TutvIE | from .tutv import TutvIE | ||||||
|  | from .tvigle import TvigleIE | ||||||
| from .tvp import TvpIE | from .tvp import TvpIE | ||||||
|  | from .udemy import ( | ||||||
|  |     UdemyIE, | ||||||
|  |     UdemyCourseIE | ||||||
|  | ) | ||||||
| from .unistra import UnistraIE | from .unistra import UnistraIE | ||||||
| from .ustream import UstreamIE, UstreamChannelIE | from .ustream import UstreamIE, UstreamChannelIE | ||||||
| from .vbox7 import Vbox7IE | from .vbox7 import Vbox7IE | ||||||
| @@ -238,7 +258,9 @@ from .vesti import VestiIE | |||||||
| from .vevo import VevoIE | from .vevo import VevoIE | ||||||
| from .vice import ViceIE | from .vice import ViceIE | ||||||
| from .viddler import ViddlerIE | from .viddler import ViddlerIE | ||||||
|  | from .videobam import VideoBamIE | ||||||
| from .videodetective import VideoDetectiveIE | from .videodetective import VideoDetectiveIE | ||||||
|  | from .videolecturesnet import VideoLecturesNetIE | ||||||
| from .videofyme import VideofyMeIE | from .videofyme import VideofyMeIE | ||||||
| from .videopremium import VideoPremiumIE | from .videopremium import VideoPremiumIE | ||||||
| from .vimeo import ( | from .vimeo import ( | ||||||
| @@ -254,6 +276,7 @@ from .viki import VikiIE | |||||||
| from .vk import VKIE | from .vk import VKIE | ||||||
| from .vube import VubeIE | from .vube import VubeIE | ||||||
| from .wat import WatIE | from .wat import WatIE | ||||||
|  | from .wdr import WDRIE | ||||||
| from .weibo import WeiboIE | from .weibo import WeiboIE | ||||||
| from .wimp import WimpIE | from .wimp import WimpIE | ||||||
| from .wistia import WistiaIE | from .wistia import WistiaIE | ||||||
| @@ -272,19 +295,20 @@ from .youku import YoukuIE | |||||||
| from .youporn import YouPornIE | from .youporn import YouPornIE | ||||||
| from .youtube import ( | from .youtube import ( | ||||||
|     YoutubeIE, |     YoutubeIE, | ||||||
|     YoutubePlaylistIE, |  | ||||||
|     YoutubeSearchIE, |  | ||||||
|     YoutubeSearchDateIE, |  | ||||||
|     YoutubeUserIE, |  | ||||||
|     YoutubeChannelIE, |     YoutubeChannelIE, | ||||||
|     YoutubeShowIE, |  | ||||||
|     YoutubeSubscriptionsIE, |  | ||||||
|     YoutubeRecommendedIE, |  | ||||||
|     YoutubeTruncatedURLIE, |  | ||||||
|     YoutubeWatchLaterIE, |  | ||||||
|     YoutubeFavouritesIE, |     YoutubeFavouritesIE, | ||||||
|     YoutubeHistoryIE, |     YoutubeHistoryIE, | ||||||
|  |     YoutubePlaylistIE, | ||||||
|  |     YoutubeRecommendedIE, | ||||||
|  |     YoutubeSearchDateIE, | ||||||
|  |     YoutubeSearchIE, | ||||||
|  |     YoutubeSearchURLIE, | ||||||
|  |     YoutubeShowIE, | ||||||
|  |     YoutubeSubscriptionsIE, | ||||||
|     YoutubeTopListIE, |     YoutubeTopListIE, | ||||||
|  |     YoutubeTruncatedURLIE, | ||||||
|  |     YoutubeUserIE, | ||||||
|  |     YoutubeWatchLaterIE, | ||||||
| ) | ) | ||||||
| from .zdf import ZDFIE | from .zdf import ZDFIE | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,7 +5,7 @@ from .common import InfoExtractor | |||||||
|  |  | ||||||
|  |  | ||||||
| class AcademicEarthCourseIE(InfoExtractor): | class AcademicEarthCourseIE(InfoExtractor): | ||||||
|     _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)' |     _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)' | ||||||
|     IE_NAME = 'AcademicEarth:Course' |     IE_NAME = 'AcademicEarth:Course' | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -14,12 +14,12 @@ class AcademicEarthCourseIE(InfoExtractor): | |||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, playlist_id) |         webpage = self._download_webpage(url, playlist_id) | ||||||
|         title = self._html_search_regex( |         title = self._html_search_regex( | ||||||
|             r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title') |             r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title') | ||||||
|         description = self._html_search_regex( |         description = self._html_search_regex( | ||||||
|             r'<p class="excerpt">(.*?)</p>', |             r'<p class="excerpt"[^>]*?>(.*?)</p>', | ||||||
|             webpage, u'description', fatal=False) |             webpage, u'description', fatal=False) | ||||||
|         urls = re.findall( |         urls = re.findall( | ||||||
|             r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">', |             r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">', | ||||||
|             webpage) |             webpage) | ||||||
|         entries = [self.url_result(u) for u in urls] |         entries = [self.url_result(u) for u in urls] | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										69
									
								
								youtube_dl/extractor/aftonbladet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								youtube_dl/extractor/aftonbladet.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,69 @@ | |||||||
|  | # encoding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import datetime | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AftonbladetIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])' | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'article36015', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna', | ||||||
|  |             'description': 'Jupiters måne mest aktiv av alla himlakroppar', | ||||||
|  |             'upload_date': '20140306', | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.search(self._VALID_URL, url) | ||||||
|  |  | ||||||
|  |         video_id = mobj.group('video_id') | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|  |         # find internal video meta data | ||||||
|  |         META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' | ||||||
|  |         internal_meta_id = self._html_search_regex( | ||||||
|  |             r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id') | ||||||
|  |         internal_meta_url = META_URL % internal_meta_id | ||||||
|  |         internal_meta_json = self._download_json( | ||||||
|  |             internal_meta_url, video_id, 'Downloading video meta data') | ||||||
|  |  | ||||||
|  |         # find internal video formats | ||||||
|  |         FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s' | ||||||
|  |         internal_video_id = internal_meta_json['videoId'] | ||||||
|  |         internal_formats_url = FORMATS_URL % internal_video_id | ||||||
|  |         internal_formats_json = self._download_json( | ||||||
|  |             internal_formats_url, video_id, 'Downloading video formats') | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |         for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']: | ||||||
|  |             p = fmt['paths'][0] | ||||||
|  |             formats.append({ | ||||||
|  |                 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']), | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'width': fmt['width'], | ||||||
|  |                 'height': fmt['height'], | ||||||
|  |                 'tbr': fmt['bitrate'], | ||||||
|  |                 'protocol': 'http', | ||||||
|  |             }) | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished']) | ||||||
|  |         upload_date = timestamp.strftime('%Y%m%d') | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': internal_meta_json['title'], | ||||||
|  |             'formats': formats, | ||||||
|  |             'thumbnail': internal_meta_json['imageUrl'], | ||||||
|  |             'description': internal_meta_json['shortPreamble'], | ||||||
|  |             'upload_date': upload_date, | ||||||
|  |             'duration': internal_meta_json['duration'], | ||||||
|  |             'view_count': internal_meta_json['views'], | ||||||
|  |         } | ||||||
| @@ -72,18 +72,22 @@ class ArteTvIE(InfoExtractor): | |||||||
|             return self._extract_liveweb(url, name, lang) |             return self._extract_liveweb(url, name, lang) | ||||||
|  |  | ||||||
|         if re.search(self._LIVE_URL, url) is not None: |         if re.search(self._LIVE_URL, url) is not None: | ||||||
|             raise ExtractorError(u'Arte live streams are not yet supported, sorry') |             raise ExtractorError('Arte live streams are not yet supported, sorry') | ||||||
|             # self.extractLiveStream(url) |             # self.extractLiveStream(url) | ||||||
|             # return |             # return | ||||||
|  |  | ||||||
|  |         raise ExtractorError('No video found') | ||||||
|  |  | ||||||
|     def _extract_video(self, url, video_id, lang): |     def _extract_video(self, url, video_id, lang): | ||||||
|         """Extract from videos.arte.tv""" |         """Extract from videos.arte.tv""" | ||||||
|         ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') |         ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') | ||||||
|         ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') |         ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') | ||||||
|         ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata') |         ref_xml_doc = self._download_xml( | ||||||
|  |             ref_xml_url, video_id, note='Downloading metadata') | ||||||
|         config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) |         config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) | ||||||
|         config_xml_url = config_node.attrib['ref'] |         config_xml_url = config_node.attrib['ref'] | ||||||
|         config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration') |         config_xml = self._download_webpage( | ||||||
|  |             config_xml_url, video_id, note='Downloading configuration') | ||||||
|  |  | ||||||
|         video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml)) |         video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml)) | ||||||
|         def _key(m): |         def _key(m): | ||||||
| @@ -127,7 +131,7 @@ class ArteTvIE(InfoExtractor): | |||||||
|  |  | ||||||
| class ArteTVPlus7IE(InfoExtractor): | class ArteTVPlus7IE(InfoExtractor): | ||||||
|     IE_NAME = 'arte.tv:+7' |     IE_NAME = 'arte.tv:+7' | ||||||
|     _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' |     _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' | ||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|     def _extract_url_info(cls, url): |     def _extract_url_info(cls, url): | ||||||
| @@ -198,6 +202,8 @@ class ArteTVPlus7IE(InfoExtractor): | |||||||
|                     re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None, |                     re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None, | ||||||
|                     # The version with sourds/mal subtitles has also lower relevance |                     # The version with sourds/mal subtitles has also lower relevance | ||||||
|                     re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None, |                     re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None, | ||||||
|  |                     # Prefer http downloads over m3u8 | ||||||
|  |                     0 if f['url'].endswith('m3u8') else 1, | ||||||
|                 ) |                 ) | ||||||
|         formats = sorted(formats, key=sort_key) |         formats = sorted(formats, key=sort_key) | ||||||
|         def _format(format_info): |         def _format(format_info): | ||||||
| @@ -238,8 +244,9 @@ class ArteTVCreativeIE(ArteTVPlus7IE): | |||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', |         'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', | ||||||
|         'file': '050489-002.mp4', |  | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|  |             'id': '050489-002', | ||||||
|  |             'ext': 'mp4', | ||||||
|             'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design', |             'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design', | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
| @@ -251,8 +258,9 @@ class ArteTVFutureIE(ArteTVPlus7IE): | |||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', |         'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', | ||||||
|         'file': '050940-003.mp4', |  | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|  |             'id': '050940-003', | ||||||
|  |             'ext': 'mp4', | ||||||
|             'title': 'Les champignons au secours de la planète', |             'title': 'Les champignons au secours de la planète', | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
| @@ -266,7 +274,7 @@ class ArteTVFutureIE(ArteTVPlus7IE): | |||||||
|  |  | ||||||
| class ArteTVDDCIE(ArteTVPlus7IE): | class ArteTVDDCIE(ArteTVPlus7IE): | ||||||
|     IE_NAME = 'arte.tv:ddc' |     IE_NAME = 'arte.tv:ddc' | ||||||
|     _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)' |     _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)' | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id, lang = self._extract_url_info(url) |         video_id, lang = self._extract_url_info(url) | ||||||
| @@ -280,3 +288,19 @@ class ArteTVDDCIE(ArteTVPlus7IE): | |||||||
|         javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator') |         javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator') | ||||||
|         json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url') |         json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url') | ||||||
|         return self._extract_from_json_url(json_url, video_id, lang) |         return self._extract_from_json_url(json_url, video_id, lang) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ArteTVConcertIE(ArteTVPlus7IE): | ||||||
|  |     IE_NAME = 'arte.tv:concert' | ||||||
|  |     _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde', | ||||||
|  |         'md5': '9ea035b7bd69696b67aa2ccaaa218161', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '186', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"', | ||||||
|  |             'upload_date': '20140128', | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|   | |||||||
| @@ -162,6 +162,11 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | |||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         group_id = mobj.group('id') |         group_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, group_id, 'Downloading video page') | ||||||
|  |         if re.search(r'id="emp-error" class="notinuk">', webpage): | ||||||
|  |             raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only', | ||||||
|  |                 expected=True) | ||||||
|  |  | ||||||
|         playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id, |         playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id, | ||||||
|             'Downloading playlist XML') |             'Downloading playlist XML') | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										99
									
								
								youtube_dl/extractor/br.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								youtube_dl/extractor/br.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,99 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ExtractorError | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class BRIE(InfoExtractor): | ||||||
|  |     IE_DESC = "Bayerischer Rundfunk Mediathek" | ||||||
|  |     _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$" | ||||||
|  |     _BASE_URL = "http://www.br.de" | ||||||
|  |  | ||||||
|  |     _TESTS = [ | ||||||
|  |         { | ||||||
|  |             "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", | ||||||
|  |             "md5": "c4f83cf0f023ba5875aba0bf46860df2", | ||||||
|  |             "info_dict": { | ||||||
|  |                 "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", | ||||||
|  |                 "ext": "mp4", | ||||||
|  |                 "title": "Feiern und Verzichten", | ||||||
|  |                 "description": "Anselm Grün: Feiern und Verzichten", | ||||||
|  |                 "uploader": "BR/Birgit Baier", | ||||||
|  |                 "upload_date": "20140301" | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html", | ||||||
|  |             "md5": "ab451b09d861dbed7d7cc9ab0be19ebe", | ||||||
|  |             "info_dict": { | ||||||
|  |                 "id": "2c060e69-3a27-4e13-b0f0-668fac17d812", | ||||||
|  |                 "ext": "mp4", | ||||||
|  |                 "title": "Über den Pass", | ||||||
|  |                 "description": "Die Eroberung der Alpen: Über den Pass", | ||||||
|  |                 "uploader": None, | ||||||
|  |                 "upload_date": None | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         display_id = mobj.group('id') | ||||||
|  |         page = self._download_webpage(url, display_id) | ||||||
|  |         xml_url = self._search_regex( | ||||||
|  |             r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL") | ||||||
|  |         xml = self._download_xml(self._BASE_URL + xml_url, None) | ||||||
|  |  | ||||||
|  |         videos = [] | ||||||
|  |         for xml_video in xml.findall("video"): | ||||||
|  |             video = { | ||||||
|  |                 "id": xml_video.get("externalId"), | ||||||
|  |                 "title": xml_video.find("title").text, | ||||||
|  |                 "formats": self._extract_formats(xml_video.find("assets")), | ||||||
|  |                 "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")), | ||||||
|  |                 "description": " ".join(xml_video.find("shareTitle").text.splitlines()), | ||||||
|  |                 "webpage_url": xml_video.find("permalink").text | ||||||
|  |             } | ||||||
|  |             if xml_video.find("author").text: | ||||||
|  |                 video["uploader"] = xml_video.find("author").text | ||||||
|  |             if xml_video.find("broadcastDate").text: | ||||||
|  |                 video["upload_date"] =  "".join(reversed(xml_video.find("broadcastDate").text.split("."))) | ||||||
|  |             videos.append(video) | ||||||
|  |  | ||||||
|  |         if len(videos) > 1: | ||||||
|  |             self._downloader.report_warning( | ||||||
|  |                 'found multiple videos; please ' | ||||||
|  |                 'report this with the video URL to http://yt-dl.org/bug') | ||||||
|  |         if not videos: | ||||||
|  |             raise ExtractorError('No video entries found') | ||||||
|  |         return videos[0] | ||||||
|  |  | ||||||
|  |     def _extract_formats(self, assets): | ||||||
|  |         formats = [{ | ||||||
|  |             "url": asset.find("downloadUrl").text, | ||||||
|  |             "ext": asset.find("mediaType").text, | ||||||
|  |             "format_id": asset.get("type"), | ||||||
|  |             "width": int(asset.find("frameWidth").text), | ||||||
|  |             "height": int(asset.find("frameHeight").text), | ||||||
|  |             "tbr": int(asset.find("bitrateVideo").text), | ||||||
|  |             "abr": int(asset.find("bitrateAudio").text), | ||||||
|  |             "vcodec": asset.find("codecVideo").text, | ||||||
|  |             "container": asset.find("mediaType").text, | ||||||
|  |             "filesize": int(asset.find("size").text), | ||||||
|  |         } for asset in assets.findall("asset") | ||||||
|  |             if asset.find("downloadUrl") is not None] | ||||||
|  |  | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |         return formats | ||||||
|  |  | ||||||
|  |     def _extract_thumbnails(self, variants): | ||||||
|  |         thumbnails = [{ | ||||||
|  |             "url": self._BASE_URL + variant.find("url").text, | ||||||
|  |             "width": int(variant.find("width").text), | ||||||
|  |             "height": int(variant.find("height").text), | ||||||
|  |         } for variant in variants.findall("variant")] | ||||||
|  |         thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True) | ||||||
|  |         return thumbnails | ||||||
| @@ -23,8 +23,8 @@ class BreakIE(InfoExtractor): | |||||||
|         video_id = mobj.group(1).split("-")[-1] |         video_id = mobj.group(1).split("-")[-1] | ||||||
|         embed_url = 'http://www.break.com/embed/%s' % video_id |         embed_url = 'http://www.break.com/embed/%s' % video_id | ||||||
|         webpage = self._download_webpage(embed_url, video_id) |         webpage = self._download_webpage(embed_url, video_id) | ||||||
|         info_json = self._search_regex(r'var embedVars = ({.*?});', webpage, |         info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>', | ||||||
|                                        'info json', flags=re.DOTALL) |             webpage, 'info json', flags=re.DOTALL) | ||||||
|         info = json.loads(info_json) |         info = json.loads(info_json) | ||||||
|         video_url = info['videoUri'] |         video_url = info['videoUri'] | ||||||
|         m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url) |         m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url) | ||||||
|   | |||||||
							
								
								
									
										48
									
								
								youtube_dl/extractor/canal13cl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								youtube_dl/extractor/canal13cl.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Canal13clIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)' | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', | ||||||
|  |         'md5': '4cb1fa38adcad8fea88487a078831755', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1403022125', | ||||||
|  |             'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda', | ||||||
|  |             'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.', | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         display_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, display_id) | ||||||
|  |  | ||||||
|  |         title = self._html_search_meta( | ||||||
|  |             'twitter:title', webpage, 'title', fatal=True) | ||||||
|  |         description = self._html_search_meta( | ||||||
|  |             'twitter:description', webpage, 'description') | ||||||
|  |         url = self._html_search_regex( | ||||||
|  |             r'articuloVideo = \"(.*?)\"', webpage, 'url') | ||||||
|  |         real_id = self._search_regex( | ||||||
|  |             r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id) | ||||||
|  |         thumbnail = self._html_search_regex( | ||||||
|  |             r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail') | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': real_id, | ||||||
|  |             'display_id': display_id, | ||||||
|  |             'url': url, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |         } | ||||||
| @@ -1,4 +1,6 @@ | |||||||
| # coding: utf-8 | # coding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| @@ -9,11 +11,12 @@ class Canalc2IE(InfoExtractor): | |||||||
|     _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)' |     _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', |         'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', | ||||||
|         u'file': u'12163.mp4', |         'md5': '060158428b650f896c542dfbb3d6487f', | ||||||
|         u'md5': u'060158428b650f896c542dfbb3d6487f', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '12163', | ||||||
|             u'title': u'Terrasses du Numérique' |             'ext': 'mp4', | ||||||
|  |             'title': 'Terrasses du Numérique' | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -28,10 +31,11 @@ class Canalc2IE(InfoExtractor): | |||||||
|         video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name |         video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name | ||||||
|  |  | ||||||
|         title = self._html_search_regex( |         title = self._html_search_regex( | ||||||
|             r'class="evenement8">(.*?)</a>', webpage, u'title') |             r'class="evenement8">(.*?)</a>', webpage, 'title') | ||||||
|          |  | ||||||
|         return {'id': video_id, |         return { | ||||||
|                 'ext': 'mp4', |             'id': video_id, | ||||||
|                 'url': video_url, |             'ext': 'mp4', | ||||||
|                 'title': title, |             'url': video_url, | ||||||
|                 } |             'title': title, | ||||||
|  |         } | ||||||
|   | |||||||
							
								
								
									
										126
									
								
								youtube_dl/extractor/ceskatelevize.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										126
									
								
								youtube_dl/extractor/ceskatelevize.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,126 @@ | |||||||
|  | # -*- coding: utf-8 -*- | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     compat_urllib_request, | ||||||
|  |     compat_urllib_parse, | ||||||
|  |     compat_urllib_parse_urlparse, | ||||||
|  |     ExtractorError, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class CeskaTelevizeIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' | ||||||
|  |  | ||||||
|  |     _TESTS = [ | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '213512120230004', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'První republika: Španělská chřipka', | ||||||
|  |                 'duration': 3107.4, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True,  # requires rtmpdump | ||||||
|  |             }, | ||||||
|  |             'skip': 'Works only from Czech Republic.', | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '20138143440', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'Tsatsiki, maminka a policajt', | ||||||
|  |                 'duration': 6754.1, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True,  # requires rtmpdump | ||||||
|  |             }, | ||||||
|  |             'skip': 'Works only from Czech Republic.', | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '14716', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'První republika: Zpěvačka z Dupárny Bobina', | ||||||
|  |                 'duration': 90, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True,  # requires rtmpdump | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') | ||||||
|  |  | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|  |         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' | ||||||
|  |         if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | ||||||
|  |             raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | ||||||
|  |  | ||||||
|  |         typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') | ||||||
|  |         episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') | ||||||
|  |  | ||||||
|  |         data = { | ||||||
|  |             'playlist[0][type]': typ, | ||||||
|  |             'playlist[0][id]': episode_id, | ||||||
|  |             'requestUrl': compat_urllib_parse_urlparse(url).path, | ||||||
|  |             'requestSource': 'iVysilani', | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url', | ||||||
|  |                                             data=compat_urllib_parse.urlencode(data)) | ||||||
|  |  | ||||||
|  |         req.add_header('Content-type', 'application/x-www-form-urlencoded') | ||||||
|  |         req.add_header('x-addr', '127.0.0.1') | ||||||
|  |         req.add_header('X-Requested-With', 'XMLHttpRequest') | ||||||
|  |         req.add_header('Referer', url) | ||||||
|  |  | ||||||
|  |         playlistpage = self._download_json(req, video_id) | ||||||
|  |  | ||||||
|  |         req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url'])) | ||||||
|  |         req.add_header('Referer', url) | ||||||
|  |  | ||||||
|  |         playlist = self._download_xml(req, video_id) | ||||||
|  |          | ||||||
|  |         formats = [] | ||||||
|  |         for i in playlist.find('smilRoot/body'): | ||||||
|  |             if 'AD' not in i.attrib['id']: | ||||||
|  |                 base_url = i.attrib['base'] | ||||||
|  |                 parsedurl = compat_urllib_parse_urlparse(base_url) | ||||||
|  |                 duration = i.attrib['duration'] | ||||||
|  |  | ||||||
|  |                 for video in i.findall('video'): | ||||||
|  |                     if video.attrib['label'] != 'AD': | ||||||
|  |                         format_id = video.attrib['label'] | ||||||
|  |                         play_path = video.attrib['src'] | ||||||
|  |                         vbr = int(video.attrib['system-bitrate']) | ||||||
|  |  | ||||||
|  |                         formats.append({ | ||||||
|  |                             'format_id': format_id, | ||||||
|  |                             'url': base_url, | ||||||
|  |                             'vbr': vbr, | ||||||
|  |                             'play_path': play_path, | ||||||
|  |                             'app': parsedurl.path[1:] + '?' + parsedurl.query, | ||||||
|  |                             'rtmp_live': True, | ||||||
|  |                             'ext': 'flv', | ||||||
|  |                         }) | ||||||
|  |  | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': episode_id, | ||||||
|  |             'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'), | ||||||
|  |             'duration': float(duration), | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
| @@ -1,4 +1,5 @@ | |||||||
| # encoding: utf-8 | # encoding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| @@ -8,73 +9,63 @@ from ..utils import ( | |||||||
|  |  | ||||||
|  |  | ||||||
| class CinemassacreIE(InfoExtractor): | class CinemassacreIE(InfoExtractor): | ||||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?' |     _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?' | ||||||
|     _TESTS = [{ |     _TESTS = [ | ||||||
|         u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', |         { | ||||||
|         u'file': u'19911.flv', |             'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', | ||||||
|         u'info_dict': { |             'file': '19911.mp4', | ||||||
|             u'upload_date': u'20121110', |             'md5': 'fde81fbafaee331785f58cd6c0d46190', | ||||||
|             u'title': u'“Angry Video Game Nerd: The Movie” – Trailer', |             'info_dict': { | ||||||
|             u'description': u'md5:fb87405fcb42a331742a0dce2708560b', |                 'upload_date': '20121110', | ||||||
|  |                 'title': '“Angry Video Game Nerd: The Movie” – Trailer', | ||||||
|  |                 'description': 'md5:fb87405fcb42a331742a0dce2708560b', | ||||||
|  |             }, | ||||||
|         }, |         }, | ||||||
|         u'params': { |         { | ||||||
|             # rtmp download |             'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', | ||||||
|             u'skip_download': True, |             'file': '521be8ef82b16.mp4', | ||||||
|         }, |             'md5': 'd72f10cd39eac4215048f62ab477a511', | ||||||
|     }, |             'info_dict': { | ||||||
|     { |                 'upload_date': '20131002', | ||||||
|         u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', |                 'title': 'The Mummy’s Hand (1940)', | ||||||
|         u'file': u'521be8ef82b16.flv', |             }, | ||||||
|         u'info_dict': { |         } | ||||||
|             u'upload_date': u'20131002', |     ] | ||||||
|             u'title': u'The Mummy’s Hand (1940)', |  | ||||||
|         }, |  | ||||||
|         u'params': { |  | ||||||
|             # rtmp download |  | ||||||
|             u'skip_download': True, |  | ||||||
|         }, |  | ||||||
|     }] |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |  | ||||||
|         webpage_url = u'http://' + mobj.group('url') |         webpage = self._download_webpage(url, None)  # Don't know video id yet | ||||||
|         webpage = self._download_webpage(webpage_url, None) # Don't know video id yet |  | ||||||
|         video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') |         video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') | ||||||
|         mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) |         mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) | ||||||
|         if not mobj: |         if not mobj: | ||||||
|             raise ExtractorError(u'Can\'t extract embed url and video id') |             raise ExtractorError('Can\'t extract embed url and video id') | ||||||
|         playerdata_url = mobj.group(u'embed_url') |         playerdata_url = mobj.group('embed_url') | ||||||
|         video_id = mobj.group(u'video_id') |         video_id = mobj.group('video_id') | ||||||
|  |  | ||||||
|         video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|', |         video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|', | ||||||
|             webpage, u'title') |             webpage, 'title') | ||||||
|         video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>', |         video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>', | ||||||
|             webpage, u'description', flags=re.DOTALL, fatal=False) |             webpage, 'description', flags=re.DOTALL, fatal=False) | ||||||
|         if len(video_description) == 0: |         if len(video_description) == 0: | ||||||
|             video_description = None |             video_description = None | ||||||
|  |  | ||||||
|         playerdata = self._download_webpage(playerdata_url, video_id) |         playerdata = self._download_webpage(playerdata_url, video_id) | ||||||
|         url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url') |  | ||||||
|  |  | ||||||
|         sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file') |         sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file') | ||||||
|         hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file') |         hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file') | ||||||
|         video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False) |         video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) | ||||||
|  |  | ||||||
|         formats = [ |         formats = [ | ||||||
|             { |             { | ||||||
|                 'url': url, |                 'url': sd_url, | ||||||
|                 'play_path': 'mp4:' + sd_file, |                 'ext': 'mp4', | ||||||
|                 'rtmp_live': True, # workaround |  | ||||||
|                 'ext': 'flv', |  | ||||||
|                 'format': 'sd', |                 'format': 'sd', | ||||||
|                 'format_id': 'sd', |                 'format_id': 'sd', | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 'url': url, |                 'url': hd_url, | ||||||
|                 'play_path': 'mp4:' + hd_file, |                 'ext': 'mp4', | ||||||
|                 'rtmp_live': True, # workaround |  | ||||||
|                 'ext': 'flv', |  | ||||||
|                 'format': 'hd', |                 'format': 'hd', | ||||||
|                 'format_id': 'hd', |                 'format_id': 'hd', | ||||||
|             }, |             }, | ||||||
|   | |||||||
| @@ -17,8 +17,9 @@ class CollegeHumorIE(InfoExtractor): | |||||||
|             'id': '6902724', |             'id': '6902724', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Comic-Con Cosplay Catastrophe', |             'title': 'Comic-Con Cosplay Catastrophe', | ||||||
|             'description': 'Fans get creative this year', |             'description': "Fans get creative this year at San Diego.  Too creative.  And yes, that's really Joss Whedon.", | ||||||
|             'age_limit': 13, |             'age_limit': 13, | ||||||
|  |             'duration': 187, | ||||||
|         }, |         }, | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
| @@ -28,22 +29,22 @@ class CollegeHumorIE(InfoExtractor): | |||||||
|             'id': '3505939', |             'id': '3505939', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Font Conference', |             'title': 'Font Conference', | ||||||
|             'description': 'This video wasn\'t long enough,', |             'description': "This video wasn't long enough, so we made it double-spaced.", | ||||||
|             'age_limit': 10, |             'age_limit': 10, | ||||||
|             'duration': 179, |             'duration': 179, | ||||||
|         }, |         }, | ||||||
|     }, |     }, | ||||||
|     # embedded youtube video |     # embedded youtube video | ||||||
|     { |     { | ||||||
|         'url': 'http://www.collegehumor.com/embed/6950457', |         'url': 'http://www.collegehumor.com/embed/6950306', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'W5gMp3ZjYg4', |             'id': 'Z-bao9fg6Yc', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', |             'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!', | ||||||
|             'uploader': 'Funnyplox TV', |             'uploader': 'Mark Dice', | ||||||
|             'uploader_id': 'funnyploxtv', |             'uploader_id': 'MarkDice', | ||||||
|             'description': 'md5:7ded37421526d54afdf005e25bc2b7a3', |             'description': 'md5:62c3dab9351fac7bb44b53b69511d87f', | ||||||
|             'upload_date': '20140128', |             'upload_date': '20140127', | ||||||
|         }, |         }, | ||||||
|         'params': { |         'params': { | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
| @@ -87,6 +88,7 @@ class CollegeHumorIE(InfoExtractor): | |||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|         duration = int_or_none(vdata.get('duration'), 1000) |         duration = int_or_none(vdata.get('duration'), 1000) | ||||||
|  |         like_count = int_or_none(vdata.get('likes')) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
| @@ -96,4 +98,5 @@ class CollegeHumorIE(InfoExtractor): | |||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'age_limit': age_limit, |             'age_limit': age_limit, | ||||||
|             'duration': duration, |             'duration': duration, | ||||||
|  |             'like_count': like_count, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -14,7 +14,7 @@ from ..utils import ( | |||||||
|  |  | ||||||
|  |  | ||||||
| class ComedyCentralIE(MTVServicesInfoExtractor): | class ComedyCentralIE(MTVServicesInfoExtractor): | ||||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/ |     _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/ | ||||||
|         (video-clips|episodes|cc-studios|video-collections) |         (video-clips|episodes|cc-studios|video-collections) | ||||||
|         /(?P<title>.*)''' |         /(?P<title>.*)''' | ||||||
|     _FEED_URL = 'http://comedycentral.com/feeds/mrss/' |     _FEED_URL = 'http://comedycentral.com/feeds/mrss/' | ||||||
|   | |||||||
| @@ -88,12 +88,18 @@ class InfoExtractor(object): | |||||||
|  |  | ||||||
|     The following fields are optional: |     The following fields are optional: | ||||||
|  |  | ||||||
|  |     display_id      An alternative identifier for the video, not necessarily | ||||||
|  |                     unique, but available before title. Typically, id is | ||||||
|  |                     something like "4234987", title "Dancing naked mole rats", | ||||||
|  |                     and display_id "dancing-naked-mole-rats" | ||||||
|     thumbnails:     A list of dictionaries (with the entries "resolution" and |     thumbnails:     A list of dictionaries (with the entries "resolution" and | ||||||
|                     "url") for the varying thumbnails |                     "url") for the varying thumbnails | ||||||
|     thumbnail:      Full URL to a video thumbnail image. |     thumbnail:      Full URL to a video thumbnail image. | ||||||
|     description:    One-line video description. |     description:    One-line video description. | ||||||
|     uploader:       Full name of the video uploader. |     uploader:       Full name of the video uploader. | ||||||
|  |     timestamp:      UNIX timestamp of the moment the video became available. | ||||||
|     upload_date:    Video upload date (YYYYMMDD). |     upload_date:    Video upload date (YYYYMMDD). | ||||||
|  |                     If not explicitly set, calculated from timestamp. | ||||||
|     uploader_id:    Nickname or id of the video uploader. |     uploader_id:    Nickname or id of the video uploader. | ||||||
|     location:       Physical location of the video. |     location:       Physical location of the video. | ||||||
|     subtitles:      The subtitle file contents as a dictionary in the format |     subtitles:      The subtitle file contents as a dictionary in the format | ||||||
| @@ -114,9 +120,6 @@ class InfoExtractor(object): | |||||||
|     _real_extract() methods and define a _VALID_URL regexp. |     _real_extract() methods and define a _VALID_URL regexp. | ||||||
|     Probably, they should also be added to the list of extractors. |     Probably, they should also be added to the list of extractors. | ||||||
|  |  | ||||||
|     _real_extract() must return a *list* of information dictionaries as |  | ||||||
|     described above. |  | ||||||
|  |  | ||||||
|     Finally, the _WORKING attribute should be set to False for broken IEs |     Finally, the _WORKING attribute should be set to False for broken IEs | ||||||
|     in order to warn the users and skip the tests. |     in order to warn the users and skip the tests. | ||||||
|     """ |     """ | ||||||
| @@ -432,14 +435,14 @@ class InfoExtractor(object): | |||||||
|         if secure: regexes = self._og_regexes('video:secure_url') + regexes |         if secure: regexes = self._og_regexes('video:secure_url') + regexes | ||||||
|         return self._html_search_regex(regexes, html, name, **kargs) |         return self._html_search_regex(regexes, html, name, **kargs) | ||||||
|  |  | ||||||
|     def _html_search_meta(self, name, html, display_name=None): |     def _html_search_meta(self, name, html, display_name=None, fatal=False): | ||||||
|         if display_name is None: |         if display_name is None: | ||||||
|             display_name = name |             display_name = name | ||||||
|         return self._html_search_regex( |         return self._html_search_regex( | ||||||
|             r'''(?ix)<meta |             r'''(?ix)<meta | ||||||
|                     (?=[^>]+(?:itemprop|name|property)=["\']%s["\']) |                     (?=[^>]+(?:itemprop|name|property)=["\']%s["\']) | ||||||
|                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), |                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), | ||||||
|             html, display_name, fatal=False) |             html, display_name, fatal=fatal) | ||||||
|  |  | ||||||
|     def _dc_search_uploader(self, html): |     def _dc_search_uploader(self, html): | ||||||
|         return self._html_search_meta('dc.creator', html, 'uploader') |         return self._html_search_meta('dc.creator', html, 'uploader') | ||||||
|   | |||||||
| @@ -1,7 +1,11 @@ | |||||||
| # encoding: utf-8 | # encoding: utf-8 | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re, base64, zlib | import re | ||||||
|  | import json | ||||||
|  | import base64 | ||||||
|  | import zlib | ||||||
|  |  | ||||||
| from hashlib import sha1 | from hashlib import sha1 | ||||||
| from math import pow, sqrt, floor | from math import pow, sqrt, floor | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| @@ -19,13 +23,15 @@ from ..aes import ( | |||||||
|     inc, |     inc, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class CrunchyrollIE(InfoExtractor): | class CrunchyrollIE(InfoExtractor): | ||||||
|     _VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' |     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' | ||||||
|     _TESTS = [{ |     _TEST = { | ||||||
|         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', |         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', | ||||||
|         'file': '645513.flv', |  | ||||||
|         #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412', |         #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|  |             'id': '645513', | ||||||
|  |             'ext': 'flv', | ||||||
|             'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', |             'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', | ||||||
|             'description': 'md5:2d17137920c64f2f49981a7797d275ef', |             'description': 'md5:2d17137920c64f2f49981a7797d275ef', | ||||||
|             'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', |             'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', | ||||||
| @@ -36,7 +42,7 @@ class CrunchyrollIE(InfoExtractor): | |||||||
|             # rtmp |             # rtmp | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         }, |         }, | ||||||
|     }] |     } | ||||||
|  |  | ||||||
|     _FORMAT_IDS = { |     _FORMAT_IDS = { | ||||||
|         '360': ('60', '106'), |         '360': ('60', '106'), | ||||||
| @@ -68,7 +74,7 @@ class CrunchyrollIE(InfoExtractor): | |||||||
|             shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) |             shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) | ||||||
|             # Extend 160 Bit hash to 256 Bit |             # Extend 160 Bit hash to 256 Bit | ||||||
|             return shaHash + [0] * 12 |             return shaHash + [0] * 12 | ||||||
|          |  | ||||||
|         key = obfuscate_key(id) |         key = obfuscate_key(id) | ||||||
|         class Counter: |         class Counter: | ||||||
|             __value = iv |             __value = iv | ||||||
| @@ -80,9 +86,8 @@ class CrunchyrollIE(InfoExtractor): | |||||||
|         return zlib.decompress(decrypted_data) |         return zlib.decompress(decrypted_data) | ||||||
|  |  | ||||||
|     def _convert_subtitles_to_srt(self, subtitles): |     def _convert_subtitles_to_srt(self, subtitles): | ||||||
|         i=1 |  | ||||||
|         output = '' |         output = '' | ||||||
|         for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles): |         for i, (start, end, text) in enumerate(re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles), 1): | ||||||
|             start = start.replace('.', ',') |             start = start.replace('.', ',') | ||||||
|             end = end.replace('.', ',') |             end = end.replace('.', ',') | ||||||
|             text = clean_html(text) |             text = clean_html(text) | ||||||
| @@ -90,7 +95,6 @@ class CrunchyrollIE(InfoExtractor): | |||||||
|             if not text: |             if not text: | ||||||
|                 continue |                 continue | ||||||
|             output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) |             output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) | ||||||
|             i+=1 |  | ||||||
|         return output |         return output | ||||||
|  |  | ||||||
|     def _real_extract(self,url): |     def _real_extract(self,url): | ||||||
| @@ -108,6 +112,12 @@ class CrunchyrollIE(InfoExtractor): | |||||||
|         if note_m: |         if note_m: | ||||||
|             raise ExtractorError(note_m) |             raise ExtractorError(note_m) | ||||||
|  |  | ||||||
|  |         mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage) | ||||||
|  |         if mobj: | ||||||
|  |             msg = json.loads(mobj.group('msg')) | ||||||
|  |             if msg.get('type') == 'error': | ||||||
|  |                 raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) | ||||||
|  |  | ||||||
|         video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL) |         video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL) | ||||||
|         video_title = re.sub(r' {2,}', ' ', video_title) |         video_title = re.sub(r' {2,}', ' ', video_title) | ||||||
|         video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') |         video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') | ||||||
| @@ -123,7 +133,7 @@ class CrunchyrollIE(InfoExtractor): | |||||||
|         playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) |         playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) | ||||||
|         playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') |         playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||||
|         playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') |         playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') | ||||||
|          |  | ||||||
|         stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id') |         stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id') | ||||||
|         video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) |         video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) | ||||||
|  |  | ||||||
| @@ -161,7 +171,7 @@ class CrunchyrollIE(InfoExtractor): | |||||||
|             data = base64.b64decode(data) |             data = base64.b64decode(data) | ||||||
|  |  | ||||||
|             subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') |             subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') | ||||||
|             lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False) |             lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) | ||||||
|             if not lang_code: |             if not lang_code: | ||||||
|                 continue |                 continue | ||||||
|             subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) |             subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) | ||||||
|   | |||||||
| @@ -10,9 +10,9 @@ from ..utils import ( | |||||||
|  |  | ||||||
|  |  | ||||||
| class CSpanIE(InfoExtractor): | class CSpanIE(InfoExtractor): | ||||||
|     _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)' |     _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)' | ||||||
|     IE_DESC = 'C-SPAN' |     IE_DESC = 'C-SPAN' | ||||||
|     _TEST = { |     _TESTS = [{ | ||||||
|         'url': 'http://www.c-span.org/video/?313572-1/HolderonV', |         'url': 'http://www.c-span.org/video/?313572-1/HolderonV', | ||||||
|         'md5': '8e44ce11f0f725527daccc453f553eb0', |         'md5': '8e44ce11f0f725527daccc453f553eb0', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -22,13 +22,24 @@ class CSpanIE(InfoExtractor): | |||||||
|             'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.', |             'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.', | ||||||
|         }, |         }, | ||||||
|         'skip': 'Regularly fails on travis, for unknown reasons', |         'skip': 'Regularly fails on travis, for unknown reasons', | ||||||
|     } |     }, { | ||||||
|  |         'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models', | ||||||
|  |         # For whatever reason, the served video alternates between | ||||||
|  |         # two different ones | ||||||
|  |         #'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '340723', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'International Health Care Models', | ||||||
|  |             'description': 'md5:7a985a2d595dba00af3d9c9f0783c967', | ||||||
|  |         } | ||||||
|  |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         page_id = mobj.group('id') |         page_id = mobj.group('id') | ||||||
|         webpage = self._download_webpage(url, page_id) |         webpage = self._download_webpage(url, page_id) | ||||||
|         video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id') |         video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id') | ||||||
|  |  | ||||||
|         description = self._html_search_regex( |         description = self._html_search_regex( | ||||||
|             [ |             [ | ||||||
|   | |||||||
| @@ -12,6 +12,7 @@ from ..utils import ( | |||||||
|     get_element_by_id, |     get_element_by_id, | ||||||
|     orderedSet, |     orderedSet, | ||||||
|     str_to_int, |     str_to_int, | ||||||
|  |     int_or_none, | ||||||
|  |  | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
| ) | ) | ||||||
| @@ -124,7 +125,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|             if video_url is not None: |             if video_url is not None: | ||||||
|                 m_size = re.search(r'H264-(\d+)x(\d+)', video_url) |                 m_size = re.search(r'H264-(\d+)x(\d+)', video_url) | ||||||
|                 if m_size is not None: |                 if m_size is not None: | ||||||
|                     width, height = m_size.group(1), m_size.group(2) |                     width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) | ||||||
|                 else: |                 else: | ||||||
|                     width, height = None, None |                     width, height = None, None | ||||||
|                 formats.append({ |                 formats.append({ | ||||||
|   | |||||||
| @@ -1,25 +1,28 @@ | |||||||
| # encoding: utf-8 | # encoding: utf-8 | ||||||
|  |  | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|     determine_ext, |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class DaumIE(InfoExtractor): | class DaumIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)' |     _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)' | ||||||
|     IE_NAME = u'daum.net' |     IE_NAME = 'daum.net' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', |         'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', | ||||||
|         u'file': u'52554690.mp4', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '52554690', | ||||||
|             u'title': u'DOTA 2GETHER 시즌2 6회 - 2부', |             'ext': 'mp4', | ||||||
|             u'description': u'DOTA 2GETHER 시즌2 6회 - 2부', |             'title': 'DOTA 2GETHER 시즌2 6회 - 2부', | ||||||
|             u'upload_date': u'20130831', |             'description': 'DOTA 2GETHER 시즌2 6회 - 2부', | ||||||
|             u'duration': 3868, |             'upload_date': '20130831', | ||||||
|  |             'duration': 3868, | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -30,14 +33,14 @@ class DaumIE(InfoExtractor): | |||||||
|         webpage = self._download_webpage(canonical_url, video_id) |         webpage = self._download_webpage(canonical_url, video_id) | ||||||
|         full_id = self._search_regex( |         full_id = self._search_regex( | ||||||
|             r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]', |             r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]', | ||||||
|             webpage, u'full id') |             webpage, 'full id') | ||||||
|         query = compat_urllib_parse.urlencode({'vid': full_id}) |         query = compat_urllib_parse.urlencode({'vid': full_id}) | ||||||
|         info = self._download_xml( |         info = self._download_xml( | ||||||
|             'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, |             'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, | ||||||
|             u'Downloading video info') |             'Downloading video info') | ||||||
|         urls = self._download_xml( |         urls = self._download_xml( | ||||||
|             'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, |             'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, | ||||||
|             video_id, u'Downloading video formats info') |             video_id, 'Downloading video formats info') | ||||||
|  |  | ||||||
|         self.to_screen(u'%s: Getting video urls' % video_id) |         self.to_screen(u'%s: Getting video urls' % video_id) | ||||||
|         formats = [] |         formats = [] | ||||||
| @@ -53,7 +56,6 @@ class DaumIE(InfoExtractor): | |||||||
|             format_url = url_doc.find('result/url').text |             format_url = url_doc.find('result/url').text | ||||||
|             formats.append({ |             formats.append({ | ||||||
|                 'url': format_url, |                 'url': format_url, | ||||||
|                 'ext': determine_ext(format_url), |  | ||||||
|                 'format_id': profile, |                 'format_id': profile, | ||||||
|             }) |             }) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,60 +0,0 @@ | |||||||
| import re |  | ||||||
| import os |  | ||||||
| import socket |  | ||||||
|  |  | ||||||
| from .common import InfoExtractor |  | ||||||
| from ..utils import ( |  | ||||||
|     compat_http_client, |  | ||||||
|     compat_str, |  | ||||||
|     compat_urllib_error, |  | ||||||
|     compat_urllib_parse, |  | ||||||
|     compat_urllib_request, |  | ||||||
|  |  | ||||||
|     ExtractorError, |  | ||||||
| ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class DepositFilesIE(InfoExtractor): |  | ||||||
|     """Information extractor for depositfiles.com""" |  | ||||||
|  |  | ||||||
|     _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         file_id = url.split('/')[-1] |  | ||||||
|         # Rebuild url in english locale |  | ||||||
|         url = 'http://depositfiles.com/en/files/' + file_id |  | ||||||
|  |  | ||||||
|         # Retrieve file webpage with 'Free download' button pressed |  | ||||||
|         free_download_indication = {'gateway_result' : '1'} |  | ||||||
|         request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) |  | ||||||
|         try: |  | ||||||
|             self.report_download_webpage(file_id) |  | ||||||
|             webpage = compat_urllib_request.urlopen(request).read() |  | ||||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |  | ||||||
|             raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) |  | ||||||
|  |  | ||||||
|         # Search for the real file URL |  | ||||||
|         mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage) |  | ||||||
|         if (mobj is None) or (mobj.group(1) is None): |  | ||||||
|             # Try to figure out reason of the error. |  | ||||||
|             mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) |  | ||||||
|             if (mobj is not None) and (mobj.group(1) is not None): |  | ||||||
|                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() |  | ||||||
|                 raise ExtractorError(u'%s' % restriction_message) |  | ||||||
|             else: |  | ||||||
|                 raise ExtractorError(u'Unable to extract download URL from: %s' % url) |  | ||||||
|  |  | ||||||
|         file_url = mobj.group(1) |  | ||||||
|         file_extension = os.path.splitext(file_url)[1][1:] |  | ||||||
|  |  | ||||||
|         # Search for file title |  | ||||||
|         file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title') |  | ||||||
|  |  | ||||||
|         return [{ |  | ||||||
|             'id':       file_id.decode('utf-8'), |  | ||||||
|             'url':      file_url.decode('utf-8'), |  | ||||||
|             'uploader': None, |  | ||||||
|             'upload_date':  None, |  | ||||||
|             'title':    file_title, |  | ||||||
|             'ext':      file_extension.decode('utf-8'), |  | ||||||
|         }] |  | ||||||
| @@ -1,3 +1,5 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import json | import json | ||||||
| import re | import re | ||||||
| import socket | import socket | ||||||
| @@ -9,16 +11,15 @@ from ..utils import ( | |||||||
|     compat_urllib_error, |     compat_urllib_error, | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|     compat_urllib_request, |     compat_urllib_request, | ||||||
|  |     urlencode_postdata, | ||||||
|  |  | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class FacebookIE(InfoExtractor): | class FacebookIE(InfoExtractor): | ||||||
|     """Information Extractor for Facebook""" |  | ||||||
|  |  | ||||||
|     _VALID_URL = r'''(?x) |     _VALID_URL = r'''(?x) | ||||||
|         (?:https?://)?(?:\w+\.)?facebook\.com/ |         https?://(?:\w+\.)?facebook\.com/ | ||||||
|         (?:[^#?]*\#!/)? |         (?:[^#?]*\#!/)? | ||||||
|         (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) |         (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) | ||||||
|         (?:v|video_id)=(?P<id>[0-9]+) |         (?:v|video_id)=(?P<id>[0-9]+) | ||||||
| @@ -26,21 +27,18 @@ class FacebookIE(InfoExtractor): | |||||||
|     _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' |     _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' | ||||||
|     _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' |     _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' | ||||||
|     _NETRC_MACHINE = 'facebook' |     _NETRC_MACHINE = 'facebook' | ||||||
|     IE_NAME = u'facebook' |     IE_NAME = 'facebook' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'https://www.facebook.com/photo.php?v=120708114770723', |         'url': 'https://www.facebook.com/photo.php?v=120708114770723', | ||||||
|         u'file': u'120708114770723.mp4', |         'md5': '48975a41ccc4b7a581abd68651c1a5a8', | ||||||
|         u'md5': u'48975a41ccc4b7a581abd68651c1a5a8', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '120708114770723', | ||||||
|             u"duration": 279, |             'ext': 'mp4', | ||||||
|             u"title": u"PEOPLE ARE AWESOME 2013" |             'duration': 279, | ||||||
|  |             'title': 'PEOPLE ARE AWESOME 2013', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def report_login(self): |  | ||||||
|         """Report attempt to log in.""" |  | ||||||
|         self.to_screen(u'Logging in') |  | ||||||
|  |  | ||||||
|     def _login(self): |     def _login(self): | ||||||
|         (useremail, password) = self._get_login_info() |         (useremail, password) = self._get_login_info() | ||||||
|         if useremail is None: |         if useremail is None: | ||||||
| @@ -48,11 +46,13 @@ class FacebookIE(InfoExtractor): | |||||||
|  |  | ||||||
|         login_page_req = compat_urllib_request.Request(self._LOGIN_URL) |         login_page_req = compat_urllib_request.Request(self._LOGIN_URL) | ||||||
|         login_page_req.add_header('Cookie', 'locale=en_US') |         login_page_req.add_header('Cookie', 'locale=en_US') | ||||||
|         self.report_login() |         login_page = self._download_webpage(login_page_req, None, | ||||||
|         login_page = self._download_webpage(login_page_req, None, note=False, |             note='Downloading login page', | ||||||
|             errnote=u'Unable to download login page') |             errnote='Unable to download login page') | ||||||
|         lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd') |         lsd = self._search_regex( | ||||||
|         lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd') |             r'<input type="hidden" name="lsd" value="([^"]*)"', | ||||||
|  |             login_page, 'lsd') | ||||||
|  |         lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd') | ||||||
|  |  | ||||||
|         login_form = { |         login_form = { | ||||||
|             'email': useremail, |             'email': useremail, | ||||||
| @@ -65,27 +65,29 @@ class FacebookIE(InfoExtractor): | |||||||
|             'timezone': '-60', |             'timezone': '-60', | ||||||
|             'trynum': '1', |             'trynum': '1', | ||||||
|             } |             } | ||||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) |         request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form)) | ||||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') |         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||||
|         try: |         try: | ||||||
|             login_results = compat_urllib_request.urlopen(request).read() |             login_results = self._download_webpage(request, None, | ||||||
|  |                 note='Logging in', errnote='unable to fetch login page') | ||||||
|             if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: |             if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: | ||||||
|                 self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') |                 self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') | ||||||
|                 return |                 return | ||||||
|  |  | ||||||
|             check_form = { |             check_form = { | ||||||
|                 'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'), |                 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), | ||||||
|                 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'), |                 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'), | ||||||
|                 'name_action_selected': 'dont_save', |                 'name_action_selected': 'dont_save', | ||||||
|                 'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'), |                 'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'), | ||||||
|             } |             } | ||||||
|             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form)) |             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) | ||||||
|             check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') |             check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||||
|             check_response = compat_urllib_request.urlopen(check_req).read() |             check_response = self._download_webpage(check_req, None, | ||||||
|  |                 note='Confirming login') | ||||||
|             if re.search(r'id="checkpointSubmitButton"', check_response) is not None: |             if re.search(r'id="checkpointSubmitButton"', check_response) is not None: | ||||||
|                 self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.') |                 self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') | ||||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||||
|             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) |             self._downloader.report_warning('unable to log in: %s' % compat_str(err)) | ||||||
|             return |             return | ||||||
|  |  | ||||||
|     def _real_initialize(self): |     def _real_initialize(self): | ||||||
| @@ -93,8 +95,6 @@ class FacebookIE(InfoExtractor): | |||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         if mobj is None: |  | ||||||
|             raise ExtractorError(u'Invalid URL: %s' % url) |  | ||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|         url = 'https://www.facebook.com/video/video.php?v=%s' % video_id |         url = 'https://www.facebook.com/video/video.php?v=%s' % video_id | ||||||
| @@ -107,10 +107,10 @@ class FacebookIE(InfoExtractor): | |||||||
|             m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) |             m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) | ||||||
|             if m_msg is not None: |             if m_msg is not None: | ||||||
|                 raise ExtractorError( |                 raise ExtractorError( | ||||||
|                     u'The video is not available, Facebook said: "%s"' % m_msg.group(1), |                     'The video is not available, Facebook said: "%s"' % m_msg.group(1), | ||||||
|                     expected=True) |                     expected=True) | ||||||
|             else: |             else: | ||||||
|                 raise ExtractorError(u'Cannot parse data') |                 raise ExtractorError('Cannot parse data') | ||||||
|         data = dict(json.loads(m.group(1))) |         data = dict(json.loads(m.group(1))) | ||||||
|         params_raw = compat_urllib_parse.unquote(data['params']) |         params_raw = compat_urllib_parse.unquote(data['params']) | ||||||
|         params = json.loads(params_raw) |         params = json.loads(params_raw) | ||||||
| @@ -119,19 +119,15 @@ class FacebookIE(InfoExtractor): | |||||||
|         if not video_url: |         if not video_url: | ||||||
|             video_url = video_data['sd_src'] |             video_url = video_data['sd_src'] | ||||||
|         if not video_url: |         if not video_url: | ||||||
|             raise ExtractorError(u'Cannot find video URL') |             raise ExtractorError('Cannot find video URL') | ||||||
|         video_duration = int(video_data['video_duration']) |  | ||||||
|         thumbnail = video_data['thumbnail_src'] |  | ||||||
|  |  | ||||||
|         video_title = self._html_search_regex( |         video_title = self._html_search_regex( | ||||||
|             r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title') |             r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title') | ||||||
|  |  | ||||||
|         info = { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': video_title, |             'title': video_title, | ||||||
|             'url': video_url, |             'url': video_url, | ||||||
|             'ext': 'mp4', |             'duration': int(video_data['video_duration']), | ||||||
|             'duration': video_duration, |             'thumbnail': video_data['thumbnail_src'], | ||||||
|             'thumbnail': thumbnail, |  | ||||||
|         } |         } | ||||||
|         return [info] |  | ||||||
|   | |||||||
| @@ -8,8 +8,8 @@ from ..utils import ( | |||||||
|     unified_strdate, |     unified_strdate, | ||||||
|     str_to_int, |     str_to_int, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|  |     clean_html, | ||||||
| ) | ) | ||||||
| from youtube_dl.utils import clean_html |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class FourTubeIE(InfoExtractor): | class FourTubeIE(InfoExtractor): | ||||||
|   | |||||||
| @@ -1,12 +1,13 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import json | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  |  | ||||||
|  |  | ||||||
| class FunnyOrDieIE(InfoExtractor): | class FunnyOrDieIE(InfoExtractor): | ||||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' |     _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', |         'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', | ||||||
|         'file': '0732f586d7.mp4', |         'file': '0732f586d7.mp4', | ||||||
| @@ -30,10 +31,23 @@ class FunnyOrDieIE(InfoExtractor): | |||||||
|             [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''], |             [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''], | ||||||
|             webpage, 'video URL', flags=re.DOTALL) |             webpage, 'video URL', flags=re.DOTALL) | ||||||
|  |  | ||||||
|  |         if mobj.group('type') == 'embed': | ||||||
|  |             post_json = self._search_regex( | ||||||
|  |                 r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') | ||||||
|  |             post = json.loads(post_json) | ||||||
|  |             title = post['name'] | ||||||
|  |             description = post.get('description') | ||||||
|  |             thumbnail = post.get('picture') | ||||||
|  |         else: | ||||||
|  |             title = self._og_search_title(webpage) | ||||||
|  |             description = self._og_search_description(webpage) | ||||||
|  |             thumbnail = None | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'url': video_url, |             'url': video_url, | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': self._og_search_title(webpage), |             'title': title, | ||||||
|             'description': self._og_search_description(webpage), |             'description': description, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -1,3 +1,5 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| @@ -6,13 +8,14 @@ from .common import InfoExtractor | |||||||
| class GamekingsIE(InfoExtractor): | class GamekingsIE(InfoExtractor): | ||||||
|     _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' |     _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", |         'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', | ||||||
|         u'file': u'20130811.mp4', |  | ||||||
|         # MD5 is flaky, seems to change regularly |         # MD5 is flaky, seems to change regularly | ||||||
|         #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3', |         # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', | ||||||
|         u'info_dict': { |         u'info_dict': { | ||||||
|             u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review", |             'id': '20130811', | ||||||
|             u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.", |             'ext': 'mp4', | ||||||
|  |             'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', | ||||||
|  |             'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										134
									
								
								youtube_dl/extractor/gdcvault.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								youtube_dl/extractor/gdcvault.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,134 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     compat_urllib_parse, | ||||||
|  |     compat_urllib_request, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | class GDCVaultIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)' | ||||||
|  |     _TESTS = [ | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', | ||||||
|  |             'md5': '7ce8388f544c88b7ac11c7ab1b593704', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '1019721', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '1015683', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True,  # Requires rtmpdump | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     def _parse_mp4(self, xml_description): | ||||||
|  |         video_formats = [] | ||||||
|  |         mp4_video = xml_description.find('./metadata/mp4video') | ||||||
|  |         if mp4_video is None: | ||||||
|  |             return None | ||||||
|  |  | ||||||
|  |         mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text) | ||||||
|  |         video_root = mobj.group('root') | ||||||
|  |         formats = xml_description.findall('./metadata/MBRVideos/MBRVideo') | ||||||
|  |         for format in formats: | ||||||
|  |             mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text) | ||||||
|  |             url = video_root + mobj.group('path') | ||||||
|  |             vbr = format.find('bitrate').text | ||||||
|  |             video_formats.append({ | ||||||
|  |                 'url': url, | ||||||
|  |                 'vbr': int(vbr), | ||||||
|  |             }) | ||||||
|  |         return video_formats | ||||||
|  |  | ||||||
|  |     def _parse_flv(self, xml_description): | ||||||
|  |         video_formats = [] | ||||||
|  |         akami_url = xml_description.find('./metadata/akamaiHost').text | ||||||
|  |         slide_video_path = xml_description.find('./metadata/slideVideo').text | ||||||
|  |         video_formats.append({ | ||||||
|  |             'url': 'rtmp://' + akami_url + '/' + slide_video_path, | ||||||
|  |             'format_note': 'slide deck video', | ||||||
|  |             'quality': -2, | ||||||
|  |             'preference': -2, | ||||||
|  |             'format_id': 'slides', | ||||||
|  |         }) | ||||||
|  |         speaker_video_path = xml_description.find('./metadata/speakerVideo').text | ||||||
|  |         video_formats.append({ | ||||||
|  |             'url': 'rtmp://' + akami_url + '/' + speaker_video_path, | ||||||
|  |             'format_note': 'speaker video', | ||||||
|  |             'quality': -1, | ||||||
|  |             'preference': -1, | ||||||
|  |             'format_id': 'speaker', | ||||||
|  |         }) | ||||||
|  |         return video_formats | ||||||
|  |  | ||||||
|  |     def _login(self, webpage_url, video_id): | ||||||
|  |         (username, password) = self._get_login_info() | ||||||
|  |         if username is None or password is None: | ||||||
|  |             self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.') | ||||||
|  |             return None | ||||||
|  |  | ||||||
|  |         mobj = re.match(r'(?P<root_url>https?://.*?/).*', webpage_url) | ||||||
|  |         login_url = mobj.group('root_url') + 'api/login.php' | ||||||
|  |         logout_url = mobj.group('root_url') + 'logout' | ||||||
|  |  | ||||||
|  |         login_form = { | ||||||
|  |             'email': username, | ||||||
|  |             'password': password, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form)) | ||||||
|  |         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||||
|  |         self._download_webpage(request, video_id, 'Logging in') | ||||||
|  |         start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page') | ||||||
|  |         self._download_webpage(logout_url, video_id, 'Logging out') | ||||||
|  |  | ||||||
|  |         return start_page | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |  | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |         webpage_url = 'http://www.gdcvault.com/play/' + video_id | ||||||
|  |         start_page = self._download_webpage(webpage_url, video_id) | ||||||
|  |  | ||||||
|  |         xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False) | ||||||
|  |  | ||||||
|  |         if xml_root is None: | ||||||
|  |             # Probably need to authenticate | ||||||
|  |             start_page = self._login(webpage_url, video_id) | ||||||
|  |             if start_page is None: | ||||||
|  |                 self.report_warning('Could not login.') | ||||||
|  |             else: | ||||||
|  |                 # Grab the url from the authenticated page | ||||||
|  |                 xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root') | ||||||
|  |  | ||||||
|  |         xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False) | ||||||
|  |         if xml_name is None: | ||||||
|  |             # Fallback to the older format | ||||||
|  |             xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename') | ||||||
|  |  | ||||||
|  |         xml_decription_url = xml_root + 'xml/' + xml_name | ||||||
|  |         xml_description = self._download_xml(xml_decription_url, video_id) | ||||||
|  |  | ||||||
|  |         video_title = xml_description.find('./metadata/title').text | ||||||
|  |         video_formats = self._parse_mp4(xml_description) | ||||||
|  |         if video_formats is None: | ||||||
|  |             video_formats = self._parse_flv(xml_description) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': video_title, | ||||||
|  |             'formats': video_formats, | ||||||
|  |         } | ||||||
| @@ -12,9 +12,11 @@ from ..utils import ( | |||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|     compat_urllib_request, |     compat_urllib_request, | ||||||
|     compat_urlparse, |     compat_urlparse, | ||||||
|  |     compat_xml_parse_error, | ||||||
|  |  | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     HEADRequest, |     HEADRequest, | ||||||
|  |     parse_xml, | ||||||
|     smuggle_url, |     smuggle_url, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
| @@ -22,6 +24,7 @@ from ..utils import ( | |||||||
| ) | ) | ||||||
| from .brightcove import BrightcoveIE | from .brightcove import BrightcoveIE | ||||||
| from .ooyala import OoyalaIE | from .ooyala import OoyalaIE | ||||||
|  | from .rutv import RUTVIE | ||||||
|  |  | ||||||
|  |  | ||||||
| class GenericIE(InfoExtractor): | class GenericIE(InfoExtractor): | ||||||
| @@ -81,10 +84,10 @@ class GenericIE(InfoExtractor): | |||||||
|         # Direct link to a video |         # Direct link to a video | ||||||
|         { |         { | ||||||
|             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', |             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', | ||||||
|             'file': 'trailer.mp4', |  | ||||||
|             'md5': '67d406c2bcb6af27fa886f31aa934bbe', |             'md5': '67d406c2bcb6af27fa886f31aa934bbe', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
|                 'id': 'trailer', |                 'id': 'trailer', | ||||||
|  |                 'ext': 'mp4', | ||||||
|                 'title': 'trailer', |                 'title': 'trailer', | ||||||
|                 'upload_date': '20100513', |                 'upload_date': '20100513', | ||||||
|             } |             } | ||||||
| @@ -92,7 +95,6 @@ class GenericIE(InfoExtractor): | |||||||
|         # ooyala video |         # ooyala video | ||||||
|         { |         { | ||||||
|             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', |             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', | ||||||
|             'file': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ.mp4', |  | ||||||
|             'md5': '5644c6ca5d5782c1d0d350dad9bd840c', |             'md5': '5644c6ca5d5782c1d0d350dad9bd840c', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
|                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', |                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', | ||||||
| @@ -100,6 +102,76 @@ class GenericIE(InfoExtractor): | |||||||
|                 'title': '2cc213299525360.mov',  # that's what we get |                 'title': '2cc213299525360.mov',  # that's what we get | ||||||
|             }, |             }, | ||||||
|         }, |         }, | ||||||
|  |         # google redirect | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': 'cmQHVoWB5FY', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'upload_date': '20130224', | ||||||
|  |                 'uploader_id': 'TheVerge', | ||||||
|  |                 'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.', | ||||||
|  |                 'uploader': 'The Verge', | ||||||
|  |                 'title': 'First Firefox OS phones side-by-side', | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': False, | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         # embed.ly video | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '9ODmcdjQcHQ', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second', | ||||||
|  |                 'upload_date': '20140225', | ||||||
|  |                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff', | ||||||
|  |                 'uploader': 'Tested', | ||||||
|  |                 'uploader_id': 'testedcom', | ||||||
|  |             }, | ||||||
|  |             # No need to test YoutubeIE here | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         # funnyordie embed | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns', | ||||||
|  |             'md5': '7cf780be104d40fea7bae52eed4a470e', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '18e820ec3f', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama', | ||||||
|  |                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         # RUTV embed | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '776940', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Охотское море стало целиком российским', | ||||||
|  |                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43', | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # m3u8 download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         # Embedded TED video | ||||||
|  |         { | ||||||
|  |             'url': 'http://en.support.wordpress.com/videos/ted-talks/', | ||||||
|  |             'md5': 'deeeabcc1085eb2ba205474e7235a3d5', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '981', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'My web playroom', | ||||||
|  |                 'uploader': 'Ze Frank', | ||||||
|  |                 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b', | ||||||
|  |             } | ||||||
|  |         } | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|     def report_download_webpage(self, video_id): |     def report_download_webpage(self, video_id): | ||||||
| @@ -125,9 +197,14 @@ class GenericIE(InfoExtractor): | |||||||
|                     newurl = newurl.replace(' ', '%20') |                     newurl = newurl.replace(' ', '%20') | ||||||
|                     newheaders = dict((k,v) for k,v in req.headers.items() |                     newheaders = dict((k,v) for k,v in req.headers.items() | ||||||
|                                       if k.lower() not in ("content-length", "content-type")) |                                       if k.lower() not in ("content-length", "content-type")) | ||||||
|  |                     try: | ||||||
|  |                         # This function was deprecated in python 3.3 and removed in 3.4 | ||||||
|  |                         origin_req_host = req.get_origin_req_host() | ||||||
|  |                     except AttributeError: | ||||||
|  |                         origin_req_host = req.origin_req_host | ||||||
|                     return HEADRequest(newurl, |                     return HEADRequest(newurl, | ||||||
|                                        headers=newheaders, |                                        headers=newheaders, | ||||||
|                                        origin_req_host=req.get_origin_req_host(), |                                        origin_req_host=origin_req_host, | ||||||
|                                        unverifiable=True) |                                        unverifiable=True) | ||||||
|                 else: |                 else: | ||||||
|                     raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp) |                     raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp) | ||||||
| @@ -159,6 +236,25 @@ class GenericIE(InfoExtractor): | |||||||
|             raise ExtractorError('Invalid URL protocol') |             raise ExtractorError('Invalid URL protocol') | ||||||
|         return response |         return response | ||||||
|  |  | ||||||
|  |     def _extract_rss(self, url, video_id, doc): | ||||||
|  |         playlist_title = doc.find('./channel/title').text | ||||||
|  |         playlist_desc_el = doc.find('./channel/description') | ||||||
|  |         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text | ||||||
|  |  | ||||||
|  |         entries = [{ | ||||||
|  |             '_type': 'url', | ||||||
|  |             'url': e.find('link').text, | ||||||
|  |             'title': e.find('title').text, | ||||||
|  |         } for e in doc.findall('./channel/item')] | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'id': url, | ||||||
|  |             'title': playlist_title, | ||||||
|  |             'description': playlist_desc, | ||||||
|  |             'entries': entries, | ||||||
|  |         } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         parsed_url = compat_urlparse.urlparse(url) |         parsed_url = compat_urlparse.urlparse(url) | ||||||
|         if not parsed_url.scheme: |         if not parsed_url.scheme: | ||||||
| @@ -175,7 +271,7 @@ class GenericIE(InfoExtractor): | |||||||
|             else: |             else: | ||||||
|                 assert ':' in default_search |                 assert ':' in default_search | ||||||
|                 return self.url_result(default_search + url) |                 return self.url_result(default_search + url) | ||||||
|         video_id = os.path.splitext(url.split('/')[-1])[0] |         video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0] | ||||||
|  |  | ||||||
|         self.to_screen('%s: Requesting header' % video_id) |         self.to_screen('%s: Requesting header' % video_id) | ||||||
|  |  | ||||||
| @@ -219,6 +315,14 @@ class GenericIE(InfoExtractor): | |||||||
|  |  | ||||||
|         self.report_extraction(video_id) |         self.report_extraction(video_id) | ||||||
|  |  | ||||||
|  |         # Is it an RSS feed? | ||||||
|  |         try: | ||||||
|  |             doc = parse_xml(webpage) | ||||||
|  |             if doc.tag == 'rss': | ||||||
|  |                 return self._extract_rss(url, video_id, doc) | ||||||
|  |         except compat_xml_parse_error: | ||||||
|  |             pass | ||||||
|  |  | ||||||
|         # it's tempting to parse this further, but you would |         # it's tempting to parse this further, but you would | ||||||
|         # have to take into account all the variations like |         # have to take into account all the variations like | ||||||
|         #   Video Title - Site Name |         #   Video Title - Site Name | ||||||
| @@ -252,9 +356,9 @@ class GenericIE(InfoExtractor): | |||||||
|  |  | ||||||
|         # Look for embedded (iframe) Vimeo player |         # Look for embedded (iframe) Vimeo player | ||||||
|         mobj = re.search( |         mobj = re.search( | ||||||
|             r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage) |             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) | ||||||
|         if mobj: |         if mobj: | ||||||
|             player_url = unescapeHTML(mobj.group(1)) |             player_url = unescapeHTML(mobj.group('url')) | ||||||
|             surl = smuggle_url(player_url, {'Referer': url}) |             surl = smuggle_url(player_url, {'Referer': url}) | ||||||
|             return self.url_result(surl, 'Vimeo') |             return self.url_result(surl, 'Vimeo') | ||||||
|  |  | ||||||
| @@ -334,11 +438,17 @@ class GenericIE(InfoExtractor): | |||||||
|         if mobj is not None: |         if mobj is not None: | ||||||
|             return self.url_result(mobj.group(1), 'Mpora') |             return self.url_result(mobj.group(1), 'Mpora') | ||||||
|  |  | ||||||
|         # Look for embedded Novamov player |         # Look for embedded NovaMov player | ||||||
|         mobj = re.search( |         mobj = re.search( | ||||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage) |             r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage) | ||||||
|         if mobj is not None: |         if mobj is not None: | ||||||
|             return self.url_result(mobj.group('url'), 'Novamov') |             return self.url_result(mobj.group('url'), 'NovaMov') | ||||||
|  |  | ||||||
|  |         # Look for embedded NowVideo player | ||||||
|  |         mobj = re.search( | ||||||
|  |             r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage) | ||||||
|  |         if mobj is not None: | ||||||
|  |             return self.url_result(mobj.group('url'), 'NowVideo') | ||||||
|  |  | ||||||
|         # Look for embedded Facebook player |         # Look for embedded Facebook player | ||||||
|         mobj = re.search( |         mobj = re.search( | ||||||
| @@ -346,12 +456,38 @@ class GenericIE(InfoExtractor): | |||||||
|         if mobj is not None: |         if mobj is not None: | ||||||
|             return self.url_result(mobj.group('url'), 'Facebook') |             return self.url_result(mobj.group('url'), 'Facebook') | ||||||
|  |  | ||||||
|  |         # Look for embedded VK player | ||||||
|  |         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) | ||||||
|  |         if mobj is not None: | ||||||
|  |             return self.url_result(mobj.group('url'), 'VK') | ||||||
|  |  | ||||||
|         # Look for embedded Huffington Post player |         # Look for embedded Huffington Post player | ||||||
|         mobj = re.search( |         mobj = re.search( | ||||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) |             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) | ||||||
|         if mobj is not None: |         if mobj is not None: | ||||||
|             return self.url_result(mobj.group('url'), 'HuffPost') |             return self.url_result(mobj.group('url'), 'HuffPost') | ||||||
|  |  | ||||||
|  |         # Look for embed.ly | ||||||
|  |         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage) | ||||||
|  |         if mobj is not None: | ||||||
|  |             return self.url_result(mobj.group('url')) | ||||||
|  |         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage) | ||||||
|  |         if mobj is not None: | ||||||
|  |             return self.url_result(compat_urllib_parse.unquote(mobj.group('url'))) | ||||||
|  |  | ||||||
|  |         # Look for funnyordie embed | ||||||
|  |         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) | ||||||
|  |         if matches: | ||||||
|  |             urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie') | ||||||
|  |                      for eurl in matches] | ||||||
|  |             return self.playlist_result( | ||||||
|  |                 urlrs, playlist_id=video_id, playlist_title=video_title) | ||||||
|  |  | ||||||
|  |         # Look for embedded RUTV player | ||||||
|  |         rutv_url = RUTVIE._extract_url(webpage) | ||||||
|  |         if rutv_url: | ||||||
|  |             return self.url_result(rutv_url, 'RUTV') | ||||||
|  |  | ||||||
|         # Start with something easy: JW Player in SWFObject |         # Start with something easy: JW Player in SWFObject | ||||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) |         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||||
|         if mobj is None: |         if mobj is None: | ||||||
| @@ -363,6 +499,13 @@ class GenericIE(InfoExtractor): | |||||||
|         if mobj is None: |         if mobj is None: | ||||||
|             # Broaden the search a little bit: JWPlayer JS loader |             # Broaden the search a little bit: JWPlayer JS loader | ||||||
|             mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) |             mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) | ||||||
|  |  | ||||||
|  |         # Look for embedded TED player | ||||||
|  |         mobj = re.search( | ||||||
|  |             r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage) | ||||||
|  |         if mobj is not None: | ||||||
|  |             return self.url_result(mobj.group('url'), 'TED') | ||||||
|  |  | ||||||
|         if mobj is None: |         if mobj is None: | ||||||
|             # Try to find twitter cards info |             # Try to find twitter cards info | ||||||
|             mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) |             mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) | ||||||
| @@ -376,6 +519,18 @@ class GenericIE(InfoExtractor): | |||||||
|         if mobj is None: |         if mobj is None: | ||||||
|             # HTML5 video |             # HTML5 video | ||||||
|             mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL) |             mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL) | ||||||
|  |         if mobj is None: | ||||||
|  |             mobj = re.search( | ||||||
|  |                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' | ||||||
|  |                 r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"', | ||||||
|  |                 webpage) | ||||||
|  |             if mobj: | ||||||
|  |                 new_url = mobj.group(1) | ||||||
|  |                 self.report_following_redirect(new_url) | ||||||
|  |                 return { | ||||||
|  |                     '_type': 'url', | ||||||
|  |                     'url': new_url, | ||||||
|  |                 } | ||||||
|         if mobj is None: |         if mobj is None: | ||||||
|             raise ExtractorError('Unsupported URL: %s' % url) |             raise ExtractorError('Unsupported URL: %s' % url) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -46,6 +46,6 @@ class GoogleSearchIE(SearchInfoExtractor): | |||||||
|                     'url': mobj.group(1) |                     'url': mobj.group(1) | ||||||
|                 }) |                 }) | ||||||
|  |  | ||||||
|             if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage): |             if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage): | ||||||
|                 res['entries'] = entries[:n] |                 res['entries'] = entries[:n] | ||||||
|                 return res |                 return res | ||||||
|   | |||||||
| @@ -6,11 +6,14 @@ from random import random | |||||||
| from math import floor | from math import floor | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import compat_urllib_request | from ..utils import ( | ||||||
|  |     compat_urllib_request, | ||||||
|  |     ExtractorError, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class IPrimaIE(InfoExtractor): | class IPrimaIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)' |     _VALID_URL = r'https?://play\.iprima\.cz/[^?#]+/(?P<id>[^?#]+)' | ||||||
|  |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://play.iprima.cz/particka/particka-92', |         'url': 'http://play.iprima.cz/particka/particka-92', | ||||||
| @@ -22,20 +25,37 @@ class IPrimaIE(InfoExtractor): | |||||||
|             'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg', |             'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg', | ||||||
|         }, |         }, | ||||||
|         'params': { |         'params': { | ||||||
|             'skip_download': True, |             'skip_download': True,  # requires rtmpdump | ||||||
|         }, |         }, | ||||||
|     }, |     }, { | ||||||
|     ] |         'url': 'http://play.iprima.cz/particka/tchibo-particka-jarni-moda', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '9718337', | ||||||
|  |             'ext': 'flv', | ||||||
|  |             'title': 'Tchibo Partička - Jarní móda', | ||||||
|  |             'description': 'md5:589f8f59f414220621ff8882eb3ce7be', | ||||||
|  |             'thumbnail': 're:^http:.*\.jpg$', | ||||||
|  |         }, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True,  # requires rtmpdump | ||||||
|  |         }, | ||||||
|  |         'skip': 'Do not have permission to access this page', | ||||||
|  |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         video_id = mobj.group('videoid') |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|         player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % ( |         if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage): | ||||||
|                          floor(random()*1073741824), |             raise ExtractorError( | ||||||
|                          floor(random()*1073741824)) |                 '%s said: You do not have permission to access this page' % self.IE_NAME, expected=True) | ||||||
|  |  | ||||||
|  |         player_url = ( | ||||||
|  |             'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % | ||||||
|  |             (floor(random()*1073741824), floor(random()*1073741824)) | ||||||
|  |         ) | ||||||
|  |  | ||||||
|         req = compat_urllib_request.Request(player_url) |         req = compat_urllib_request.Request(player_url) | ||||||
|         req.add_header('Referer', url) |         req.add_header('Referer', url) | ||||||
| @@ -44,18 +64,20 @@ class IPrimaIE(InfoExtractor): | |||||||
|         base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1]) |         base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1]) | ||||||
|  |  | ||||||
|         zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO') |         zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO') | ||||||
|  |  | ||||||
|         if zoneGEO != '0': |         if zoneGEO != '0': | ||||||
|             base_url = base_url.replace('token', 'token_'+zoneGEO) |             base_url = base_url.replace('token', 'token_' + zoneGEO) | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|         for format_id in ['lq', 'hq', 'hd']: |         for format_id in ['lq', 'hq', 'hd']: | ||||||
|             filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename') |             filename = self._html_search_regex( | ||||||
|  |                 r'"%s_id":(.+?),' % format_id, webpage, 'filename') | ||||||
|  |  | ||||||
|             if filename == 'null': |             if filename == 'null': | ||||||
|                 continue |                 continue | ||||||
|  |  | ||||||
|             real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id') |             real_id = self._search_regex( | ||||||
|  |                 r'Prima-(?:[0-9]{10}|WEB)-([0-9]+)[-_]', | ||||||
|  |                 filename, 'real video id') | ||||||
|  |  | ||||||
|             if format_id == 'lq': |             if format_id == 'lq': | ||||||
|                 quality = 0 |                 quality = 0 | ||||||
| @@ -63,13 +85,13 @@ class IPrimaIE(InfoExtractor): | |||||||
|                 quality = 1 |                 quality = 1 | ||||||
|             elif format_id == 'hd': |             elif format_id == 'hd': | ||||||
|                 quality = 2 |                 quality = 2 | ||||||
|                 filename = 'hq/'+filename |                 filename = 'hq/' + filename | ||||||
|  |  | ||||||
|             formats.append({ |             formats.append({ | ||||||
|                 'format_id': format_id, |                 'format_id': format_id, | ||||||
|                 'url': base_url, |                 'url': base_url, | ||||||
|                 'quality': quality, |                 'quality': quality, | ||||||
|                 'play_path': 'mp4:'+filename.replace('"', '')[:-4], |                 'play_path': 'mp4:' + filename.replace('"', '')[:-4], | ||||||
|                 'rtmp_live': True, |                 'rtmp_live': True, | ||||||
|                 'ext': 'flv', |                 'ext': 'flv', | ||||||
|             }) |             }) | ||||||
|   | |||||||
| @@ -1,56 +1,61 @@ | |||||||
| # coding: utf-8 | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|  |     RegexNotFoundError, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class JukeboxIE(InfoExtractor): | class JukeboxIE(InfoExtractor): | ||||||
|     _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html' |     _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html' | ||||||
|     _IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>' |     _TEST = { | ||||||
|     _VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"' |         'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html', | ||||||
|     _TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>' |         'md5': '5dc6477e74b1e37042ac5acedd8413e5', | ||||||
|     _IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"' |         'info_dict': { | ||||||
|  |             'id': 'r303r', | ||||||
|  |             'ext': 'flv', | ||||||
|  |             'title': 'Kosheen-En Vivo Pride', | ||||||
|  |             'uploader': 'Kosheen', | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         video_id = mobj.group('video_id') |         video_id = mobj.group('video_id') | ||||||
|  |  | ||||||
|         html = self._download_webpage(url, video_id) |         html = self._download_webpage(url, video_id) | ||||||
|  |         iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url')) | ||||||
|         mobj = re.search(self._IFRAME, html) |  | ||||||
|         if mobj is None: |  | ||||||
|             raise ExtractorError(u'Cannot extract iframe url') |  | ||||||
|         iframe_url = unescapeHTML(mobj.group('iframe')) |  | ||||||
|  |  | ||||||
|         iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe') |         iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe') | ||||||
|         mobj = re.search(r'class="jkb_waiting"', iframe_html) |         if re.search(r'class="jkb_waiting"', iframe_html) is not None: | ||||||
|         if mobj is not None: |             raise ExtractorError('Video is not available(in your country?)!') | ||||||
|             raise ExtractorError(u'Video is not available(in your country?)!') |  | ||||||
|  |  | ||||||
|         self.report_extraction(video_id) |         self.report_extraction(video_id) | ||||||
|  |  | ||||||
|         mobj = re.search(self._VIDEO_URL, iframe_html) |         try: | ||||||
|         if mobj is None: |             video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"', | ||||||
|             mobj = re.search(self._IS_YOUTUBE, iframe_html) |                 iframe_html, 'video url') | ||||||
|             if mobj is None: |             video_url = unescapeHTML(video_url).replace('\/', '/') | ||||||
|                 raise ExtractorError(u'Cannot extract video url') |         except RegexNotFoundError: | ||||||
|             youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/') |             youtube_url = self._search_regex( | ||||||
|             self.to_screen(u'Youtube video detected') |                 r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"', | ||||||
|             return self.url_result(youtube_url,ie='Youtube') |                 iframe_html, 'youtube url') | ||||||
|         video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/') |             youtube_url = unescapeHTML(youtube_url).replace('\/', '/') | ||||||
|         video_ext = unescapeHTML(mobj.group('video_ext')) |             self.to_screen('Youtube video detected') | ||||||
|  |             return self.url_result(youtube_url, ie='Youtube') | ||||||
|  |  | ||||||
|         mobj = re.search(self._TITLE, html) |         title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>', | ||||||
|         if mobj is None: |             html, 'title') | ||||||
|             raise ExtractorError(u'Cannot extract title') |         artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>', | ||||||
|         title = unescapeHTML(mobj.group('title')) |             html, 'artist') | ||||||
|         artist = unescapeHTML(mobj.group('artist')) |  | ||||||
|  |  | ||||||
|         return [{'id': video_id, |         return { | ||||||
|                  'url': video_url, |             'id': video_id, | ||||||
|                  'title': artist + '-' + title, |             'url': video_url, | ||||||
|                  'ext': video_ext |             'title': artist + '-' + title, | ||||||
|                  }] |             'uploader': artist, | ||||||
|  |         } | ||||||
|   | |||||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | |||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import int_or_none | ||||||
|  |  | ||||||
|  |  | ||||||
| class KontrTubeIE(InfoExtractor): | class KontrTubeIE(InfoExtractor): | ||||||
| @@ -32,27 +33,26 @@ class KontrTubeIE(InfoExtractor): | |||||||
|  |  | ||||||
|         video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL') |         video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL') | ||||||
|         thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False) |         thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False) | ||||||
|         title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, |         title = self._html_search_regex( | ||||||
|             'video title') |             r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title') | ||||||
|         description = self._html_search_meta('description', webpage, 'video description') |         description = self._html_search_meta('description', webpage, 'video description') | ||||||
|  |  | ||||||
|         mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', |         mobj = re.search( | ||||||
|             webpage) |             r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage) | ||||||
|         duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None |         duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None | ||||||
|  |  | ||||||
|         view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, |         view_count = self._html_search_regex( | ||||||
|             'view count', fatal=False) |             r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False) | ||||||
|         view_count = int(view_count) if view_count is not None else None |  | ||||||
|  |  | ||||||
|         comment_count = None |         comment_count = None | ||||||
|         comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', |         comment_str = self._html_search_regex( | ||||||
|             fatal=False) |             r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False) | ||||||
|         if comment_str.startswith('комментариев нет'): |         if comment_str.startswith('комментариев нет'): | ||||||
|             comment_count = 0 |             comment_count = 0 | ||||||
|         else: |         else: | ||||||
|             mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str) |             mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str) | ||||||
|             if mobj: |             if mobj: | ||||||
|                 comment_count = int(mobj.group('total')) |                 comment_count = mobj.group('total') | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
| @@ -61,6 +61,6 @@ class KontrTubeIE(InfoExtractor): | |||||||
|             'title': title, |             'title': title, | ||||||
|             'description': description, |             'description': description, | ||||||
|             'duration': duration, |             'duration': duration, | ||||||
|             'view_count': view_count, |             'view_count': int_or_none(view_count), | ||||||
|             'comment_count': comment_count, |             'comment_count': int_or_none(comment_count), | ||||||
|         } |         } | ||||||
| @@ -6,7 +6,8 @@ import re | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     unified_strdate |     unified_strdate, | ||||||
|  |     ExtractorError, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -32,13 +33,11 @@ class LifeNewsIE(InfoExtractor): | |||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|         webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page') |         webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page') | ||||||
|  |  | ||||||
|         video_url = self._html_search_regex( |         videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage) | ||||||
|             r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL') |         if not videos: | ||||||
|  |             raise ExtractorError('No media links available for %s' % video_id) | ||||||
|         thumbnail = self._html_search_regex( |  | ||||||
|             r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail') |  | ||||||
|  |  | ||||||
|         title = self._og_search_title(webpage) |         title = self._og_search_title(webpage) | ||||||
|         TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS' |         TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS' | ||||||
| @@ -50,20 +49,26 @@ class LifeNewsIE(InfoExtractor): | |||||||
|         view_count = self._html_search_regex( |         view_count = self._html_search_regex( | ||||||
|             r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False) |             r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False) | ||||||
|         comment_count = self._html_search_regex( |         comment_count = self._html_search_regex( | ||||||
|             r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count', fatal=False) |             r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False) | ||||||
|  |  | ||||||
|         upload_date = self._html_search_regex( |         upload_date = self._html_search_regex( | ||||||
|             r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False) |             r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False) | ||||||
|         if upload_date is not None: |         if upload_date is not None: | ||||||
|             upload_date = unified_strdate(upload_date) |             upload_date = unified_strdate(upload_date) | ||||||
|  |  | ||||||
|         return { |         def make_entry(video_id, media, video_number=None): | ||||||
|             'id': video_id, |             return { | ||||||
|             'url': video_url, |                 'id': video_id, | ||||||
|             'thumbnail': thumbnail, |                 'url': media[1], | ||||||
|             'title': title, |                 'thumbnail': media[0], | ||||||
|             'description': description, |                 'title': title if video_number is None else '%s-video%s' % (title, video_number), | ||||||
|             'view_count': int_or_none(view_count), |                 'description': description, | ||||||
|             'comment_count': int_or_none(comment_count), |                 'view_count': int_or_none(view_count), | ||||||
|             'upload_date': upload_date, |                 'comment_count': int_or_none(comment_count), | ||||||
|         } |                 'upload_date': upload_date, | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |         if len(videos) == 1: | ||||||
|  |             return make_entry(video_id, videos[0]) | ||||||
|  |         else: | ||||||
|  |             return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)] | ||||||
| @@ -4,15 +4,17 @@ import json | |||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import int_or_none | ||||||
|  |  | ||||||
|  |  | ||||||
| class LiveLeakIE(InfoExtractor): | class LiveLeakIE(InfoExtractor): | ||||||
|     _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)' |     _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://www.liveleak.com/view?i=757_1364311680', |         'url': 'http://www.liveleak.com/view?i=757_1364311680', | ||||||
|         'file': '757_1364311680.mp4', |  | ||||||
|         'md5': '0813c2430bea7a46bf13acf3406992f4', |         'md5': '0813c2430bea7a46bf13acf3406992f4', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|  |             'id': '757_1364311680', | ||||||
|  |             'ext': 'mp4', | ||||||
|             'description': 'extremely bad day for this guy..!', |             'description': 'extremely bad day for this guy..!', | ||||||
|             'uploader': 'ljfriel2', |             'uploader': 'ljfriel2', | ||||||
|             'title': 'Most unlucky car accident' |             'title': 'Most unlucky car accident' | ||||||
| @@ -20,25 +22,62 @@ class LiveLeakIE(InfoExtractor): | |||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
|         'url': 'http://www.liveleak.com/view?i=f93_1390833151', |         'url': 'http://www.liveleak.com/view?i=f93_1390833151', | ||||||
|         'file': 'f93_1390833151.mp4', |  | ||||||
|         'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf', |         'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|  |             'id': 'f93_1390833151', | ||||||
|  |             'ext': 'mp4', | ||||||
|             'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.', |             'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.', | ||||||
|             'uploader': 'ARD_Stinkt', |             'uploader': 'ARD_Stinkt', | ||||||
|             'title': 'German Television does first Edward Snowden Interview (ENGLISH)', |             'title': 'German Television does first Edward Snowden Interview (ENGLISH)', | ||||||
|         } |         } | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |         'url': 'http://www.liveleak.com/view?i=4f7_1392687779', | ||||||
|  |         'md5': '42c6d97d54f1db107958760788c5f48f', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '4f7_1392687779', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing...  I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.", | ||||||
|  |             'uploader': 'CapObveus', | ||||||
|  |             'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck', | ||||||
|  |             'age_limit': 18, | ||||||
|  |         } | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |  | ||||||
|         video_id = mobj.group('video_id') |         video_id = mobj.group('video_id') | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|  |         video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip() | ||||||
|  |         video_description = self._og_search_description(webpage) | ||||||
|  |         video_uploader = self._html_search_regex( | ||||||
|  |             r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False) | ||||||
|  |         age_limit = int_or_none(self._search_regex( | ||||||
|  |             r'you confirm that you are ([0-9]+) years and over.', | ||||||
|  |             webpage, 'age limit', default=None)) | ||||||
|  |  | ||||||
|         sources_raw = self._search_regex( |         sources_raw = self._search_regex( | ||||||
|             r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None) |             r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None) | ||||||
|         if sources_raw is None: |         if sources_raw is None: | ||||||
|             sources_raw = '[{ %s}]' % ( |             alt_source = self._search_regex( | ||||||
|                 self._search_regex(r'(file: ".*?"),', webpage, 'video URL')) |                 r'(file: ".*?"),', webpage, 'video URL', default=None) | ||||||
|  |             if alt_source: | ||||||
|  |                 sources_raw = '[{ %s}]' % alt_source | ||||||
|  |             else: | ||||||
|  |                 # Maybe an embed? | ||||||
|  |                 embed_url = self._search_regex( | ||||||
|  |                     r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"', | ||||||
|  |                     webpage, 'embed URL') | ||||||
|  |                 return { | ||||||
|  |                     '_type': 'url_transparent', | ||||||
|  |                     'url': embed_url, | ||||||
|  |                     'id': video_id, | ||||||
|  |                     'title': video_title, | ||||||
|  |                     'description': video_description, | ||||||
|  |                     'uploader': video_uploader, | ||||||
|  |                     'age_limit': age_limit, | ||||||
|  |                 } | ||||||
|  |  | ||||||
|         sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw) |         sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw) | ||||||
|         sources = json.loads(sources_json) |         sources = json.loads(sources_json) | ||||||
| @@ -49,15 +88,11 @@ class LiveLeakIE(InfoExtractor): | |||||||
|         } for s in sources] |         } for s in sources] | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|         video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip() |  | ||||||
|         video_description = self._og_search_description(webpage) |  | ||||||
|         video_uploader = self._html_search_regex( |  | ||||||
|             r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False) |  | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': video_title, |             'title': video_title, | ||||||
|             'description': video_description, |             'description': video_description, | ||||||
|             'uploader': video_uploader, |             'uploader': video_uploader, | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|  |             'age_limit': age_limit, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -8,7 +8,9 @@ from .common import InfoExtractor | |||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|     compat_urllib_request, |     compat_urllib_request, | ||||||
|     ExtractorError |     ExtractorError, | ||||||
|  |     int_or_none, | ||||||
|  |     compat_str, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -19,16 +21,17 @@ class LyndaIE(SubtitlesInfoExtractor): | |||||||
|     _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' |     _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' | ||||||
|     _NETRC_MACHINE = 'lynda' |     _NETRC_MACHINE = 'lynda' | ||||||
|  |  | ||||||
|     _SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account' |     _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true' | ||||||
|     _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' |     _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' | ||||||
|  |  | ||||||
|     ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' |     ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', |         'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', | ||||||
|         'file': '114408.mp4', |  | ||||||
|         'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', |         'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|  |             'id': '114408', | ||||||
|  |             'ext': 'mp4', | ||||||
|             'title': 'Using the exercise files', |             'title': 'Using the exercise files', | ||||||
|             'duration': 68 |             'duration': 68 | ||||||
|         } |         } | ||||||
| @@ -41,27 +44,44 @@ class LyndaIE(SubtitlesInfoExtractor): | |||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         video_id = mobj.group(1) |         video_id = mobj.group(1) | ||||||
|  |  | ||||||
|         page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, |         page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id, | ||||||
|                                       video_id, 'Downloading video JSON') |             'Downloading video JSON') | ||||||
|         video_json = json.loads(page) |         video_json = json.loads(page) | ||||||
|  |  | ||||||
|         if 'Status' in video_json: |         if 'Status' in video_json: | ||||||
|             raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True) |             raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True) | ||||||
|  |  | ||||||
|         if video_json['HasAccess'] is False: |         if video_json['HasAccess'] is False: | ||||||
|             raise ExtractorError('Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True) |             raise ExtractorError( | ||||||
|  |                 'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True) | ||||||
|  |  | ||||||
|         video_id = video_json['ID'] |         video_id = compat_str(video_json['ID']) | ||||||
|         duration = video_json['DurationInSeconds'] |         duration = video_json['DurationInSeconds'] | ||||||
|         title = video_json['Title'] |         title = video_json['Title'] | ||||||
|  |  | ||||||
|         formats = [{'url': fmt['Url'], |         formats = [] | ||||||
|  |  | ||||||
|  |         fmts = video_json.get('Formats') | ||||||
|  |         if fmts: | ||||||
|  |             formats.extend([ | ||||||
|  |                 { | ||||||
|  |                     'url': fmt['Url'], | ||||||
|                     'ext': fmt['Extension'], |                     'ext': fmt['Extension'], | ||||||
|                     'width': fmt['Width'], |                     'width': fmt['Width'], | ||||||
|                     'height': fmt['Height'], |                     'height': fmt['Height'], | ||||||
|                     'filesize': fmt['FileSize'], |                     'filesize': fmt['FileSize'], | ||||||
|                     'format_id': str(fmt['Resolution']) |                     'format_id': str(fmt['Resolution']) | ||||||
|                     } for fmt in video_json['Formats']] |                 } for fmt in fmts]) | ||||||
|  |  | ||||||
|  |         prioritized_streams = video_json.get('PrioritizedStreams') | ||||||
|  |         if prioritized_streams: | ||||||
|  |             formats.extend([ | ||||||
|  |                 { | ||||||
|  |                     'url': video_url, | ||||||
|  |                     'width': int_or_none(format_id), | ||||||
|  |                     'format_id': format_id, | ||||||
|  |                 } for format_id, video_url in prioritized_streams['0'].items() | ||||||
|  |             ]) | ||||||
|  |  | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|  |  | ||||||
| @@ -91,7 +111,7 @@ class LyndaIE(SubtitlesInfoExtractor): | |||||||
|             'stayPut': 'false' |             'stayPut': 'false' | ||||||
|         }         |         }         | ||||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) |         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | ||||||
|         login_page = self._download_webpage(request, None, note='Logging in as %s' % username) |         login_page = self._download_webpage(request, None, 'Logging in as %s' % username) | ||||||
|  |  | ||||||
|         # Not (yet) logged in |         # Not (yet) logged in | ||||||
|         m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page) |         m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page) | ||||||
| @@ -116,7 +136,7 @@ class LyndaIE(SubtitlesInfoExtractor): | |||||||
|                     'stayPut': 'false', |                     'stayPut': 'false', | ||||||
|                 } |                 } | ||||||
|                 request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form)) |                 request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form)) | ||||||
|                 login_page = self._download_webpage(request, None, note='Confirming log in and log out from another device') |                 login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device') | ||||||
|  |  | ||||||
|         if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: |         if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: | ||||||
|             raise ExtractorError('Unable to log in') |             raise ExtractorError('Unable to log in') | ||||||
| @@ -150,7 +170,7 @@ class LyndaIE(SubtitlesInfoExtractor): | |||||||
|  |  | ||||||
|     def _get_available_subtitles(self, video_id, webpage): |     def _get_available_subtitles(self, video_id, webpage): | ||||||
|         url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id |         url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id | ||||||
|         sub = self._download_webpage(url, None, note=False) |         sub = self._download_webpage(url, None, False) | ||||||
|         sub_json = json.loads(sub) |         sub_json = json.loads(sub) | ||||||
|         return {'en': url} if len(sub_json) > 0 else {} |         return {'en': url} if len(sub_json) > 0 else {} | ||||||
|  |  | ||||||
| @@ -179,6 +199,9 @@ class LyndaCourseIE(InfoExtractor): | |||||||
|         videos = [] |         videos = [] | ||||||
|         (username, _) = self._get_login_info() |         (username, _) = self._get_login_info() | ||||||
|  |  | ||||||
|  |         # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided | ||||||
|  |         # by single video API anymore | ||||||
|  |  | ||||||
|         for chapter in course_json['Chapters']: |         for chapter in course_json['Chapters']: | ||||||
|             for video in chapter['Videos']: |             for video in chapter['Videos']: | ||||||
|                 if username is None and video['HasAccess'] is False: |                 if username is None and video['HasAccess'] is False: | ||||||
|   | |||||||
							
								
								
									
										66
									
								
								youtube_dl/extractor/mailru.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								youtube_dl/extractor/mailru.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | |||||||
|  | # encoding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  | import datetime | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class MailRuIE(InfoExtractor): | ||||||
|  |     IE_NAME = 'mailru' | ||||||
|  |     IE_DESC = 'Видео@Mail.Ru' | ||||||
|  |     _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', | ||||||
|  |         'md5': 'dea205f03120046894db4ebb6159879a', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '46301138', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', | ||||||
|  |             'upload_date': '20140224', | ||||||
|  |             'uploader': 'sonypicturesrus', | ||||||
|  |             'uploader_id': 'sonypicturesrus@mail.ru', | ||||||
|  |             'duration': 184, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         video_data = self._download_json( | ||||||
|  |             'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON') | ||||||
|  |  | ||||||
|  |         author = video_data['author'] | ||||||
|  |         uploader = author['name'] | ||||||
|  |         uploader_id = author['id'] | ||||||
|  |  | ||||||
|  |         movie = video_data['movie'] | ||||||
|  |         content_id = str(movie['contentId']) | ||||||
|  |         title = movie['title'] | ||||||
|  |         thumbnail = movie['poster'] | ||||||
|  |         duration = movie['duration'] | ||||||
|  |  | ||||||
|  |         upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d') | ||||||
|  |         view_count = video_data['views_count'] | ||||||
|  |  | ||||||
|  |         formats = [ | ||||||
|  |             { | ||||||
|  |                 'url': video['url'], | ||||||
|  |                 'format_id': video['name'], | ||||||
|  |             } for video in video_data['videos'] | ||||||
|  |         ] | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': content_id, | ||||||
|  |             'title': title, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |             'upload_date': upload_date, | ||||||
|  |             'uploader': uploader, | ||||||
|  |             'uploader_id': uploader_id, | ||||||
|  |             'duration': duration, | ||||||
|  |             'view_count': view_count, | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
| @@ -166,6 +166,7 @@ class MetacafeIE(InfoExtractor): | |||||||
|  |  | ||||||
|         video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title') |         video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title') | ||||||
|         description = self._og_search_description(webpage) |         description = self._og_search_description(webpage) | ||||||
|  |         thumbnail = self._og_search_thumbnail(webpage) | ||||||
|         video_uploader = self._html_search_regex( |         video_uploader = self._html_search_regex( | ||||||
|                 r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);', |                 r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);', | ||||||
|                 webpage, u'uploader nickname', fatal=False) |                 webpage, u'uploader nickname', fatal=False) | ||||||
| @@ -183,6 +184,7 @@ class MetacafeIE(InfoExtractor): | |||||||
|             'uploader': video_uploader, |             'uploader': video_uploader, | ||||||
|             'upload_date':  None, |             'upload_date':  None, | ||||||
|             'title':    video_title, |             'title':    video_title, | ||||||
|  |             'thumbnail':thumbnail, | ||||||
|             'ext':      video_ext, |             'ext':      video_ext, | ||||||
|             'age_limit': age_limit, |             'age_limit': age_limit, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -1,24 +1,30 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
| import json | import json | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from .youtube import YoutubeIE | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|  |     compat_urlparse, | ||||||
|     clean_html, |     clean_html, | ||||||
|  |     ExtractorError, | ||||||
|     get_element_by_id, |     get_element_by_id, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class TechTVMITIE(InfoExtractor): | class TechTVMITIE(InfoExtractor): | ||||||
|     IE_NAME = u'techtv.mit.edu' |     IE_NAME = 'techtv.mit.edu' | ||||||
|     _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)' |     _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', |         'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', | ||||||
|         u'file': u'25418.mp4', |         'md5': '1f8cb3e170d41fd74add04d3c9330e5f', | ||||||
|         u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '25418', | ||||||
|             u'title': u'MIT DNA Learning Center Set', |             'ext': 'mp4', | ||||||
|             u'description': u'md5:82313335e8a8a3f243351ba55bc1b474', |             'title': 'MIT DNA Learning Center Set', | ||||||
|  |             'description': 'md5:82313335e8a8a3f243351ba55bc1b474', | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -27,12 +33,12 @@ class TechTVMITIE(InfoExtractor): | |||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|         raw_page = self._download_webpage( |         raw_page = self._download_webpage( | ||||||
|             'http://techtv.mit.edu/videos/%s' % video_id, video_id) |             'http://techtv.mit.edu/videos/%s' % video_id, video_id) | ||||||
|         clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page) |         clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page) | ||||||
|  |  | ||||||
|         base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', |         base_url = self._search_regex( | ||||||
|             raw_page, u'base url') |             r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url') | ||||||
|         formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page, |         formats_json = self._search_regex( | ||||||
|             u'video formats') |             r'bitrates: (\[.+?\])', raw_page, 'video formats') | ||||||
|         formats_mit = json.loads(formats_json) |         formats_mit = json.loads(formats_json) | ||||||
|         formats = [ |         formats = [ | ||||||
|             { |             { | ||||||
| @@ -48,28 +54,31 @@ class TechTVMITIE(InfoExtractor): | |||||||
|  |  | ||||||
|         title = get_element_by_id('edit-title', clean_page) |         title = get_element_by_id('edit-title', clean_page) | ||||||
|         description = clean_html(get_element_by_id('edit-description', clean_page)) |         description = clean_html(get_element_by_id('edit-description', clean_page)) | ||||||
|         thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', |         thumbnail = self._search_regex( | ||||||
|             raw_page, u'thumbnail', flags=re.DOTALL) |             r'playlist:.*?url: \'(.+?)\'', | ||||||
|  |             raw_page, 'thumbnail', flags=re.DOTALL) | ||||||
|  |  | ||||||
|         return {'id': video_id, |         return { | ||||||
|                 'title': title, |             'id': video_id, | ||||||
|                 'formats': formats, |             'title': title, | ||||||
|                 'description': description, |             'formats': formats, | ||||||
|                 'thumbnail': thumbnail, |             'description': description, | ||||||
|                 } |             'thumbnail': thumbnail, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
| class MITIE(TechTVMITIE): | class MITIE(TechTVMITIE): | ||||||
|     IE_NAME = u'video.mit.edu' |     IE_NAME = 'video.mit.edu' | ||||||
|     _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)' |     _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', |         'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', | ||||||
|         u'file': u'21783.mp4', |         'md5': '7db01d5ccc1895fc5010e9c9e13648da', | ||||||
|         u'md5': u'7db01d5ccc1895fc5010e9c9e13648da', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '21783', | ||||||
|             u'title': u'The Government is Profiling You', |             'ext': 'mp4', | ||||||
|             u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd', |             'title': 'The Government is Profiling You', | ||||||
|  |             'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd', | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -77,7 +86,73 @@ class MITIE(TechTVMITIE): | |||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         page_title = mobj.group('title') |         page_title = mobj.group('title') | ||||||
|         webpage = self._download_webpage(url, page_title) |         webpage = self._download_webpage(url, page_title) | ||||||
|         self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME)) |         embed_url = self._search_regex( | ||||||
|         embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage, |             r'<iframe .*?src="(.+?)"', webpage, 'embed url') | ||||||
|             u'embed url') |  | ||||||
|         return self.url_result(embed_url, ie='TechTVMIT') |         return self.url_result(embed_url, ie='TechTVMIT') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class OCWMITIE(InfoExtractor): | ||||||
|  |     IE_NAME = 'ocw.mit.edu' | ||||||
|  |     _VALID_URL = r'^http://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)' | ||||||
|  |     _BASE_URL = 'http://ocw.mit.edu/' | ||||||
|  |  | ||||||
|  |     _TESTS = [ | ||||||
|  |         { | ||||||
|  |             'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': 'EObHWIEKGjA', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence', | ||||||
|  |                 'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.', | ||||||
|  |                 #'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt' | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://ocw.mit.edu/courses/mathematics/18-01sc-single-variable-calculus-fall-2010/1.-differentiation/part-a-definition-and-basic-rules/session-1-introduction-to-derivatives/', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '7K1sB05pE0A', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Session 1: Introduction to Derivatives', | ||||||
|  |                 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.', | ||||||
|  |                 #'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT' | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         topic = mobj.group('topic') | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, topic) | ||||||
|  |         title = self._html_search_meta('WT.cg_s', webpage) | ||||||
|  |         description = self._html_search_meta('Description', webpage) | ||||||
|  |  | ||||||
|  |         # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file) | ||||||
|  |         embed_chapter_media = re.search(r'ocw_embed_chapter_media\((.+?)\)', webpage) | ||||||
|  |         if embed_chapter_media: | ||||||
|  |             metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1)) | ||||||
|  |             metadata = re.split(r', ?', metadata) | ||||||
|  |             yt = metadata[1] | ||||||
|  |             subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7]) | ||||||
|  |         else: | ||||||
|  |             # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file) | ||||||
|  |             embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage) | ||||||
|  |             if embed_media: | ||||||
|  |                 metadata = re.sub(r'[\'"]', '', embed_media.group(1)) | ||||||
|  |                 metadata = re.split(r', ?', metadata) | ||||||
|  |                 yt = metadata[1] | ||||||
|  |                 subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5]) | ||||||
|  |             else: | ||||||
|  |                 raise ExtractorError('Unable to find embedded YouTube video.') | ||||||
|  |         video_id = YoutubeIE.extract_id(yt) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'url_transparent', | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'url': yt, | ||||||
|  |             'url_transparent' | ||||||
|  |             'subtitles': subs, | ||||||
|  |             'ie_key': 'Youtube', | ||||||
|  |         } | ||||||
|   | |||||||
| @@ -5,18 +5,20 @@ import re | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
|  |     compat_urllib_parse, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class MixcloudIE(InfoExtractor): | class MixcloudIE(InfoExtractor): | ||||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' |     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' | ||||||
|     IE_NAME = 'mixcloud' |     IE_NAME = 'mixcloud' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', |         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', | ||||||
|         'file': 'dholbach-cryptkeeper.mp3', |  | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|  |             'id': 'dholbach-cryptkeeper', | ||||||
|  |             'ext': 'mp3', | ||||||
|             'title': 'Cryptkeeper', |             'title': 'Cryptkeeper', | ||||||
|             'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', |             'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | ||||||
|             'uploader': 'Daniel Holbach', |             'uploader': 'Daniel Holbach', | ||||||
| @@ -45,7 +47,7 @@ class MixcloudIE(InfoExtractor): | |||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         uploader = mobj.group(1) |         uploader = mobj.group(1) | ||||||
|         cloudcast_name = mobj.group(2) |         cloudcast_name = mobj.group(2) | ||||||
|         track_id = '-'.join((uploader, cloudcast_name)) |         track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name))) | ||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, track_id) |         webpage = self._download_webpage(url, track_id) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,9 +5,12 @@ import re | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|  |     compat_urllib_request, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     find_xpath_attr, |     find_xpath_attr, | ||||||
|     fix_xml_ampersands, |     fix_xml_ampersands, | ||||||
|  |     HEADRequest, | ||||||
|  |     unescapeHTML, | ||||||
|     url_basename, |     url_basename, | ||||||
|     RegexNotFoundError, |     RegexNotFoundError, | ||||||
| ) | ) | ||||||
| @@ -18,6 +21,7 @@ def _media_xml_tag(tag): | |||||||
|  |  | ||||||
|  |  | ||||||
| class MTVServicesInfoExtractor(InfoExtractor): | class MTVServicesInfoExtractor(InfoExtractor): | ||||||
|  |     _MOBILE_TEMPLATE = None | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def _id_from_uri(uri): |     def _id_from_uri(uri): | ||||||
|         return uri.split(':')[-1] |         return uri.split(':')[-1] | ||||||
| @@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|         else: |         else: | ||||||
|             return thumb_node.attrib['url'] |             return thumb_node.attrib['url'] | ||||||
|  |  | ||||||
|     def _extract_video_formats(self, mdoc): |     def _extract_mobile_video_formats(self, mtvn_id): | ||||||
|         if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None: |         webpage_url = self._MOBILE_TEMPLATE % mtvn_id | ||||||
|             raise ExtractorError('This video is not available from your country.', expected=True) |         req = compat_urllib_request.Request(webpage_url) | ||||||
|  |         # Otherwise we get a webpage that would execute some javascript | ||||||
|  |         req.add_header('Youtubedl-user-agent', 'curl/7') | ||||||
|  |         webpage = self._download_webpage(req, mtvn_id, | ||||||
|  |             'Downloading mobile page') | ||||||
|  |         metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url')) | ||||||
|  |         req = HEADRequest(metrics_url) | ||||||
|  |         response = self._request_webpage(req, mtvn_id, 'Resolving url') | ||||||
|  |         url = response.geturl() | ||||||
|  |         # Transform the url to get the best quality: | ||||||
|  |         url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1) | ||||||
|  |         return [{'url': url,'ext': 'mp4'}] | ||||||
|  |  | ||||||
|  |     def _extract_video_formats(self, mdoc, mtvn_id): | ||||||
|  |         if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None: | ||||||
|  |             if mtvn_id is not None and self._MOBILE_TEMPLATE is not None: | ||||||
|  |                 self.to_screen('The normal version is not available from your ' | ||||||
|  |                     'country, trying with the mobile version') | ||||||
|  |                 return self._extract_mobile_video_formats(mtvn_id) | ||||||
|  |             raise ExtractorError('This video is not available from your country.', | ||||||
|  |                 expected=True) | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|         for rendition in mdoc.findall('.//rendition'): |         for rendition in mdoc.findall('.//rendition'): | ||||||
| @@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|             raise ExtractorError('Could not find video title') |             raise ExtractorError('Could not find video title') | ||||||
|         title = title.strip() |         title = title.strip() | ||||||
|  |  | ||||||
|  |         # This a short id that's used in the webpage urls | ||||||
|  |         mtvn_id = None | ||||||
|  |         mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category', | ||||||
|  |                 'scheme', 'urn:mtvn:id') | ||||||
|  |         if mtvn_id_node is not None: | ||||||
|  |             mtvn_id = mtvn_id_node.text | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'title': title, |             'title': title, | ||||||
|             'formats': self._extract_video_formats(mediagen_doc), |             'formats': self._extract_video_formats(mediagen_doc, mtvn_id), | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'thumbnail': self._get_thumbnail_url(uri, itemdoc), |             'thumbnail': self._get_thumbnail_url(uri, itemdoc), | ||||||
|             'description': description, |             'description': description, | ||||||
|   | |||||||
| @@ -1,3 +1,5 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import binascii | import binascii | ||||||
| import base64 | import base64 | ||||||
| import hashlib | import hashlib | ||||||
| @@ -14,18 +16,16 @@ from ..utils import ( | |||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class MyVideoIE(InfoExtractor): | class MyVideoIE(InfoExtractor): | ||||||
|     """Information Extractor for myvideo.de.""" |     _VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*' | ||||||
|  |     IE_NAME = 'myvideo' | ||||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*' |  | ||||||
|     IE_NAME = u'myvideo' |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', |         'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', | ||||||
|         u'file': u'8229274.flv', |         'md5': '2d2753e8130479ba2cb7e0a37002053e', | ||||||
|         u'md5': u'2d2753e8130479ba2cb7e0a37002053e', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '8229274', | ||||||
|             u"title": u"bowling-fail-or-win" |             'ext': 'flv', | ||||||
|  |             'title': 'bowling-fail-or-win', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -53,10 +53,7 @@ class MyVideoIE(InfoExtractor): | |||||||
|  |  | ||||||
|     def _real_extract(self,url): |     def _real_extract(self,url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         if mobj is None: |         video_id = mobj.group('id') | ||||||
|             raise ExtractorError(u'invalid URL: %s' % url) |  | ||||||
|  |  | ||||||
|         video_id = mobj.group(1) |  | ||||||
|  |  | ||||||
|         GK = ( |         GK = ( | ||||||
|           b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt' |           b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt' | ||||||
| @@ -74,37 +71,33 @@ class MyVideoIE(InfoExtractor): | |||||||
|             video_url = mobj.group(1) + '.flv' |             video_url = mobj.group(1) + '.flv' | ||||||
|  |  | ||||||
|             video_title = self._html_search_regex('<title>([^<]+)</title>', |             video_title = self._html_search_regex('<title>([^<]+)</title>', | ||||||
|                 webpage, u'title') |                 webpage, 'title') | ||||||
|  |  | ||||||
|             video_ext = self._search_regex('[.](.+?)$', video_url, u'extension') |             return { | ||||||
|  |                 'id': video_id, | ||||||
|             return [{ |                 'url': video_url, | ||||||
|                 'id':       video_id, |                 'title': video_title, | ||||||
|                 'url':      video_url, |             } | ||||||
|                 'uploader': None, |  | ||||||
|                 'upload_date':  None, |  | ||||||
|                 'title':    video_title, |  | ||||||
|                 'ext':      video_ext, |  | ||||||
|             }] |  | ||||||
|  |  | ||||||
|         mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) |         mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) | ||||||
|         if mobj is not None: |         if mobj is not None: | ||||||
|             request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') |             request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') | ||||||
|             response = self._download_webpage(request, video_id, |             response = self._download_webpage(request, video_id, | ||||||
|                                               u'Downloading video info') |                                               'Downloading video info') | ||||||
|             info = json.loads(base64.b64decode(response).decode('utf-8')) |             info = json.loads(base64.b64decode(response).decode('utf-8')) | ||||||
|             return {'id': video_id, |             return { | ||||||
|                     'title': info['title'], |                 'id': video_id, | ||||||
|                     'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), |                 'title': info['title'], | ||||||
|                     'play_path': info['filename'], |                 'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), | ||||||
|                     'ext': 'flv', |                 'play_path': info['filename'], | ||||||
|                     'thumbnail': info['thumbnail'][0]['url'], |                 'ext': 'flv', | ||||||
|                     } |                 'thumbnail': info['thumbnail'][0]['url'], | ||||||
|  |             } | ||||||
|  |  | ||||||
|         # try encxml |         # try encxml | ||||||
|         mobj = re.search('var flashvars={(.+?)}', webpage) |         mobj = re.search('var flashvars={(.+?)}', webpage) | ||||||
|         if mobj is None: |         if mobj is None: | ||||||
|             raise ExtractorError(u'Unable to extract video') |             raise ExtractorError('Unable to extract video') | ||||||
|  |  | ||||||
|         params = {} |         params = {} | ||||||
|         encxml = '' |         encxml = '' | ||||||
| @@ -118,7 +111,7 @@ class MyVideoIE(InfoExtractor): | |||||||
|             params['domain'] = 'www.myvideo.de' |             params['domain'] = 'www.myvideo.de' | ||||||
|         xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params)) |         xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params)) | ||||||
|         if 'flash_playertype=MTV' in xmldata_url: |         if 'flash_playertype=MTV' in xmldata_url: | ||||||
|             self._downloader.report_warning(u'avoiding MTV player') |             self._downloader.report_warning('avoiding MTV player') | ||||||
|             xmldata_url = ( |             xmldata_url = ( | ||||||
|                 'http://www.myvideo.de/dynamic/get_player_video_xml.php' |                 'http://www.myvideo.de/dynamic/get_player_video_xml.php' | ||||||
|                 '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes' |                 '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes' | ||||||
| @@ -144,7 +137,7 @@ class MyVideoIE(InfoExtractor): | |||||||
|             video_url = compat_urllib_parse.unquote(mobj.group(1)) |             video_url = compat_urllib_parse.unquote(mobj.group(1)) | ||||||
|             if 'myvideo2flash' in video_url: |             if 'myvideo2flash' in video_url: | ||||||
|                 self.report_warning( |                 self.report_warning( | ||||||
|                     u'Rewriting URL to use unencrypted rtmp:// ...', |                     'Rewriting URL to use unencrypted rtmp:// ...', | ||||||
|                     video_id) |                     video_id) | ||||||
|                 video_url = video_url.replace('rtmpe://', 'rtmp://') |                 video_url = video_url.replace('rtmpe://', 'rtmp://') | ||||||
|  |  | ||||||
| @@ -152,39 +145,31 @@ class MyVideoIE(InfoExtractor): | |||||||
|             # extract non rtmp videos |             # extract non rtmp videos | ||||||
|             mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data) |             mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data) | ||||||
|             if mobj is None: |             if mobj is None: | ||||||
|                 raise ExtractorError(u'unable to extract url') |                 raise ExtractorError('unable to extract url') | ||||||
|             video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2)) |             video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2)) | ||||||
|  |  | ||||||
|         video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file') |         video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file') | ||||||
|         video_file = compat_urllib_parse.unquote(video_file) |         video_file = compat_urllib_parse.unquote(video_file) | ||||||
|  |  | ||||||
|         if not video_file.endswith('f4m'): |         if not video_file.endswith('f4m'): | ||||||
|             ppath, prefix = video_file.split('.') |             ppath, prefix = video_file.split('.') | ||||||
|             video_playpath = '%s:%s' % (prefix, ppath) |             video_playpath = '%s:%s' % (prefix, ppath) | ||||||
|             video_hls_playlist = '' |  | ||||||
|         else: |         else: | ||||||
|             video_playpath = '' |             video_playpath = '' | ||||||
|             video_hls_playlist = ( |  | ||||||
|                 video_file |  | ||||||
|             ).replace('.f4m', '.m3u8') |  | ||||||
|  |  | ||||||
|         video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj') |         video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj') | ||||||
|         video_swfobj = compat_urllib_parse.unquote(video_swfobj) |         video_swfobj = compat_urllib_parse.unquote(video_swfobj) | ||||||
|  |  | ||||||
|         video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", |         video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", | ||||||
|             webpage, u'title') |             webpage, 'title') | ||||||
|  |  | ||||||
|         return [{ |         return { | ||||||
|             'id':                 video_id, |             'id': video_id, | ||||||
|             'url':                video_url, |             'url': video_url, | ||||||
|             'tc_url':             video_url, |             'tc_url': video_url, | ||||||
|             'uploader':           None, |             'title': video_title, | ||||||
|             'upload_date':        None, |             'ext': 'flv', | ||||||
|             'title':              video_title, |             'play_path': video_playpath, | ||||||
|             'ext':                u'flv', |             'player_url': video_swfobj, | ||||||
|             'play_path':          video_playpath, |         } | ||||||
|             'video_file':         video_file, |  | ||||||
|             'video_hls_playlist': video_hls_playlist, |  | ||||||
|             'player_url':         video_swfobj, |  | ||||||
|         }] |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,19 +1,46 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import find_xpath_attr, compat_str | from ..utils import find_xpath_attr, compat_str | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class NBCIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'http://www\.nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', | ||||||
|  |         'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'u1RInQZRN7QJ', | ||||||
|  |             'ext': 'flv', | ||||||
|  |             'title': 'I Am a Firefighter', | ||||||
|  |             'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |         theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url') | ||||||
|  |         if theplatform_url.startswith('//'): | ||||||
|  |             theplatform_url = 'http:' + theplatform_url | ||||||
|  |         return self.url_result(theplatform_url) | ||||||
|  |  | ||||||
|  |  | ||||||
| class NBCNewsIE(InfoExtractor): | class NBCNewsIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)' |     _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://www.nbcnews.com/video/nbc-news/52753292', |         'url': 'http://www.nbcnews.com/video/nbc-news/52753292', | ||||||
|         u'file': u'52753292.flv', |         'md5': '47abaac93c6eaf9ad37ee6c4463a5179', | ||||||
|         u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '52753292', | ||||||
|             u'title': u'Crew emerges after four-month Mars food study', |             'ext': 'flv', | ||||||
|             u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1', |             'title': 'Crew emerges after four-month Mars food study', | ||||||
|  |             'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -23,10 +50,11 @@ class NBCNewsIE(InfoExtractor): | |||||||
|         all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) |         all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) | ||||||
|         info = all_info.find('video') |         info = all_info.find('video') | ||||||
|  |  | ||||||
|         return {'id': video_id, |         return { | ||||||
|                 'title': info.find('headline').text, |             'id': video_id, | ||||||
|                 'ext': 'flv', |             'title': info.find('headline').text, | ||||||
|                 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, |             'ext': 'flv', | ||||||
|                 'description': compat_str(info.find('caption').text), |             'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, | ||||||
|                 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, |             'description': compat_str(info.find('caption').text), | ||||||
|                 } |             'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, | ||||||
|  |         } | ||||||
|   | |||||||
| @@ -1,6 +1,5 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import json |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| @@ -12,10 +11,13 @@ class NineGagIE(InfoExtractor): | |||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         "url": "http://9gag.tv/v/1912", |         "url": "http://9gag.tv/v/1912", | ||||||
|         "file": "1912.mp4", |  | ||||||
|         "info_dict": { |         "info_dict": { | ||||||
|  |             "id": "1912", | ||||||
|  |             "ext": "mp4", | ||||||
|             "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", |             "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", | ||||||
|             "title": "\"People Are Awesome 2013\" Is Absolutely Awesome" |             "title": "\"People Are Awesome 2013\" Is Absolutely Awesome", | ||||||
|  |             "view_count": int, | ||||||
|  |             "thumbnail": "re:^https?://", | ||||||
|         }, |         }, | ||||||
|         'add_ie': ['Youtube'] |         'add_ie': ['Youtube'] | ||||||
|     } |     } | ||||||
| @@ -25,21 +27,27 @@ class NineGagIE(InfoExtractor): | |||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|         data_json = self._html_search_regex(r'''(?x) |  | ||||||
|             <div\s*id="tv-video"\s*data-video-source="youtube"\s* |  | ||||||
|                 data-video-meta="([^"]+)"''', webpage, 'video metadata') |  | ||||||
|  |  | ||||||
|         data = json.loads(data_json) |         youtube_id = self._html_search_regex( | ||||||
|  |             r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"', | ||||||
|  |             webpage, 'video ID') | ||||||
|  |         description = self._html_search_regex( | ||||||
|  |             r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage, | ||||||
|  |             'description', fatal=False) | ||||||
|  |         view_count_str = self._html_search_regex( | ||||||
|  |             r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count', | ||||||
|  |             fatal=False) | ||||||
|  |         view_count = ( | ||||||
|  |             None if view_count_str is None | ||||||
|  |             else int(view_count_str.replace(',', ''))) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             '_type': 'url_transparent', |             '_type': 'url_transparent', | ||||||
|             'url': data['youtubeVideoId'], |             'url': youtube_id, | ||||||
|             'ie_key': 'Youtube', |             'ie_key': 'Youtube', | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': data['title'], |             'title': self._og_search_title(webpage), | ||||||
|             'description': data['description'], |             'description': description, | ||||||
|             'view_count': int(data['view_count']), |             'view_count': view_count, | ||||||
|             'like_count': int(data['statistic']['like']), |             'thumbnail': self._og_search_thumbnail(webpage), | ||||||
|             'dislike_count': int(data['statistic']['dislike']), |  | ||||||
|             'thumbnail': data['thumbnail_url'], |  | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -1,61 +1,51 @@ | |||||||
|  | # encoding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ExtractorError, |  | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class NormalbootsIE(InfoExtractor): | class NormalbootsIE(InfoExtractor): | ||||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$' |     _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://normalboots.com/video/home-alone-games-jontron/', |         'url': 'http://normalboots.com/video/home-alone-games-jontron/', | ||||||
|         u'file': u'home-alone-games-jontron.mp4', |         'md5': '8bf6de238915dd501105b44ef5f1e0f6', | ||||||
|         u'md5': u'8bf6de238915dd501105b44ef5f1e0f6', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': 'home-alone-games-jontron', | ||||||
|             u'title': u'Home Alone Games - JonTron - NormalBoots', |             'ext': 'mp4', | ||||||
|             u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/', |             'title': 'Home Alone Games - JonTron - NormalBoots', | ||||||
|             u'uploader': u'JonTron', |             'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/', | ||||||
|             u'upload_date': u'20140125', |             'uploader': 'JonTron', | ||||||
|  |             'upload_date': '20140125', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|      |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         if mobj is None: |  | ||||||
|             raise ExtractorError(u'Invalid URL: %s' % url) |  | ||||||
|         video_id = mobj.group('videoid') |         video_id = mobj.group('videoid') | ||||||
|          |  | ||||||
|         info = { |  | ||||||
|             'id': video_id, |  | ||||||
|             'uploader': None, |  | ||||||
|             'upload_date': None, |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         if url[:4] != 'http': |  | ||||||
|             url = 'http://' + url |  | ||||||
|          |  | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|         video_title = self._og_search_title(webpage) |  | ||||||
|         video_description = self._og_search_description(webpage) |  | ||||||
|         video_thumbnail = self._og_search_thumbnail(webpage) |  | ||||||
|         video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>', |         video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>', | ||||||
|             webpage, 'uploader') |             webpage, 'uploader') | ||||||
|         raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',  |         raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', | ||||||
|             webpage, 'date') |             webpage, 'date') | ||||||
|         video_upload_date = unified_strdate(raw_upload_date) |         video_upload_date = unified_strdate(raw_upload_date) | ||||||
|         video_upload_date = unified_strdate(raw_upload_date) |  | ||||||
|              |  | ||||||
|         player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url') |         player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url') | ||||||
|         player_page = self._download_webpage(player_url, video_id) |         player_page = self._download_webpage(player_url, video_id) | ||||||
|         video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file') |         video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file') | ||||||
|          |  | ||||||
|         info['url'] = video_url |         return { | ||||||
|         info['title'] = video_title |             'id': video_id, | ||||||
|         info['description'] = video_description |             'url': video_url, | ||||||
|         info['thumbnail'] = video_thumbnail |             'title': self._og_search_title(webpage), | ||||||
|         info['uploader'] = video_uploader |             'description': self._og_search_description(webpage), | ||||||
|         info['upload_date'] = video_upload_date |             'thumbnail': self._og_search_thumbnail(webpage), | ||||||
|          |             'uploader': video_uploader, | ||||||
|         return info |             'upload_date': video_upload_date, | ||||||
|  |         } | ||||||
|   | |||||||
| @@ -9,14 +9,25 @@ from ..utils import ( | |||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class NovamovIE(InfoExtractor): | class NovaMovIE(InfoExtractor): | ||||||
|     _VALID_URL = r'http://(?:(?:www\.)?novamov\.com/video/|(?:(?:embed|www)\.)novamov\.com/embed\.php\?v=)(?P<videoid>[a-z\d]{13})' |     IE_NAME = 'novamov' | ||||||
|  |     IE_DESC = 'NovaMov' | ||||||
|  |  | ||||||
|  |     _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'} | ||||||
|  |  | ||||||
|  |     _HOST = 'www.novamov.com' | ||||||
|  |  | ||||||
|  |     _FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>' | ||||||
|  |     _FILEKEY_REGEX = r'flashvars\.filekey="(?P<filekey>[^"]+)";' | ||||||
|  |     _TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>' | ||||||
|  |     _DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.novamov.com/video/4rurhn9x446jj', |         'url': 'http://www.novamov.com/video/4rurhn9x446jj', | ||||||
|         'file': '4rurhn9x446jj.flv', |  | ||||||
|         'md5': '7205f346a52bbeba427603ba10d4b935', |         'md5': '7205f346a52bbeba427603ba10d4b935', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|  |             'id': '4rurhn9x446jj', | ||||||
|  |             'ext': 'flv', | ||||||
|             'title': 'search engine optimization', |             'title': 'search engine optimization', | ||||||
|             'description': 'search engine optimization is used to rank the web page in the google search engine' |             'description': 'search engine optimization is used to rank the web page in the google search engine' | ||||||
|         }, |         }, | ||||||
| @@ -27,31 +38,26 @@ class NovamovIE(InfoExtractor): | |||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         video_id = mobj.group('videoid') |         video_id = mobj.group('videoid') | ||||||
|  |  | ||||||
|         page = self._download_webpage('http://www.novamov.com/video/%s' % video_id, |         page = self._download_webpage( | ||||||
|                                       video_id, 'Downloading video page') |             'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page') | ||||||
|  |  | ||||||
|         if re.search(r'This file no longer exists on our servers!</h2>', page) is not None: |         if re.search(self._FILE_DELETED_REGEX, page) is not None: | ||||||
|             raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) |             raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) | ||||||
|  |  | ||||||
|         filekey = self._search_regex( |         filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey') | ||||||
|             r'flashvars\.filekey="(?P<filekey>[^"]+)";', page, 'filekey') |  | ||||||
|  |  | ||||||
|         title = self._html_search_regex( |         title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False) | ||||||
|             r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>', |  | ||||||
|             page, 'title', fatal=False) |  | ||||||
|  |  | ||||||
|         description = self._html_search_regex( |         description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False) | ||||||
|             r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>', |  | ||||||
|             page, 'description', fatal=False) |  | ||||||
|  |  | ||||||
|         api_response = self._download_webpage( |         api_response = self._download_webpage( | ||||||
|             'http://www.novamov.com/api/player.api.php?key=%s&file=%s' % (filekey, video_id), |             'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id, | ||||||
|             video_id, 'Downloading video api response') |             'Downloading video api response') | ||||||
|  |  | ||||||
|         response = compat_urlparse.parse_qs(api_response) |         response = compat_urlparse.parse_qs(api_response) | ||||||
|  |  | ||||||
|         if 'error_msg' in response: |         if 'error_msg' in response: | ||||||
|             raise ExtractorError('novamov returned error: %s' % response['error_msg'][0], expected=True) |             raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True) | ||||||
|  |  | ||||||
|         video_url = response['url'][0] |         video_url = response['url'][0] | ||||||
|  |  | ||||||
| @@ -60,4 +66,4 @@ class NovamovIE(InfoExtractor): | |||||||
|             'url': video_url, |             'url': video_url, | ||||||
|             'title': title, |             'title': title, | ||||||
|             'description': description |             'description': description | ||||||
|         } |         } | ||||||
| @@ -1,46 +1,28 @@ | |||||||
| import re | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .novamov import NovaMovIE | ||||||
| from ..utils import compat_urlparse |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class NowVideoIE(InfoExtractor): | class NowVideoIE(NovaMovIE): | ||||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.(?:ch|sx)/video/(?P<id>\w+)' |     IE_NAME = 'nowvideo' | ||||||
|  |     IE_DESC = 'NowVideo' | ||||||
|  |  | ||||||
|  |     _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'} | ||||||
|  |  | ||||||
|  |     _HOST = 'www.nowvideo.ch' | ||||||
|  |  | ||||||
|  |     _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' | ||||||
|  |     _FILEKEY_REGEX = r'var fkzd="([^"]+)";' | ||||||
|  |     _TITLE_REGEX = r'<h4>([^<]+)</h4>' | ||||||
|  |     _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa', |         'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa', | ||||||
|         u'file': u'0mw0yow7b6dxa.flv', |         'md5': 'f8fbbc8add72bd95b7850c6a02fc8817', | ||||||
|         u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '0mw0yow7b6dxa', | ||||||
|             u"title": u"youtubedl test video _BaW_jenozKc.mp4" |             'ext': 'flv', | ||||||
|  |             'title': 'youtubedl test video _BaW_jenozKc.mp4', | ||||||
|  |             'description': 'Description', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         mobj = re.match(self._VALID_URL, url) |  | ||||||
|  |  | ||||||
|         video_id = mobj.group('id') |  | ||||||
|         webpage_url = 'http://www.nowvideo.ch/video/' + video_id |  | ||||||
|         embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id |  | ||||||
|         webpage = self._download_webpage(webpage_url, video_id) |  | ||||||
|         embed_page = self._download_webpage(embed_url, video_id, |  | ||||||
|             u'Downloading embed page') |  | ||||||
|  |  | ||||||
|         self.report_extraction(video_id) |  | ||||||
|  |  | ||||||
|         video_title = self._html_search_regex(r'<h4>(.*)</h4>', |  | ||||||
|             webpage, u'video title') |  | ||||||
|  |  | ||||||
|         video_key = self._search_regex(r'var fkzd="(.*)";', |  | ||||||
|             embed_page, u'video key') |  | ||||||
|  |  | ||||||
|         api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key) |  | ||||||
|         api_response = self._download_webpage(api_call, video_id, |  | ||||||
|             u'Downloading API page') |  | ||||||
|         video_url = compat_urlparse.parse_qs(api_response)[u'url'][0] |  | ||||||
|  |  | ||||||
|         return [{ |  | ||||||
|             'id':        video_id, |  | ||||||
|             'url':       video_url, |  | ||||||
|             'ext':       'flv', |  | ||||||
|             'title':     video_title, |  | ||||||
|         }] |  | ||||||
| @@ -8,6 +8,7 @@ from .common import InfoExtractor | |||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     HEADRequest, |     HEADRequest, | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
|  |     ExtractorError, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -35,7 +36,15 @@ class ORFIE(InfoExtractor): | |||||||
|         data_json = self._search_regex( |         data_json = self._search_regex( | ||||||
|             r'initializeAdworx\((.+?)\);\n', webpage, 'video info') |             r'initializeAdworx\((.+?)\);\n', webpage, 'video info') | ||||||
|         all_data = json.loads(data_json) |         all_data = json.loads(data_json) | ||||||
|         sdata = all_data[0]['values']['segments'] |  | ||||||
|  |         def get_segments(all_data): | ||||||
|  |             for data in all_data: | ||||||
|  |                 if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM': | ||||||
|  |                     return data['values']['segments'] | ||||||
|  |  | ||||||
|  |         sdata = get_segments(all_data) | ||||||
|  |         if not sdata: | ||||||
|  |             raise ExtractorError('Unable to extract segments') | ||||||
|  |  | ||||||
|         def quality_to_int(s): |         def quality_to_int(s): | ||||||
|             m = re.search('([0-9]+)', s) |             m = re.search('([0-9]+)', s) | ||||||
|   | |||||||
							
								
								
									
										57
									
								
								youtube_dl/extractor/parliamentliveuk.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								youtube_dl/extractor/parliamentliveuk.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|  |     unified_strdate, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ParliamentLiveUKIE(InfoExtractor): | ||||||
|  |     IE_NAME = 'parliamentlive.tv' | ||||||
|  |     IE_DESC = 'UK parliament videos' | ||||||
|  |     _VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '15121', | ||||||
|  |             'ext': 'asf', | ||||||
|  |             'title': 'hoc home affairs committee, 18 mar 2014.pm', | ||||||
|  |             'description': 'md5:033b3acdf83304cd43946b2d5e5798d1', | ||||||
|  |         }, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True,  # Requires mplayer (mms) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|  |         asx_url = self._html_search_regex( | ||||||
|  |             r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage, | ||||||
|  |             'metadata URL') | ||||||
|  |         asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata') | ||||||
|  |         video_url = asx.find('.//REF').attrib['HREF'] | ||||||
|  |  | ||||||
|  |         title = self._search_regex( | ||||||
|  |             r'''(?x)player\.setClipDetails\( | ||||||
|  |                 (?:(?:[0-9]+|"[^"]+"),\s*){2} | ||||||
|  |                 "([^"]+",\s*"[^"]+)" | ||||||
|  |                 ''', | ||||||
|  |             webpage, 'title').replace('", "', ', ') | ||||||
|  |         description = self._html_search_regex( | ||||||
|  |             r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>', | ||||||
|  |             webpage, 'description') | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'ext': 'asf', | ||||||
|  |             'url': video_url, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |         } | ||||||
| @@ -3,6 +3,9 @@ from __future__ import unicode_literals | |||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     US_RATINGS, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class PBSIE(InfoExtractor): | class PBSIE(InfoExtractor): | ||||||
| @@ -13,7 +16,7 @@ class PBSIE(InfoExtractor): | |||||||
|             # Article with embedded player |             # Article with embedded player | ||||||
|            (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) | |            (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) | | ||||||
|            # Player |            # Player | ||||||
|            video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/ |            video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ | ||||||
|         ) |         ) | ||||||
|     ''' |     ''' | ||||||
|  |  | ||||||
| @@ -57,6 +60,11 @@ class PBSIE(InfoExtractor): | |||||||
|         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id |         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id | ||||||
|         info = self._download_json(info_url, display_id) |         info = self._download_json(info_url, display_id) | ||||||
|  |  | ||||||
|  |         rating_str = info.get('rating') | ||||||
|  |         if rating_str is not None: | ||||||
|  |             rating_str = rating_str.rpartition('-')[2] | ||||||
|  |         age_limit = US_RATINGS.get(rating_str) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': info['title'], |             'title': info['title'], | ||||||
| @@ -65,4 +73,5 @@ class PBSIE(InfoExtractor): | |||||||
|             'description': info['program'].get('description'), |             'description': info['program'].get('description'), | ||||||
|             'thumbnail': info.get('image_url'), |             'thumbnail': info.get('image_url'), | ||||||
|             'duration': info.get('duration'), |             'duration': info.get('duration'), | ||||||
|  |             'age_limit': age_limit, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -1,76 +1,43 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import datetime | import datetime | ||||||
| import json | import json | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  |  | ||||||
| from ..utils import ( |  | ||||||
|     ExtractorError, |  | ||||||
| ) |  | ||||||
|  |  | ||||||
| class PhotobucketIE(InfoExtractor): | class PhotobucketIE(InfoExtractor): | ||||||
|     """Information extractor for photobucket.com.""" |     _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' | ||||||
|  |  | ||||||
|     # TODO: the original _VALID_URL was: |  | ||||||
|     # r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' |  | ||||||
|     # Check if it's necessary to keep the old extracion process |  | ||||||
|     _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' |  | ||||||
|     IE_NAME = u'photobucket' |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', |         'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', | ||||||
|         u'file': u'zpsc0c3b9fa.mp4', |         'file': 'zpsc0c3b9fa.mp4', | ||||||
|         u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99', |         'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', | ||||||
|         u'info_dict': { |         'info_dict': { | ||||||
|             u"upload_date": u"20130504",  |             'upload_date': '20130504', | ||||||
|             u"uploader": u"rachaneronas",  |             'uploader': 'rachaneronas', | ||||||
|             u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!" |             'title': 'Tired of Link Building? Try BacklinkMyDomain.com!', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         # Extract id from URL |  | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         if mobj is None: |  | ||||||
|             raise ExtractorError(u'Invalid URL: %s' % url) |  | ||||||
|  |  | ||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|         video_extension = mobj.group('ext') |         video_extension = mobj.group('ext') | ||||||
|  |  | ||||||
|         # Retrieve video webpage to extract further information |  | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|         # Extract URL, uploader, and title from webpage |         # Extract URL, uploader, and title from webpage | ||||||
|         self.report_extraction(video_id) |         self.report_extraction(video_id) | ||||||
|         # We try first by looking the javascript code: |         info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', | ||||||
|         mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage) |             webpage, 'info json') | ||||||
|         if mobj is not None: |         info = json.loads(info_json) | ||||||
|             info = json.loads(mobj.group('json')) |         return { | ||||||
|             return [{ |             'id': video_id, | ||||||
|                 'id':       video_id, |             'url': info['downloadUrl'], | ||||||
|                 'url':      info[u'downloadUrl'], |             'uploader': info['username'], | ||||||
|                 'uploader': info[u'username'], |             'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'), | ||||||
|                 'upload_date':  datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'), |             'title': info['title'], | ||||||
|                 'title':    info[u'title'], |             'ext': video_extension, | ||||||
|                 'ext':      video_extension, |             'thumbnail': info['thumbUrl'], | ||||||
|                 'thumbnail': info[u'thumbUrl'], |         } | ||||||
|             }] |  | ||||||
|  |  | ||||||
|         # We try looking in other parts of the webpage |  | ||||||
|         video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />', |  | ||||||
|             webpage, u'video URL') |  | ||||||
|  |  | ||||||
|         mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage) |  | ||||||
|         if mobj is None: |  | ||||||
|             raise ExtractorError(u'Unable to extract title') |  | ||||||
|         video_title = mobj.group(1).decode('utf-8') |  | ||||||
|         video_uploader = mobj.group(2).decode('utf-8') |  | ||||||
|  |  | ||||||
|         return [{ |  | ||||||
|             'id':       video_id.decode('utf-8'), |  | ||||||
|             'url':      video_url.decode('utf-8'), |  | ||||||
|             'uploader': video_uploader, |  | ||||||
|             'upload_date':  None, |  | ||||||
|             'title':    video_title, |  | ||||||
|             'ext':      video_extension.decode('utf-8'), |  | ||||||
|         }] |  | ||||||
|   | |||||||
							
								
								
									
										80
									
								
								youtube_dl/extractor/playvid.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/extractor/playvid.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     compat_urllib_parse, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class PlayvidIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://www.playvid.com/watch/agbDDi7WZTV', | ||||||
|  |         'md5': '44930f8afa616efdf9482daf4fe53e1e', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'agbDDi7WZTV', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Michelle Lewin in Miami Beach', | ||||||
|  |             'duration': 240, | ||||||
|  |             'age_limit': 18, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|  |         video_title = None | ||||||
|  |         duration = None | ||||||
|  |         video_thumbnail = None | ||||||
|  |         formats = [] | ||||||
|  |  | ||||||
|  |         # most of the information is stored in the flashvars | ||||||
|  |         flashvars = self._html_search_regex( | ||||||
|  |             r'flashvars="(.+?)"', webpage, 'flashvars') | ||||||
|  |  | ||||||
|  |         infos = compat_urllib_parse.unquote(flashvars).split(r'&') | ||||||
|  |         for info in infos: | ||||||
|  |             videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) | ||||||
|  |             if videovars_match: | ||||||
|  |                 key = videovars_match.group(1) | ||||||
|  |                 val = videovars_match.group(2) | ||||||
|  |  | ||||||
|  |                 if key == 'title': | ||||||
|  |                     video_title = compat_urllib_parse.unquote_plus(val) | ||||||
|  |                 if key == 'duration': | ||||||
|  |                     try: | ||||||
|  |                         duration = int(val) | ||||||
|  |                     except ValueError: | ||||||
|  |                         pass | ||||||
|  |                 if key == 'big_thumb': | ||||||
|  |                     video_thumbnail = val | ||||||
|  |  | ||||||
|  |                 videourl_match = re.match( | ||||||
|  |                     r'^video_urls\]\[(?P<resolution>[0-9]+)p', key) | ||||||
|  |                 if videourl_match: | ||||||
|  |                     height = int(videourl_match.group('resolution')) | ||||||
|  |                     formats.append({ | ||||||
|  |                         'height': height, | ||||||
|  |                         'url': val, | ||||||
|  |                     }) | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         # Extract title - should be in the flashvars; if not, look elsewhere | ||||||
|  |         if video_title is None: | ||||||
|  |             video_title = self._html_search_regex( | ||||||
|  |                 r'<title>(.*?)</title', webpage, 'title') | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'formats': formats, | ||||||
|  |             'title': video_title, | ||||||
|  |             'thumbnail': video_thumbnail, | ||||||
|  |             'duration': duration, | ||||||
|  |             'description': None, | ||||||
|  |             'age_limit': 18 | ||||||
|  |         } | ||||||
| @@ -1,7 +1,10 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import json | import json | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import int_or_none | ||||||
|  |  | ||||||
|  |  | ||||||
| class PodomaticIE(InfoExtractor): | class PodomaticIE(InfoExtractor): | ||||||
| @@ -9,14 +12,14 @@ class PodomaticIE(InfoExtractor): | |||||||
|     _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)' |     _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00", |         "url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00", | ||||||
|         u"file": u"2009-01-02T16_03_35-08_00.mp3", |         "file": "2009-01-02T16_03_35-08_00.mp3", | ||||||
|         u"md5": u"84bb855fcf3429e6bf72460e1eed782d", |         "md5": "84bb855fcf3429e6bf72460e1eed782d", | ||||||
|         u"info_dict": { |         "info_dict": { | ||||||
|             u"uploader": u"Science Teaching Tips", |             "uploader": "Science Teaching Tips", | ||||||
|             u"uploader_id": u"scienceteachingtips", |             "uploader_id": "scienceteachingtips", | ||||||
|             u"title": u"64.  When the Moon Hits Your Eye", |             "title": "64.  When the Moon Hits Your Eye", | ||||||
|             u"duration": 446, |             "duration": 446, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -36,7 +39,7 @@ class PodomaticIE(InfoExtractor): | |||||||
|         uploader = data['podcast'] |         uploader = data['podcast'] | ||||||
|         title = data['title'] |         title = data['title'] | ||||||
|         thumbnail = data['imageLocation'] |         thumbnail = data['imageLocation'] | ||||||
|         duration = int(data['length'] / 1000.0) |         duration = int_or_none(data.get('length'), 1000) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|   | |||||||
| @@ -44,7 +44,7 @@ class PornHubIE(InfoExtractor): | |||||||
|  |  | ||||||
|         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) |         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) | ||||||
|         if webpage.find('"encrypted":true') != -1: |         if webpage.find('"encrypted":true') != -1: | ||||||
|             password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ') |             password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password')) | ||||||
|             video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) |             video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|   | |||||||
							
								
								
									
										297
									
								
								youtube_dl/extractor/prosiebensat1.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										297
									
								
								youtube_dl/extractor/prosiebensat1.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,297 @@ | |||||||
|  | # encoding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from hashlib import sha1 | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     compat_urllib_parse, | ||||||
|  |     unified_strdate, | ||||||
|  |     clean_html, | ||||||
|  |     RegexNotFoundError, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ProSiebenSat1IE(InfoExtractor): | ||||||
|  |     IE_NAME = 'prosiebensat1' | ||||||
|  |     IE_DESC = 'ProSiebenSat.1 Digital' | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|ran|the-voice-of-germany)\.de|fem\.com)/(?P<id>.+)' | ||||||
|  |  | ||||||
|  |     _TESTS = [ | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '2104602', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Staffel 2, Episode 18 - Jahresrückblick', | ||||||
|  |                 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', | ||||||
|  |                 'upload_date': '20131231', | ||||||
|  |                 'duration': 5845.04, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # rtmp download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '2570327', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Lady-Umstyling für Audrina', | ||||||
|  |                 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d', | ||||||
|  |                 'upload_date': '20131014', | ||||||
|  |                 'duration': 606.76, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # rtmp download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |             'skip': 'Seems to be broken', | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '2429369', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Countdown für die Autowerkstatt', | ||||||
|  |                 'description': 'md5:809fc051a457b5d8666013bc40698817', | ||||||
|  |                 'upload_date': '20140223', | ||||||
|  |                 'duration': 2595.04, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # rtmp download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '2904997', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Sexy laufen in Ugg Boots', | ||||||
|  |                 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6', | ||||||
|  |                 'upload_date': '20140122', | ||||||
|  |                 'duration': 245.32, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # rtmp download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '2906572', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Im Interview: Kai Wiesinger', | ||||||
|  |                 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', | ||||||
|  |                 'upload_date': '20140225', | ||||||
|  |                 'duration': 522.56, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # rtmp download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '2992323', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', | ||||||
|  |                 'description': 'md5:2669cde3febe9bce13904f701e774eb6', | ||||||
|  |                 'upload_date': '20140225', | ||||||
|  |                 'duration': 2410.44, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # rtmp download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '3004256', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Schalke: Tönnies möchte Raul zurück', | ||||||
|  |                 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f', | ||||||
|  |                 'upload_date': '20140226', | ||||||
|  |                 'duration': 228.96, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # rtmp download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '2572814', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Andreas Kümmert: Rocket Man', | ||||||
|  |                 'description': 'md5:6ddb02b0781c6adf778afea606652e38', | ||||||
|  |                 'upload_date': '20131017', | ||||||
|  |                 'duration': 469.88, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # rtmp download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '2156342', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Kurztrips zum Valentinstag', | ||||||
|  |                 'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528', | ||||||
|  |                 'upload_date': '20130206', | ||||||
|  |                 'duration': 307.24, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # rtmp download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     _CLIPID_REGEXES = [ | ||||||
|  |         r'"clip_id"\s*:\s+"(\d+)"', | ||||||
|  |         r'clipid: "(\d+)"', | ||||||
|  |     ] | ||||||
|  |     _TITLE_REGEXES = [ | ||||||
|  |         r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', | ||||||
|  |         r'<header class="clearfix">\s*<h3>(.+?)</h3>', | ||||||
|  |         r'<!-- start video -->\s*<h1>(.+?)</h1>', | ||||||
|  |         r'<div class="ep-femvideos-pi4-video-txt">\s*<h2>(.+?)</h2>', | ||||||
|  |     ] | ||||||
|  |     _DESCRIPTION_REGEXES = [ | ||||||
|  |         r'<p itemprop="description">\s*(.+?)</p>', | ||||||
|  |         r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', | ||||||
|  |         r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', | ||||||
|  |         r'<p>(.+?)</p>\s*<div class="ep-femvideos-pi4-video-footer">', | ||||||
|  |     ] | ||||||
|  |     _UPLOAD_DATE_REGEXES = [ | ||||||
|  |         r'<meta property="og:published_time" content="(.+?)">', | ||||||
|  |         r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"', | ||||||
|  |         r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr', | ||||||
|  |         r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', | ||||||
|  |         r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         page = self._download_webpage(url, video_id, 'Downloading page') | ||||||
|  |  | ||||||
|  |         def extract(patterns, name, page, fatal=False): | ||||||
|  |             for pattern in patterns: | ||||||
|  |                 mobj = re.search(pattern, page) | ||||||
|  |                 if mobj: | ||||||
|  |                     return clean_html(mobj.group(1)) | ||||||
|  |             if fatal: | ||||||
|  |                 raise RegexNotFoundError(u'Unable to extract %s' % name) | ||||||
|  |             return None | ||||||
|  |  | ||||||
|  |         clip_id = extract(self._CLIPID_REGEXES, 'clip id', page, fatal=True) | ||||||
|  |  | ||||||
|  |         access_token = 'testclient' | ||||||
|  |         client_name = 'kolibri-1.2.5' | ||||||
|  |         client_location = url | ||||||
|  |  | ||||||
|  |         videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({ | ||||||
|  |             'access_token': access_token, | ||||||
|  |             'client_location': client_location, | ||||||
|  |             'client_name': client_name, | ||||||
|  |             'ids': clip_id, | ||||||
|  |         }) | ||||||
|  |  | ||||||
|  |         videos = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON') | ||||||
|  |  | ||||||
|  |         duration = float(videos[0]['duration']) | ||||||
|  |         source_ids = [source['id'] for source in videos[0]['sources']] | ||||||
|  |         source_ids_str = ','.join(map(str, source_ids)) | ||||||
|  |  | ||||||
|  |         g = '01!8d8F_)r9]4s[qeuXfP%' | ||||||
|  |  | ||||||
|  |         client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]) | ||||||
|  |                                  .encode('utf-8')).hexdigest() | ||||||
|  |  | ||||||
|  |         sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse.urlencode({ | ||||||
|  |             'access_token': access_token, | ||||||
|  |             'client_id': client_id, | ||||||
|  |             'client_location': client_location, | ||||||
|  |             'client_name': client_name, | ||||||
|  |         })) | ||||||
|  |  | ||||||
|  |         sources = self._download_json(sources_api_url, clip_id, 'Downloading sources JSON') | ||||||
|  |         server_id = sources['server_id'] | ||||||
|  |  | ||||||
|  |         client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, | ||||||
|  |                                           client_location, source_ids_str, g, client_name]) | ||||||
|  |                                  .encode('utf-8')).hexdigest() | ||||||
|  |  | ||||||
|  |         url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse.urlencode({ | ||||||
|  |             'access_token': access_token, | ||||||
|  |             'client_id': client_id, | ||||||
|  |             'client_location': client_location, | ||||||
|  |             'client_name': client_name, | ||||||
|  |             'server_id': server_id, | ||||||
|  |             'source_ids': source_ids_str, | ||||||
|  |         })) | ||||||
|  |  | ||||||
|  |         urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON') | ||||||
|  |  | ||||||
|  |         title = extract(self._TITLE_REGEXES, 'title', page, fatal=True) | ||||||
|  |         description = extract(self._DESCRIPTION_REGEXES, 'description', page) | ||||||
|  |         thumbnail = self._og_search_thumbnail(page) | ||||||
|  |  | ||||||
|  |         upload_date = extract(self._UPLOAD_DATE_REGEXES, 'upload date', page) | ||||||
|  |         if upload_date: | ||||||
|  |             upload_date = unified_strdate(upload_date) | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |  | ||||||
|  |         urls_sources = urls['sources'] | ||||||
|  |         if isinstance(urls_sources, dict): | ||||||
|  |             urls_sources = urls_sources.values() | ||||||
|  |  | ||||||
|  |         def fix_bitrate(bitrate): | ||||||
|  |             return bitrate / 1000 if bitrate % 1000 == 0 else bitrate | ||||||
|  |  | ||||||
|  |         for source in urls_sources: | ||||||
|  |             protocol = source['protocol'] | ||||||
|  |             if protocol == 'rtmp' or protocol == 'rtmpe': | ||||||
|  |                 mobj = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', source['url']) | ||||||
|  |                 if not mobj: | ||||||
|  |                     continue | ||||||
|  |                 formats.append({ | ||||||
|  |                     'url': mobj.group('url'), | ||||||
|  |                     'app': mobj.group('app'), | ||||||
|  |                     'play_path': mobj.group('playpath'), | ||||||
|  |                     'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', | ||||||
|  |                     'page_url': 'http://www.prosieben.de', | ||||||
|  |                     'vbr': fix_bitrate(source['bitrate']), | ||||||
|  |                     'ext': 'mp4', | ||||||
|  |                     'format_id': '%s_%s' % (source['cdn'], source['bitrate']), | ||||||
|  |                 }) | ||||||
|  |             else: | ||||||
|  |                 formats.append({ | ||||||
|  |                     'url': source['url'], | ||||||
|  |                     'vbr': fix_bitrate(source['bitrate']), | ||||||
|  |                 }) | ||||||
|  |  | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': clip_id, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |             'upload_date': upload_date, | ||||||
|  |             'duration': duration, | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
| @@ -1,148 +1,165 @@ | |||||||
| # encoding: utf-8 | # encoding: utf-8 | ||||||
|  |  | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     clean_html, |  | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|  |     clean_html, | ||||||
|  |     unified_strdate, | ||||||
|  |     int_or_none, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class RTLnowIE(InfoExtractor): | class RTLnowIE(InfoExtractor): | ||||||
|     """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" |     """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" | ||||||
|     _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' |     _VALID_URL = r'''(?x) | ||||||
|     _TESTS = [{ |                         (?:https?://)? | ||||||
|         'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', |                         (?P<url> | ||||||
|         'file': '90419.flv', |                             (?P<domain> | ||||||
|         'info_dict': { |                                 rtl-now\.rtl\.de| | ||||||
|             'upload_date': '20070416', |                                 rtl2now\.rtl2\.de| | ||||||
|             'title': 'Ahornallee - Folge 1 - Der Einzug', |                                 (?:www\.)?voxnow\.de| | ||||||
|             'description': 'Folge 1 - Der Einzug', |                                 (?:www\.)?rtlnitronow\.de| | ||||||
|  |                                 (?:www\.)?superrtlnow\.de| | ||||||
|  |                                 (?:www\.)?n-tvnow\.de) | ||||||
|  |                             /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\? | ||||||
|  |                             (?:container_id|film_id)=(?P<video_id>[0-9]+)& | ||||||
|  |                             player=1(?:&season=[0-9]+)?(?:&.*)? | ||||||
|  |                         )''' | ||||||
|  |  | ||||||
|  |     _TESTS = [ | ||||||
|  |         { | ||||||
|  |             'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '90419', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'Ahornallee - Folge 1 - Der Einzug', | ||||||
|  |                 'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de', | ||||||
|  |                 'upload_date': '20070416', | ||||||
|  |                 'duration': 1685, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |             'skip': 'Only works from Germany', | ||||||
|         }, |         }, | ||||||
|         'params': { |         { | ||||||
|             'skip_download': True, |             'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '69756', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', | ||||||
|  |                 'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0', | ||||||
|  |                 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', | ||||||
|  |                 'upload_date': '20120519', | ||||||
|  |                 'duration': 1245, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |             'skip': 'Only works from Germany', | ||||||
|         }, |         }, | ||||||
|         'skip': 'Only works from Germany', |         { | ||||||
|     }, |             'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', | ||||||
|     { |             'info_dict': { | ||||||
|         'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', |                 'id': '13883', | ||||||
|         'file': '69756.flv', |                 'ext': 'flv', | ||||||
|         'info_dict': { |                 'title': 'Voxtours - Südafrika-Reporter II', | ||||||
|             'upload_date': '20120519', |                 'description': 'md5:de7f8d56be6fd4fed10f10f57786db00', | ||||||
|             'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', |                 'upload_date': '20090627', | ||||||
|             'description': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', |                 'duration': 1800, | ||||||
|             'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|         }, |         }, | ||||||
|         'params': { |         { | ||||||
|             'skip_download': True, |             'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '99205', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'Medicopter 117 - Angst!', | ||||||
|  |                 'description': 'md5:895b1df01639b5f61a04fc305a5cb94d', | ||||||
|  |                 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg', | ||||||
|  |                 'upload_date': '20080928', | ||||||
|  |                 'duration': 2691, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|         }, |         }, | ||||||
|         'skip': 'Only works from Germany', |         { | ||||||
|     }, |             'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0', | ||||||
|     { |             'info_dict': { | ||||||
|         'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', |                 'id': '153819', | ||||||
|         'file': '13883.flv', |                 'ext': 'flv', | ||||||
|         'info_dict': { |                 'title': 'Deluxe - Alles was Spaß macht - Thema u.a.: Luxushotel für Vierbeiner', | ||||||
|             'upload_date': '20090627', |                 'description': 'md5:c3705e1bb32e1a5b2bcd634fc065c631', | ||||||
|             'title': 'Voxtours - Südafrika-Reporter II', |                 'thumbnail': 'http://autoimg.static-fra.de/ntvnow/383157/1500x1500/image2.jpg', | ||||||
|             'description': 'Südafrika-Reporter II', |                 'upload_date': '20140221', | ||||||
|  |                 'duration': 2429, | ||||||
|  |             }, | ||||||
|  |             'skip': 'Only works from Germany', | ||||||
|         }, |         }, | ||||||
|         'params': { |     ] | ||||||
|             'skip_download': True, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', |  | ||||||
|         'file': '99205.flv', |  | ||||||
|         'info_dict': { |  | ||||||
|             'upload_date': '20080928',  |  | ||||||
|             'title': 'Medicopter 117 - Angst!', |  | ||||||
|             'description': 'Angst!', |  | ||||||
|             'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' |  | ||||||
|         }, |  | ||||||
|         'params': { |  | ||||||
|             'skip_download': True, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         'url': 'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', |  | ||||||
|         'file': '124903.flv', |  | ||||||
|         'info_dict': { |  | ||||||
|             'upload_date': '20130101', |  | ||||||
|             'title': 'Top Gear vom 01.01.2013', |  | ||||||
|             'description': 'Episode 1', |  | ||||||
|         }, |  | ||||||
|         'params': { |  | ||||||
|             'skip_download': True, |  | ||||||
|         }, |  | ||||||
|         'skip': 'Only works from Germany', |  | ||||||
|     }] |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_page_url = 'http://%s/' % mobj.group('domain') | ||||||
|         webpage_url = 'http://' + mobj.group('url') |  | ||||||
|         video_page_url = 'http://' + mobj.group('domain') + '/' |  | ||||||
|         video_id = mobj.group('video_id') |         video_id = mobj.group('video_id') | ||||||
|  |  | ||||||
|         webpage = self._download_webpage(webpage_url, video_id) |         webpage = self._download_webpage('http://' + mobj.group('url'), video_id) | ||||||
|  |  | ||||||
|         note_m = re.search(r'''(?sx) |         mobj = re.search(r'(?s)<div style="margin-left: 20px; font-size: 13px;">(.*?)<div id="playerteaser">', webpage) | ||||||
|             <div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?) |         if mobj: | ||||||
|             <div[ ]id="playerteaser">''', webpage) |             raise ExtractorError(clean_html(mobj.group(1)), expected=True) | ||||||
|         if note_m: |  | ||||||
|             msg = clean_html(note_m.group(1)) |         title = self._og_search_title(webpage) | ||||||
|             raise ExtractorError(msg) |         description = self._og_search_description(webpage) | ||||||
|  |         thumbnail = self._og_search_thumbnail(webpage, default=None) | ||||||
|  |  | ||||||
|  |         upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date')) | ||||||
|  |  | ||||||
|  |         mobj = re.search(r'<meta itemprop="duration" content="PT(?P<seconds>\d+)S" />', webpage) | ||||||
|  |         duration = int(mobj.group('seconds')) if mobj else None | ||||||
|  |  | ||||||
|         video_title = self._html_search_regex( |  | ||||||
|             r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>', |  | ||||||
|             webpage, 'title') |  | ||||||
|         playerdata_url = self._html_search_regex( |         playerdata_url = self._html_search_regex( | ||||||
|             r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', |             r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url') | ||||||
|             webpage, 'playerdata_url') |  | ||||||
|  |  | ||||||
|         playerdata = self._download_webpage(playerdata_url, video_id) |         playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML') | ||||||
|         mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata) |  | ||||||
|         if mobj: |         videoinfo = playerdata.find('./playlist/videoinfo') | ||||||
|             video_description = mobj.group('description') |          | ||||||
|             if mobj.group('upload_date_Y'): |         formats = [] | ||||||
|                 video_upload_date = mobj.group('upload_date_Y') |         for filename in videoinfo.findall('filename'): | ||||||
|             elif mobj.group('upload_date_y'): |             mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text) | ||||||
|                 video_upload_date = '20' + mobj.group('upload_date_y') |             if mobj: | ||||||
|  |                 fmt = { | ||||||
|  |                     'url': mobj.group('url'), | ||||||
|  |                     'play_path': 'mp4:' + mobj.group('play_path'), | ||||||
|  |                     'page_url': video_page_url, | ||||||
|  |                     'player_url': video_page_url + 'includes/vodplayer.swf', | ||||||
|  |                 } | ||||||
|             else: |             else: | ||||||
|                 video_upload_date = None |                 fmt = { | ||||||
|             if video_upload_date: |                     'url': filename.text, | ||||||
|                 video_upload_date += mobj.group('upload_date_m') + mobj.group('upload_date_d') |                 } | ||||||
|         else: |             fmt.update({ | ||||||
|             video_description = None |                 'width': int_or_none(filename.get('width')), | ||||||
|             video_upload_date = None |                 'height': int_or_none(filename.get('height')), | ||||||
|             self._downloader.report_warning('Unable to extract description and upload date') |                 'vbr': int_or_none(filename.get('bitrate')), | ||||||
|  |                 'ext': 'flv', | ||||||
|         # Thumbnail: not every video has an thumbnail |             }) | ||||||
|         mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage) |             formats.append(fmt) | ||||||
|         if mobj: |  | ||||||
|             video_thumbnail = mobj.group('thumbnail') |  | ||||||
|         else: |  | ||||||
|             video_thumbnail = None |  | ||||||
|  |  | ||||||
|         mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata) |  | ||||||
|         if mobj is None: |  | ||||||
|             raise ExtractorError('Unable to extract media URL') |  | ||||||
|         video_url = mobj.group('url') |  | ||||||
|         video_play_path = 'mp4:' + mobj.group('play_path') |  | ||||||
|         video_player_url = video_page_url + 'includes/vodplayer.swf' |  | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'url': video_url, |             'title': title, | ||||||
|             'play_path': video_play_path, |             'description': description, | ||||||
|             'page_url': video_page_url, |             'thumbnail': thumbnail, | ||||||
|             'player_url': video_player_url, |             'upload_date': upload_date, | ||||||
|             'ext': 'flv', |             'duration': duration, | ||||||
|             'title': video_title, |             'formats': formats, | ||||||
|             'description': video_description, |         } | ||||||
|             'upload_date': video_upload_date, |  | ||||||
|             'thumbnail': video_thumbnail, |  | ||||||
|         } |  | ||||||
							
								
								
									
										183
									
								
								youtube_dl/extractor/rutv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										183
									
								
								youtube_dl/extractor/rutv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,183 @@ | |||||||
|  | # encoding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|  |     int_or_none | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class RUTVIE(InfoExtractor): | ||||||
|  |     IE_DESC = 'RUTV.RU' | ||||||
|  |     _VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)' | ||||||
|  |  | ||||||
|  |     _TESTS = [ | ||||||
|  |         { | ||||||
|  |             'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '774471', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Монологи на все времена', | ||||||
|  |                 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5', | ||||||
|  |                 'duration': 2906, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # m3u8 download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '774016', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Чужой в семье Сталина', | ||||||
|  |                 'description': '', | ||||||
|  |                 'duration': 2539, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # m3u8 download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '766888', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', | ||||||
|  |                 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', | ||||||
|  |                 'duration': 279, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # m3u8 download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '771852', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет', | ||||||
|  |                 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8', | ||||||
|  |                 'duration': 3096, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # m3u8 download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '51499', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', | ||||||
|  |                 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # rtmp download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |             'skip': 'Translation has finished', | ||||||
|  |         }, | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     @classmethod | ||||||
|  |     def _extract_url(cls, webpage): | ||||||
|  |         mobj = re.search( | ||||||
|  |             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage) | ||||||
|  |         if mobj: | ||||||
|  |             return mobj.group('url') | ||||||
|  |  | ||||||
|  |         mobj = re.search( | ||||||
|  |             r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)', | ||||||
|  |             webpage) | ||||||
|  |         if mobj: | ||||||
|  |             return mobj.group('url') | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |         video_type = mobj.group('type') | ||||||
|  |  | ||||||
|  |         if not video_type or video_type == 'swf': | ||||||
|  |             video_type = 'video' | ||||||
|  |  | ||||||
|  |         json_data = self._download_json( | ||||||
|  |             'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id), | ||||||
|  |             video_id, 'Downloading JSON') | ||||||
|  |  | ||||||
|  |         if json_data['errors']: | ||||||
|  |             raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True) | ||||||
|  |  | ||||||
|  |         playlist = json_data['data']['playlist'] | ||||||
|  |         medialist = playlist['medialist'] | ||||||
|  |         media = medialist[0] | ||||||
|  |  | ||||||
|  |         if media['errors']: | ||||||
|  |             raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True) | ||||||
|  |  | ||||||
|  |         view_count = playlist.get('count_views') | ||||||
|  |         priority_transport = playlist['priority_transport'] | ||||||
|  |  | ||||||
|  |         thumbnail = media['picture'] | ||||||
|  |         width = int_or_none(media['width']) | ||||||
|  |         height = int_or_none(media['height']) | ||||||
|  |         description = media['anons'] | ||||||
|  |         title = media['title'] | ||||||
|  |         duration = int_or_none(media.get('duration')) | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |  | ||||||
|  |         for transport, links in media['sources'].items(): | ||||||
|  |             for quality, url in links.items(): | ||||||
|  |                 if transport == 'rtmp': | ||||||
|  |                     mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url) | ||||||
|  |                     if not mobj: | ||||||
|  |                         continue | ||||||
|  |                     fmt = { | ||||||
|  |                         'url': mobj.group('url'), | ||||||
|  |                         'play_path': mobj.group('playpath'), | ||||||
|  |                         'app': mobj.group('app'), | ||||||
|  |                         'page_url': 'http://player.rutv.ru', | ||||||
|  |                         'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22', | ||||||
|  |                         'rtmp_live': True, | ||||||
|  |                         'ext': 'flv', | ||||||
|  |                         'vbr': int(quality), | ||||||
|  |                     } | ||||||
|  |                 elif transport == 'm3u8': | ||||||
|  |                     fmt = { | ||||||
|  |                         'url': url, | ||||||
|  |                         'ext': 'mp4', | ||||||
|  |                     } | ||||||
|  |                 else: | ||||||
|  |                     fmt = { | ||||||
|  |                         'url': url | ||||||
|  |                     } | ||||||
|  |                 fmt.update({ | ||||||
|  |                     'width': width, | ||||||
|  |                     'height': height, | ||||||
|  |                     'format_id': '%s-%s' % (transport, quality), | ||||||
|  |                     'preference': -1 if priority_transport == transport else -2, | ||||||
|  |                 }) | ||||||
|  |                 formats.append(fmt) | ||||||
|  |  | ||||||
|  |         if not formats: | ||||||
|  |             raise ExtractorError('No media links available for %s' % video_id) | ||||||
|  |  | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |             'view_count': view_count, | ||||||
|  |             'duration': duration, | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/savefrom.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/savefrom.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import os.path | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SaveFromIE(InfoExtractor): | ||||||
|  |     IE_NAME = 'savefrom.net' | ||||||
|  |     _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'UlVRAPW2WJY', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'About Team Radical MMA | MMA Fighting', | ||||||
|  |             'upload_date': '20120816', | ||||||
|  |             'uploader': 'Howcast', | ||||||
|  |             'uploader_id': 'Howcast', | ||||||
|  |             'description': 'md5:4f0aac94361a12e1ce57d74f85265175', | ||||||
|  |         }, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = os.path.splitext(url.split('/')[-1])[0] | ||||||
|  |         return { | ||||||
|  |             '_type': 'url', | ||||||
|  |             'id': video_id, | ||||||
|  |             'url': mobj.group('url'), | ||||||
|  |         } | ||||||
| @@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor): | |||||||
|                 'id': '47127627', |                 'id': '47127627', | ||||||
|                 'ext': 'mp3', |                 'ext': 'mp3', | ||||||
|                 'title': 'Goldrushed', |                 'title': 'Goldrushed', | ||||||
|  |                 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', | ||||||
|                 'uploader': 'The Royal Concept', |                 'uploader': 'The Royal Concept', | ||||||
|                 'upload_date': '20120521', |                 'upload_date': '20120521', | ||||||
|             }, |             }, | ||||||
| @@ -217,7 +218,7 @@ class SoundcloudIE(InfoExtractor): | |||||||
|         return self._extract_info_dict(info, full_title, secret_token=token) |         return self._extract_info_dict(info, full_title, secret_token=token) | ||||||
|  |  | ||||||
| class SoundcloudSetIE(SoundcloudIE): | class SoundcloudSetIE(SoundcloudIE): | ||||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$' |     _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' | ||||||
|     IE_NAME = 'soundcloud:set' |     IE_NAME = 'soundcloud:set' | ||||||
|     # it's in tests/test_playlists.py |     # it's in tests/test_playlists.py | ||||||
|     _TESTS = [] |     _TESTS = [] | ||||||
|   | |||||||
| @@ -1,6 +1,5 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import os |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| @@ -8,23 +7,27 @@ from ..utils import ( | |||||||
|     compat_urllib_parse_urlparse, |     compat_urllib_parse_urlparse, | ||||||
|     compat_urllib_request, |     compat_urllib_request, | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|  |     unified_strdate, | ||||||
|  |     str_to_int, | ||||||
|  |     int_or_none, | ||||||
| ) | ) | ||||||
| from ..aes import ( | from ..aes import aes_decrypt_text | ||||||
|     aes_decrypt_text |  | ||||||
| ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class SpankwireIE(InfoExtractor): | class SpankwireIE(InfoExtractor): | ||||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)' |     _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', |         'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', | ||||||
|         'file': '103545.mp4', |         'md5': '8bbfde12b101204b39e4b9fe7eb67095', | ||||||
|         'md5': '1b3f55e345500552dbc252a3e9c1af43', |  | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             "uploader": "oreusz", |             'id': '103545', | ||||||
|             "title": "Buckcherry`s X Rated Music Video Crazy Bitch", |             'ext': 'mp4', | ||||||
|             "description": "Crazy Bitch X rated music video.", |             'title': 'Buckcherry`s X Rated Music Video Crazy Bitch', | ||||||
|             "age_limit": 18, |             'description': 'Crazy Bitch X rated music video.', | ||||||
|  |             'uploader': 'oreusz', | ||||||
|  |             'uploader_id': '124697', | ||||||
|  |             'upload_date': '20070508', | ||||||
|  |             'age_limit': 18, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -37,13 +40,26 @@ class SpankwireIE(InfoExtractor): | |||||||
|         req.add_header('Cookie', 'age_verified=1') |         req.add_header('Cookie', 'age_verified=1') | ||||||
|         webpage = self._download_webpage(req, video_id) |         webpage = self._download_webpage(req, video_id) | ||||||
|  |  | ||||||
|         video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title') |         title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title') | ||||||
|         video_uploader = self._html_search_regex( |  | ||||||
|             r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False) |  | ||||||
|         thumbnail = self._html_search_regex( |  | ||||||
|             r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False) |  | ||||||
|         description = self._html_search_regex( |         description = self._html_search_regex( | ||||||
|             r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False) |             r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False) | ||||||
|  |         thumbnail = self._html_search_regex( | ||||||
|  |             r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False) | ||||||
|  |  | ||||||
|  |         uploader = self._html_search_regex( | ||||||
|  |             r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False) | ||||||
|  |         uploader_id = self._html_search_regex( | ||||||
|  |             r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False) | ||||||
|  |         upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False) | ||||||
|  |         if upload_date: | ||||||
|  |             upload_date = unified_strdate(upload_date) | ||||||
|  |          | ||||||
|  |         view_count = self._html_search_regex( | ||||||
|  |             r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False) | ||||||
|  |         if view_count: | ||||||
|  |             view_count = str_to_int(view_count) | ||||||
|  |         comment_count = int_or_none(self._html_search_regex( | ||||||
|  |             r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False)) | ||||||
|  |  | ||||||
|         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage))) |         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage))) | ||||||
|         if webpage.find('flashvars\.encrypted = "true"') != -1: |         if webpage.find('flashvars\.encrypted = "true"') != -1: | ||||||
| @@ -53,16 +69,13 @@ class SpankwireIE(InfoExtractor): | |||||||
|         formats = [] |         formats = [] | ||||||
|         for video_url in video_urls: |         for video_url in video_urls: | ||||||
|             path = compat_urllib_parse_urlparse(video_url).path |             path = compat_urllib_parse_urlparse(video_url).path | ||||||
|             extension = os.path.splitext(path)[1][1:] |  | ||||||
|             format = path.split('/')[4].split('_')[:2] |             format = path.split('/')[4].split('_')[:2] | ||||||
|             resolution, bitrate_str = format |             resolution, bitrate_str = format | ||||||
|             format = "-".join(format) |             format = "-".join(format) | ||||||
|             height = int(resolution.rstrip('P')) |             height = int(resolution.rstrip('Pp')) | ||||||
|             tbr = int(bitrate_str.rstrip('K')) |             tbr = int(bitrate_str.rstrip('Kk')) | ||||||
|  |  | ||||||
|             formats.append({ |             formats.append({ | ||||||
|                 'url': video_url, |                 'url': video_url, | ||||||
|                 'ext': extension, |  | ||||||
|                 'resolution': resolution, |                 'resolution': resolution, | ||||||
|                 'format': format, |                 'format': format, | ||||||
|                 'tbr': tbr, |                 'tbr': tbr, | ||||||
| @@ -75,10 +88,14 @@ class SpankwireIE(InfoExtractor): | |||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'uploader': video_uploader, |             'title': title, | ||||||
|             'title': video_title, |  | ||||||
|             'thumbnail': thumbnail, |  | ||||||
|             'description': description, |             'description': description, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |             'uploader': uploader, | ||||||
|  |             'uploader_id': uploader_id, | ||||||
|  |             'upload_date': upload_date, | ||||||
|  |             'view_count': view_count, | ||||||
|  |             'comment_count': comment_count, | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'age_limit': age_limit, |             'age_limit': age_limit, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -1,10 +1,15 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
| from .mtv import MTVServicesInfoExtractor | from .mtv import MTVServicesInfoExtractor | ||||||
|  |  | ||||||
|  |  | ||||||
| class SpikeIE(MTVServicesInfoExtractor): | class SpikeIE(MTVServicesInfoExtractor): | ||||||
|     _VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+' |     _VALID_URL = r'''(?x)https?:// | ||||||
|  |         (www\.spike\.com/(video-clips|episodes)/.+| | ||||||
|  |          m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+)) | ||||||
|  |         ''' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', |         'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', | ||||||
|         'md5': '1a9265f32b0c375793d6c4ce45255256', |         'md5': '1a9265f32b0c375793d6c4ce45255256', | ||||||
| @@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor): | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     _FEED_URL = 'http://www.spike.com/feeds/mrss/' |     _FEED_URL = 'http://www.spike.com/feeds/mrss/' | ||||||
|  |     _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.search(self._VALID_URL, url) | ||||||
|  |         mobile_id = mobj.group('mobile_id') | ||||||
|  |         if mobile_id is not None: | ||||||
|  |             url = 'http://www.spike.com/video-clips/%s' % mobile_id | ||||||
|  |         return super(SpikeIE, self)._real_extract(url) | ||||||
|   | |||||||
| @@ -6,115 +6,120 @@ import re | |||||||
| from .subtitles import SubtitlesInfoExtractor | from .subtitles import SubtitlesInfoExtractor | ||||||
|  |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     RegexNotFoundError, |     compat_str, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class TEDIE(SubtitlesInfoExtractor): | class TEDIE(SubtitlesInfoExtractor): | ||||||
|     _VALID_URL=r'''http://www\.ted\.com/ |     _VALID_URL = r'''(?x) | ||||||
|                    ( |         (?P<proto>https?://) | ||||||
|                         ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist |         (?P<type>www|embed)(?P<urlmain>\.ted\.com/ | ||||||
|                         | |         ( | ||||||
|                         ((?P<type_talk>talks)) # We have a simple talk |             (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist | ||||||
|                    ) |             | | ||||||
|                    (/lang/(.*?))? # The url may contain the language |             ((?P<type_talk>talks)) # We have a simple talk | ||||||
|                    /(?P<name>\w+) # Here goes the name and then ".html" |         ) | ||||||
|                    ''' |         (/lang/(.*?))? # The url may contain the language | ||||||
|  |         /(?P<name>\w+) # Here goes the name and then ".html" | ||||||
|  |         .*)$ | ||||||
|  |         ''' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', |         'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', | ||||||
|         'file': '102.mp4', |  | ||||||
|         'md5': '4ea1dada91e4174b53dac2bb8ace429d', |         'md5': '4ea1dada91e4174b53dac2bb8ace429d', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922", |             'id': '102', | ||||||
|             "title": "Dan Dennett: The illusion of consciousness" |             'ext': 'mp4', | ||||||
|  |             'title': 'The illusion of consciousness', | ||||||
|  |             'description': ('Philosopher Dan Dennett makes a compelling ' | ||||||
|  |                 'argument that not only don\'t we understand our own ' | ||||||
|  |                 'consciousness, but that half the time our brains are ' | ||||||
|  |                 'actively fooling us.'), | ||||||
|  |             'uploader': 'Dan Dennett', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     @classmethod |     _FORMATS_PREFERENCE = { | ||||||
|     def suitable(cls, url): |         'low': 1, | ||||||
|         """Receives a URL and returns True if suitable for this IE.""" |         'medium': 2, | ||||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None |         'high': 3, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _extract_info(self, webpage): | ||||||
|  |         info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>', | ||||||
|  |             webpage, 'info json') | ||||||
|  |         return json.loads(info_json) | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         m=re.match(self._VALID_URL, url, re.VERBOSE) |         m = re.match(self._VALID_URL, url, re.VERBOSE) | ||||||
|  |         if m.group('type') == 'embed': | ||||||
|  |             desktop_url = m.group('proto') + 'www' + m.group('urlmain') | ||||||
|  |             return self.url_result(desktop_url, 'TED') | ||||||
|  |         name = m.group('name') | ||||||
|         if m.group('type_talk'): |         if m.group('type_talk'): | ||||||
|             return self._talk_info(url) |             return self._talk_info(url, name) | ||||||
|         else : |         else: | ||||||
|             playlist_id=m.group('playlist_id') |             return self._playlist_videos_info(url, name) | ||||||
|             name=m.group('name') |  | ||||||
|             self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name)) |  | ||||||
|             return [self._playlist_videos_info(url,name,playlist_id)] |  | ||||||
|  |  | ||||||
|  |     def _playlist_videos_info(self, url, name): | ||||||
|     def _playlist_videos_info(self, url, name, playlist_id): |  | ||||||
|         '''Returns the videos of the playlist''' |         '''Returns the videos of the playlist''' | ||||||
|  |  | ||||||
|         webpage = self._download_webpage( |         webpage = self._download_webpage(url, name, | ||||||
|             url, playlist_id, 'Downloading playlist webpage') |             'Downloading playlist webpage') | ||||||
|         matches = re.finditer( |         info = self._extract_info(webpage) | ||||||
|             r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>', |         playlist_info = info['playlist'] | ||||||
|             webpage) |  | ||||||
|  |  | ||||||
|         playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>', |  | ||||||
|                                                  webpage, 'playlist title') |  | ||||||
|  |  | ||||||
|         playlist_entries = [ |         playlist_entries = [ | ||||||
|             self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED') |             self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key()) | ||||||
|             for m in matches |             for talk in info['talks'] | ||||||
|         ] |         ] | ||||||
|         return self.playlist_result( |         return self.playlist_result( | ||||||
|             playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title) |             playlist_entries, | ||||||
|  |             playlist_id=compat_str(playlist_info['id']), | ||||||
|  |             playlist_title=playlist_info['title']) | ||||||
|  |  | ||||||
|     def _talk_info(self, url, video_id=0): |     def _talk_info(self, url, video_name): | ||||||
|         """Return the video for the talk in the url""" |         webpage = self._download_webpage(url, video_name) | ||||||
|         m = re.match(self._VALID_URL, url,re.VERBOSE) |  | ||||||
|         video_name = m.group('name') |  | ||||||
|         webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name) |  | ||||||
|         self.report_extraction(video_name) |         self.report_extraction(video_name) | ||||||
|         # If the url includes the language we get the title translated |  | ||||||
|         title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>', |         talk_info = self._extract_info(webpage)['talks'][0] | ||||||
|                                         webpage, 'title') |  | ||||||
|         json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>', |  | ||||||
|                                     webpage, 'json data') |  | ||||||
|         info = json.loads(json_data) |  | ||||||
|         desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>', |  | ||||||
|                                        webpage, 'description', flags = re.DOTALL) |  | ||||||
|          |  | ||||||
|         thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"', |  | ||||||
|                                        webpage, 'thumbnail') |  | ||||||
|         formats = [{ |         formats = [{ | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'url': stream['file'], |             'url': format_url, | ||||||
|             'format': stream['id'] |             'format_id': format_id, | ||||||
|         } for stream in info['htmlStreams']] |             'format': format_id, | ||||||
|  |             'preference': self._FORMATS_PREFERENCE.get(format_id, -1), | ||||||
|         video_id = info['id'] |         } for (format_id, format_url) in talk_info['nativeDownloads'].items()] | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         video_id = compat_str(talk_info['id']) | ||||||
|         # subtitles |         # subtitles | ||||||
|         video_subtitles = self.extract_subtitles(video_id, webpage) |         video_subtitles = self.extract_subtitles(video_id, talk_info) | ||||||
|         if self._downloader.params.get('listsubtitles', False): |         if self._downloader.params.get('listsubtitles', False): | ||||||
|             self._list_available_subtitles(video_id, webpage) |             self._list_available_subtitles(video_id, talk_info) | ||||||
|             return |             return | ||||||
|  |  | ||||||
|  |         thumbnail = talk_info['thumb'] | ||||||
|  |         if not thumbnail.startswith('http'): | ||||||
|  |             thumbnail = 'http://' + thumbnail | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': title, |             'title': talk_info['title'], | ||||||
|  |             'uploader': talk_info['speaker'], | ||||||
|             'thumbnail': thumbnail, |             'thumbnail': thumbnail, | ||||||
|             'description': desc, |             'description': self._og_search_description(webpage), | ||||||
|             'subtitles': video_subtitles, |             'subtitles': video_subtitles, | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|     def _get_available_subtitles(self, video_id, webpage): |     def _get_available_subtitles(self, video_id, talk_info): | ||||||
|         try: |         languages = [lang['languageCode'] for lang in talk_info.get('languages', [])] | ||||||
|             options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL) |         if languages: | ||||||
|             languages = re.findall(r'(?:<option value=")(\S+)"', options) |             sub_lang_list = {} | ||||||
|             if languages: |             for l in languages: | ||||||
|                 sub_lang_list = {} |                 url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) | ||||||
|                 for l in languages: |                 sub_lang_list[l] = url | ||||||
|                     url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) |             return sub_lang_list | ||||||
|                     sub_lang_list[l] = url |         else: | ||||||
|                 return sub_lang_list |  | ||||||
|         except RegexNotFoundError: |  | ||||||
|             self._downloader.report_warning(u'video doesn\'t have subtitles') |             self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||||
|         return {} |             return {} | ||||||
|   | |||||||
| @@ -39,6 +39,8 @@ class TestURLIE(InfoExtractor): | |||||||
|                     ('Found multiple matching extractors: %s' % |                     ('Found multiple matching extractors: %s' % | ||||||
|                         ' '.join(ie.IE_NAME for ie in matching_extractors)), |                         ' '.join(ie.IE_NAME for ie in matching_extractors)), | ||||||
|                     expected=True) |                     expected=True) | ||||||
|  |         else: | ||||||
|  |             extractor = matching_extractors[0] | ||||||
|  |  | ||||||
|         num_str = mobj.group('num') |         num_str = mobj.group('num') | ||||||
|         num = int(num_str) if num_str else 0 |         num = int(num_str) if num_str else 0 | ||||||
|   | |||||||
| @@ -13,7 +13,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language | |||||||
| class ThePlatformIE(InfoExtractor): | class ThePlatformIE(InfoExtractor): | ||||||
|     _VALID_URL = r'''(?x) |     _VALID_URL = r'''(?x) | ||||||
|         (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/ |         (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/ | ||||||
|            (?P<config>[^/\?]+/(?:swf|config)/select/)? |            (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? | ||||||
|          |theplatform:)(?P<id>[^/\?&]+)''' |          |theplatform:)(?P<id>[^/\?&]+)''' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
| @@ -54,10 +54,15 @@ class ThePlatformIE(InfoExtractor): | |||||||
|  |  | ||||||
|         f4m_node = body.find(_x('smil:seq/smil:video')) |         f4m_node = body.find(_x('smil:seq/smil:video')) | ||||||
|         if f4m_node is not None: |         if f4m_node is not None: | ||||||
|  |             f4m_url = f4m_node.attrib['src'] | ||||||
|  |             if 'manifest.f4m?' not in f4m_url: | ||||||
|  |                 f4m_url += '?' | ||||||
|  |             # the parameters are from syfy.com, other sites may use others, | ||||||
|  |             # they also work for nbc.com | ||||||
|  |             f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' | ||||||
|             formats = [{ |             formats = [{ | ||||||
|                 'ext': 'flv', |                 'ext': 'flv', | ||||||
|                 # the parameters are from syfy.com, other sites may use others |                 'url': f4m_url, | ||||||
|                 'url': f4m_node.attrib['src'] + '?g=UXWGVKRWHFSP&hdcore=3.0.3', |  | ||||||
|             }] |             }] | ||||||
|         else: |         else: | ||||||
|             base_url = head.find(_x('smil:meta')).attrib['base'] |             base_url = head.find(_x('smil:meta')).attrib['base'] | ||||||
| @@ -95,9 +100,10 @@ class ThePlatformIE(InfoExtractor): | |||||||
|         if mobj.group('config'): |         if mobj.group('config'): | ||||||
|             config_url = url+ '&form=json' |             config_url = url+ '&form=json' | ||||||
|             config_url = config_url.replace('swf/', 'config/') |             config_url = config_url.replace('swf/', 'config/') | ||||||
|  |             config_url = config_url.replace('onsite/', 'onsite/config/') | ||||||
|             config_json = self._download_webpage(config_url, video_id, u'Downloading config') |             config_json = self._download_webpage(config_url, video_id, u'Downloading config') | ||||||
|             config = json.loads(config_json) |             config = json.loads(config_json) | ||||||
|             smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4' |             smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' | ||||||
|         else: |         else: | ||||||
|             smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' |             smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' | ||||||
|                 'format=smil&mbr=true'.format(video_id)) |                 'format=smil&mbr=true'.format(video_id)) | ||||||
|   | |||||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | |||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from youtube_dl.utils import ExtractorError | from ..utils import ExtractorError | ||||||
|  |  | ||||||
|  |  | ||||||
| class TinyPicIE(InfoExtractor): | class TinyPicIE(InfoExtractor): | ||||||
|   | |||||||
							
								
								
									
										44
									
								
								youtube_dl/extractor/trutube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								youtube_dl/extractor/trutube.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TruTubeIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*' | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', | ||||||
|  |         'md5': 'c5b6e301b0a2040b074746cbeaa26ca1', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '14880', | ||||||
|  |             'ext': 'flv', | ||||||
|  |             'title': 'Ramses II - Proven To Be A Red Headed Caucasoid', | ||||||
|  |             'thumbnail': 're:^http:.*\.jpg$', | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |         video_title = self._og_search_title(webpage).strip() | ||||||
|  |         thumbnail = self._search_regex( | ||||||
|  |             r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False) | ||||||
|  |  | ||||||
|  |         all_formats = re.finditer( | ||||||
|  |             r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage) | ||||||
|  |         formats = [{ | ||||||
|  |             'format_id': m.group('key'), | ||||||
|  |             'quality': -i, | ||||||
|  |             'url': m.group('url'), | ||||||
|  |         } for i, m in enumerate(all_formats)] | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': video_title, | ||||||
|  |             'formats': formats, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |         } | ||||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/tvigle.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/tvigle.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | |||||||
|  | # encoding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     unified_strdate, | ||||||
|  |     clean_html, | ||||||
|  |     int_or_none, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TvigleIE(InfoExtractor): | ||||||
|  |     IE_NAME = 'tvigle' | ||||||
|  |     IE_DESC = 'Интернет-телевидение Tvigle.ru' | ||||||
|  |     _VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)' | ||||||
|  |  | ||||||
|  |     _TESTS = [ | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081', | ||||||
|  |             'md5': '09afba4616666249f087efc6dcf83cb3', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '503081', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'Брат 2 ', | ||||||
|  |                 'description': 'md5:f5a42970f50648cee3d7ad740f3ae769', | ||||||
|  |                 'upload_date': '20110919', | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433', | ||||||
|  |             'md5': 'e7efe5350dd5011d0de6550b53c3ba7b', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '676433', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком', | ||||||
|  |                 'description': 'md5:027f7dc872948f14c96d19b4178428a4', | ||||||
|  |                 'upload_date': '20121218', | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         video_data = self._download_xml( | ||||||
|  |             'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML') | ||||||
|  |  | ||||||
|  |         video = video_data.find('./video') | ||||||
|  |  | ||||||
|  |         title = video.get('name') | ||||||
|  |         description = video.get('anons') | ||||||
|  |         if description: | ||||||
|  |             description = clean_html(description) | ||||||
|  |         thumbnail = video_data.get('img') | ||||||
|  |         upload_date = unified_strdate(video.get('date')) | ||||||
|  |         like_count = int_or_none(video.get('vtp')) | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |         for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]): | ||||||
|  |             video_url = video.get(format_id) | ||||||
|  |             if not video_url: | ||||||
|  |                 continue | ||||||
|  |             formats.append({ | ||||||
|  |                 'url': video_url, | ||||||
|  |                 'format_id': format_id, | ||||||
|  |                 'format_note': format_note, | ||||||
|  |                 'quality': num, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |             'upload_date': upload_date, | ||||||
|  |             'like_count': like_count, | ||||||
|  |             'age_limit': 18, | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
							
								
								
									
										164
									
								
								youtube_dl/extractor/udemy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										164
									
								
								youtube_dl/extractor/udemy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,164 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     compat_urllib_parse, | ||||||
|  |     compat_urllib_request, | ||||||
|  |     ExtractorError, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class UdemyIE(InfoExtractor): | ||||||
|  |     IE_NAME = 'udemy' | ||||||
|  |     _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)' | ||||||
|  |     _LOGIN_URL = 'https://www.udemy.com/join/login-submit/' | ||||||
|  |     _NETRC_MACHINE = 'udemy' | ||||||
|  |  | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757', | ||||||
|  |         'md5': '98eda5b657e752cf945d8445e261b5c5', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '160614', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Introduction and Installation', | ||||||
|  |             'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876', | ||||||
|  |             'duration': 579.29, | ||||||
|  |         }, | ||||||
|  |         'skip': 'Requires udemy account credentials', | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _handle_error(self, response): | ||||||
|  |         if not isinstance(response, dict): | ||||||
|  |             return | ||||||
|  |         error = response.get('error') | ||||||
|  |         if error: | ||||||
|  |             error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message')) | ||||||
|  |             error_data = error.get('data') | ||||||
|  |             if error_data: | ||||||
|  |                 error_str += ' - %s' % error_data.get('formErrors') | ||||||
|  |             raise ExtractorError(error_str, expected=True) | ||||||
|  |  | ||||||
|  |     def _download_json(self, url, video_id, note='Downloading JSON metadata'): | ||||||
|  |         response = super(UdemyIE, self)._download_json(url, video_id, note) | ||||||
|  |         self._handle_error(response) | ||||||
|  |         return response | ||||||
|  |  | ||||||
|  |     def _real_initialize(self): | ||||||
|  |         self._login() | ||||||
|  |  | ||||||
|  |     def _login(self): | ||||||
|  |         (username, password) = self._get_login_info() | ||||||
|  |         if username is None: | ||||||
|  |             raise ExtractorError( | ||||||
|  |                 'Udemy account is required, use --username and --password options to provide account credentials.', | ||||||
|  |                 expected=True) | ||||||
|  |  | ||||||
|  |         login_popup = self._download_webpage( | ||||||
|  |             'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None, | ||||||
|  |             'Downloading login popup') | ||||||
|  |  | ||||||
|  |         if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>': | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         csrf = self._html_search_regex(r'<input type="hidden" name="csrf" value="(.+?)"', login_popup, 'csrf token') | ||||||
|  |  | ||||||
|  |         login_form = { | ||||||
|  |             'email': username, | ||||||
|  |             'password': password, | ||||||
|  |             'csrf': csrf, | ||||||
|  |             'displayType': 'json', | ||||||
|  |             'isSubmitted': '1', | ||||||
|  |         } | ||||||
|  |         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | ||||||
|  |         response = self._download_json(request, None, 'Logging in as %s' % username) | ||||||
|  |  | ||||||
|  |         if 'returnUrl' not in response: | ||||||
|  |             raise ExtractorError('Unable to log in') | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         lecture_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         lecture = self._download_json( | ||||||
|  |             'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, lecture_id, 'Downloading lecture JSON') | ||||||
|  |  | ||||||
|  |         if lecture['assetType'] != 'Video': | ||||||
|  |             raise ExtractorError('Lecture %s is not a video' % lecture_id, expected=True) | ||||||
|  |  | ||||||
|  |         asset = lecture['asset'] | ||||||
|  |  | ||||||
|  |         stream_url = asset['streamUrl'] | ||||||
|  |         mobj = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url) | ||||||
|  |         if mobj: | ||||||
|  |             return self.url_result(mobj.group(1), 'Youtube') | ||||||
|  |  | ||||||
|  |         video_id = asset['id'] | ||||||
|  |         thumbnail = asset['thumbnailUrl'] | ||||||
|  |         duration = asset['data']['duration'] | ||||||
|  |  | ||||||
|  |         download_url = asset['downloadUrl'] | ||||||
|  |  | ||||||
|  |         formats = [ | ||||||
|  |             { | ||||||
|  |                 'url': download_url['Video480p'][0], | ||||||
|  |                 'format_id': '360p', | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 'url': download_url['Video'][0], | ||||||
|  |                 'format_id': '720p', | ||||||
|  |             }, | ||||||
|  |         ] | ||||||
|  |  | ||||||
|  |         title = lecture['title'] | ||||||
|  |         description = lecture['description'] | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |             'duration': duration, | ||||||
|  |             'formats': formats | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class UdemyCourseIE(UdemyIE): | ||||||
|  |     IE_NAME = 'udemy:course' | ||||||
|  |     _VALID_URL = r'https?://www\.udemy\.com/(?P<coursepath>[\da-z-]+)' | ||||||
|  |     _SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<' | ||||||
|  |     _ALREADY_ENROLLED = '>You are already taking this course.<' | ||||||
|  |     _TESTS = [] | ||||||
|  |  | ||||||
|  |     @classmethod | ||||||
|  |     def suitable(cls, url): | ||||||
|  |         return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url) | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         course_path = mobj.group('coursepath') | ||||||
|  |  | ||||||
|  |         response = self._download_json( | ||||||
|  |             'https://www.udemy.com/api-1.1/courses/%s' % course_path, course_path, 'Downloading course JSON') | ||||||
|  |  | ||||||
|  |         course_id = int(response['id']) | ||||||
|  |         course_title = response['title'] | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage( | ||||||
|  |             'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id, course_id, 'Enrolling in the course') | ||||||
|  |  | ||||||
|  |         if self._SUCCESSFULLY_ENROLLED in webpage: | ||||||
|  |             self.to_screen('%s: Successfully enrolled in' % course_id) | ||||||
|  |         elif self._ALREADY_ENROLLED in webpage: | ||||||
|  |             self.to_screen('%s: Already enrolled in' % course_id) | ||||||
|  |  | ||||||
|  |         response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id, | ||||||
|  |             course_id, 'Downloading course curriculum') | ||||||
|  |  | ||||||
|  |         entries = [ | ||||||
|  |             self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy') | ||||||
|  |             for asset in response if asset.get('assetType') == 'Video' | ||||||
|  |         ] | ||||||
|  |  | ||||||
|  |         return self.playlist_result(entries, course_id, course_title) | ||||||
| @@ -4,6 +4,7 @@ import re | |||||||
| import json | import json | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import compat_urllib_request | ||||||
|  |  | ||||||
|  |  | ||||||
| class VeohIE(InfoExtractor): | class VeohIE(InfoExtractor): | ||||||
| @@ -24,6 +25,13 @@ class VeohIE(InfoExtractor): | |||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|  |         age_limit = 0 | ||||||
|  |         if 'class="adultwarning-container"' in webpage: | ||||||
|  |             self.report_age_confirmation() | ||||||
|  |             age_limit = 18 | ||||||
|  |             request = compat_urllib_request.Request(url) | ||||||
|  |             request.add_header('Cookie', 'confirmedAdult=true') | ||||||
|  |             webpage = self._download_webpage(request, video_id) | ||||||
|  |  | ||||||
|         m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage) |         m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage) | ||||||
|         if m_youtube is not None: |         if m_youtube is not None: | ||||||
| @@ -44,4 +52,5 @@ class VeohIE(InfoExtractor): | |||||||
|             'thumbnail': info.get('highResImage') or info.get('medResImage'), |             'thumbnail': info.get('highResImage') or info.get('medResImage'), | ||||||
|             'description': info['description'], |             'description': info['description'], | ||||||
|             'view_count': info['views'], |             'view_count': info['views'], | ||||||
|  |             'age_limit': age_limit, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -4,14 +4,11 @@ from __future__ import unicode_literals | |||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ExtractorError | ||||||
|     ExtractorError, | from .rutv import RUTVIE | ||||||
|     int_or_none |  | ||||||
| ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class VestiIE(InfoExtractor): | class VestiIE(InfoExtractor): | ||||||
|     IE_NAME = 'vesti' |  | ||||||
|     IE_DESC = 'Вести.Ru' |     IE_DESC = 'Вести.Ru' | ||||||
|     _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)' |     _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)' | ||||||
|  |  | ||||||
| @@ -30,6 +27,20 @@ class VestiIE(InfoExtractor): | |||||||
|                 'skip_download': True, |                 'skip_download': True, | ||||||
|             }, |             }, | ||||||
|         }, |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.vesti.ru/doc.html?id=1349233', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '773865', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Участники митинга штурмуют Донецкую областную администрацию', | ||||||
|  |                 'description': 'md5:1a160e98b3195379b4c849f2f4958009', | ||||||
|  |                 'duration': 210, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # m3u8 download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|         { |         { | ||||||
|             'url': 'http://www.vesti.ru/only_video.html?vid=576180', |             'url': 'http://www.vesti.ru/only_video.html?vid=576180', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
| @@ -44,6 +55,20 @@ class VestiIE(InfoExtractor): | |||||||
|                 'skip_download': True, |                 'skip_download': True, | ||||||
|             }, |             }, | ||||||
|         }, |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://hitech.vesti.ru/news/view/id/4000', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '766888', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', | ||||||
|  |                 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', | ||||||
|  |                 'duration': 279, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 # m3u8 download | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|         { |         { | ||||||
|             'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403', |             'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
| @@ -57,7 +82,7 @@ class VestiIE(InfoExtractor): | |||||||
|                 # m3u8 download |                 # m3u8 download | ||||||
|                 'skip_download': True, |                 'skip_download': True, | ||||||
|             }, |             }, | ||||||
|             'skip': 'Blocked outside Russia' |             'skip': 'Blocked outside Russia', | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             'url': 'http://sochi2014.vesti.ru/live/play/live_id/301', |             'url': 'http://sochi2014.vesti.ru/live/play/live_id/301', | ||||||
| @@ -72,7 +97,7 @@ class VestiIE(InfoExtractor): | |||||||
|                 'skip_download': True, |                 'skip_download': True, | ||||||
|             }, |             }, | ||||||
|             'skip': 'Translation has finished' |             'skip': 'Translation has finished' | ||||||
|         } |         }, | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -81,90 +106,16 @@ class VestiIE(InfoExtractor): | |||||||
|  |  | ||||||
|         page = self._download_webpage(url, video_id, 'Downloading page') |         page = self._download_webpage(url, video_id, 'Downloading page') | ||||||
|  |  | ||||||
|         mobj = re.search(r'<meta property="og:video" content=".+?\.swf\?v?id=(?P<id>\d+).*?" />', page) |         mobj = re.search( | ||||||
|  |             r'<meta[^>]+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)', | ||||||
|  |             page) | ||||||
|         if mobj: |         if mobj: | ||||||
|             video_type = 'video' |  | ||||||
|             video_id = mobj.group('id') |             video_id = mobj.group('id') | ||||||
|         else: |             page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id, | ||||||
|             mobj = re.search( |                 'Downloading video page') | ||||||
|                 r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>', page) |  | ||||||
|  |  | ||||||
|             if not mobj: |         rutv_url = RUTVIE._extract_url(page) | ||||||
|                 raise ExtractorError('No media found') |         if rutv_url: | ||||||
|  |             return self.url_result(rutv_url, 'RUTV') | ||||||
|  |  | ||||||
|             video_type = mobj.group('type') |         raise ExtractorError('No video found', expected=True) | ||||||
|             video_id = mobj.group('id') |  | ||||||
|  |  | ||||||
|         json_data = self._download_json( |  | ||||||
|             'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id), |  | ||||||
|             video_id, 'Downloading JSON') |  | ||||||
|  |  | ||||||
|         if json_data['errors']: |  | ||||||
|             raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True) |  | ||||||
|  |  | ||||||
|         playlist = json_data['data']['playlist'] |  | ||||||
|         medialist = playlist['medialist'] |  | ||||||
|         media = medialist[0] |  | ||||||
|  |  | ||||||
|         if media['errors']: |  | ||||||
|             raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True) |  | ||||||
|  |  | ||||||
|         view_count = playlist.get('count_views') |  | ||||||
|         priority_transport = playlist['priority_transport'] |  | ||||||
|  |  | ||||||
|         thumbnail = media['picture'] |  | ||||||
|         width = media['width'] |  | ||||||
|         height = media['height'] |  | ||||||
|         description = media['anons'] |  | ||||||
|         title = media['title'] |  | ||||||
|         duration = int_or_none(media.get('duration')) |  | ||||||
|  |  | ||||||
|         formats = [] |  | ||||||
|  |  | ||||||
|         for transport, links in media['sources'].items(): |  | ||||||
|             for quality, url in links.items(): |  | ||||||
|                 if transport == 'rtmp': |  | ||||||
|                     mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url) |  | ||||||
|                     if not mobj: |  | ||||||
|                         continue |  | ||||||
|                     fmt = { |  | ||||||
|                         'url': mobj.group('url'), |  | ||||||
|                         'play_path': mobj.group('playpath'), |  | ||||||
|                         'app': mobj.group('app'), |  | ||||||
|                         'page_url': 'http://player.rutv.ru', |  | ||||||
|                         'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22', |  | ||||||
|                         'rtmp_live': True, |  | ||||||
|                         'ext': 'flv', |  | ||||||
|                         'vbr': int(quality), |  | ||||||
|                     } |  | ||||||
|                 elif transport == 'm3u8': |  | ||||||
|                     fmt = { |  | ||||||
|                         'url': url, |  | ||||||
|                         'ext': 'mp4', |  | ||||||
|                     } |  | ||||||
|                 else: |  | ||||||
|                     fmt = { |  | ||||||
|                         'url': url |  | ||||||
|                     } |  | ||||||
|                 fmt.update({ |  | ||||||
|                     'width': width, |  | ||||||
|                     'height': height, |  | ||||||
|                     'format_id': '%s-%s' % (transport, quality), |  | ||||||
|                     'preference': -1 if priority_transport == transport else -2, |  | ||||||
|                 }) |  | ||||||
|                 formats.append(fmt) |  | ||||||
|  |  | ||||||
|         if not formats: |  | ||||||
|             raise ExtractorError('No media links available for %s' % video_id) |  | ||||||
|  |  | ||||||
|         self._sort_formats(formats) |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             'id': video_id, |  | ||||||
|             'title': title, |  | ||||||
|             'description': description, |  | ||||||
|             'thumbnail': thumbnail, |  | ||||||
|             'view_count': view_count, |  | ||||||
|             'duration': duration, |  | ||||||
|             'formats': formats, |  | ||||||
|         } |  | ||||||
| @@ -2,7 +2,6 @@ from __future__ import unicode_literals | |||||||
|  |  | ||||||
| import re | import re | ||||||
| import xml.etree.ElementTree | import xml.etree.ElementTree | ||||||
| import datetime |  | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
| @@ -22,17 +21,50 @@ class VevoIE(InfoExtractor): | |||||||
|            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| |            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| | ||||||
|            vevo:) |            vevo:) | ||||||
|         (?P<id>[^&?#]+)''' |         (?P<id>[^&?#]+)''' | ||||||
|  |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', |         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | ||||||
|         'file': 'GB1101300280.mp4', |  | ||||||
|         "md5": "06bea460acb744eab74a9d7dcb4bfd61", |         "md5": "06bea460acb744eab74a9d7dcb4bfd61", | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|  |             'id': 'GB1101300280', | ||||||
|  |             'ext': 'mp4', | ||||||
|             "upload_date": "20130624", |             "upload_date": "20130624", | ||||||
|             "uploader": "Hurts", |             "uploader": "Hurts", | ||||||
|             "title": "Somebody to Die For", |             "title": "Somebody to Die For", | ||||||
|             "duration": 230.12, |             "duration": 230.12, | ||||||
|             "width": 1920, |             "width": 1920, | ||||||
|             "height": 1080, |             "height": 1080, | ||||||
|  |             # timestamp and upload_date are often incorrect; seem to change randomly | ||||||
|  |             'timestamp': int, | ||||||
|  |         } | ||||||
|  |     }, { | ||||||
|  |         'note': 'v3 SMIL format', | ||||||
|  |         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', | ||||||
|  |         'md5': '893ec0e0d4426a1d96c01de8f2bdff58', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'USUV71302923', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'upload_date': '20140219', | ||||||
|  |             'uploader': 'Cassadee Pope', | ||||||
|  |             'title': 'I Wish I Could Break Your Heart', | ||||||
|  |             'duration': 226.101, | ||||||
|  |             'age_limit': 0, | ||||||
|  |             'timestamp': int, | ||||||
|  |         } | ||||||
|  |     }, { | ||||||
|  |         'note': 'Age-limited video', | ||||||
|  |         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'USRV81300282', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'age_limit': 18, | ||||||
|  |             'title': 'Tunnel Vision (Explicit)', | ||||||
|  |             'uploader': 'Justin Timberlake', | ||||||
|  |             'upload_date': 're:2013070[34]', | ||||||
|  |             'timestamp': int, | ||||||
|  |         }, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': 'true', | ||||||
|         } |         } | ||||||
|     }] |     }] | ||||||
|     _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' |     _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' | ||||||
| @@ -105,9 +137,31 @@ class VevoIE(InfoExtractor): | |||||||
|         video_info = self._download_json(json_url, video_id)['video'] |         video_info = self._download_json(json_url, video_id)['video'] | ||||||
|  |  | ||||||
|         formats = self._formats_from_json(video_info) |         formats = self._formats_from_json(video_info) | ||||||
|  |  | ||||||
|  |         is_explicit = video_info.get('isExplicit') | ||||||
|  |         if is_explicit is True: | ||||||
|  |             age_limit = 18 | ||||||
|  |         elif is_explicit is False: | ||||||
|  |             age_limit = 0 | ||||||
|  |         else: | ||||||
|  |             age_limit = None | ||||||
|  |  | ||||||
|  |         # Download SMIL | ||||||
|  |         smil_blocks = sorted(( | ||||||
|  |             f for f in video_info['videoVersions'] | ||||||
|  |             if f['sourceType'] == 13), | ||||||
|  |             key=lambda f: f['version']) | ||||||
|  |  | ||||||
|  |         smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( | ||||||
|  |             self._SMIL_BASE_URL, video_id, video_id.lower()) | ||||||
|  |         if smil_blocks: | ||||||
|  |             smil_url_m = self._search_regex( | ||||||
|  |                 r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL', | ||||||
|  |                 fatal=False) | ||||||
|  |             if smil_url_m is not None: | ||||||
|  |                 smil_url = smil_url_m | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( |  | ||||||
|                 self._SMIL_BASE_URL, video_id, video_id.lower()) |  | ||||||
|             smil_xml = self._download_webpage(smil_url, video_id, |             smil_xml = self._download_webpage(smil_url, video_id, | ||||||
|                                               'Downloading SMIL info') |                                               'Downloading SMIL info') | ||||||
|             formats.extend(self._formats_from_smil(smil_xml)) |             formats.extend(self._formats_from_smil(smil_xml)) | ||||||
| @@ -119,13 +173,14 @@ class VevoIE(InfoExtractor): | |||||||
|  |  | ||||||
|         timestamp_ms = int(self._search_regex( |         timestamp_ms = int(self._search_regex( | ||||||
|             r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date')) |             r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date')) | ||||||
|         upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000) |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': video_info['title'], |             'title': video_info['title'], | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'thumbnail': video_info['imageUrl'], |             'thumbnail': video_info['imageUrl'], | ||||||
|             'upload_date': upload_date.strftime('%Y%m%d'), |             'timestamp': timestamp_ms // 1000, | ||||||
|             'uploader': video_info['mainArtists'][0]['artistName'], |             'uploader': video_info['mainArtists'][0]['artistName'], | ||||||
|             'duration': video_info['duration'], |             'duration': video_info['duration'], | ||||||
|  |             'age_limit': age_limit, | ||||||
|         } |         } | ||||||
|   | |||||||
							
								
								
									
										81
									
								
								youtube_dl/extractor/videobam.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								youtube_dl/extractor/videobam.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,81 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  | import json | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import int_or_none | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class VideoBamIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)' | ||||||
|  |  | ||||||
|  |     _TESTS = [ | ||||||
|  |         { | ||||||
|  |             'url': 'http://videobam.com/OiJQM', | ||||||
|  |             'md5': 'db471f27763a531f10416a0c58b5a1e0', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': 'OiJQM', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Is Alcohol Worse Than Ecstasy?', | ||||||
|  |                 'description': 'md5:d25b96151515c91debc42bfbb3eb2683', | ||||||
|  |                 'uploader': 'frihetsvinge', | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://videobam.com/pqLvq', | ||||||
|  |             'md5': 'd9a565b5379a99126ef94e1d7f9a383e', | ||||||
|  |             'note': 'HD video', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': 'pqLvq', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': '_', | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page') | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |  | ||||||
|  |         for preference, format_id in enumerate(['low', 'high']): | ||||||
|  |             mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page) | ||||||
|  |             if not mobj: | ||||||
|  |                 continue | ||||||
|  |             formats.append({ | ||||||
|  |                 'url': mobj.group('url'), | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'format_id': format_id, | ||||||
|  |                 'preference': preference, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         if not formats: | ||||||
|  |             player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config')) | ||||||
|  |             formats = [{ | ||||||
|  |                 'url': item['url'], | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |             } for item in player_config['playlist'] if 'autoPlay' in item] | ||||||
|  |  | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         title = self._og_search_title(page, default='_', fatal=False) | ||||||
|  |         description = self._og_search_description(page, default=None) | ||||||
|  |         thumbnail = self._og_search_thumbnail(page) | ||||||
|  |         uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None) | ||||||
|  |         view_count = int_or_none( | ||||||
|  |             self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False)) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |             'uploader': uploader, | ||||||
|  |             'view_count': view_count, | ||||||
|  |             'formats': formats, | ||||||
|  |             'age_limit': 18, | ||||||
|  |         } | ||||||
| @@ -1,22 +1,23 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from .internetvideoarchive import InternetVideoArchiveIE | from .internetvideoarchive import InternetVideoArchiveIE | ||||||
| from ..utils import ( | from ..utils import compat_urlparse | ||||||
|     compat_urlparse, |  | ||||||
| ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class VideoDetectiveIE(InfoExtractor): | class VideoDetectiveIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' |     _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487', |         'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', | ||||||
|         u'file': u'194487.mp4', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '194487', | ||||||
|             u'title': u'KICK-ASS 2', |             'ext': 'mp4', | ||||||
|             u'description': u'md5:65ba37ad619165afac7d432eaded6013', |             'title': 'KICK-ASS 2', | ||||||
|             u'duration': 135, |             'description': 'md5:65ba37ad619165afac7d432eaded6013', | ||||||
|  |             'duration': 135, | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor): | |||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|         og_video = self._og_search_video_url(webpage) |         og_video = self._og_search_video_url(webpage) | ||||||
|         query = compat_urlparse.urlparse(og_video).query |         query = compat_urlparse.urlparse(og_video).query | ||||||
|         return self.url_result(InternetVideoArchiveIE._build_url(query), |         return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key()) | ||||||
|             ie=InternetVideoArchiveIE.ie_key()) |  | ||||||
|   | |||||||
							
								
								
									
										67
									
								
								youtube_dl/extractor/videolecturesnet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								youtube_dl/extractor/videolecturesnet.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,67 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     find_xpath_attr, | ||||||
|  |     int_or_none, | ||||||
|  |     parse_duration, | ||||||
|  |     unified_strdate, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class VideoLecturesNetIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/' | ||||||
|  |     IE_NAME = 'videolectures.net' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'promogram_igor_mekjavic_eng', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Automatics, robotics and biocybernetics', | ||||||
|  |             'description': 'md5:815fc1deb6b3a2bff99de2d5325be482', | ||||||
|  |             'upload_date': '20130627', | ||||||
|  |             'duration': 565, | ||||||
|  |             'thumbnail': 're:http://.*\.jpg', | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id | ||||||
|  |         smil = self._download_xml(smil_url, video_id) | ||||||
|  |  | ||||||
|  |         title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content'] | ||||||
|  |         description = find_xpath_attr(smil, './/meta', 'name', 'abstract').attrib['content'] | ||||||
|  |         upload_date = unified_strdate( | ||||||
|  |             find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content']) | ||||||
|  |  | ||||||
|  |         switch = smil.find('.//switch') | ||||||
|  |         duration = parse_duration(switch.attrib.get('dur')) | ||||||
|  |         thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail') | ||||||
|  |         thumbnail = ( | ||||||
|  |             None if thumbnail_el is None else thumbnail_el.attrib.get('src')) | ||||||
|  |  | ||||||
|  |         formats = [{ | ||||||
|  |             'url': v.attrib['src'], | ||||||
|  |             'width': int_or_none(v.attrib.get('width')), | ||||||
|  |             'height': int_or_none(v.attrib.get('height')), | ||||||
|  |             'filesize': int_or_none(v.attrib.get('size')), | ||||||
|  |             'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0, | ||||||
|  |             'ext': v.attrib.get('ext'), | ||||||
|  |         } for v in switch.findall('./video') | ||||||
|  |             if v.attrib.get('proto') == 'http'] | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'upload_date': upload_date, | ||||||
|  |             'duration': duration, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
| @@ -1,29 +1,33 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
|  |     US_RATINGS, | ||||||
| ) | ) | ||||||
| from .subtitles import SubtitlesInfoExtractor | from .subtitles import SubtitlesInfoExtractor | ||||||
|  |  | ||||||
|  |  | ||||||
| class VikiIE(SubtitlesInfoExtractor): | class VikiIE(SubtitlesInfoExtractor): | ||||||
|     IE_NAME = u'viki' |     IE_NAME = 'viki' | ||||||
|  |  | ||||||
|     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' |     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14', |         'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', | ||||||
|         u'file': u'1023585v.mp4', |         'md5': 'a21454021c2646f5433514177e2caa5f', | ||||||
|         u'md5': u'a21454021c2646f5433514177e2caa5f', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '1023585v', | ||||||
|             u'title': u'Heirs Episode 14', |             'ext': 'mp4', | ||||||
|             u'uploader': u'SBS', |             'title': 'Heirs Episode 14', | ||||||
|             u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e', |             'uploader': 'SBS', | ||||||
|             u'upload_date': u'20131121', |             'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e', | ||||||
|             u'age_limit': 13, |             'upload_date': '20131121', | ||||||
|  |             'age_limit': 13, | ||||||
|         }, |         }, | ||||||
|         u'skip': u'Blocked in the US', |         'skip': 'Blocked in the US', | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -44,28 +48,21 @@ class VikiIE(SubtitlesInfoExtractor): | |||||||
|  |  | ||||||
|         rating_str = self._html_search_regex( |         rating_str = self._html_search_regex( | ||||||
|             r'<strong>Rating: </strong>\s*([^<]*)<', webpage, |             r'<strong>Rating: </strong>\s*([^<]*)<', webpage, | ||||||
|             u'rating information', default='').strip() |             'rating information', default='').strip() | ||||||
|         RATINGS = { |         age_limit = US_RATINGS.get(rating_str) | ||||||
|             'G': 0, |  | ||||||
|             'PG': 10, |  | ||||||
|             'PG-13': 13, |  | ||||||
|             'R': 16, |  | ||||||
|             'NC': 18, |  | ||||||
|         } |  | ||||||
|         age_limit = RATINGS.get(rating_str) |  | ||||||
|  |  | ||||||
|         info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id |         info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id | ||||||
|         info_webpage = self._download_webpage( |         info_webpage = self._download_webpage( | ||||||
|             info_url, video_id, note=u'Downloading info page') |             info_url, video_id, note='Downloading info page') | ||||||
|         if re.match(r'\s*<div\s+class="video-error', info_webpage): |         if re.match(r'\s*<div\s+class="video-error', info_webpage): | ||||||
|             raise ExtractorError( |             raise ExtractorError( | ||||||
|                 u'Video %s is blocked from your location.' % video_id, |                 'Video %s is blocked from your location.' % video_id, | ||||||
|                 expected=True) |                 expected=True) | ||||||
|         video_url = self._html_search_regex( |         video_url = self._html_search_regex( | ||||||
|             r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL') |             r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL') | ||||||
|  |  | ||||||
|         upload_date_str = self._html_search_regex( |         upload_date_str = self._html_search_regex( | ||||||
|             r'"created_at":"([^"]+)"', info_webpage, u'upload date') |             r'"created_at":"([^"]+)"', info_webpage, 'upload date') | ||||||
|         upload_date = ( |         upload_date = ( | ||||||
|             unified_strdate(upload_date_str) |             unified_strdate(upload_date_str) | ||||||
|             if upload_date_str is not None |             if upload_date_str is not None | ||||||
|   | |||||||
| @@ -8,6 +8,7 @@ import itertools | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from .subtitles import SubtitlesInfoExtractor | from .subtitles import SubtitlesInfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|  |     compat_HTTPError, | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|     compat_urllib_request, |     compat_urllib_request, | ||||||
|     clean_html, |     clean_html, | ||||||
| @@ -101,6 +102,15 @@ class VimeoIE(SubtitlesInfoExtractor): | |||||||
|         }, |         }, | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|  |     @classmethod | ||||||
|  |     def suitable(cls, url): | ||||||
|  |         if VimeoChannelIE.suitable(url): | ||||||
|  |             # Otherwise channel urls like http://vimeo.com/channels/31259 would | ||||||
|  |             # match | ||||||
|  |             return False | ||||||
|  |         else: | ||||||
|  |             return super(VimeoIE, cls).suitable(url) | ||||||
|  |  | ||||||
|     def _login(self): |     def _login(self): | ||||||
|         (username, password) = self._get_login_info() |         (username, password) = self._get_login_info() | ||||||
|         if username is None: |         if username is None: | ||||||
| @@ -172,7 +182,18 @@ class VimeoIE(SubtitlesInfoExtractor): | |||||||
|  |  | ||||||
|         # Retrieve video webpage to extract further information |         # Retrieve video webpage to extract further information | ||||||
|         request = compat_urllib_request.Request(url, None, headers) |         request = compat_urllib_request.Request(url, None, headers) | ||||||
|         webpage = self._download_webpage(request, video_id) |         try: | ||||||
|  |             webpage = self._download_webpage(request, video_id) | ||||||
|  |         except ExtractorError as ee: | ||||||
|  |             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: | ||||||
|  |                 errmsg = ee.cause.read() | ||||||
|  |                 if b'Because of its privacy settings, this video cannot be played here' in errmsg: | ||||||
|  |                     raise ExtractorError( | ||||||
|  |                         'Cannot download embed-only video without embedding ' | ||||||
|  |                         'URL. Please call youtube-dl with the URL of the page ' | ||||||
|  |                         'that embeds this video.', | ||||||
|  |                         expected=True) | ||||||
|  |             raise | ||||||
|  |  | ||||||
|         # Now we begin extracting as much information as we can from what we |         # Now we begin extracting as much information as we can from what we | ||||||
|         # retrieved. First we extract the information common to all extractors, |         # retrieved. First we extract the information common to all extractors, | ||||||
| @@ -221,7 +242,9 @@ class VimeoIE(SubtitlesInfoExtractor): | |||||||
|         # Extract video thumbnail |         # Extract video thumbnail | ||||||
|         video_thumbnail = config["video"].get("thumbnail") |         video_thumbnail = config["video"].get("thumbnail") | ||||||
|         if video_thumbnail is None: |         if video_thumbnail is None: | ||||||
|             _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1] |             video_thumbs = config["video"].get("thumbs") | ||||||
|  |             if video_thumbs and isinstance(video_thumbs, dict): | ||||||
|  |                 _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1] | ||||||
|  |  | ||||||
|         # Extract video description |         # Extract video description | ||||||
|         video_description = None |         video_description = None | ||||||
| @@ -318,7 +341,7 @@ class VimeoIE(SubtitlesInfoExtractor): | |||||||
|  |  | ||||||
| class VimeoChannelIE(InfoExtractor): | class VimeoChannelIE(InfoExtractor): | ||||||
|     IE_NAME = 'vimeo:channel' |     IE_NAME = 'vimeo:channel' | ||||||
|     _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)' |     _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)/?(\?.*)?$' | ||||||
|     _MORE_PAGES_INDICATOR = r'<a.+?rel="next"' |     _MORE_PAGES_INDICATOR = r'<a.+?rel="next"' | ||||||
|     _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"' |     _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"' | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,8 +1,10 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  | import json | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import unified_strdate | ||||||
|  |  | ||||||
|  |  | ||||||
| class VineIE(InfoExtractor): | class VineIE(InfoExtractor): | ||||||
| @@ -13,31 +15,46 @@ class VineIE(InfoExtractor): | |||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'b9KOOWX7HUx', |             'id': 'b9KOOWX7HUx', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'uploader': 'Jack Dorsey', |  | ||||||
|             'title': 'Chicken.', |             'title': 'Chicken.', | ||||||
|  |             'description': 'Chicken.', | ||||||
|  |             'upload_date': '20130519', | ||||||
|  |             'uploader': 'Jack Dorsey', | ||||||
|  |             'uploader_id': '76', | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |  | ||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|         webpage_url = 'https://vine.co/v/' + video_id |  | ||||||
|         webpage = self._download_webpage(webpage_url, video_id) |  | ||||||
|  |  | ||||||
|         self.report_extraction(video_id) |         webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id) | ||||||
|  |  | ||||||
|         video_url = self._html_search_meta('twitter:player:stream', webpage, |         data = json.loads(self._html_search_regex( | ||||||
|             'video URL') |             r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data')) | ||||||
|  |  | ||||||
|         uploader = self._html_search_regex(r'<p class="username">(.*?)</p>', |         formats = [ | ||||||
|             webpage, 'uploader', fatal=False, flags=re.DOTALL) |             { | ||||||
|  |                 'url': data['videoLowURL'], | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'format_id': 'low', | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 'url': data['videoUrl'], | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'format_id': 'standard', | ||||||
|  |             } | ||||||
|  |         ] | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'url': video_url, |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': self._og_search_title(webpage), |             'title': self._og_search_title(webpage), | ||||||
|             'thumbnail': self._og_search_thumbnail(webpage), |             'description': data['description'], | ||||||
|             'uploader': uploader, |             'thumbnail': data['thumbnailUrl'], | ||||||
|         } |             'upload_date': unified_strdate(data['created']), | ||||||
|  |             'uploader': data['username'], | ||||||
|  |             'uploader_id': data['userIdStr'], | ||||||
|  |             'like_count': data['likes']['count'], | ||||||
|  |             'comment_count': data['comments']['count'], | ||||||
|  |             'repost_count': data['reposts']['count'], | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
| @@ -16,7 +16,7 @@ from ..utils import ( | |||||||
|  |  | ||||||
| class VKIE(InfoExtractor): | class VKIE(InfoExtractor): | ||||||
|     IE_NAME = 'vk.com' |     IE_NAME = 'vk.com' | ||||||
|     _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)' |     _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))' | ||||||
|     _NETRC_MACHINE = 'vk' |     _NETRC_MACHINE = 'vk' | ||||||
|  |  | ||||||
|     _TESTS = [ |     _TESTS = [ | ||||||
| @@ -42,6 +42,18 @@ class VKIE(InfoExtractor): | |||||||
|                 'duration': 558, |                 'duration': 558, | ||||||
|             } |             } | ||||||
|         }, |         }, | ||||||
|  |         { | ||||||
|  |             'note': 'Embedded video', | ||||||
|  |             'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1', | ||||||
|  |             'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '162925554', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'uploader': 'Vladimir Gavrin', | ||||||
|  |                 'title': 'Lin Dan', | ||||||
|  |                 'duration': 101, | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|         { |         { | ||||||
|             'url': 'http://vk.com/video-8871596_164049491', |             'url': 'http://vk.com/video-8871596_164049491', | ||||||
|             'md5': 'a590bcaf3d543576c9bd162812387666', |             'md5': 'a590bcaf3d543576c9bd162812387666', | ||||||
| @@ -54,7 +66,7 @@ class VKIE(InfoExtractor): | |||||||
|                 'duration': 8352, |                 'duration': 8352, | ||||||
|             }, |             }, | ||||||
|             'skip': 'Requires vk account credentials', |             'skip': 'Requires vk account credentials', | ||||||
|         } |         }, | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|     def _login(self): |     def _login(self): | ||||||
| @@ -82,7 +94,10 @@ class VKIE(InfoExtractor): | |||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('videoid') | ||||||
|  |  | ||||||
|  |         if not video_id: | ||||||
|  |             video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) | ||||||
|  |  | ||||||
|         info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id |         info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id | ||||||
|         info_page = self._download_webpage(info_url, video_id) |         info_page = self._download_webpage(info_url, video_id) | ||||||
|   | |||||||
| @@ -13,7 +13,7 @@ class VubeIE(InfoExtractor): | |||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', |         'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', | ||||||
|         'md5': 'f81dcf6d0448e3291f54380181695821', |         'md5': 'db7aba89d4603dadd627e9d1973946fe', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'YL2qNPkqon', |             'id': 'YL2qNPkqon', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
| @@ -77,4 +77,4 @@ class VubeIE(InfoExtractor): | |||||||
|             'like_count': like_count, |             'like_count': like_count, | ||||||
|             'dislike_count': dislike_count, |             'dislike_count': dislike_count, | ||||||
|             'comment_count': comment_count, |             'comment_count': comment_count, | ||||||
|         } |         } | ||||||
|   | |||||||
							
								
								
									
										114
									
								
								youtube_dl/extractor/wdr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								youtube_dl/extractor/wdr.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     unified_strdate, | ||||||
|  |     compat_urlparse, | ||||||
|  |     determine_ext, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class WDRIE(InfoExtractor): | ||||||
|  |     _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?' | ||||||
|  |     _VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX | ||||||
|  |  | ||||||
|  |     _TESTS = [ | ||||||
|  |         { | ||||||
|  |             'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': 'mdb-362427', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'Servicezeit', | ||||||
|  |                 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', | ||||||
|  |                 'upload_date': '20140310', | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': 'mdb-363194', | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'title': 'Marga Spiegel ist tot', | ||||||
|  |                 'description': 'md5:2309992a6716c347891c045be50992e4', | ||||||
|  |                 'upload_date': '20140311', | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', | ||||||
|  |             'md5': '83e9e8fefad36f357278759870805898', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': 'mdb-194332', | ||||||
|  |                 'ext': 'mp3', | ||||||
|  |                 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', | ||||||
|  |                 'description': 'md5:2309992a6716c347891c045be50992e4', | ||||||
|  |                 'upload_date': '20091129', | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html', | ||||||
|  |             'md5': 'cfff440d4ee64114083ac44676df5d15', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': 'mdb-363068', | ||||||
|  |                 'ext': 'mp3', | ||||||
|  |                 'title': 'Grenzenlos lecker - Baklava', | ||||||
|  |                 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', | ||||||
|  |                 'upload_date': '20140311', | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         page_url = mobj.group('url') | ||||||
|  |         page_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, page_id) | ||||||
|  |  | ||||||
|  |         if mobj.group('player') is None: | ||||||
|  |             entries = [ | ||||||
|  |                 self.url_result(page_url + href, 'WDR') | ||||||
|  |                 for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage) | ||||||
|  |             ] | ||||||
|  |             return self.playlist_result(entries, page_id) | ||||||
|  |  | ||||||
|  |         flashvars = compat_urlparse.parse_qs( | ||||||
|  |             self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars')) | ||||||
|  |  | ||||||
|  |         page_id = flashvars['trackerClipId'][0] | ||||||
|  |         video_url = flashvars['dslSrc'][0] | ||||||
|  |         title = flashvars['trackerClipTitle'][0] | ||||||
|  |         thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None | ||||||
|  |  | ||||||
|  |         if 'trackerClipAirTime' in flashvars: | ||||||
|  |             upload_date = flashvars['trackerClipAirTime'][0] | ||||||
|  |         else: | ||||||
|  |             upload_date = self._html_search_meta('DC.Date', webpage, 'upload date') | ||||||
|  |  | ||||||
|  |         if upload_date: | ||||||
|  |             upload_date = unified_strdate(upload_date) | ||||||
|  |  | ||||||
|  |         if video_url.endswith('.f4m'): | ||||||
|  |             video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' | ||||||
|  |             ext = 'flv' | ||||||
|  |         else: | ||||||
|  |             ext = determine_ext(video_url) | ||||||
|  |  | ||||||
|  |         description = self._html_search_meta('Description', webpage, 'description') | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': page_id, | ||||||
|  |             'url': video_url, | ||||||
|  |             'ext': ext, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |             'upload_date': upload_date, | ||||||
|  |         } | ||||||
| @@ -6,14 +6,15 @@ from .common import InfoExtractor | |||||||
|  |  | ||||||
|  |  | ||||||
| class WimpIE(InfoExtractor): | class WimpIE(InfoExtractor): | ||||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/' |     _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.wimp.com/deerfence/', |         'url': 'http://www.wimp.com/maruexhausted/', | ||||||
|         'file': 'deerfence.flv', |         'md5': 'f1acced123ecb28d9bb79f2479f2b6a1', | ||||||
|         'md5': '8b215e2e0168c6081a1cf84b2846a2b5', |  | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             "title": "Watch Till End: Herd of deer jump over a fence.", |             'id': 'maruexhausted', | ||||||
|             "description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.", |             'ext': 'flv', | ||||||
|  |             'title': 'Maru is exhausted.', | ||||||
|  |             'description': 'md5:57e099e857c0a4ea312542b684a869b8', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -30,4 +31,4 @@ class WimpIE(InfoExtractor): | |||||||
|             'title': self._og_search_title(webpage), |             'title': self._og_search_title(webpage), | ||||||
|             'thumbnail': self._og_search_thumbnail(webpage), |             'thumbnail': self._og_search_thumbnail(webpage), | ||||||
|             'description': self._og_search_description(webpage), |             'description': self._og_search_description(webpage), | ||||||
|         } |         } | ||||||
| @@ -22,8 +22,8 @@ class WorldStarHipHopIE(InfoExtractor): | |||||||
|         webpage_src = self._download_webpage(url, video_id) |         webpage_src = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|         m_vevo_id = re.search(r'videoId=(.*?)&?', |         m_vevo_id = re.search(r'videoId=(.*?)&?', | ||||||
|             webpage_src) |                               webpage_src) | ||||||
|          |  | ||||||
|         if m_vevo_id is not None: |         if m_vevo_id is not None: | ||||||
|             self.to_screen(u'Vevo video detected:') |             self.to_screen(u'Vevo video detected:') | ||||||
|             return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo') |             return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo') | ||||||
|   | |||||||
| @@ -4,51 +4,51 @@ import re | |||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     compat_urllib_parse, |  | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|  |     unified_strdate, | ||||||
|  |     str_to_int, | ||||||
|  |     int_or_none, | ||||||
|  |     parse_duration, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class XHamsterIE(InfoExtractor): | class XHamsterIE(InfoExtractor): | ||||||
|     """Information Extractor for xHamster""" |     """Information Extractor for xHamster""" | ||||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' |     _VALID_URL = r'http://(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' | ||||||
|     _TESTS = [{ |     _TESTS = [ | ||||||
|         'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', |         { | ||||||
|         'file': '1509445.mp4', |             'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', | ||||||
|         'md5': '8281348b8d3c53d39fffb377d24eac4e', |             'md5': '8281348b8d3c53d39fffb377d24eac4e', | ||||||
|         'info_dict': { |             'info_dict': { | ||||||
|             "upload_date": "20121014", |                 'id': '1509445', | ||||||
|             "uploader_id": "Ruseful2011", |                 'ext': 'mp4', | ||||||
|             "title": "FemaleAgent Shy beauty takes the bait", |                 'title': 'FemaleAgent Shy beauty takes the bait', | ||||||
|             "age_limit": 18, |                 'upload_date': '20121014', | ||||||
|  |                 'uploader_id': 'Ruseful2011', | ||||||
|  |                 'duration': 893, | ||||||
|  |                 'age_limit': 18, | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', | ||||||
|  |             'md5': '4cbd8d56708ecb4fb4124c23e4acb81a', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '2221348', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Britney Spears  Sexy Booty', | ||||||
|  |                 'upload_date': '20130914', | ||||||
|  |                 'uploader_id': 'jojo747400', | ||||||
|  |                 'duration': 200, | ||||||
|  |                 'age_limit': 18, | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|     }, |     ] | ||||||
|     { |  | ||||||
|         'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', |  | ||||||
|         'file': '2221348.flv', |  | ||||||
|         'md5': 'e767b9475de189320f691f49c679c4c7', |  | ||||||
|         'info_dict': { |  | ||||||
|             "upload_date": "20130914", |  | ||||||
|             "uploader_id": "jojo747400", |  | ||||||
|             "title": "Britney Spears  Sexy Booty", |  | ||||||
|             "age_limit": 18, |  | ||||||
|         } |  | ||||||
|     }] |  | ||||||
|  |  | ||||||
|     def _real_extract(self,url): |     def _real_extract(self,url): | ||||||
|         def extract_video_url(webpage): |         def extract_video_url(webpage): | ||||||
|             mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) |             mp4 = re.search(r'<video\s+.*?file="([^"]+)".*?>', webpage) | ||||||
|             if mobj is None: |  | ||||||
|                 raise ExtractorError('Unable to extract media URL') |  | ||||||
|             if len(mobj.group('server')) == 0: |  | ||||||
|                 return compat_urllib_parse.unquote(mobj.group('file')) |  | ||||||
|             else: |  | ||||||
|                 return mobj.group('server')+'/key='+mobj.group('file') |  | ||||||
|  |  | ||||||
|         def extract_mp4_video_url(webpage): |  | ||||||
|             mp4 = re.search(r'<a href=\"(.+?)\" class=\"mp4Play\"',webpage) |  | ||||||
|             if mp4 is None: |             if mp4 is None: | ||||||
|                 return None |                 raise ExtractorError('Unable to extract media URL') | ||||||
|             else: |             else: | ||||||
|                 return mp4.group(1) |                 return mp4.group(1) | ||||||
|  |  | ||||||
| @@ -62,50 +62,49 @@ class XHamsterIE(InfoExtractor): | |||||||
|         mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo) |         mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo) | ||||||
|         webpage = self._download_webpage(mrss_url, video_id) |         webpage = self._download_webpage(mrss_url, video_id) | ||||||
|  |  | ||||||
|         video_title = self._html_search_regex( |         title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title') | ||||||
|             r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title') |  | ||||||
|  |  | ||||||
|         # Only a few videos have an description |         # Only a few videos have an description | ||||||
|         mobj = re.search(r'<span>Description: </span>([^<]+)', webpage) |         mobj = re.search(r'<span>Description: </span>([^<]+)', webpage) | ||||||
|         video_description = mobj.group(1) if mobj else None |         description = mobj.group(1) if mobj else None | ||||||
|  |  | ||||||
|         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) |         upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'', | ||||||
|         if mobj: |             webpage, 'upload date', fatal=False) | ||||||
|             video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d') |         if upload_date: | ||||||
|         else: |             upload_date = unified_strdate(upload_date) | ||||||
|             video_upload_date = None |  | ||||||
|             self._downloader.report_warning('Unable to extract upload date') |  | ||||||
|  |  | ||||||
|         video_uploader_id = self._html_search_regex( |         uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)', | ||||||
|             r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)', |  | ||||||
|             webpage, 'uploader id', default='anonymous') |             webpage, 'uploader id', default='anonymous') | ||||||
|  |  | ||||||
|         video_thumbnail = self._search_regex( |         thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False) | ||||||
|             r'\'image\':\'(?P<thumbnail>[^\']+)\'', |  | ||||||
|             webpage, 'thumbnail', fatal=False) |         duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>', | ||||||
|  |             webpage, 'duration', fatal=False)) | ||||||
|  |  | ||||||
|  |         view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False) | ||||||
|  |         if view_count: | ||||||
|  |             view_count = str_to_int(view_count) | ||||||
|  |  | ||||||
|  |         mobj = re.search(r"hint='(?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes'", webpage) | ||||||
|  |         (like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None) | ||||||
|  |  | ||||||
|  |         mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage) | ||||||
|  |         comment_count = mobj.group('commentcount') if mobj else 0 | ||||||
|  |  | ||||||
|         age_limit = self._rta_search(webpage) |         age_limit = self._rta_search(webpage) | ||||||
|  |  | ||||||
|         hd = is_hd(webpage) |         hd = is_hd(webpage) | ||||||
|  |  | ||||||
|         video_url = extract_video_url(webpage) |         video_url = extract_video_url(webpage) | ||||||
|         formats = [{ |         formats = [{ | ||||||
|             'url': video_url, |             'url': video_url, | ||||||
|             'format_id': 'hd' if hd else 'sd', |             'format_id': 'hd' if hd else 'sd', | ||||||
|             'preference': 0, |             'preference': 1, | ||||||
|         }] |         }] | ||||||
|  |  | ||||||
|         video_mp4_url = extract_mp4_video_url(webpage) |  | ||||||
|         if video_mp4_url is not None: |  | ||||||
|             formats.append({ |  | ||||||
|                 'url': video_mp4_url, |  | ||||||
|                 'ext': 'mp4', |  | ||||||
|                 'format_id': 'mp4-hd' if hd else 'mp4-sd', |  | ||||||
|                 'preference': 1, |  | ||||||
|             }) |  | ||||||
|  |  | ||||||
|         if not hd: |         if not hd: | ||||||
|             webpage = self._download_webpage( |             mrss_url = self._search_regex(r'<link rel="canonical" href="([^"]+)', webpage, 'mrss_url') | ||||||
|                 mrss_url + '?hd', video_id, note='Downloading HD webpage') |             webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage') | ||||||
|             if is_hd(webpage): |             if is_hd(webpage): | ||||||
|                 video_url = extract_video_url(webpage) |                 video_url = extract_video_url(webpage) | ||||||
|                 formats.append({ |                 formats.append({ | ||||||
| @@ -118,11 +117,16 @@ class XHamsterIE(InfoExtractor): | |||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': video_title, |             'title': title, | ||||||
|             'formats': formats, |             'description': description, | ||||||
|             'description': video_description, |             'upload_date': upload_date, | ||||||
|             'upload_date': video_upload_date, |             'uploader_id': uploader_id, | ||||||
|             'uploader_id': video_uploader_id, |             'thumbnail': thumbnail, | ||||||
|             'thumbnail': video_thumbnail, |             'duration': duration, | ||||||
|  |             'view_count': view_count, | ||||||
|  |             'like_count': int_or_none(like_count), | ||||||
|  |             'dislike_count': int_or_none(dislike_count), | ||||||
|  |             'comment_count': int_or_none(comment_count), | ||||||
|             'age_limit': age_limit, |             'age_limit': age_limit, | ||||||
|  |             'formats': formats, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -1,55 +1,49 @@ | |||||||
|  | # encoding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|  |  | ||||||
|     ExtractorError, |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class XNXXIE(InfoExtractor): | class XNXXIE(InfoExtractor): | ||||||
|     _VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)' |     _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)' | ||||||
|     VIDEO_URL_RE = r'flv_url=(.*?)&' |  | ||||||
|     VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' |  | ||||||
|     VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&' |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', |         'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||||
|         u'file': u'1135332.flv', |         'md5': '0831677e2b4761795f68d417e0b7b445', | ||||||
|         u'md5': u'0831677e2b4761795f68d417e0b7b445', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '1135332', | ||||||
|             u"title": u"lida \u00bb Naked Funny Actress  (5)", |             'ext': 'flv', | ||||||
|             u"age_limit": 18, |             'title': 'lida » Naked Funny Actress  (5)', | ||||||
|  |             'age_limit': 18, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         if mobj is None: |         video_id = mobj.group('id') | ||||||
|             raise ExtractorError(u'Invalid URL: %s' % url) |  | ||||||
|         video_id = mobj.group(1) |  | ||||||
|  |  | ||||||
|         # Get webpage content |         # Get webpage content | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|         video_url = self._search_regex(self.VIDEO_URL_RE, |         video_url = self._search_regex(r'flv_url=(.*?)&', | ||||||
|             webpage, u'video URL') |             webpage, 'video URL') | ||||||
|         video_url = compat_urllib_parse.unquote(video_url) |         video_url = compat_urllib_parse.unquote(video_url) | ||||||
|  |  | ||||||
|         video_title = self._html_search_regex(self.VIDEO_TITLE_RE, |         video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM', | ||||||
|             webpage, u'title') |             webpage, 'title') | ||||||
|  |  | ||||||
|         video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE, |         video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&', | ||||||
|             webpage, u'thumbnail', fatal=False) |             webpage, 'thumbnail', fatal=False) | ||||||
|  |  | ||||||
|         return [{ |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'url': video_url, |             'url': video_url, | ||||||
|             'uploader': None, |  | ||||||
|             'upload_date': None, |  | ||||||
|             'title': video_title, |             'title': video_title, | ||||||
|             'ext': 'flv', |             'ext': 'flv', | ||||||
|             'thumbnail': video_thumbnail, |             'thumbnail': video_thumbnail, | ||||||
|             'description': None, |  | ||||||
|             'age_limit': 18, |             'age_limit': 18, | ||||||
|         }] |         } | ||||||
|   | |||||||
| @@ -7,19 +7,24 @@ from .common import InfoExtractor | |||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     compat_urllib_parse_urlparse, |     compat_urllib_parse_urlparse, | ||||||
|     compat_urllib_request, |     compat_urllib_request, | ||||||
|  |     parse_duration, | ||||||
|  |     str_to_int, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class XTubeIE(InfoExtractor): | class XTubeIE(InfoExtractor): | ||||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))' |     _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', |         'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', | ||||||
|         'file': 'kVTUy_G222_.mp4', |  | ||||||
|         'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', |         'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             "title": "strange erotica", |             'id': 'kVTUy_G222_', | ||||||
|             "description": "surreal gay themed erotica...almost an ET kind of thing", |             'ext': 'mp4', | ||||||
|             "uploader": "greenshowers", |             'title': 'strange erotica', | ||||||
|             "age_limit": 18, |             'description': 'surreal gay themed erotica...almost an ET kind of thing', | ||||||
|  |             'uploader': 'greenshowers', | ||||||
|  |             'duration': 450, | ||||||
|  |             'age_limit': 18, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -32,10 +37,23 @@ class XTubeIE(InfoExtractor): | |||||||
|         req.add_header('Cookie', 'age_verified=1') |         req.add_header('Cookie', 'age_verified=1') | ||||||
|         webpage = self._download_webpage(req, video_id) |         webpage = self._download_webpage(req, video_id) | ||||||
|  |  | ||||||
|         video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, 'title') |         video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title') | ||||||
|         video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False) |         video_uploader = self._html_search_regex( | ||||||
|         video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, 'description', fatal=False) |             r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False) | ||||||
|         video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/') |         video_description = self._html_search_regex( | ||||||
|  |             r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False) | ||||||
|  |         video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/') | ||||||
|  |         duration = parse_duration(self._html_search_regex( | ||||||
|  |             r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False)) | ||||||
|  |         view_count = self._html_search_regex( | ||||||
|  |             r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False) | ||||||
|  |         if view_count: | ||||||
|  |             view_count = str_to_int(view_count) | ||||||
|  |         comment_count = self._html_search_regex( | ||||||
|  |             r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False) | ||||||
|  |         if comment_count: | ||||||
|  |             comment_count = str_to_int(comment_count) | ||||||
|  |  | ||||||
|         path = compat_urllib_parse_urlparse(video_url).path |         path = compat_urllib_parse_urlparse(video_url).path | ||||||
|         extension = os.path.splitext(path)[1][1:] |         extension = os.path.splitext(path)[1][1:] | ||||||
|         format = path.split('/')[5].split('_')[:2] |         format = path.split('/')[5].split('_')[:2] | ||||||
| @@ -48,6 +66,9 @@ class XTubeIE(InfoExtractor): | |||||||
|             'title': video_title, |             'title': video_title, | ||||||
|             'uploader': video_uploader, |             'uploader': video_uploader, | ||||||
|             'description': video_description, |             'description': video_description, | ||||||
|  |             'duration': duration, | ||||||
|  |             'view_count': view_count, | ||||||
|  |             'comment_count': comment_count, | ||||||
|             'url': video_url, |             'url': video_url, | ||||||
|             'ext': extension, |             'ext': extension, | ||||||
|             'format': format, |             'format': format, | ||||||
|   | |||||||
| @@ -1,3 +1,6 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  |  | ||||||
| import json | import json | ||||||
| import re | import re | ||||||
| import sys | import sys | ||||||
| @@ -17,24 +20,25 @@ from ..aes import ( | |||||||
|  |  | ||||||
|  |  | ||||||
| class YouPornIE(InfoExtractor): | class YouPornIE(InfoExtractor): | ||||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))' |     _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', |         'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', | ||||||
|         u'file': u'505835.mp4', |         'md5': '71ec5fcfddacf80f495efa8b6a8d9a89', | ||||||
|         u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': '505835', | ||||||
|             u"upload_date": u"20101221", |             'ext': 'mp4', | ||||||
|             u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", |             'upload_date': '20101221', | ||||||
|             u"uploader": u"Ask Dan And Jennifer", |             'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?', | ||||||
|             u"title": u"Sex Ed: Is It Safe To Masturbate Daily?", |             'uploader': 'Ask Dan And Jennifer', | ||||||
|             u"age_limit": 18, |             'title': 'Sex Ed: Is It Safe To Masturbate Daily?', | ||||||
|  |             'age_limit': 18, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         video_id = mobj.group('videoid') |         video_id = mobj.group('videoid') | ||||||
|         url = 'http://www.' + mobj.group('url') |         url = mobj.group('proto') + 'www.' + mobj.group('url') | ||||||
|  |  | ||||||
|         req = compat_urllib_request.Request(url) |         req = compat_urllib_request.Request(url) | ||||||
|         req.add_header('Cookie', 'age_verified=1') |         req.add_header('Cookie', 'age_verified=1') | ||||||
| @@ -42,7 +46,7 @@ class YouPornIE(InfoExtractor): | |||||||
|         age_limit = self._rta_search(webpage) |         age_limit = self._rta_search(webpage) | ||||||
|  |  | ||||||
|         # Get JSON parameters |         # Get JSON parameters | ||||||
|         json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') |         json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters') | ||||||
|         try: |         try: | ||||||
|             params = json.loads(json_params) |             params = json.loads(json_params) | ||||||
|         except: |         except: | ||||||
| @@ -61,7 +65,7 @@ class YouPornIE(InfoExtractor): | |||||||
|         # Get all of the links from the page |         # Get all of the links from the page | ||||||
|         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>' |         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>' | ||||||
|         download_list_html = self._search_regex(DOWNLOAD_LIST_RE, |         download_list_html = self._search_regex(DOWNLOAD_LIST_RE, | ||||||
|             webpage, u'download list').strip() |             webpage, 'download list').strip() | ||||||
|         LINK_RE = r'<a href="([^"]+)">' |         LINK_RE = r'<a href="([^"]+)">' | ||||||
|         links = re.findall(LINK_RE, download_list_html) |         links = re.findall(LINK_RE, download_list_html) | ||||||
|  |  | ||||||
| @@ -86,7 +90,7 @@ class YouPornIE(InfoExtractor): | |||||||
|             resolution = format_parts[0] |             resolution = format_parts[0] | ||||||
|             height = int(resolution[:-len('p')]) |             height = int(resolution[:-len('p')]) | ||||||
|             bitrate = int(format_parts[1][:-len('k')]) |             bitrate = int(format_parts[1][:-len('k')]) | ||||||
|             format = u'-'.join(format_parts) + u'-' + dn |             format = '-'.join(format_parts) + '-' + dn | ||||||
|  |  | ||||||
|             formats.append({ |             formats.append({ | ||||||
|                 'url': video_url, |                 'url': video_url, | ||||||
|   | |||||||
| @@ -29,7 +29,6 @@ from ..utils import ( | |||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     PagedList, |     PagedList, | ||||||
|     RegexNotFoundError, |  | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
|     orderedSet, |     orderedSet, | ||||||
| @@ -195,14 +194,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|         '151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10}, |         '151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10}, | ||||||
|  |  | ||||||
|         # DASH mp4 video |         # DASH mp4 video | ||||||
|         '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'preference': -40}, |         '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||||
|         '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'preference': -40}, |         '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||||
|         '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40}, |         '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||||
|         '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40}, |         '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||||
|         '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, |         '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||||
|         '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40}, |         '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||||
|         '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40}, |         '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||||
|         '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, |         '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||||
|  |  | ||||||
|         # Dash mp4 audio |         # Dash mp4 audio | ||||||
|         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, |         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, | ||||||
| @@ -210,12 +209,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50}, |         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50}, | ||||||
|  |  | ||||||
|         # Dash webm |         # Dash webm | ||||||
|         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, |         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||||
|         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, |         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||||
|         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, |         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||||
|         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, |         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||||
|         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, |         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||||
|         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, |         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, | ||||||
|         '242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40}, |         '242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40}, | ||||||
|         '243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40}, |         '243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40}, | ||||||
|         '244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40}, |         '244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40}, | ||||||
| @@ -297,6 +296,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|                 u"format": "141", |                 u"format": "141", | ||||||
|             }, |             }, | ||||||
|         }, |         }, | ||||||
|  |         # DASH manifest with encrypted signature | ||||||
|  |         { | ||||||
|  |             u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA', | ||||||
|  |             u'info_dict': { | ||||||
|  |                 u'id': u'IB3lcPjvWLA', | ||||||
|  |                 u'ext': u'm4a', | ||||||
|  |                 u'title': u'Afrojack - The Spark ft. Spree Wilson', | ||||||
|  |                 u'description': u'md5:3199ed45ee8836572865580804d7ac0f', | ||||||
|  |                 u'uploader': u'AfrojackVEVO', | ||||||
|  |                 u'uploader_id': u'AfrojackVEVO', | ||||||
|  |                 u'upload_date': u'20131011', | ||||||
|  |             }, | ||||||
|  |             u"params": { | ||||||
|  |                 u'youtube_include_dash_manifest': True, | ||||||
|  |                 u'format': '141', | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -1114,14 +1130,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') |         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|  |         proto = ( | ||||||
|  |             u'http' if self._downloader.params.get('prefer_insecure', False) | ||||||
|  |             else u'https') | ||||||
|  |  | ||||||
|         # Extract original video URL from URL with redirection, like age verification, using next_url parameter |         # Extract original video URL from URL with redirection, like age verification, using next_url parameter | ||||||
|         mobj = re.search(self._NEXT_URL_RE, url) |         mobj = re.search(self._NEXT_URL_RE, url) | ||||||
|         if mobj: |         if mobj: | ||||||
|             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') |             url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') | ||||||
|         video_id = self.extract_id(url) |         video_id = self.extract_id(url) | ||||||
|  |  | ||||||
|         # Get video webpage |         # Get video webpage | ||||||
|         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id |         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id | ||||||
|         video_webpage = self._download_webpage(url, video_id) |         video_webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|         # Attempt to extract SWF player URL |         # Attempt to extract SWF player URL | ||||||
| @@ -1146,7 +1166,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|                                                   'asv': 3, |                                                   'asv': 3, | ||||||
|                                                   'sts':'1588', |                                                   'sts':'1588', | ||||||
|                                                   }) |                                                   }) | ||||||
|             video_info_url = 'https://www.youtube.com/get_video_info?' + data |             video_info_url = proto + '://www.youtube.com/get_video_info?' + data | ||||||
|             video_info_webpage = self._download_webpage(video_info_url, video_id, |             video_info_webpage = self._download_webpage(video_info_url, video_id, | ||||||
|                                     note=False, |                                     note=False, | ||||||
|                                     errnote='unable to download video info webpage') |                                     errnote='unable to download video info webpage') | ||||||
| @@ -1154,7 +1174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|         else: |         else: | ||||||
|             age_gate = False |             age_gate = False | ||||||
|             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: |             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: | ||||||
|                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' |                 video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' | ||||||
|                         % (video_id, el_type)) |                         % (video_id, el_type)) | ||||||
|                 video_info_webpage = self._download_webpage(video_info_url, video_id, |                 video_info_webpage = self._download_webpage(video_info_url, video_id, | ||||||
|                                         note=False, |                                         note=False, | ||||||
| @@ -1269,11 +1289,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|  |  | ||||||
|         # Decide which formats to download |         # Decide which formats to download | ||||||
|         try: |         try: | ||||||
|             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) |             mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage) | ||||||
|             if not mobj: |             if not mobj: | ||||||
|                 raise ValueError('Could not find vevo ID') |                 raise ValueError('Could not find vevo ID') | ||||||
|             info = json.loads(mobj.group(1)) |             json_code = uppercase_escape(mobj.group(1)) | ||||||
|             args = info['args'] |             ytplayer_config = json.loads(json_code) | ||||||
|  |             args = ytplayer_config['args'] | ||||||
|             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map |             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map | ||||||
|             # this signatures are encrypted |             # this signatures are encrypted | ||||||
|             if 'url_encoded_fmt_stream_map' not in args: |             if 'url_encoded_fmt_stream_map' not in args: | ||||||
| @@ -1366,12 +1387,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') |             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') | ||||||
|  |  | ||||||
|         # Look for the DASH manifest |         # Look for the DASH manifest | ||||||
|         dash_manifest_url_lst = video_info.get('dashmpd') |         if (self._downloader.params.get('youtube_include_dash_manifest', False)): | ||||||
|         if (dash_manifest_url_lst and dash_manifest_url_lst[0] and |  | ||||||
|                 self._downloader.params.get('youtube_include_dash_manifest', False)): |  | ||||||
|             try: |             try: | ||||||
|  |                 # The DASH manifest used needs to be the one from the original video_webpage. | ||||||
|  |                 # The one found in get_video_info seems to be using different signatures. | ||||||
|  |                 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. | ||||||
|  |                 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the | ||||||
|  |                 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. | ||||||
|  |                 if age_gate: | ||||||
|  |                     dash_manifest_url = video_info.get('dashmpd')[0] | ||||||
|  |                 else: | ||||||
|  |                     dash_manifest_url = ytplayer_config['args']['dashmpd'] | ||||||
|  |                 def decrypt_sig(mobj): | ||||||
|  |                     s = mobj.group(1) | ||||||
|  |                     dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) | ||||||
|  |                     return '/signature/%s' % dec_s | ||||||
|  |                 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) | ||||||
|                 dash_doc = self._download_xml( |                 dash_doc = self._download_xml( | ||||||
|                     dash_manifest_url_lst[0], video_id, |                     dash_manifest_url, video_id, | ||||||
|                     note=u'Downloading DASH manifest', |                     note=u'Downloading DASH manifest', | ||||||
|                     errnote=u'Could not download DASH manifest') |                     errnote=u'Could not download DASH manifest') | ||||||
|                 for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): |                 for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): | ||||||
| @@ -1416,7 +1449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|             'duration':     video_duration, |             'duration':     video_duration, | ||||||
|             'age_limit':    18 if age_gate else 0, |             'age_limit':    18 if age_gate else 0, | ||||||
|             'annotations':  video_annotations, |             'annotations':  video_annotations, | ||||||
|             'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id, |             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id, | ||||||
|             'view_count':   view_count, |             'view_count':   view_count, | ||||||
|             'like_count': like_count, |             'like_count': like_count, | ||||||
|             'dislike_count': dislike_count, |             'dislike_count': dislike_count, | ||||||
| @@ -1443,9 +1476,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | |||||||
|                      | |                      | | ||||||
|                         ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) |                         ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) | ||||||
|                      )""" |                      )""" | ||||||
|     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s' |     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' | ||||||
|     _MORE_PAGES_INDICATOR = r'data-link-type="next"' |     _MORE_PAGES_INDICATOR = r'data-link-type="next"' | ||||||
|     _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)' |     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)' | ||||||
|     IE_NAME = u'youtube:playlist' |     IE_NAME = u'youtube:playlist' | ||||||
|  |  | ||||||
|     def _real_initialize(self): |     def _real_initialize(self): | ||||||
| @@ -1460,11 +1493,15 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | |||||||
|         # the id of the playlist is just 'RD' + video_id |         # the id of the playlist is just 'RD' + video_id | ||||||
|         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) |         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) | ||||||
|         webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix') |         webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix') | ||||||
|         title_span = (get_element_by_attribute('class', 'title long-title', webpage) or |         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) | ||||||
|             get_element_by_attribute('class', 'title ', webpage)) |         title_span = (search_title('playlist-title') or | ||||||
|  |             search_title('title long-title') or search_title('title')) | ||||||
|         title = clean_html(title_span) |         title = clean_html(title_span) | ||||||
|         video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s' % re.escape(playlist_id) |         video_re = r'''(?x)data-video-username="(.*?)".*? | ||||||
|         ids = orderedSet(re.findall(video_re, webpage)) |                        href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id) | ||||||
|  |         matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) | ||||||
|  |         # Some of the videos may have been deleted, their username field is empty | ||||||
|  |         ids = [video_id for (username, video_id) in matches if username] | ||||||
|         url_results = self._ids_to_results(ids) |         url_results = self._ids_to_results(ids) | ||||||
|  |  | ||||||
|         return self.playlist_result(url_results, playlist_id, title) |         return self.playlist_result(url_results, playlist_id, title) | ||||||
| @@ -1493,29 +1530,31 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | |||||||
|             raise ExtractorError(u'For downloading YouTube.com top lists, use ' |             raise ExtractorError(u'For downloading YouTube.com top lists, use ' | ||||||
|                 u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) |                 u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) | ||||||
|  |  | ||||||
|  |         url = self._TEMPLATE_URL % playlist_id | ||||||
|  |         page = self._download_webpage(url, playlist_id) | ||||||
|  |         more_widget_html = content_html = page | ||||||
|  |  | ||||||
|         # Extract the video ids from the playlist pages |         # Extract the video ids from the playlist pages | ||||||
|         ids = [] |         ids = [] | ||||||
|  |  | ||||||
|         for page_num in itertools.count(1): |         for page_num in itertools.count(1): | ||||||
|             url = self._TEMPLATE_URL % (playlist_id, page_num) |             matches = re.finditer(self._VIDEO_RE, content_html) | ||||||
|             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) |  | ||||||
|             matches = re.finditer(self._VIDEO_RE, page) |  | ||||||
|             # We remove the duplicates and the link with index 0 |             # We remove the duplicates and the link with index 0 | ||||||
|             # (it's not the first video of the playlist) |             # (it's not the first video of the playlist) | ||||||
|             new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0') |             new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0') | ||||||
|             ids.extend(new_ids) |             ids.extend(new_ids) | ||||||
|  |  | ||||||
|             if re.search(self._MORE_PAGES_INDICATOR, page) is None: |             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) | ||||||
|  |             if not mobj: | ||||||
|                 break |                 break | ||||||
|  |  | ||||||
|         try: |             more = self._download_json( | ||||||
|             playlist_title = self._og_search_title(page) |                 'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num) | ||||||
|         except RegexNotFoundError: |             content_html = more['content_html'] | ||||||
|             self.report_warning( |             more_widget_html = more['load_more_widget_html'] | ||||||
|                 u'Playlist page is missing OpenGraph title, falling back ...', |  | ||||||
|                 playlist_id) |         playlist_title = self._html_search_regex( | ||||||
|             playlist_title = self._html_search_regex( |                 r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title') | ||||||
|                 r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title') |  | ||||||
|  |  | ||||||
|         url_results = self._ids_to_results(ids) |         url_results = self._ids_to_results(ids) | ||||||
|         return self.playlist_result(url_results, playlist_id, playlist_title) |         return self.playlist_result(url_results, playlist_id, playlist_title) | ||||||
| @@ -1611,7 +1650,7 @@ class YoutubeChannelIE(InfoExtractor): | |||||||
|  |  | ||||||
| class YoutubeUserIE(InfoExtractor): | class YoutubeUserIE(InfoExtractor): | ||||||
|     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' |     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' | ||||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' |     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' | ||||||
|     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' |     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' | ||||||
|     _GDATA_PAGE_SIZE = 50 |     _GDATA_PAGE_SIZE = 50 | ||||||
|     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' |     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' | ||||||
| @@ -1710,12 +1749,50 @@ class YoutubeSearchIE(SearchInfoExtractor): | |||||||
|                   for video_id in video_ids] |                   for video_id in video_ids] | ||||||
|         return self.playlist_result(videos, query) |         return self.playlist_result(videos, query) | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeSearchDateIE(YoutubeSearchIE): | class YoutubeSearchDateIE(YoutubeSearchIE): | ||||||
|     IE_NAME = YoutubeSearchIE.IE_NAME + ':date' |     IE_NAME = YoutubeSearchIE.IE_NAME + ':date' | ||||||
|     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' |     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' | ||||||
|     _SEARCH_KEY = 'ytsearchdate' |     _SEARCH_KEY = 'ytsearchdate' | ||||||
|     IE_DESC = u'YouTube.com searches, newest videos first' |     IE_DESC = u'YouTube.com searches, newest videos first' | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class YoutubeSearchURLIE(InfoExtractor): | ||||||
|  |     IE_DESC = u'YouTube.com search URLs' | ||||||
|  |     IE_NAME = u'youtube:search_url' | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         query = compat_urllib_parse.unquote_plus(mobj.group('query')) | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, query) | ||||||
|  |         result_code = self._search_regex( | ||||||
|  |             r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML') | ||||||
|  |  | ||||||
|  |         part_codes = re.findall( | ||||||
|  |             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code) | ||||||
|  |         entries = [] | ||||||
|  |         for part_code in part_codes: | ||||||
|  |             part_title = self._html_search_regex( | ||||||
|  |                 r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) | ||||||
|  |             part_url_snippet = self._html_search_regex( | ||||||
|  |                 r'(?s)href="([^"]+)"', part_code, 'item URL') | ||||||
|  |             part_url = compat_urlparse.urljoin( | ||||||
|  |                 'https://www.youtube.com/', part_url_snippet) | ||||||
|  |             entries.append({ | ||||||
|  |                 '_type': 'url', | ||||||
|  |                 'url': part_url, | ||||||
|  |                 'title': part_title, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'entries': entries, | ||||||
|  |             'title': query, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeShowIE(InfoExtractor): | class YoutubeShowIE(InfoExtractor): | ||||||
|     IE_DESC = u'YouTube.com (multi-season) shows' |     IE_DESC = u'YouTube.com (multi-season) shows' | ||||||
|     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' |     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' | ||||||
|   | |||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user