Compare commits
	
		
			98 Commits
		
	
	
		
			2014.04.03
			...
			2014.04.11
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 140012d0f6 | ||
|  | 4be9f8c814 | ||
|  | 5c802bac37 | ||
|  | 6c30ff756a | ||
|  | 62749e4708 | ||
|  | 6b7dee4b38 | ||
|  | ef2041eb4e | ||
|  | 29e3e682af | ||
|  | f983c44199 | ||
|  | e4db19511a | ||
|  | c47d21da80 | ||
|  | 269aecd0c0 | ||
|  | aafddb2b0a | ||
|  | 6262ac8ac5 | ||
|  | 89938c719e | ||
|  | ec0fafbb19 | ||
|  | a5863bdf33 | ||
|  | b58ddb32ba | ||
|  | b9e12a8140 | ||
|  | 104aa7388a | ||
|  | c3855d28b0 | ||
|  | 734f90bb41 | ||
|  | 91a6addeeb | ||
|  | 9afb76c5ad | ||
|  | dfb2cb5cfd | ||
|  | 650d688d10 | ||
|  | 0ba77818f3 | ||
|  | 09baa7da7e | ||
|  | 85e787f51d | ||
|  | 2a9e1e453a | ||
|  | ee1e199685 | ||
|  | 17c5a00774 | ||
|  | 15c0e8e7b2 | ||
|  | cca37fba48 | ||
|  | 9d0993ec4a | ||
|  | 342f33bf9e | ||
|  | 7cd3bc5f99 | ||
|  | 931055e6cb | ||
|  | d0e4cf82f1 | ||
|  | 6f88df2c57 | ||
|  | 4479bf2762 | ||
|  | 1ff7c0f7d8 | ||
|  | 610e47c87e | ||
|  | 50f566076f | ||
|  | 92810ff497 | ||
|  | 60ccc59a1c | ||
|  | 91745595d3 | ||
|  | d6e40507d0 | ||
|  | deed48b472 | ||
|  | e4d41bfca5 | ||
|  | a355b70f27 | ||
|  | f8514f6186 | ||
|  | e09b8fcd9d | ||
|  | 7d1b527ff9 | ||
|  | f943c7b622 | ||
|  | 676eb3f2dd | ||
|  | 98b7cf1ace | ||
|  | c465afd736 | ||
|  | b84d6e7fc4 | ||
|  | 2efd5d78c1 | ||
|  | c8edf47b3a | ||
|  | 3b4c26a428 | ||
|  | 1525148114 | ||
|  | 9e0c5791c1 | ||
|  | 29a1ab2afc | ||
|  | fa387d2d99 | ||
|  | 6d0d573eca | ||
|  | bb799e811b | ||
|  | 04ee53eca1 | ||
|  | 659eb98a53 | ||
|  | ca6aada48e | ||
|  | 43df5a7e71 | ||
|  | 88f1c6de7b | ||
|  | 65a40ab82b | ||
|  | 4b9cced103 | ||
|  | 5c38625259 | ||
|  | 6344fa04bb | ||
|  | e3ced9ed61 | ||
|  | 5075d598bc | ||
|  | 68eb8e90e6 | ||
|  | d3a96346c4 | ||
|  | 0e518e2fea | ||
|  | 1e0a235f39 | ||
|  | 9ad400f75e | ||
|  | 3537b93d8a | ||
|  | 56eca2e956 | ||
|  | 2ad4d1ba07 | ||
|  | 4853de808b | ||
|  | 6ff5f12218 | ||
|  | 52a180684f | ||
|  | b21e25702f | ||
|  | 983af2600f | ||
|  | f34e6a2cd6 | ||
|  | a9f304031b | ||
|  | 9271bc8355 | ||
|  | d1b3e3dd75 | ||
|  | 968ed2a777 | ||
|  | 5fbd672c38 | 
							
								
								
									
										62
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										62
									
								
								README.md
									
									
									
									
									
								
							| @@ -371,7 +371,67 @@ If you want to create a build of youtube-dl yourself, you'll need | ||||
|  | ||||
| ### Adding support for a new site | ||||
|  | ||||
| If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/). | ||||
| If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`): | ||||
|  | ||||
| 1. [Fork this repository](https://github.com/rg3/youtube-dl/fork) | ||||
| 2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git` | ||||
| 3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor` | ||||
| 4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`: | ||||
|  | ||||
|         # coding: utf-8 | ||||
|         from __future__ import unicode_literals | ||||
|  | ||||
|         import re | ||||
|  | ||||
|         from .common import InfoExtractor | ||||
|          | ||||
|          | ||||
|         class YourExtractorIE(InfoExtractor): | ||||
|             _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)' | ||||
|             _TEST = { | ||||
|                 'url': 'http://yourextractor.com/watch/42', | ||||
|                 'md5': 'TODO: md5 sum of the first 10KiB of the video file', | ||||
|                 'info_dict': { | ||||
|                     'id': '42', | ||||
|                     'ext': 'mp4', | ||||
|                     'title': 'Video title goes here', | ||||
|                     # TODO more properties, either as: | ||||
|                     # * A value | ||||
|                     # * MD5 checksum; start the string with md5: | ||||
|                     # * A regular expression; start the string with re: | ||||
|                     # * Any Python type (for example int or float) | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             def _real_extract(self, url): | ||||
|                 mobj = re.match(self._VALID_URL, url) | ||||
|                 video_id = mobj.group('id') | ||||
|  | ||||
|                 # TODO more code goes here, for example ... | ||||
|                 webpage = self._download_webpage(url, video_id) | ||||
|                 title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') | ||||
|  | ||||
|                 return { | ||||
|                     'id': video_id, | ||||
|                     'title': title, | ||||
|                     # TODO more properties (see youtube_dl/extractor/common.py) | ||||
|                 } | ||||
|  | ||||
|  | ||||
| 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). | ||||
| 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. | ||||
| 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. | ||||
| 8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501). | ||||
| 9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this: | ||||
|  | ||||
|         $ git add youtube_dl/extractor/__init__.py | ||||
|         $ git add youtube_dl/extractor/yourextractor.py | ||||
|         $ git commit -m '[yourextractor] Add new extractor' | ||||
|         $ git push origin yourextractor | ||||
|  | ||||
| 10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. | ||||
|  | ||||
| In any case, thank you very much for your contributions! | ||||
|  | ||||
| # BUGS | ||||
|  | ||||
|   | ||||
| @@ -26,16 +26,27 @@ class YDL(FakeYDL): | ||||
|         self.msgs.append(msg) | ||||
|  | ||||
|  | ||||
| def _make_result(formats, **kwargs): | ||||
|     res = { | ||||
|         'formats': formats, | ||||
|         'id': 'testid', | ||||
|         'title': 'testttitle', | ||||
|         'extractor': 'testex', | ||||
|     } | ||||
|     res.update(**kwargs) | ||||
|     return res | ||||
|  | ||||
|  | ||||
| class TestFormatSelection(unittest.TestCase): | ||||
|     def test_prefer_free_formats(self): | ||||
|         # Same resolution => download webm | ||||
|         ydl = YDL() | ||||
|         ydl.params['prefer_free_formats'] = True | ||||
|         formats = [ | ||||
|             {'ext': 'webm', 'height': 460}, | ||||
|             {'ext': 'mp4',  'height': 460}, | ||||
|             {'ext': 'webm', 'height': 460, 'url': 'x'}, | ||||
|             {'ext': 'mp4', 'height': 460, 'url': 'y'}, | ||||
|         ] | ||||
|         info_dict = {'formats': formats, 'extractor': 'test'} | ||||
|         info_dict = _make_result(formats) | ||||
|         yie = YoutubeIE(ydl) | ||||
|         yie._sort_formats(info_dict['formats']) | ||||
|         ydl.process_ie_result(info_dict) | ||||
| @@ -46,8 +57,8 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         ydl = YDL() | ||||
|         ydl.params['prefer_free_formats'] = True | ||||
|         formats = [ | ||||
|             {'ext': 'webm', 'height': 720}, | ||||
|             {'ext': 'mp4', 'height': 1080}, | ||||
|             {'ext': 'webm', 'height': 720, 'url': 'a'}, | ||||
|             {'ext': 'mp4', 'height': 1080, 'url': 'b'}, | ||||
|         ] | ||||
|         info_dict['formats'] = formats | ||||
|         yie = YoutubeIE(ydl) | ||||
| @@ -60,9 +71,9 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         ydl = YDL() | ||||
|         ydl.params['prefer_free_formats'] = False | ||||
|         formats = [ | ||||
|             {'ext': 'webm', 'height': 720}, | ||||
|             {'ext': 'mp4', 'height': 720}, | ||||
|             {'ext': 'flv', 'height': 720}, | ||||
|             {'ext': 'webm', 'height': 720, 'url': '_'}, | ||||
|             {'ext': 'mp4', 'height': 720, 'url': '_'}, | ||||
|             {'ext': 'flv', 'height': 720, 'url': '_'}, | ||||
|         ] | ||||
|         info_dict['formats'] = formats | ||||
|         yie = YoutubeIE(ydl) | ||||
| @@ -74,8 +85,8 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         ydl = YDL() | ||||
|         ydl.params['prefer_free_formats'] = False | ||||
|         formats = [ | ||||
|             {'ext': 'flv', 'height': 720}, | ||||
|             {'ext': 'webm', 'height': 720}, | ||||
|             {'ext': 'flv', 'height': 720, 'url': '_'}, | ||||
|             {'ext': 'webm', 'height': 720, 'url': '_'}, | ||||
|         ] | ||||
|         info_dict['formats'] = formats | ||||
|         yie = YoutubeIE(ydl) | ||||
| @@ -91,8 +102,7 @@ class TestFormatSelection(unittest.TestCase): | ||||
|             {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3}, | ||||
|             {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4}, | ||||
|         ] | ||||
|         info_dict = { | ||||
|             'formats': formats, 'extractor': 'test', 'id': 'testvid'} | ||||
|         info_dict = _make_result(formats) | ||||
|  | ||||
|         ydl = YDL() | ||||
|         ydl.process_ie_result(info_dict) | ||||
| @@ -120,12 +130,12 @@ class TestFormatSelection(unittest.TestCase): | ||||
|  | ||||
|     def test_format_selection(self): | ||||
|         formats = [ | ||||
|             {'format_id': '35', 'ext': 'mp4', 'preference': 1}, | ||||
|             {'format_id': '45', 'ext': 'webm', 'preference': 2}, | ||||
|             {'format_id': '47', 'ext': 'webm', 'preference': 3}, | ||||
|             {'format_id': '2', 'ext': 'flv', 'preference': 4}, | ||||
|             {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'}, | ||||
|             {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'}, | ||||
|             {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'}, | ||||
|             {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'}, | ||||
|         ] | ||||
|         info_dict = {'formats': formats, 'extractor': 'test'} | ||||
|         info_dict = _make_result(formats) | ||||
|  | ||||
|         ydl = YDL({'format': '20/47'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
| @@ -154,12 +164,12 @@ class TestFormatSelection(unittest.TestCase): | ||||
|  | ||||
|     def test_format_selection_audio(self): | ||||
|         formats = [ | ||||
|             {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'}, | ||||
|             {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'}, | ||||
|             {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'}, | ||||
|             {'format_id': 'vid', 'ext': 'mp4', 'preference': 4}, | ||||
|             {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'}, | ||||
|             {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'}, | ||||
|             {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'}, | ||||
|             {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'}, | ||||
|         ] | ||||
|         info_dict = {'formats': formats, 'extractor': 'test'} | ||||
|         info_dict = _make_result(formats) | ||||
|  | ||||
|         ydl = YDL({'format': 'bestaudio'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
| @@ -172,10 +182,10 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         self.assertEqual(downloaded['format_id'], 'audio-low') | ||||
|  | ||||
|         formats = [ | ||||
|             {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1}, | ||||
|             {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2}, | ||||
|             {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'}, | ||||
|             {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'}, | ||||
|         ] | ||||
|         info_dict = {'formats': formats, 'extractor': 'test'} | ||||
|         info_dict = _make_result(formats) | ||||
|  | ||||
|         ydl = YDL({'format': 'bestaudio/worstaudio/best'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
| @@ -184,11 +194,11 @@ class TestFormatSelection(unittest.TestCase): | ||||
|  | ||||
|     def test_format_selection_video(self): | ||||
|         formats = [ | ||||
|             {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'}, | ||||
|             {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'}, | ||||
|             {'format_id': 'vid', 'ext': 'mp4', 'preference': 3}, | ||||
|             {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'}, | ||||
|             {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'}, | ||||
|             {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'}, | ||||
|         ] | ||||
|         info_dict = {'formats': formats, 'extractor': 'test'} | ||||
|         info_dict = _make_result(formats) | ||||
|  | ||||
|         ydl = YDL({'format': 'bestvideo'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
| @@ -217,10 +227,12 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         for f1id, f2id in zip(order, order[1:]): | ||||
|             f1 = YoutubeIE._formats[f1id].copy() | ||||
|             f1['format_id'] = f1id | ||||
|             f1['url'] = 'url:' + f1id | ||||
|             f2 = YoutubeIE._formats[f2id].copy() | ||||
|             f2['format_id'] = f2id | ||||
|             f2['url'] = 'url:' + f2id | ||||
|  | ||||
|             info_dict = {'formats': [f1, f2], 'extractor': 'youtube'} | ||||
|             info_dict = _make_result([f1, f2], extractor='youtube') | ||||
|             ydl = YDL() | ||||
|             yie = YoutubeIE(ydl) | ||||
|             yie._sort_formats(info_dict['formats']) | ||||
| @@ -228,7 +240,7 @@ class TestFormatSelection(unittest.TestCase): | ||||
|             downloaded = ydl.downloaded_info_dicts[0] | ||||
|             self.assertEqual(downloaded['format_id'], f1id) | ||||
|  | ||||
|             info_dict = {'formats': [f2, f1], 'extractor': 'youtube'} | ||||
|             info_dict = _make_result([f2, f1], extractor='youtube') | ||||
|             ydl = YDL() | ||||
|             yie = YoutubeIE(ydl) | ||||
|             yie._sort_formats(info_dict['formats']) | ||||
|   | ||||
| @@ -153,6 +153,25 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertMatch( | ||||
|             'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights', | ||||
|             ['ComedyCentralShows']) | ||||
|  | ||||
|     def test_yahoo_https(self): | ||||
|         # https://github.com/rg3/youtube-dl/issues/2701 | ||||
|         self.assertMatch( | ||||
|             'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', | ||||
|             ['Yahoo']) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -324,7 +324,6 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertEqual(result['id'], '342759') | ||||
|         self.assertEqual( | ||||
|             result['title'], 'General Motors Ignition Switch Recall') | ||||
|         self.assertEqual(len(result['entries']), 9) | ||||
|         whole_duration = sum(e['duration'] for e in result['entries']) | ||||
|         self.assertEqual(whole_duration, 14855) | ||||
|  | ||||
|   | ||||
| @@ -38,6 +38,7 @@ from youtube_dl.utils import ( | ||||
|     xpath_with_ns, | ||||
|     parse_iso8601, | ||||
|     strip_jsonp, | ||||
|     uppercase_escape, | ||||
| ) | ||||
|  | ||||
| if sys.version_info < (3, 0): | ||||
| @@ -279,6 +280,9 @@ class TestUtil(unittest.TestCase): | ||||
|         d = json.loads(stripped) | ||||
|         self.assertEqual(d, [{"id": "532cb", "x": 3}]) | ||||
|  | ||||
|     def test_uppercase_escpae(self): | ||||
|         self.assertEqual(uppercase_escape(u'aä'), u'aä') | ||||
|         self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐') | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
							
								
								
									
										34
									
								
								youtube_dl/YoutubeDL.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										34
									
								
								youtube_dl/YoutubeDL.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							| @@ -286,6 +286,9 @@ class YoutubeDL(object): | ||||
|         """Print message to stdout if not in quiet mode.""" | ||||
|         return self.to_stdout(message, skip_eol, check_quiet=True) | ||||
|  | ||||
|     def _write_string(self, s, out=None): | ||||
|         write_string(s, out=out, encoding=self.params.get('encoding')) | ||||
|  | ||||
|     def to_stdout(self, message, skip_eol=False, check_quiet=False): | ||||
|         """Print message to stdout if not in quiet mode.""" | ||||
|         if self.params.get('logger'): | ||||
| @@ -295,7 +298,7 @@ class YoutubeDL(object): | ||||
|             terminator = ['\n', ''][skip_eol] | ||||
|             output = message + terminator | ||||
|  | ||||
|             write_string(output, self._screen_file) | ||||
|             self._write_string(output, self._screen_file) | ||||
|  | ||||
|     def to_stderr(self, message): | ||||
|         """Print message to stderr.""" | ||||
| @@ -305,7 +308,7 @@ class YoutubeDL(object): | ||||
|         else: | ||||
|             message = self._bidi_workaround(message) | ||||
|             output = message + '\n' | ||||
|             write_string(output, self._err_file) | ||||
|             self._write_string(output, self._err_file) | ||||
|  | ||||
|     def to_console_title(self, message): | ||||
|         if not self.params.get('consoletitle', False): | ||||
| @@ -315,21 +318,21 @@ class YoutubeDL(object): | ||||
|             # already of type unicode() | ||||
|             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) | ||||
|         elif 'TERM' in os.environ: | ||||
|             write_string('\033]0;%s\007' % message, self._screen_file) | ||||
|             self._write_string('\033]0;%s\007' % message, self._screen_file) | ||||
|  | ||||
|     def save_console_title(self): | ||||
|         if not self.params.get('consoletitle', False): | ||||
|             return | ||||
|         if 'TERM' in os.environ: | ||||
|             # Save the title on stack | ||||
|             write_string('\033[22;0t', self._screen_file) | ||||
|             self._write_string('\033[22;0t', self._screen_file) | ||||
|  | ||||
|     def restore_console_title(self): | ||||
|         if not self.params.get('consoletitle', False): | ||||
|             return | ||||
|         if 'TERM' in os.environ: | ||||
|             # Restore the title from stack | ||||
|             write_string('\033[23;0t', self._screen_file) | ||||
|             self._write_string('\033[23;0t', self._screen_file) | ||||
|  | ||||
|     def __enter__(self): | ||||
|         self.save_console_title() | ||||
| @@ -933,7 +936,7 @@ class YoutubeDL(object): | ||||
|                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                                 subfile.write(sub) | ||||
|                 except (OSError, IOError): | ||||
|                     self.report_error('Cannot write subtitles file ' + descfn) | ||||
|                     self.report_error('Cannot write subtitles file ' + sub_filename) | ||||
|                     return | ||||
|  | ||||
|         if self.params.get('writeinfojson', False): | ||||
| @@ -1211,9 +1214,16 @@ class YoutubeDL(object): | ||||
|         if not self.params.get('verbose'): | ||||
|             return | ||||
|  | ||||
|         write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % | ||||
|                  (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding())) | ||||
|         write_string('[debug] youtube-dl version ' + __version__ + '\n') | ||||
|         write_string( | ||||
|             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( | ||||
|                 locale.getpreferredencoding(), | ||||
|                 sys.getfilesystemencoding(), | ||||
|                 sys.stdout.encoding, | ||||
|                 self.get_encoding()), | ||||
|             encoding=None | ||||
|         ) | ||||
|  | ||||
|         self._write_string('[debug] youtube-dl version ' + __version__ + '\n') | ||||
|         try: | ||||
|             sp = subprocess.Popen( | ||||
|                 ['git', 'rev-parse', '--short', 'HEAD'], | ||||
| @@ -1222,20 +1232,20 @@ class YoutubeDL(object): | ||||
|             out, err = sp.communicate() | ||||
|             out = out.decode().strip() | ||||
|             if re.match('[0-9a-f]+', out): | ||||
|                 write_string('[debug] Git HEAD: ' + out + '\n') | ||||
|                 self._write_string('[debug] Git HEAD: ' + out + '\n') | ||||
|         except: | ||||
|             try: | ||||
|                 sys.exc_clear() | ||||
|             except: | ||||
|                 pass | ||||
|         write_string('[debug] Python version %s - %s' % | ||||
|         self._write_string('[debug] Python version %s - %s' % | ||||
|                      (platform.python_version(), platform_name()) + '\n') | ||||
|  | ||||
|         proxy_map = {} | ||||
|         for handler in self._opener.handlers: | ||||
|             if hasattr(handler, 'proxies'): | ||||
|                 proxy_map.update(handler.proxies) | ||||
|         write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') | ||||
|         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') | ||||
|  | ||||
|     def _setup_opener(self): | ||||
|         timeout_val = self.params.get('socket_timeout') | ||||
|   | ||||
| @@ -52,6 +52,7 @@ __authors__  = ( | ||||
|     'Juan C. Olivares', | ||||
|     'Mattias Harrysson', | ||||
|     'phaer', | ||||
|     'Sainyam Kapoor', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
| @@ -242,7 +243,7 @@ def parseOpts(overrideArguments=None): | ||||
|         help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') | ||||
|     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') | ||||
|     general.add_option( | ||||
|         '--prefer-insecure', action='store_true', dest='prefer_insecure', | ||||
|         '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', | ||||
|         help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') | ||||
|     general.add_option( | ||||
|         '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', | ||||
|   | ||||
| @@ -4,9 +4,10 @@ import sys | ||||
| import time | ||||
|  | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     encodeFilename, | ||||
|     timeconvert, | ||||
|     format_bytes, | ||||
|     timeconvert, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -173,7 +174,7 @@ class FileDownloader(object): | ||||
|                 return | ||||
|             os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) | ||||
|         except (IOError, OSError) as err: | ||||
|             self.report_error(u'unable to rename file: %s' % str(err)) | ||||
|             self.report_error(u'unable to rename file: %s' % compat_str(err)) | ||||
|  | ||||
|     def try_utime(self, filename, last_modified_hdr): | ||||
|         """Try to set the last-modified time of the given file.""" | ||||
|   | ||||
| @@ -32,6 +32,7 @@ from .canal13cl import Canal13clIE | ||||
| from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .cbs import CBSIE | ||||
| from .cbsnews import CBSNewsIE | ||||
| from .ceskatelevize import CeskaTelevizeIE | ||||
| from .channel9 import Channel9IE | ||||
| from .chilloutzone import ChilloutzoneIE | ||||
| @@ -40,6 +41,7 @@ from .clipfish import ClipfishIE | ||||
| from .cliphunter import CliphunterIE | ||||
| from .clipsyndicate import ClipsyndicateIE | ||||
| from .cmt import CMTIE | ||||
| from .cnet import CNETIE | ||||
| from .cnn import ( | ||||
|     CNNIE, | ||||
|     CNNBlogsIE, | ||||
| @@ -61,6 +63,7 @@ from .dotsub import DotsubIE | ||||
| from .dreisat import DreiSatIE | ||||
| from .defense import DefenseGouvFrIE | ||||
| from .discovery import DiscoveryIE | ||||
| from .divxstage import DivxStageIE | ||||
| from .dropbox import DropboxIE | ||||
| from .ebaumsworld import EbaumsWorldIE | ||||
| from .ehow import EHowIE | ||||
| @@ -153,6 +156,9 @@ from .mixcloud import MixcloudIE | ||||
| from .mpora import MporaIE | ||||
| from .mofosex import MofosexIE | ||||
| from .mooshare import MooshareIE | ||||
| from .morningstar import MorningstarIE | ||||
| from .motorsport import MotorsportIE | ||||
| from .movshare import MovShareIE | ||||
| from .mtv import ( | ||||
|     MTVIE, | ||||
|     MTVIggyIE, | ||||
| @@ -202,6 +208,7 @@ from .rottentomatoes import RottenTomatoesIE | ||||
| from .roxwel import RoxwelIE | ||||
| from .rtlnow import RTLnowIE | ||||
| from .rts import RTSIE | ||||
| from .rtve import RTVEALaCartaIE | ||||
| from .rutube import ( | ||||
|     RutubeIE, | ||||
|     RutubeChannelIE, | ||||
| @@ -273,6 +280,7 @@ from .videodetective import VideoDetectiveIE | ||||
| from .videolecturesnet import VideoLecturesNetIE | ||||
| from .videofyme import VideofyMeIE | ||||
| from .videopremium import VideoPremiumIE | ||||
| from .videoweed import VideoWeedIE | ||||
| from .vimeo import ( | ||||
|     VimeoIE, | ||||
|     VimeoChannelIE, | ||||
|   | ||||
| @@ -4,39 +4,72 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BRIE(InfoExtractor): | ||||
|     IE_DESC = "Bayerischer Rundfunk Mediathek" | ||||
|     _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$" | ||||
|     _BASE_URL = "http://www.br.de" | ||||
|     IE_DESC = 'Bayerischer Rundfunk Mediathek' | ||||
|     _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html' | ||||
|     _BASE_URL = 'http://www.br.de' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", | ||||
|             "md5": "c4f83cf0f023ba5875aba0bf46860df2", | ||||
|             "info_dict": { | ||||
|                 "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", | ||||
|                 "ext": "mp4", | ||||
|                 "title": "Feiern und Verzichten", | ||||
|                 "description": "Anselm Grün: Feiern und Verzichten", | ||||
|                 "uploader": "BR/Birgit Baier", | ||||
|                 "upload_date": "20140301" | ||||
|             'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html', | ||||
|             'md5': 'c4f83cf0f023ba5875aba0bf46860df2', | ||||
|             'info_dict': { | ||||
|                 'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Feiern und Verzichten', | ||||
|                 'description': 'Anselm Grün: Feiern und Verzichten', | ||||
|                 'uploader': 'BR/Birgit Baier', | ||||
|                 'upload_date': '20140301', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             "url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html", | ||||
|             "md5": "ab451b09d861dbed7d7cc9ab0be19ebe", | ||||
|             "info_dict": { | ||||
|                 "id": "2c060e69-3a27-4e13-b0f0-668fac17d812", | ||||
|                 "ext": "mp4", | ||||
|                 "title": "Über den Pass", | ||||
|                 "description": "Die Eroberung der Alpen: Über den Pass", | ||||
|                 "uploader": None, | ||||
|                 "upload_date": None | ||||
|             'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html', | ||||
|             'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe', | ||||
|             'info_dict': { | ||||
|                 'id': '2c060e69-3a27-4e13-b0f0-668fac17d812', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Über den Pass', | ||||
|                 'description': 'Die Eroberung der Alpen: Über den Pass', | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html', | ||||
|             'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820', | ||||
|             'info_dict': { | ||||
|                 'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab', | ||||
|                 'ext': 'aac', | ||||
|                 'title': '"Keine neuen Schulden im nächsten Jahr"', | ||||
|                 'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html', | ||||
|             'md5': 'dbab0aef2e047060ea7a21fc1ce1078a', | ||||
|             'info_dict': { | ||||
|                 'id': '6ba73750-d405-45d3-861d-1ce8c524e059', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Umweltbewusster Häuslebauer', | ||||
|                 'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html', | ||||
|             'md5': '23bca295f1650d698f94fc570977dae3', | ||||
|             'info_dict': { | ||||
|                 'id': 'd982c9ce-8648-4753-b358-98abb8aec43d', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Folge 1 - Metaphysik', | ||||
|                 'description': 'Kant für Anfänger: Folge 1 - Metaphysik', | ||||
|                 'uploader': 'Eva Maria Steimle', | ||||
|                 'upload_date': '20140117', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -44,56 +77,63 @@ class BRIE(InfoExtractor): | ||||
|         display_id = mobj.group('id') | ||||
|         page = self._download_webpage(url, display_id) | ||||
|         xml_url = self._search_regex( | ||||
|             r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL") | ||||
|             r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL') | ||||
|         xml = self._download_xml(self._BASE_URL + xml_url, None) | ||||
|  | ||||
|         videos = [] | ||||
|         for xml_video in xml.findall("video"): | ||||
|             video = { | ||||
|                 "id": xml_video.get("externalId"), | ||||
|                 "title": xml_video.find("title").text, | ||||
|                 "formats": self._extract_formats(xml_video.find("assets")), | ||||
|                 "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")), | ||||
|                 "description": " ".join(xml_video.find("shareTitle").text.splitlines()), | ||||
|                 "webpage_url": xml_video.find("permalink").text | ||||
|             } | ||||
|             if xml_video.find("author").text: | ||||
|                 video["uploader"] = xml_video.find("author").text | ||||
|             if xml_video.find("broadcastDate").text: | ||||
|                 video["upload_date"] =  "".join(reversed(xml_video.find("broadcastDate").text.split("."))) | ||||
|             videos.append(video) | ||||
|         medias = [] | ||||
|  | ||||
|         if len(videos) > 1: | ||||
|         for xml_media in xml.findall('video') + xml.findall('audio'): | ||||
|             media = { | ||||
|                 'id': xml_media.get('externalId'), | ||||
|                 'title': xml_media.find('title').text, | ||||
|                 'formats': self._extract_formats(xml_media.find('assets')), | ||||
|                 'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')), | ||||
|                 'description': ' '.join(xml_media.find('shareTitle').text.splitlines()), | ||||
|                 'webpage_url': xml_media.find('permalink').text | ||||
|             } | ||||
|             if xml_media.find('author').text: | ||||
|                 media['uploader'] = xml_media.find('author').text | ||||
|             if xml_media.find('broadcastDate').text: | ||||
|                 media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.'))) | ||||
|             medias.append(media) | ||||
|  | ||||
|         if len(medias) > 1: | ||||
|             self._downloader.report_warning( | ||||
|                 'found multiple videos; please ' | ||||
|                 'found multiple medias; please ' | ||||
|                 'report this with the video URL to http://yt-dl.org/bug') | ||||
|         if not videos: | ||||
|             raise ExtractorError('No video entries found') | ||||
|         return videos[0] | ||||
|         if not medias: | ||||
|             raise ExtractorError('No media entries found') | ||||
|         return medias[0] | ||||
|  | ||||
|     def _extract_formats(self, assets): | ||||
|  | ||||
|         def text_or_none(asset, tag): | ||||
|             elem = asset.find(tag) | ||||
|             return None if elem is None else elem.text | ||||
|  | ||||
|         formats = [{ | ||||
|             "url": asset.find("downloadUrl").text, | ||||
|             "ext": asset.find("mediaType").text, | ||||
|             "format_id": asset.get("type"), | ||||
|             "width": int(asset.find("frameWidth").text), | ||||
|             "height": int(asset.find("frameHeight").text), | ||||
|             "tbr": int(asset.find("bitrateVideo").text), | ||||
|             "abr": int(asset.find("bitrateAudio").text), | ||||
|             "vcodec": asset.find("codecVideo").text, | ||||
|             "container": asset.find("mediaType").text, | ||||
|             "filesize": int(asset.find("size").text), | ||||
|         } for asset in assets.findall("asset") | ||||
|             if asset.find("downloadUrl") is not None] | ||||
|             'url': text_or_none(asset, 'downloadUrl'), | ||||
|             'ext': text_or_none(asset, 'mediaType'), | ||||
|             'format_id': asset.get('type'), | ||||
|             'width': int_or_none(text_or_none(asset, 'frameWidth')), | ||||
|             'height': int_or_none(text_or_none(asset, 'frameHeight')), | ||||
|             'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')), | ||||
|             'abr': int_or_none(text_or_none(asset, 'bitrateAudio')), | ||||
|             'vcodec': text_or_none(asset, 'codecVideo'), | ||||
|             'acodec': text_or_none(asset, 'codecAudio'), | ||||
|             'container': text_or_none(asset, 'mediaType'), | ||||
|             'filesize': int_or_none(text_or_none(asset, 'size')), | ||||
|         } for asset in assets.findall('asset') | ||||
|             if asset.find('downloadUrl') is not None] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|         return formats | ||||
|  | ||||
|     def _extract_thumbnails(self, variants): | ||||
|         thumbnails = [{ | ||||
|             "url": self._BASE_URL + variant.find("url").text, | ||||
|             "width": int(variant.find("width").text), | ||||
|             "height": int(variant.find("height").text), | ||||
|         } for variant in variants.findall("variant")] | ||||
|         thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True) | ||||
|             'url': self._BASE_URL + variant.find('url').text, | ||||
|             'width': int_or_none(variant.find('width').text), | ||||
|             'height': int_or_none(variant.find('height').text), | ||||
|         } for variant in variants.findall('variant')] | ||||
|         thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True) | ||||
|         return thumbnails | ||||
|   | ||||
| @@ -27,9 +27,10 @@ class BreakIE(InfoExtractor): | ||||
|             webpage, 'info json', flags=re.DOTALL) | ||||
|         info = json.loads(info_json) | ||||
|         video_url = info['videoUri'] | ||||
|         m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url) | ||||
|         if m_youtube is not None: | ||||
|             return self.url_result(m_youtube.group(1), 'Youtube') | ||||
|         youtube_id = info.get('youtubeId') | ||||
|         if youtube_id: | ||||
|             return self.url_result(youtube_id, 'Youtube') | ||||
|  | ||||
|         final_url = video_url + '?' + info['AuthToken'] | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -4,9 +4,7 @@ import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class BYUtvIE(InfoExtractor): | ||||
| @@ -16,7 +14,7 @@ class BYUtvIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': 'granite-flats-talking', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f', | ||||
|             'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c', | ||||
|             'title': 'Talking', | ||||
|             'thumbnail': 're:^https?://.*promo.*' | ||||
|         }, | ||||
|   | ||||
							
								
								
									
										87
									
								
								youtube_dl/extractor/cbsnews.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								youtube_dl/extractor/cbsnews.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,87 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CBSNewsIE(InfoExtractor): | ||||
|     IE_DESC = 'CBS News' | ||||
|     _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/', | ||||
|             'info_dict': { | ||||
|                 'id': 'tesla-and-spacex-elon-musks-industrial-empire', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire', | ||||
|                 'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg', | ||||
|                 'duration': 791, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', | ||||
|             'info_dict': { | ||||
|                 'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', | ||||
|                 'thumbnail': 'http://cbsnews2.cbsistatic.com/hub/i/r/2014/04/04/0c9fbc66-576b-41ca-8069-02d122060dd2/thumbnail/140x90/6dad7a502f88875ceac38202984b6d58/en-0404-werner-replace-640x360.jpg', | ||||
|                 'duration': 205, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_info = json.loads(self._html_search_regex( | ||||
|             r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'', | ||||
|             webpage, 'video JSON info')) | ||||
|  | ||||
|         item = video_info['item'] if 'item' in video_info else video_info | ||||
|         title = item.get('articleTitle') or item.get('hed') | ||||
|         duration = item.get('duration') | ||||
|         thumbnail = item.get('mediaImage') or item.get('thumbnail') | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']: | ||||
|             uri = item.get('media' + format_id + 'URI') | ||||
|             if not uri: | ||||
|                 continue | ||||
|             fmt = { | ||||
|                 'url': uri, | ||||
|                 'format_id': format_id, | ||||
|             } | ||||
|             if uri.startswith('rtmp'): | ||||
|                 fmt.update({ | ||||
|                     'app': 'ondemand?auth=cbs', | ||||
|                     'play_path': 'mp4:' + uri.split('<break>')[-1], | ||||
|                     'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf', | ||||
|                     'page_url': 'http://www.cbsnews.com', | ||||
|                     'ext': 'flv', | ||||
|                 }) | ||||
|             elif uri.endswith('.m3u8'): | ||||
|                 fmt['ext'] = 'mp4' | ||||
|             formats.append(fmt) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										75
									
								
								youtube_dl/extractor/cnet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								youtube_dl/extractor/cnet.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CNETIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', | ||||
|         'md5': '041233212a0d06b179c87cbcca1577b8', | ||||
|         'info_dict': { | ||||
|             'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hands-on with Microsoft Windows 8.1 Update', | ||||
|             'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', | ||||
|             'thumbnail': 're:^http://.*/flmswindows8.jpg$', | ||||
|             'uploader_id': 'sarah.mitroff@cbsinteractive.com', | ||||
|             'uploader': 'Sarah Mitroff', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         data_json = self._html_search_regex( | ||||
|             r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'", | ||||
|             webpage, 'data json') | ||||
|         data = json.loads(data_json) | ||||
|         vdata = data['video'] | ||||
|         if not vdata: | ||||
|             vdata = data['videos'][0] | ||||
|         if not vdata: | ||||
|             raise ExtractorError('Cannot find video data') | ||||
|  | ||||
|         video_id = vdata['id'] | ||||
|         title = vdata['headline'] | ||||
|         description = vdata.get('dek') | ||||
|         thumbnail = vdata.get('image', {}).get('path') | ||||
|         author = vdata.get('author') | ||||
|         if author: | ||||
|             uploader = '%s %s' % (author['firstName'], author['lastName']) | ||||
|             uploader_id = author.get('email') | ||||
|         else: | ||||
|             uploader = None | ||||
|             uploader_id = None | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': '%s-%s-%s' % ( | ||||
|                 f['type'], f['format'], | ||||
|                 int_or_none(f.get('bitrate'), 1000, default='')), | ||||
|             'url': f['uri'], | ||||
|             'tbr': int_or_none(f.get('bitrate'), 1000), | ||||
|         } for f in vdata['files']['data']] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
| @@ -41,9 +41,9 @@ class ComedyCentralShowsIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport) | ||||
|                       |https?://(:www\.)? | ||||
|                           (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ | ||||
|                          (full-episodes/(?P<episode>.*)| | ||||
|                          ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| | ||||
|                           (?P<clip> | ||||
|                               (?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+)) | ||||
|                               (?:(?:guests/[^/]+|videos|video-playlists)/[^/]+/(?P<videotitle>[^/?#]+)) | ||||
|                               |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) | ||||
|                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) | ||||
|                           )| | ||||
|   | ||||
| @@ -251,7 +251,10 @@ class InfoExtractor(object): | ||||
|             with open(filename, 'wb') as outf: | ||||
|                 outf.write(webpage_bytes) | ||||
|  | ||||
|         content = webpage_bytes.decode(encoding, 'replace') | ||||
|         try: | ||||
|             content = webpage_bytes.decode(encoding, 'replace') | ||||
|         except LookupError: | ||||
|             content = webpage_bytes.decode('utf-8', 'replace') | ||||
|  | ||||
|         if (u'<title>Access to this site is blocked</title>' in content and | ||||
|                 u'Websense' in content[:512]): | ||||
|   | ||||
| @@ -8,7 +8,6 @@ from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     compat_str, | ||||
|     get_element_by_attribute, | ||||
|     get_element_by_id, | ||||
|     orderedSet, | ||||
|     str_to_int, | ||||
| @@ -180,7 +179,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
| class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|     IE_NAME = u'dailymotion:playlist' | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' | ||||
|     _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>' | ||||
|     _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"' | ||||
|     _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' | ||||
|  | ||||
|     def _extract_entries(self, id): | ||||
| @@ -190,10 +189,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|             webpage = self._download_webpage(request, | ||||
|                                              id, u'Downloading page %s' % pagenum) | ||||
|  | ||||
|             playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage) | ||||
|             video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el)) | ||||
|             video_ids.extend(re.findall(r'data-id="(.+?)"', webpage)) | ||||
|  | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: | ||||
|                 break | ||||
|         return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') | ||||
|                    for video_id in orderedSet(video_ids)] | ||||
| @@ -212,8 +210,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|  | ||||
| class DailymotionUserIE(DailymotionPlaylistIE): | ||||
|     IE_NAME = u'dailymotion:user' | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' | ||||
|     _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' | ||||
|     _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
							
								
								
									
										27
									
								
								youtube_dl/extractor/divxstage.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								youtube_dl/extractor/divxstage.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .novamov import NovaMovIE | ||||
|  | ||||
|  | ||||
| class DivxStageIE(NovaMovIE): | ||||
|     IE_NAME = 'divxstage' | ||||
|     IE_DESC = 'DivxStage' | ||||
|  | ||||
|     _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'} | ||||
|  | ||||
|     _HOST = 'www.divxstage.eu' | ||||
|  | ||||
|     _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' | ||||
|     _TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>' | ||||
|     _DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.divxstage.eu/video/57f238e2e5e01', | ||||
|         'md5': '63969f6eb26533a1968c4d325be63e72', | ||||
|         'info_dict': { | ||||
|             'id': '57f238e2e5e01', | ||||
|             'ext': 'flv', | ||||
|             'title': 'youtubedl test video', | ||||
|             'description': 'This is a test video for youtubedl.', | ||||
|         } | ||||
|     } | ||||
| @@ -114,20 +114,6 @@ class GenericIE(InfoExtractor): | ||||
|                 'title': '2cc213299525360.mov',  # that's what we get | ||||
|             }, | ||||
|         }, | ||||
|         # second style of embedded ooyala videos | ||||
|         { | ||||
|             'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html', | ||||
|             'info_dict': { | ||||
|                 'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Behind-the-scenes: Financial Review Sunday ', | ||||
|                 'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # google redirect | ||||
|         { | ||||
|             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', | ||||
| @@ -198,6 +184,17 @@ class GenericIE(InfoExtractor): | ||||
|                 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b', | ||||
|             } | ||||
|         }, | ||||
|         # Embeded Ustream video | ||||
|         { | ||||
|             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm', | ||||
|             'md5': '27b99cdb639c9b12a79bca876a073417', | ||||
|             'info_dict': { | ||||
|                 'id': '45734260', | ||||
|                 'ext': 'flv', | ||||
|                 'uploader': 'AU SPA:  The NSA and Privacy', | ||||
|                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman' | ||||
|             } | ||||
|         }, | ||||
|         # nowvideo embed hidden behind percent encoding | ||||
|         { | ||||
|             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', | ||||
| @@ -514,17 +511,18 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group(1), 'Mpora') | ||||
|  | ||||
|         # Look for embedded NovaMov player | ||||
|         # Look for embedded NovaMov-based player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage) | ||||
|             r'''(?x)<iframe[^>]+?src=(["\']) | ||||
|                     (?P<url>http://(?:(?:embed|www)\.)? | ||||
|                         (?:novamov\.com| | ||||
|                            nowvideo\.(?:ch|sx|eu|at|ag|co)| | ||||
|                            videoweed\.(?:es|com)| | ||||
|                            movshare\.(?:net|sx|ag)| | ||||
|                            divxstage\.(?:eu|net|ch|co|at|ag)) | ||||
|                         /embed\.php.+?)\1''', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'NovaMov') | ||||
|  | ||||
|         # Look for embedded NowVideo player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'NowVideo') | ||||
|             return self.url_result(mobj.group('url')) | ||||
|  | ||||
|         # Look for embedded Facebook player | ||||
|         mobj = re.search( | ||||
| @@ -570,6 +568,12 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'TED') | ||||
|  | ||||
|         # Look for embedded Ustream videos | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Ustream') | ||||
|  | ||||
|         # Look for embedded arte.tv player | ||||
|         mobj = re.search( | ||||
|             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"', | ||||
|   | ||||
| @@ -1,9 +1,12 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import os | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     ExtractorError, | ||||
|     formatSeconds, | ||||
| ) | ||||
| @@ -24,34 +27,31 @@ class JustinTVIE(InfoExtractor): | ||||
|         /?(?:\#.*)?$ | ||||
|         """ | ||||
|     _JUSTIN_PAGE_LIMIT = 100 | ||||
|     IE_NAME = u'justin.tv' | ||||
|     IE_NAME = 'justin.tv' | ||||
|     IE_DESC = 'justin.tv and twitch.tv' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360', | ||||
|         u'file': u'296128360.flv', | ||||
|         u'md5': u'ecaa8a790c22a40770901460af191c9a', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20110927",  | ||||
|             u"uploader_id": 25114803,  | ||||
|             u"uploader": u"thegamedevhub",  | ||||
|             u"title": u"Beginner Series - Scripting With Python Pt.1" | ||||
|         'url': 'http://www.twitch.tv/thegamedevhub/b/296128360', | ||||
|         'md5': 'ecaa8a790c22a40770901460af191c9a', | ||||
|         'info_dict': { | ||||
|             'id': '296128360', | ||||
|             'ext': 'flv', | ||||
|             'upload_date': '20110927', | ||||
|             'uploader_id': 25114803, | ||||
|             'uploader': 'thegamedevhub', | ||||
|             'title': 'Beginner Series - Scripting With Python Pt.1' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def report_download_page(self, channel, offset): | ||||
|         """Report attempt to download a single page of videos.""" | ||||
|         self.to_screen(u'%s: Downloading video information from %d to %d' % | ||||
|                 (channel, offset, offset + self._JUSTIN_PAGE_LIMIT)) | ||||
|  | ||||
|     # Return count of items, list of *valid* items | ||||
|     def _parse_page(self, url, video_id): | ||||
|         info_json = self._download_webpage(url, video_id, | ||||
|                                            u'Downloading video info JSON', | ||||
|                                            u'unable to download video info JSON') | ||||
|                                            'Downloading video info JSON', | ||||
|                                            'unable to download video info JSON') | ||||
|  | ||||
|         response = json.loads(info_json) | ||||
|         if type(response) != list: | ||||
|             error_text = response.get('error', 'unknown error') | ||||
|             raise ExtractorError(u'Justin.tv API: %s' % error_text) | ||||
|             raise ExtractorError('Justin.tv API: %s' % error_text) | ||||
|         info = [] | ||||
|         for clip in response: | ||||
|             video_url = clip['video_file_url'] | ||||
| @@ -62,7 +62,7 @@ class JustinTVIE(InfoExtractor): | ||||
|                 video_id = clip['id'] | ||||
|                 video_title = clip.get('title', video_id) | ||||
|                 info.append({ | ||||
|                     'id': video_id, | ||||
|                     'id': compat_str(video_id), | ||||
|                     'url': video_url, | ||||
|                     'title': video_title, | ||||
|                     'uploader': clip.get('channel_name', video_uploader_id), | ||||
| @@ -74,8 +74,6 @@ class JustinTVIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'invalid URL: %s' % url) | ||||
|  | ||||
|         api_base = 'http://api.justin.tv' | ||||
|         paged = False | ||||
| @@ -89,40 +87,41 @@ class JustinTVIE(InfoExtractor): | ||||
|             webpage = self._download_webpage(url, chapter_id) | ||||
|             m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage) | ||||
|             if not m: | ||||
|                 raise ExtractorError(u'Cannot find archive of a chapter') | ||||
|                 raise ExtractorError('Cannot find archive of a chapter') | ||||
|             archive_id = m.group(1) | ||||
|  | ||||
|             api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id | ||||
|             doc = self._download_xml(api, chapter_id, | ||||
|                                              note=u'Downloading chapter information', | ||||
|                                              errnote=u'Chapter information download failed') | ||||
|             doc = self._download_xml( | ||||
|                 api, chapter_id, | ||||
|                 note='Downloading chapter information', | ||||
|                 errnote='Chapter information download failed') | ||||
|             for a in doc.findall('.//archive'): | ||||
|                 if archive_id == a.find('./id').text: | ||||
|                     break | ||||
|             else: | ||||
|                 raise ExtractorError(u'Could not find chapter in chapter information') | ||||
|                 raise ExtractorError('Could not find chapter in chapter information') | ||||
|  | ||||
|             video_url = a.find('./video_file_url').text | ||||
|             video_ext = video_url.rpartition('.')[2] or u'flv' | ||||
|             video_ext = video_url.rpartition('.')[2] or 'flv' | ||||
|  | ||||
|             chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id | ||||
|             chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id, | ||||
|                                    note='Downloading chapter metadata', | ||||
|                                    errnote='Download of chapter metadata failed') | ||||
|             chapter_info = json.loads(chapter_info_json) | ||||
|             chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id | ||||
|             chapter_info = self._download_json( | ||||
|                 chapter_api_url, 'c' + chapter_id, | ||||
|                 note='Downloading chapter metadata', | ||||
|                 errnote='Download of chapter metadata failed') | ||||
|  | ||||
|             bracket_start = int(doc.find('.//bracket_start').text) | ||||
|             bracket_end = int(doc.find('.//bracket_end').text) | ||||
|  | ||||
|             # TODO determine start (and probably fix up file) | ||||
|             #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 | ||||
|             #video_url += u'?start=' + TODO:start_timestamp | ||||
|             #video_url += '?start=' + TODO:start_timestamp | ||||
|             # bracket_start is 13290, but we want 51670615 | ||||
|             self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. ' | ||||
|                                             u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end))) | ||||
|             self._downloader.report_warning('Chapter detected, but we can just download the whole file. ' | ||||
|                                             'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end))) | ||||
|  | ||||
|             info = { | ||||
|                 'id': u'c' + chapter_id, | ||||
|                 'id': 'c' + chapter_id, | ||||
|                 'url': video_url, | ||||
|                 'ext': video_ext, | ||||
|                 'title': chapter_info['title'], | ||||
| @@ -131,14 +130,12 @@ class JustinTVIE(InfoExtractor): | ||||
|                 'uploader': chapter_info['channel']['display_name'], | ||||
|                 'uploader_id': chapter_info['channel']['name'], | ||||
|             } | ||||
|             return [info] | ||||
|             return info | ||||
|         else: | ||||
|             video_id = mobj.group('videoid') | ||||
|             api = api_base + '/broadcast/by_archive/%s.json' % video_id | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         info = [] | ||||
|         entries = [] | ||||
|         offset = 0 | ||||
|         limit = self._JUSTIN_PAGE_LIMIT | ||||
|         while True: | ||||
| @@ -146,8 +143,12 @@ class JustinTVIE(InfoExtractor): | ||||
|                 self.report_download_page(video_id, offset) | ||||
|             page_url = api + ('?offset=%d&limit=%d' % (offset, limit)) | ||||
|             page_count, page_info = self._parse_page(page_url, video_id) | ||||
|             info.extend(page_info) | ||||
|             entries.extend(page_info) | ||||
|             if not paged or page_count != limit: | ||||
|                 break | ||||
|             offset += limit | ||||
|         return info | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': video_id, | ||||
|             'entries': entries, | ||||
|         } | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
|  | ||||
| @@ -11,22 +13,22 @@ from ..aes import ( | ||||
|     aes_decrypt_text | ||||
| ) | ||||
|  | ||||
|  | ||||
| class KeezMoviesIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', | ||||
|         u'file': u'1214711.mp4', | ||||
|         u'md5': u'6e297b7e789329923fcf83abb67c9289', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Petite Asian Lady Mai Playing In Bathtub", | ||||
|             u"age_limit": 18, | ||||
|         'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', | ||||
|         'file': '1214711.mp4', | ||||
|         'md5': '6e297b7e789329923fcf83abb67c9289', | ||||
|         'info_dict': { | ||||
|             'title': 'Petite Asian Lady Mai Playing In Bathtub', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         url = 'http://www.' + mobj.group('url') | ||||
|  | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
| @@ -38,10 +40,10 @@ class KeezMoviesIE(InfoExtractor): | ||||
|             embedded_url = mobj.group(1) | ||||
|             return self.url_result(embedded_url) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title') | ||||
|         video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) | ||||
|         if webpage.find('encrypted=true')!=-1: | ||||
|             password = self._html_search_regex(r'video_title=(.+?)&', webpage, u'password') | ||||
|         video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title') | ||||
|         video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, 'video_url')) | ||||
|         if 'encrypted=true' in webpage: | ||||
|             password = self._html_search_regex(r'video_title=(.+?)&', webpage, 'password') | ||||
|             video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') | ||||
|         path = compat_urllib_parse_urlparse(video_url).path | ||||
|         extension = os.path.splitext(path)[1][1:] | ||||
|   | ||||
							
								
								
									
										47
									
								
								youtube_dl/extractor/morningstar.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								youtube_dl/extractor/morningstar.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class MorningstarIE(InfoExtractor): | ||||
|     IE_DESC = 'morningstar.com' | ||||
|     _VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869', | ||||
|         'md5': '6c0acface7a787aadc8391e4bbf7b0f5', | ||||
|         'info_dict': { | ||||
|             'id': '615869', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Get Ahead of the Curve on 2013 Taxes', | ||||
|             'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.", | ||||
|             'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title') | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"', | ||||
|             webpage, 'video URL') | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'<input type="hidden" id="hidSnapshot" value="([^"]+)"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|         description = self._html_search_regex( | ||||
|             r'<div id="mstarDeck".*?>(.*?)</div>', | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|         } | ||||
							
								
								
									
										63
									
								
								youtube_dl/extractor/motorsport.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								youtube_dl/extractor/motorsport.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,63 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import hashlib | ||||
| import json | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_parse_qs, | ||||
|     compat_str, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MotorsportIE(InfoExtractor): | ||||
|     IE_DESC = 'motorsport.com' | ||||
|     _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', | ||||
|         'md5': '5592cb7c5005d9b2c163df5ac3dc04e4', | ||||
|         'info_dict': { | ||||
|             'id': '7063', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Red Bull Racing: 2014 Rules Explained', | ||||
|             'duration': 207, | ||||
|             'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.', | ||||
|             'uploader': 'rainiere', | ||||
|             'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         flashvars_code = self._html_search_regex( | ||||
|             r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars') | ||||
|         flashvars = compat_parse_qs(flashvars_code) | ||||
|         params = json.loads(flashvars['parameters'][0]) | ||||
|  | ||||
|         e = compat_str(int(time.time()) + 24 * 60 * 60) | ||||
|         base_video_url = params['location'] + '?e=' + e | ||||
|         s = 'h3hg713fh32' | ||||
|         h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest() | ||||
|         video_url = base_video_url + '&h=' + h | ||||
|  | ||||
|         uploader = self._html_search_regex( | ||||
|             r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage, | ||||
|             'uploader', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': params['video_id'], | ||||
|             'display_id': display_id, | ||||
|             'title': params['title'], | ||||
|             'url': video_url, | ||||
|             'description': params.get('description'), | ||||
|             'thumbnail': params.get('main_thumb'), | ||||
|             'duration': int_or_none(params.get('duration')), | ||||
|             'uploader': uploader, | ||||
|         } | ||||
							
								
								
									
										27
									
								
								youtube_dl/extractor/movshare.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								youtube_dl/extractor/movshare.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .novamov import NovaMovIE | ||||
|  | ||||
|  | ||||
| class MovShareIE(NovaMovIE): | ||||
|     IE_NAME = 'movshare' | ||||
|     IE_DESC = 'MovShare' | ||||
|  | ||||
|     _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'} | ||||
|  | ||||
|     _HOST = 'www.movshare.net' | ||||
|  | ||||
|     _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' | ||||
|     _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>' | ||||
|     _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.movshare.net/video/559e28be54d96', | ||||
|         'md5': 'abd31a2132947262c50429e1d16c1bfd', | ||||
|         'info_dict': { | ||||
|             'id': '559e28be54d96', | ||||
|             'ext': 'flv', | ||||
|             'title': 'dissapeared image', | ||||
|             'description': 'optical illusion  dissapeared image  magic illusion', | ||||
|         } | ||||
|     } | ||||
| @@ -4,9 +4,7 @@ import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
| ) | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class MporaIE(InfoExtractor): | ||||
| @@ -20,7 +18,7 @@ class MporaIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'title': 'Katy Curd -  Winter in the Forest', | ||||
|             'duration': 416, | ||||
|             'uploader': 'petenewman', | ||||
|             'uploader': 'Peter Newman Media', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -7,9 +7,14 @@ from .common import InfoExtractor | ||||
|  | ||||
| class NineGagIE(InfoExtractor): | ||||
|     IE_NAME = '9gag' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/ | ||||
|         (?: | ||||
|             v/(?P<numid>[0-9]+)| | ||||
|             p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+) | ||||
|         ) | ||||
|     ''' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         "url": "http://9gag.tv/v/1912", | ||||
|         "info_dict": { | ||||
|             "id": "1912", | ||||
| @@ -20,17 +25,33 @@ class NineGagIE(InfoExtractor): | ||||
|             "thumbnail": "re:^https?://", | ||||
|         }, | ||||
|         'add_ie': ['Youtube'] | ||||
|     } | ||||
|     }, | ||||
|     { | ||||
|         'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar', | ||||
|         'info_dict': { | ||||
|             'id': 'KklwM', | ||||
|             'ext': 'mp4', | ||||
|             'display_id': 'alternate-banned-opening-scene-of-gravity', | ||||
|             "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.", | ||||
|             'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie", | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = mobj.group('numid') or mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or video_id | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         youtube_id = self._html_search_regex( | ||||
|             r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"', | ||||
|             webpage, 'video ID') | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)id="jsid-video-post-container".*?data-title="([^"]+)"', | ||||
|             webpage, 'title', default=None) | ||||
|         if not title: | ||||
|             title = self._og_search_title(webpage) | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage, | ||||
|             'description', fatal=False) | ||||
| @@ -46,7 +67,8 @@ class NineGagIE(InfoExtractor): | ||||
|             'url': youtube_id, | ||||
|             'ie_key': 'Youtube', | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'view_count': view_count, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|   | ||||
| @@ -13,7 +13,8 @@ class NovaMovIE(InfoExtractor): | ||||
|     IE_NAME = 'novamov' | ||||
|     IE_DESC = 'NovaMov' | ||||
|  | ||||
|     _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'} | ||||
|     _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})' | ||||
|     _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'} | ||||
|  | ||||
|     _HOST = 'www.novamov.com' | ||||
|  | ||||
| @@ -36,18 +37,17 @@ class NovaMovIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         page = self._download_webpage( | ||||
|             'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page') | ||||
|  | ||||
|         if re.search(self._FILE_DELETED_REGEX, page) is not None: | ||||
|             raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) | ||||
|             raise ExtractorError('Video %s does not exist' % video_id, expected=True) | ||||
|  | ||||
|         filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey') | ||||
|  | ||||
|         title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False) | ||||
|  | ||||
|         description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False) | ||||
|  | ||||
|         api_response = self._download_webpage( | ||||
|   | ||||
| @@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE): | ||||
|     IE_NAME = 'nowvideo' | ||||
|     IE_DESC = 'NowVideo' | ||||
|  | ||||
|     _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'} | ||||
|     _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'} | ||||
|  | ||||
|     _HOST = 'www.nowvideo.ch' | ||||
|  | ||||
|   | ||||
| @@ -59,11 +59,11 @@ class NTVIE(InfoExtractor): | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/kino/Koma_film', | ||||
|             'info_dict': { | ||||
|                 'id': '750783', | ||||
|                 'id': '758100', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ', | ||||
|                 'description': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ', | ||||
|                 'duration': 28, | ||||
|                 'title': 'Остросюжетный фильм «Кома»', | ||||
|                 'description': 'Остросюжетный фильм «Кома»', | ||||
|                 'duration': 5592, | ||||
|             }, | ||||
|             'params': { | ||||
|                     # rtmp download | ||||
|   | ||||
| @@ -1,44 +1,81 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_parse | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class PornHdIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)' | ||||
|     _VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', | ||||
|         'file': '1962.flv', | ||||
|         'md5': '35272469887dca97abd30abecc6cdf75', | ||||
|         'md5': '956b8ca569f7f4d8ec563e2c41598441', | ||||
|         'info_dict': { | ||||
|             "title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video", | ||||
|             "age_limit": 18, | ||||
|             'id': '1962', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Sierra loves doing laundry', | ||||
|             'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('video_id') | ||||
|         video_title = mobj.group('video_title') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         next_url = self._html_search_regex( | ||||
|             r'&hd=(http.+?)&', webpage, 'video URL') | ||||
|         next_url = compat_urllib_parse.unquote(next_url) | ||||
|         title = self._og_search_title(webpage) | ||||
|         TITLE_SUFFIX = ' porn HD Video | PornHD.com ' | ||||
|         if title.endswith(TITLE_SUFFIX): | ||||
|             title = title[:-len(TITLE_SUFFIX)] | ||||
|  | ||||
|         video_url = self._download_webpage( | ||||
|             next_url, video_id, note='Retrieving video URL', | ||||
|             errnote='Could not retrieve video URL') | ||||
|         age_limit = 18 | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'(\d+) views 	</span>', webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': format_url, | ||||
|                 'ext': format.lower(), | ||||
|                 'format_id': '%s-%s' % (format.lower(), quality.lower()), | ||||
|                 'quality': 1 if quality.lower() == 'high' else 0, | ||||
|             } for format, quality, format_url in re.findall( | ||||
|                 r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage) | ||||
|         ] | ||||
|  | ||||
|         mobj = re.search(r'flashVars = (?P<flashvars>{.+?});', webpage) | ||||
|         if mobj: | ||||
|             flashvars = json.loads(mobj.group('flashvars')) | ||||
|             formats.extend([ | ||||
|                 { | ||||
|                     'url': flashvars['hashlink'].replace('?noProxy=1', ''), | ||||
|                     'ext': 'flv', | ||||
|                     'format_id': 'flv-low', | ||||
|                     'quality': 0, | ||||
|                 }, | ||||
|                 { | ||||
|                     'url': flashvars['hd'].replace('?noProxy=1', ''), | ||||
|                     'ext': 'flv', | ||||
|                     'format_id': 'flv-high', | ||||
|                     'quality': 1, | ||||
|                 } | ||||
|             ]) | ||||
|             thumbnail = flashvars['urlWallpaper'] | ||||
|         else: | ||||
|             thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'flv', | ||||
|             'title': video_title, | ||||
|             'age_limit': age_limit, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ class Ro220IE(InfoExtractor): | ||||
|         'md5': '03af18b73a07b4088753930db7a34add', | ||||
|         'info_dict': { | ||||
|             "title": "Luati-le Banii sez 4 ep 1", | ||||
|             "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", | ||||
|             "description": "re:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$", | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -9,46 +9,136 @@ from ..utils import ( | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     unescapeHTML, | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RTSIE(InfoExtractor): | ||||
|     IE_DESC = 'RTS.ch' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', | ||||
|         'md5': '753b877968ad8afaeddccc374d4256a5', | ||||
|         'info_dict': { | ||||
|             'id': '3449373', | ||||
|             'ext': 'mp4', | ||||
|             'duration': 1488, | ||||
|             'title': 'Les Enfants Terribles', | ||||
|             'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.', | ||||
|             'uploader': 'Divers', | ||||
|             'upload_date': '19680921', | ||||
|             'timestamp': -40280400, | ||||
|             'thumbnail': 're:^https?://.*\.image' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', | ||||
|             'md5': '753b877968ad8afaeddccc374d4256a5', | ||||
|             'info_dict': { | ||||
|                 'id': '3449373', | ||||
|                 'ext': 'mp4', | ||||
|                 'duration': 1488, | ||||
|                 'title': 'Les Enfants Terribles', | ||||
|                 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.', | ||||
|                 'uploader': 'Divers', | ||||
|                 'upload_date': '19680921', | ||||
|                 'timestamp': -40280400, | ||||
|                 'thumbnail': 're:^https?://.*\.image' | ||||
|             }, | ||||
|         }, | ||||
|     } | ||||
|         { | ||||
|             'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', | ||||
|             'md5': 'c148457a27bdc9e5b1ffe081a7a8337b', | ||||
|             'info_dict': { | ||||
|                 'id': '5624067', | ||||
|                 'ext': 'mp4', | ||||
|                 'duration': 3720, | ||||
|                 'title': 'Les yeux dans les cieux - Mon homard au Canada', | ||||
|                 'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7', | ||||
|                 'uploader': 'Passe-moi les jumelles', | ||||
|                 'upload_date': '20140404', | ||||
|                 'timestamp': 1396635300, | ||||
|                 'thumbnail': 're:^https?://.*\.image' | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html', | ||||
|             'md5': 'b4326fecd3eb64a458ba73c73e91299d', | ||||
|             'info_dict': { | ||||
|                 'id': '5745975', | ||||
|                 'ext': 'mp4', | ||||
|                 'duration': 48, | ||||
|                 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski', | ||||
|                 'description': 'Hockey - Playoff', | ||||
|                 'uploader': 'Hockey', | ||||
|                 'upload_date': '20140403', | ||||
|                 'timestamp': 1396556882, | ||||
|                 'thumbnail': 're:^https?://.*\.image' | ||||
|             }, | ||||
|             'skip': 'Blocked outside Switzerland', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html', | ||||
|             'md5': '9bb06503773c07ce83d3cbd793cebb91', | ||||
|             'info_dict': { | ||||
|                 'id': '5745356', | ||||
|                 'ext': 'mp4', | ||||
|                 'duration': 33, | ||||
|                 'title': 'Londres cachée par un épais smog', | ||||
|                 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.', | ||||
|                 'uploader': 'Le Journal en continu', | ||||
|                 'upload_date': '20140403', | ||||
|                 'timestamp': 1396537322, | ||||
|                 'thumbnail': 're:^https?://.*\.image' | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', | ||||
|             'md5': 'dd8ef6a22dff163d063e2a52bc8adcae', | ||||
|             'info_dict': { | ||||
|                 'id': '5706148', | ||||
|                 'ext': 'mp3', | ||||
|                 'duration': 123, | ||||
|                 'title': '"Urban Hippie", de Damien Krisl', | ||||
|                 'description': 'Des Hippies super glam.', | ||||
|                 'upload_date': '20140403', | ||||
|                 'timestamp': 1396551600, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('id') | ||||
|  | ||||
|         all_info = self._download_json( | ||||
|             'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id) | ||||
|         info = all_info['video']['JSONinfo'] | ||||
|         def download_json(internal_id): | ||||
|             return self._download_json( | ||||
|                 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id, | ||||
|                 video_id) | ||||
|  | ||||
|         all_info = download_json(video_id) | ||||
|  | ||||
|         # video_id extracted out of URL is not always a real id | ||||
|         if 'video' not in all_info and 'audio' not in all_info: | ||||
|             page = self._download_webpage(url, video_id) | ||||
|             internal_id = self._html_search_regex( | ||||
|                 r'<(?:video|audio) data-id="([0-9]+)"', page, | ||||
|                 'internal video id') | ||||
|             all_info = download_json(internal_id) | ||||
|  | ||||
|         info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio'] | ||||
|  | ||||
|         upload_timestamp = parse_iso8601(info.get('broadcast_date')) | ||||
|         duration = parse_duration(info.get('duration')) | ||||
|         duration = info.get('duration') or info.get('cutout') or info.get('cutduration') | ||||
|         if isinstance(duration, compat_str): | ||||
|             duration = parse_duration(duration) | ||||
|         view_count = info.get('plays') | ||||
|         thumbnail = unescapeHTML(info.get('preview_image_url')) | ||||
|  | ||||
|         def extract_bitrate(url): | ||||
|             return int_or_none(self._search_regex( | ||||
|                 r'-([0-9]+)k\.', url, 'bitrate', default=None)) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': fid, | ||||
|             'url': furl, | ||||
|             'tbr': int_or_none(self._search_regex( | ||||
|                 r'-([0-9]+)k\.', furl, 'bitrate', default=None)), | ||||
|             'tbr': extract_bitrate(furl), | ||||
|         } for fid, furl in info['streams'].items()] | ||||
|  | ||||
|         if 'media' in info: | ||||
|             formats.extend([{ | ||||
|                 'format_id': '%s-%sk' % (media['ext'], media['rate']), | ||||
|                 'url': 'http://download-video.rts.ch/%s' % media['url'], | ||||
|                 'tbr': media['rate'] or extract_bitrate(media['url']), | ||||
|             } for media in info['media'] if media.get('rate')]) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
| @@ -57,6 +147,7 @@ class RTSIE(InfoExtractor): | ||||
|             'title': info['title'], | ||||
|             'description': info.get('intro'), | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'uploader': info.get('programName'), | ||||
|             'timestamp': upload_timestamp, | ||||
|             'thumbnail': thumbnail, | ||||
|   | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/rtve.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/rtve.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import base64 | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     struct_unpack, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RTVEALaCartaIE(InfoExtractor): | ||||
|     IE_NAME = 'rtve.es:alacarta' | ||||
|     IE_DESC = 'RTVE a la carta' | ||||
|     _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', | ||||
|         'md5': '18fcd45965bdd076efdb12cd7f6d7b9e', | ||||
|         'info_dict': { | ||||
|             'id': '2491869', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _decrypt_url(self, png): | ||||
|         encrypted_data = base64.b64decode(png) | ||||
|         text_index = encrypted_data.find(b'tEXt') | ||||
|         text_chunk = encrypted_data[text_index-4:] | ||||
|         length = struct_unpack('!I', text_chunk[:4])[0] | ||||
|         # Use bytearray to get integers when iterating in both python 2.x and 3.x | ||||
|         data = bytearray(text_chunk[8:8+length]) | ||||
|         data = [chr(b) for b in data if b != 0] | ||||
|         hash_index = data.index('#') | ||||
|         alphabet_data = data[:hash_index] | ||||
|         url_data = data[hash_index+1:] | ||||
|  | ||||
|         alphabet = [] | ||||
|         e = 0 | ||||
|         d = 0 | ||||
|         for l in alphabet_data: | ||||
|             if d == 0: | ||||
|                 alphabet.append(l) | ||||
|                 d = e = (e + 1) % 4 | ||||
|             else: | ||||
|                 d -= 1 | ||||
|         url = '' | ||||
|         f = 0 | ||||
|         e = 3 | ||||
|         b = 1 | ||||
|         for letter in url_data: | ||||
|             if f == 0: | ||||
|                 l = int(letter)*10 | ||||
|                 f = 1 | ||||
|             else: | ||||
|                 if e == 0: | ||||
|                     l += int(letter) | ||||
|                     url += alphabet[l] | ||||
|                     e = (b + 3) % 4 | ||||
|                     f = 0 | ||||
|                     b += 1 | ||||
|                 else: | ||||
|                     e -= 1 | ||||
|  | ||||
|         return url | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         info = self._download_json( | ||||
|             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, | ||||
|             video_id)['page']['items'][0] | ||||
|         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id | ||||
|         png = self._download_webpage(png_url, video_id, 'Downloading url information') | ||||
|         video_url = self._decrypt_url(png) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info['title'], | ||||
|             'url': video_url, | ||||
|             'thumbnail': info['image'], | ||||
|         } | ||||
| @@ -9,8 +9,18 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class TeamcocoIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' | ||||
|     _TESTS = [ | ||||
|     { | ||||
|         'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', | ||||
|         'file': '80187.mp4', | ||||
|         'md5': '3f7746aa0dc86de18df7539903d399ea', | ||||
|         'info_dict': { | ||||
|             'title': 'Conan Becomes A Mary Kay Beauty Consultant', | ||||
|             'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' | ||||
|         } | ||||
|     }, | ||||
|     { | ||||
|         'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', | ||||
|         'file': '19705.mp4', | ||||
|         'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', | ||||
| @@ -19,22 +29,23 @@ class TeamcocoIE(InfoExtractor): | ||||
|             "title": "Louis C.K. Interview Pt. 1 11/3/11" | ||||
|         } | ||||
|     } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|         url_title = mobj.group('url_title') | ||||
|         webpage = self._download_webpage(url, url_title) | ||||
|  | ||||
|         video_id = self._html_search_regex( | ||||
|             r'<article class="video" data-id="(\d+?)"', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         display_id = mobj.group('display_id') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|          | ||||
|         video_id = mobj.group("video_id") | ||||
|         if not video_id: | ||||
|             video_id = self._html_search_regex( | ||||
|                 r'<article class="video" data-id="(\d+?)"', | ||||
|                 webpage, 'video id') | ||||
|  | ||||
|         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id | ||||
|         data = self._download_xml(data_url, video_id, 'Downloading data webpage') | ||||
|         data = self._download_xml( | ||||
|             data_url, display_id, 'Downloading data webpage') | ||||
|  | ||||
|         qualities = ['500k', '480p', '1000k', '720p', '1080p'] | ||||
|         formats = [] | ||||
| @@ -69,6 +80,7 @@ class TeamcocoIE(InfoExtractor): | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'formats': formats, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|   | ||||
| @@ -37,6 +37,7 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|                 'consciousness, but that half the time our brains are ' | ||||
|                 'actively fooling us.'), | ||||
|             'uploader': 'Dan Dennett', | ||||
|             'width': 854, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms', | ||||
| @@ -50,10 +51,10 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     _FORMATS_PREFERENCE = { | ||||
|         'low': 1, | ||||
|         'medium': 2, | ||||
|         'high': 3, | ||||
|     _NATIVE_FORMATS = { | ||||
|         'low': {'preference': 1, 'width': 320, 'height': 180}, | ||||
|         'medium': {'preference': 2, 'width': 512, 'height': 288}, | ||||
|         'high': {'preference': 3, 'width': 854, 'height': 480}, | ||||
|     } | ||||
|  | ||||
|     def _extract_info(self, webpage): | ||||
| @@ -98,12 +99,14 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|         talk_info = self._extract_info(webpage)['talks'][0] | ||||
|  | ||||
|         formats = [{ | ||||
|             'ext': 'mp4', | ||||
|             'url': format_url, | ||||
|             'format_id': format_id, | ||||
|             'format': format_id, | ||||
|             'preference': self._FORMATS_PREFERENCE.get(format_id, -1), | ||||
|         } for (format_id, format_url) in talk_info['nativeDownloads'].items()] | ||||
|         for f in formats: | ||||
|             finfo = self._NATIVE_FORMATS.get(f['format_id']) | ||||
|             if finfo: | ||||
|                 f.update(finfo) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         video_id = compat_str(talk_info['id']) | ||||
|   | ||||
| @@ -11,7 +11,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class UstreamIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' | ||||
|     _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)' | ||||
|     IE_NAME = 'ustream' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.ustream.tv/recorded/20274954', | ||||
| @@ -25,6 +25,13 @@ class UstreamIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         if m.group('type') == 'embed': | ||||
|             video_id = m.group('videoID') | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|             desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id') | ||||
|             desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id | ||||
|             return self.url_result(desktop_url, 'Ustream') | ||||
|  | ||||
|         video_id = m.group('videoID') | ||||
|  | ||||
|         video_url = 'http://tcdn.ustream.tv/video/%s' % video_id | ||||
|   | ||||
							
								
								
									
										26
									
								
								youtube_dl/extractor/videoweed.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								youtube_dl/extractor/videoweed.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .novamov import NovaMovIE | ||||
|  | ||||
|  | ||||
| class VideoWeedIE(NovaMovIE): | ||||
|     IE_NAME = 'videoweed' | ||||
|     IE_DESC = 'VideoWeed' | ||||
|  | ||||
|     _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'} | ||||
|  | ||||
|     _HOST = 'www.videoweed.es' | ||||
|  | ||||
|     _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' | ||||
|     _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.videoweed.es/file/b42178afbea14', | ||||
|         'md5': 'abd31a2132947262c50429e1d16c1bfd', | ||||
|         'info_dict': { | ||||
|             'id': 'b42178afbea14', | ||||
|             'ext': 'flv', | ||||
|             'title': 'optical illusion  dissapeared image magic illusion', | ||||
|             'description': '' | ||||
|         }, | ||||
|     } | ||||
| @@ -3,11 +3,12 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
|  | ||||
|  | ||||
| class WimpIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.wimp.com/maruexhausted/', | ||||
|         'md5': 'f1acced123ecb28d9bb79f2479f2b6a1', | ||||
|         'info_dict': { | ||||
| @@ -16,7 +17,20 @@ class WimpIE(InfoExtractor): | ||||
|             'title': 'Maru is exhausted.', | ||||
|             'description': 'md5:57e099e857c0a4ea312542b684a869b8', | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         # youtube video | ||||
|         'url': 'http://www.wimp.com/clowncar/', | ||||
|         'info_dict': { | ||||
|             'id': 'cG4CEr2aiSg', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Basset hound clown car...incredible!', | ||||
|             'description': 'md5:8d228485e0719898c017203f900b3a35', | ||||
|             'uploader': 'Gretchen Hoey', | ||||
|             'uploader_id': 'gretchenandjeff1', | ||||
|             'upload_date': '20140303', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -24,6 +38,13 @@ class WimpIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_url = self._search_regex( | ||||
|             r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL') | ||||
|         if YoutubeIE.suitable(video_url): | ||||
|             self.to_screen('Found YouTube video') | ||||
|             return { | ||||
|                 '_type': 'url', | ||||
|                 'url': video_url, | ||||
|                 'ie_key': YoutubeIE.ie_key(), | ||||
|             } | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -31,4 +52,4 @@ class WimpIE(InfoExtractor): | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
|         } | ||||
|   | ||||
| @@ -15,22 +15,24 @@ from ..utils import ( | ||||
|  | ||||
| class YahooIE(InfoExtractor): | ||||
|     IE_DESC = 'Yahoo screen' | ||||
|     _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html' | ||||
|     _VALID_URL = r'https?://screen\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', | ||||
|             'file': '214727115.mp4', | ||||
|             'md5': '4962b075c08be8690a922ee026d05e69', | ||||
|             'info_dict': { | ||||
|                 'id': '214727115', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Julian Smith & Travis Legg Watch Julian Smith', | ||||
|                 'description': 'Julian and Travis watch Julian Smith', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', | ||||
|             'file': '103000935.mp4', | ||||
|             'md5': 'd6e6fc6e1313c608f316ddad7b82b306', | ||||
|             'info_dict': { | ||||
|                 'id': '103000935', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Codefellas - The Cougar Lies with Spanish Moss', | ||||
|                 'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?', | ||||
|             }, | ||||
| @@ -60,10 +62,9 @@ class YahooIE(InfoExtractor): | ||||
|             'env': 'prod', | ||||
|             'format': 'json', | ||||
|         }) | ||||
|         query_result_json = self._download_webpage( | ||||
|         query_result = self._download_json( | ||||
|             'http://video.query.yahoo.com/v1/public/yql?' + data, | ||||
|             video_id, 'Downloading video info') | ||||
|         query_result = json.loads(query_result_json) | ||||
|         info = query_result['query']['results']['mediaObj'][0] | ||||
|         meta = info['meta'] | ||||
|  | ||||
| @@ -86,7 +87,6 @@ class YahooIE(InfoExtractor): | ||||
|             else: | ||||
|                 format_url = compat_urlparse.urljoin(host, path) | ||||
|                 format_info['url'] = format_url | ||||
|                  | ||||
|             formats.append(format_info) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
| @@ -134,27 +134,25 @@ class YahooSearchIE(SearchInfoExtractor): | ||||
|  | ||||
|     def _get_n_results(self, query, n): | ||||
|         """Get a specified number of results for a query""" | ||||
|  | ||||
|         res = { | ||||
|             '_type': 'playlist', | ||||
|             'id': query, | ||||
|             'entries': [] | ||||
|         } | ||||
|         for pagenum in itertools.count(0):  | ||||
|         entries = [] | ||||
|         for pagenum in itertools.count(0): | ||||
|             result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) | ||||
|             webpage = self._download_webpage(result_url, query, | ||||
|                                              note='Downloading results page '+str(pagenum+1)) | ||||
|             info = json.loads(webpage) | ||||
|             info = self._download_json(result_url, query, | ||||
|                 note='Downloading results page '+str(pagenum+1)) | ||||
|             m = info['m'] | ||||
|             results = info['results'] | ||||
|  | ||||
|             for (i, r) in enumerate(results): | ||||
|                 if (pagenum * 30) +i >= n: | ||||
|                 if (pagenum * 30) + i >= n: | ||||
|                     break | ||||
|                 mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r) | ||||
|                 e = self.url_result('http://' + mobj.group('url'), 'Yahoo') | ||||
|                 res['entries'].append(e) | ||||
|             if (pagenum * 30 +i >= n) or (m['last'] >= (m['total'] -1)): | ||||
|                 entries.append(e) | ||||
|             if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)): | ||||
|                 break | ||||
|  | ||||
|         return res | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': query, | ||||
|             'entries': entries, | ||||
|         } | ||||
|   | ||||
| @@ -1453,7 +1453,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|             more_widget_html = more['load_more_widget_html'] | ||||
|  | ||||
|         playlist_title = self._html_search_regex( | ||||
|                 r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title') | ||||
|             r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>', | ||||
|             page, u'title') | ||||
|  | ||||
|         url_results = self._ids_to_results(ids) | ||||
|         return self.playlist_result(url_results, playlist_id, playlist_title) | ||||
|   | ||||
| @@ -53,8 +53,7 @@ class FFmpegPostProcessor(PostProcessor): | ||||
|  | ||||
|         if self._downloader.params.get('verbose', False): | ||||
|             self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd)) | ||||
|         bcmd = [self._downloader.encode(c) for c in cmd] | ||||
|         p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         stdout, stderr = p.communicate() | ||||
|         if p.returncode != 0: | ||||
|             stderr = stderr.decode('utf-8', 'replace') | ||||
|   | ||||
| @@ -2,6 +2,7 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| import calendar | ||||
| import codecs | ||||
| import contextlib | ||||
| import ctypes | ||||
| import datetime | ||||
| @@ -909,25 +910,84 @@ def platform_name(): | ||||
|     return res | ||||
|  | ||||
|  | ||||
| def write_string(s, out=None): | ||||
| def _windows_write_string(s, out): | ||||
|     """ Returns True if the string was written using special methods, | ||||
|     False if it has yet to be written out.""" | ||||
|     # Adapted from http://stackoverflow.com/a/3259271/35070 | ||||
|  | ||||
|     import ctypes | ||||
|     import ctypes.wintypes | ||||
|  | ||||
|     WIN_OUTPUT_IDS = { | ||||
|         1: -11, | ||||
|         2: -12, | ||||
|     } | ||||
|  | ||||
|     def ucs2_len(s): | ||||
|         return sum((2 if ord(c) > 0xffff else 1) for c in s) | ||||
|  | ||||
|     fileno = out.fileno() | ||||
|     if fileno not in WIN_OUTPUT_IDS: | ||||
|         return False | ||||
|  | ||||
|     GetStdHandle = ctypes.WINFUNCTYPE( | ||||
|         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)( | ||||
|         ("GetStdHandle", ctypes.windll.kernel32)) | ||||
|     h = GetStdHandle(WIN_OUTPUT_IDS[fileno]) | ||||
|  | ||||
|     WriteConsoleW = ctypes.WINFUNCTYPE( | ||||
|         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR, | ||||
|         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD), | ||||
|         ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32)) | ||||
|     written = ctypes.wintypes.DWORD(0) | ||||
|  | ||||
|     GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32)) | ||||
|     FILE_TYPE_CHAR = 0x0002 | ||||
|     FILE_TYPE_REMOTE = 0x8000 | ||||
|     GetConsoleMode = ctypes.WINFUNCTYPE( | ||||
|         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, | ||||
|         ctypes.POINTER(ctypes.wintypes.DWORD))( | ||||
|         ("GetConsoleMode", ctypes.windll.kernel32)) | ||||
|     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value | ||||
|  | ||||
|     def not_a_console(handle): | ||||
|         if handle == INVALID_HANDLE_VALUE or handle is None: | ||||
|             return True | ||||
|         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR | ||||
|                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) | ||||
|  | ||||
|     if not_a_console(h): | ||||
|         return False | ||||
|  | ||||
|     remaining = ucs2_len(s) | ||||
|     while remaining > 0: | ||||
|         ret = WriteConsoleW( | ||||
|             h, s, min(remaining, 1024), ctypes.byref(written), None) | ||||
|         if ret == 0: | ||||
|             raise OSError('Failed to write string') | ||||
|         remaining -= written.value | ||||
|     return True | ||||
|  | ||||
|  | ||||
| def write_string(s, out=None, encoding=None): | ||||
|     if out is None: | ||||
|         out = sys.stderr | ||||
|     assert type(s) == compat_str | ||||
|  | ||||
|     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'): | ||||
|         if _windows_write_string(s, out): | ||||
|             return | ||||
|  | ||||
|     if ('b' in getattr(out, 'mode', '') or | ||||
|             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr | ||||
|         s = s.encode(preferredencoding(), 'ignore') | ||||
|     try: | ||||
|         byt = s.encode(encoding or preferredencoding(), 'ignore') | ||||
|         out.write(byt) | ||||
|     elif hasattr(out, 'buffer'): | ||||
|         enc = encoding or getattr(out, 'encoding', None) or preferredencoding() | ||||
|         byt = s.encode(enc, 'ignore') | ||||
|         out.buffer.write(byt) | ||||
|     else: | ||||
|         out.write(s) | ||||
|     except UnicodeEncodeError: | ||||
|         # In Windows shells, this can fail even when the codec is just charmap!? | ||||
|         # See https://wiki.python.org/moin/PrintFails#Issue | ||||
|         if sys.platform == 'win32' and hasattr(out, 'encoding'): | ||||
|             s = s.encode(out.encoding, 'ignore').decode(out.encoding) | ||||
|             out.write(s) | ||||
|         else: | ||||
|             raise | ||||
|  | ||||
|     out.flush() | ||||
|  | ||||
|  | ||||
| @@ -1176,12 +1236,12 @@ class HEADRequest(compat_urllib_request.Request): | ||||
|         return "HEAD" | ||||
|  | ||||
|  | ||||
| def int_or_none(v, scale=1): | ||||
|     return v if v is None else (int(v) // scale) | ||||
| def int_or_none(v, scale=1, default=None): | ||||
|     return default if v is None else (int(v) // scale) | ||||
|  | ||||
|  | ||||
| def float_or_none(v, scale=1): | ||||
|     return v if v is None else (float(v) / scale) | ||||
| def float_or_none(v, scale=1, default=None): | ||||
|     return default if v is None else (float(v) / scale) | ||||
|  | ||||
|  | ||||
| def parse_duration(s): | ||||
| @@ -1263,9 +1323,11 @@ class PagedList(object): | ||||
|  | ||||
|  | ||||
| def uppercase_escape(s): | ||||
|     unicode_escape = codecs.getdecoder('unicode_escape') | ||||
|     return re.sub( | ||||
|         r'\\U[0-9a-fA-F]{8}', | ||||
|         lambda m: m.group(0).decode('unicode-escape'), s) | ||||
|         lambda m: unicode_escape(m.group(0))[0], | ||||
|         s) | ||||
|  | ||||
| try: | ||||
|     struct.pack(u'!I', 0) | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.04.03.2' | ||||
| __version__ = '2014.04.11' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user