Compare commits
	
		
			146 Commits
		
	
	
		
			2013.07.24
			...
			2013.08.23
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 8ae7be3ef4 | ||
|  | 306170518f | ||
|  | aa6a10c44a | ||
|  | 9af73dc4fc | ||
|  | fc483bb6af | ||
|  | 53b0f3e4e2 | ||
|  | 4353cf51a0 | ||
|  | ce34e9ce5e | ||
|  | d4051a8e05 | ||
|  | df3df7fb64 | ||
|  | 9e9c164052 | ||
|  | 066090dd3f | ||
|  | 74e6672beb | ||
|  | 02bcf0d389 | ||
|  | 10204dc898 | ||
|  | 1865ed31b9 | ||
|  | 3669cdba10 | ||
|  | 939fbd26ac | ||
|  | e6ddb4e7af | ||
|  | 83390b83d9 | ||
|  | 4a55479fa9 | ||
|  | f527115b5f | ||
|  | 75e1b46add | ||
|  | 063fcc9676 | ||
|  | 8403612258 | ||
|  | 25b51c7816 | ||
|  | 9779b63bb6 | ||
|  | d81aef3adf | ||
|  | 45ed795cb0 | ||
|  | 683e98a8a4 | ||
|  | e0cfeb2ea7 | ||
|  | 75340ee383 | ||
|  | 668de34c6b | ||
|  | a91b954bb4 | ||
|  | 37b6d5f684 | ||
|  | b7a6838407 | ||
|  | cde846b3d3 | ||
|  | 6c3e6e88d3 | ||
|  | 739674cd77 | ||
|  | 4b2d7cae11 | ||
|  | 7fea7156cb | ||
|  | 3093468977 | ||
|  | 79cb25776f | ||
|  | 87f78946a5 | ||
|  | 211fbc1328 | ||
|  | 836a086ce9 | ||
|  | 90d3989b99 | ||
|  | d741e55a42 | ||
|  | 17d3aaaf16 | ||
|  | ea55b2a4ca | ||
|  | 3f0537dd4a | ||
|  | 12e895fc5a | ||
|  | bda2c49d75 | ||
|  | 01b32990da | ||
|  | dbda1b5147 | ||
|  | ddf3bd328b | ||
|  | b9c37b92cf | ||
|  | f9c3c90ca8 | ||
|  | 6daccbe317 | ||
|  | 71ea844c0e | ||
|  | 3a7256697e | ||
|  | d1ba998274 | ||
|  | 718ced8d8c | ||
|  | e1842025d0 | ||
|  | 2b9213cdc1 | ||
|  | 0577177e3e | ||
|  | 298f833b16 | ||
|  | 0f399e6e5e | ||
|  | 5b075e27cb | ||
|  | 8a9d86a2a7 | ||
|  | d468a09789 | ||
|  | 9f4ab73d7f | ||
|  | 02cf62e240 | ||
|  | 67fb0c5495 | ||
|  | 4efba05c56 | ||
|  | 0f90943e45 | ||
|  | 526e638c8a | ||
|  | 356e067390 | ||
|  | e2f48f9643 | ||
|  | b513a251f8 | ||
|  | 36cb11f068 | ||
|  | 7a4c6cc92f | ||
|  | 7edcb8f39c | ||
|  | 39b782b390 | ||
|  | 577664c8e8 | ||
|  | bba12cec89 | ||
|  | 70c4c03cb8 | ||
|  | f5791ed136 | ||
|  | fbf189a6ee | ||
|  | 09825cb5c0 | ||
|  | ed27d35674 | ||
|  | fd5539eb41 | ||
|  | 04bca64bde | ||
|  | 03cc7c20c1 | ||
|  | 4075311d94 | ||
|  | 95fdc7d69c | ||
|  | 86fe61c8f9 | ||
|  | 9bb6d2f21d | ||
|  | e3f4593e76 | ||
|  | 1d043b93cf | ||
|  | b15d4f624f | ||
|  | 4aa16a50f5 | ||
|  | bbcbf4d459 | ||
|  | 930ad9eecc | ||
|  | b072a9defd | ||
|  | 75952c6e3d | ||
|  | 05afc96b73 | ||
|  | fa80026915 | ||
|  | 2bc3de0f28 | ||
|  | 99c7bc94af | ||
|  | 152c8f349d | ||
|  | d75654c15e | ||
|  | 0725f584e1 | ||
|  | 8cda9241d1 | ||
|  | a3124ba49f | ||
|  | 579e2691fe | ||
|  | 63f05de10b | ||
|  | caeefc29eb | ||
|  | a3c736def2 | ||
|  | 58261235f0 | ||
|  | da70877a1b | ||
|  | 5c468ca8a8 | ||
|  | aedd6bb97d | ||
|  | 733d9cacb8 | ||
|  | 42f2805e48 | ||
|  | 0ffcb7c6fc | ||
|  | 27669bd11d | ||
|  | 6625f82940 | ||
|  | d0866f0bb4 | ||
|  | 09eeb75130 | ||
|  | 0a99956f71 | ||
|  | 12ef6aefa8 | ||
|  | e93aa81aa6 | ||
|  | 755eb0320e | ||
|  | 43ba5456b1 | ||
|  | 156d5ad6da | ||
|  | c626a3d9fa | ||
|  | b2e8bc1b20 | ||
|  | 771822ebb8 | ||
|  | eb6a41ba0f | ||
|  | 7d2392691c | ||
|  | 6804038d06 | ||
|  | 2f799533ae | ||
|  | 88ae5991cd | ||
|  | 5d51a883c2 | ||
|  | c4a91be726 | 
							
								
								
									
										16
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								README.md
									
									
									
									
									
								
							| @@ -120,18 +120,20 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --max-quality FORMAT       highest quality format to download | ||||
|     -F, --list-formats         list all available formats (currently youtube | ||||
|                                only) | ||||
|  | ||||
| ## Subtitle Options: | ||||
|     --write-sub                write subtitle file (currently youtube only) | ||||
|     --write-auto-sub           write automatic subtitle file (currently youtube | ||||
|                                only) | ||||
|     --only-sub                 [deprecated] alias of --skip-download | ||||
|     --all-subs                 downloads all the available subtitles of the | ||||
|                                video (currently youtube only) | ||||
|                                video | ||||
|     --list-subs                lists all available subtitles for the video | ||||
|                                (currently youtube only) | ||||
|     --sub-format FORMAT        subtitle format [srt/sbv/vtt] (default=srt) | ||||
|                                (currently youtube only) | ||||
|     --sub-lang LANG            language of the subtitles to download (optional) | ||||
|                                use IETF language tags like 'en' | ||||
|     --sub-format FORMAT        subtitle format (default=srt) ([sbv/vtt] youtube | ||||
|                                only) | ||||
|     --sub-lang LANGS           languages of the subtitles to download (optional) | ||||
|                                separated by commas, use IETF language tags like | ||||
|                                'en,pt' | ||||
|  | ||||
| ## Authentication Options: | ||||
|     -u, --username USERNAME    account username | ||||
| @@ -153,6 +155,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                processing; the video is erased by default | ||||
|     --no-post-overwrites       do not overwrite post-processed files; the post- | ||||
|                                processed files are overwritten by default | ||||
|     --embed-subs               embed subtitles in the video (only for mp4 | ||||
|                                videos) | ||||
|  | ||||
| # CONFIGURATION | ||||
|  | ||||
|   | ||||
| @@ -11,30 +11,45 @@ tests = [ | ||||
|     # 90 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`", | ||||
|      "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"), | ||||
|     # 89  | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'", | ||||
|      "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"), | ||||
|     # 88 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", | ||||
|      "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), | ||||
|     # 87 - vflART1Nf 2013/07/24 | ||||
|     # 87 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", | ||||
|      "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"), | ||||
|     # 86 - vfl_ymO4Z 2013/06/27 | ||||
|      "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"), | ||||
|     # 86 - vflh9ybst 2013/08/23 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", | ||||
|      "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), | ||||
|     # 85 - vflSAFCP9 2013/07/19 | ||||
|      "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"), | ||||
|     # 85 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", | ||||
|      "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"), | ||||
|      ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), | ||||
|     # 84 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", | ||||
|      "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), | ||||
|     # 83 - vflcaqGO8 2013/07/11 | ||||
|      "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"), | ||||
|     # 83 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", | ||||
|      "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"), | ||||
|     # 82 | ||||
|      ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), | ||||
|     # 82 - vflZK4ZYR 2013/08/23 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", | ||||
|      "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), | ||||
|     # 81 | ||||
|      "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"), | ||||
|     # 81 - vflLC8JvQ 2013/07/25 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", | ||||
|      "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."), | ||||
|      "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), | ||||
|     # 80 - vflZK4ZYR 2013/08/23 (sporadic) | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>", | ||||
|      "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"), | ||||
|     # 79 - vflLC8JvQ 2013/07/25 (sporadic) | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/", | ||||
|      "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), | ||||
| ] | ||||
|  | ||||
| tests_age_gate = [ | ||||
|     # 86 - vflqinMWD | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", | ||||
|      "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), | ||||
| ] | ||||
|  | ||||
| def find_matching(wrong, right): | ||||
| @@ -87,6 +102,8 @@ def genall(tests): | ||||
|  | ||||
| def main(): | ||||
|     print(genall(tests)) | ||||
|     print(u'    Age gate:') | ||||
|     print(genall(tests_age_gate)) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
|   | ||||
| @@ -50,6 +50,7 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|  | ||||
|     def test_no_duplicates(self): | ||||
|         ies = gen_extractors() | ||||
|   | ||||
							
								
								
									
										38
									
								
								test/test_playlists.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								test/test_playlists.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| import sys | ||||
| import unittest | ||||
| import json | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE | ||||
| from youtube_dl.utils import * | ||||
|  | ||||
| from helper import FakeYDL | ||||
|  | ||||
| class TestPlaylists(unittest.TestCase): | ||||
|     def assertIsPlaylist(self, info): | ||||
|         """Make sure the info has '_type' set to 'playlist'""" | ||||
|         self.assertEqual(info['_type'], 'playlist') | ||||
|  | ||||
|     def test_dailymotion_playlist(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = DailymotionPlaylistIE(dl) | ||||
|         result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], u'SPORT') | ||||
|         self.assertTrue(len(result['entries']) > 20) | ||||
|  | ||||
|     def test_vimeo_channel(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = VimeoChannelIE(dl) | ||||
|         result = ie.extract('http://vimeo.com/channels/tributes') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], u'Vimeo Tributes') | ||||
|         self.assertTrue(len(result['entries']) > 24) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -1,67 +0,0 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| import unittest | ||||
| import sys | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.extractor.youtube import YoutubeIE | ||||
| from helper import FakeYDL | ||||
|  | ||||
| sig = YoutubeIE(FakeYDL())._decrypt_signature | ||||
|  | ||||
| class TestYoutubeSig(unittest.TestCase): | ||||
|     def test_92(self): | ||||
|         wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8" | ||||
|         right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_90(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`" | ||||
|         right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_88(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<" | ||||
|         right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_87(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" | ||||
|         right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_86(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" | ||||
|         right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_85(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<" | ||||
|         right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_84(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<" | ||||
|         right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_83(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" | ||||
|         right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_82(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<" | ||||
|         right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_81(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>." | ||||
|         right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>." | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -35,47 +35,47 @@ class TestYoutubeSubtitles(unittest.TestCase): | ||||
|         DL.params['writesubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260') | ||||
|     def test_youtube_subtitles_it(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitleslang'] = 'it' | ||||
|         DL.params['subtitleslangs'] = ['it'] | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d') | ||||
|         sub = info_dict[0]['subtitles']['it'] | ||||
|         self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d') | ||||
|     def test_youtube_onlysubtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['onlysubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260') | ||||
|     def test_youtube_allsubtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['allsubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         subtitles = info_dict[0]['subtitles'] | ||||
|         self.assertEqual(len(subtitles), 13) | ||||
|         self.assertEqual(len(subtitles.keys()), 13) | ||||
|     def test_youtube_subtitles_sbv_format(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitlesformat'] = 'sbv' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b') | ||||
|     def test_youtube_subtitles_vtt_format(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitlesformat'] = 'vtt' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7') | ||||
|     def test_youtube_list_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['listsubtitles'] = True | ||||
| @@ -85,11 +85,20 @@ class TestYoutubeSubtitles(unittest.TestCase): | ||||
|     def test_youtube_automatic_captions(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writeautomaticsub'] = True | ||||
|         DL.params['subtitleslang'] = 'it' | ||||
|         DL.params['subtitleslangs'] = ['it'] | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('8YoUxe5ncPo') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertTrue(sub[2] is not None) | ||||
|         sub = info_dict[0]['subtitles']['it'] | ||||
|         self.assertTrue(sub is not None) | ||||
|     def test_youtube_multiple_langs(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         langs = ['it', 'fr', 'de'] | ||||
|         DL.params['subtitleslangs'] = langs | ||||
|         IE = YoutubeIE(DL) | ||||
|         subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles'] | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -79,9 +79,13 @@ class FileDownloader(object): | ||||
|         rate = float(current) / dif | ||||
|         eta = int((float(total) - float(current)) / rate) | ||||
|         (eta_mins, eta_secs) = divmod(eta, 60) | ||||
|         if eta_mins > 99: | ||||
|             return '--:--' | ||||
|         return '%02d:%02d' % (eta_mins, eta_secs) | ||||
|         (eta_hours, eta_mins) = divmod(eta_mins, 60) | ||||
|         if eta_hours > 99: | ||||
|             return '--:--:--' | ||||
|         if eta_hours == 0: | ||||
|             return '%02d:%02d' % (eta_mins, eta_secs) | ||||
|         else: | ||||
|             return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs) | ||||
|  | ||||
|     @staticmethod | ||||
|     def calc_speed(start, now, bytes): | ||||
| @@ -329,6 +333,35 @@ class FileDownloader(object): | ||||
|             self.report_error(u'mplayer exited with code %d' % retval) | ||||
|             return False | ||||
|  | ||||
|     def _download_m3u8_with_ffmpeg(self, filename, url): | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename] | ||||
|         # Check for ffmpeg first | ||||
|         try: | ||||
|             subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | ||||
|         except (OSError, IOError): | ||||
|             self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] ) | ||||
|             return False | ||||
|  | ||||
|         retval = subprocess.call(args) | ||||
|         if retval == 0: | ||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) | ||||
|             self.try_rename(tmpfilename, filename) | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': fsize, | ||||
|                 'total_bytes': fsize, | ||||
|                 'filename': filename, | ||||
|                 'status': 'finished', | ||||
|             }) | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr(u"\n") | ||||
|             self.report_error(u'ffmpeg exited with code %d' % retval) | ||||
|             return False | ||||
|  | ||||
|  | ||||
|     def _do_download(self, filename, info_dict): | ||||
|         url = info_dict['url'] | ||||
| @@ -354,6 +387,10 @@ class FileDownloader(object): | ||||
|         if url.startswith('mms') or url.startswith('rtsp'): | ||||
|             return self._download_with_mplayer(filename, url) | ||||
|  | ||||
|         # m3u8 manifest are downloaded with ffmpeg | ||||
|         if determine_ext(url) == u'm3u8': | ||||
|             return self._download_m3u8_with_ffmpeg(filename, url) | ||||
|  | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         stream = None | ||||
|  | ||||
|   | ||||
| @@ -71,12 +71,17 @@ class FFmpegPostProcessor(PostProcessor): | ||||
|         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] | ||||
|         return dict((program, executable(program)) for program in programs) | ||||
|  | ||||
|     def run_ffmpeg(self, path, out_path, opts): | ||||
|     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): | ||||
|         if not self._exes['ffmpeg'] and not self._exes['avconv']: | ||||
|             raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') | ||||
|         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)] | ||||
|  | ||||
|         files_cmd = [] | ||||
|         for path in input_paths: | ||||
|             files_cmd.extend(['-i', encodeFilename(path)]) | ||||
|         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd | ||||
|                + opts + | ||||
|                [encodeFilename(self._ffmpeg_filename_argument(out_path))]) | ||||
|  | ||||
|         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         stdout,stderr = p.communicate() | ||||
|         if p.returncode != 0: | ||||
| @@ -84,6 +89,9 @@ class FFmpegPostProcessor(PostProcessor): | ||||
|             msg = stderr.strip().split('\n')[-1] | ||||
|             raise FFmpegPostProcessorError(msg) | ||||
|  | ||||
|     def run_ffmpeg(self, path, out_path, opts): | ||||
|         self.run_ffmpeg_multiple_files([path], out_path, opts) | ||||
|  | ||||
|     def _ffmpeg_filename_argument(self, fn): | ||||
|         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details | ||||
|         if fn.startswith(u'-'): | ||||
| @@ -100,7 +108,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|         self._nopostoverwrites = nopostoverwrites | ||||
|  | ||||
|     def get_audio_codec(self, path): | ||||
|         if not self._exes['ffprobe'] and not self._exes['avprobe']: return None | ||||
|         if not self._exes['ffprobe'] and not self._exes['avprobe']: | ||||
|             raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.') | ||||
|         try: | ||||
|             cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))] | ||||
|             handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) | ||||
| @@ -208,7 +217,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|             try: | ||||
|                 os.utime(encodeFilename(new_path), (time.time(), information['filetime'])) | ||||
|             except: | ||||
|                 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') | ||||
|                 self._downloader.report_warning(u'Cannot update utime of audio file') | ||||
|  | ||||
|         information['filepath'] = new_path | ||||
|         return self._nopostoverwrites,information | ||||
| @@ -231,3 +240,227 @@ class FFmpegVideoConvertor(FFmpegPostProcessor): | ||||
|         information['format'] = self._preferedformat | ||||
|         information['ext'] = self._preferedformat | ||||
|         return False,information | ||||
|  | ||||
|  | ||||
| class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): | ||||
|     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt | ||||
|     _lang_map = { | ||||
|         'aa': 'aar', | ||||
|         'ab': 'abk', | ||||
|         'ae': 'ave', | ||||
|         'af': 'afr', | ||||
|         'ak': 'aka', | ||||
|         'am': 'amh', | ||||
|         'an': 'arg', | ||||
|         'ar': 'ara', | ||||
|         'as': 'asm', | ||||
|         'av': 'ava', | ||||
|         'ay': 'aym', | ||||
|         'az': 'aze', | ||||
|         'ba': 'bak', | ||||
|         'be': 'bel', | ||||
|         'bg': 'bul', | ||||
|         'bh': 'bih', | ||||
|         'bi': 'bis', | ||||
|         'bm': 'bam', | ||||
|         'bn': 'ben', | ||||
|         'bo': 'bod', | ||||
|         'br': 'bre', | ||||
|         'bs': 'bos', | ||||
|         'ca': 'cat', | ||||
|         'ce': 'che', | ||||
|         'ch': 'cha', | ||||
|         'co': 'cos', | ||||
|         'cr': 'cre', | ||||
|         'cs': 'ces', | ||||
|         'cu': 'chu', | ||||
|         'cv': 'chv', | ||||
|         'cy': 'cym', | ||||
|         'da': 'dan', | ||||
|         'de': 'deu', | ||||
|         'dv': 'div', | ||||
|         'dz': 'dzo', | ||||
|         'ee': 'ewe', | ||||
|         'el': 'ell', | ||||
|         'en': 'eng', | ||||
|         'eo': 'epo', | ||||
|         'es': 'spa', | ||||
|         'et': 'est', | ||||
|         'eu': 'eus', | ||||
|         'fa': 'fas', | ||||
|         'ff': 'ful', | ||||
|         'fi': 'fin', | ||||
|         'fj': 'fij', | ||||
|         'fo': 'fao', | ||||
|         'fr': 'fra', | ||||
|         'fy': 'fry', | ||||
|         'ga': 'gle', | ||||
|         'gd': 'gla', | ||||
|         'gl': 'glg', | ||||
|         'gn': 'grn', | ||||
|         'gu': 'guj', | ||||
|         'gv': 'glv', | ||||
|         'ha': 'hau', | ||||
|         'he': 'heb', | ||||
|         'hi': 'hin', | ||||
|         'ho': 'hmo', | ||||
|         'hr': 'hrv', | ||||
|         'ht': 'hat', | ||||
|         'hu': 'hun', | ||||
|         'hy': 'hye', | ||||
|         'hz': 'her', | ||||
|         'ia': 'ina', | ||||
|         'id': 'ind', | ||||
|         'ie': 'ile', | ||||
|         'ig': 'ibo', | ||||
|         'ii': 'iii', | ||||
|         'ik': 'ipk', | ||||
|         'io': 'ido', | ||||
|         'is': 'isl', | ||||
|         'it': 'ita', | ||||
|         'iu': 'iku', | ||||
|         'ja': 'jpn', | ||||
|         'jv': 'jav', | ||||
|         'ka': 'kat', | ||||
|         'kg': 'kon', | ||||
|         'ki': 'kik', | ||||
|         'kj': 'kua', | ||||
|         'kk': 'kaz', | ||||
|         'kl': 'kal', | ||||
|         'km': 'khm', | ||||
|         'kn': 'kan', | ||||
|         'ko': 'kor', | ||||
|         'kr': 'kau', | ||||
|         'ks': 'kas', | ||||
|         'ku': 'kur', | ||||
|         'kv': 'kom', | ||||
|         'kw': 'cor', | ||||
|         'ky': 'kir', | ||||
|         'la': 'lat', | ||||
|         'lb': 'ltz', | ||||
|         'lg': 'lug', | ||||
|         'li': 'lim', | ||||
|         'ln': 'lin', | ||||
|         'lo': 'lao', | ||||
|         'lt': 'lit', | ||||
|         'lu': 'lub', | ||||
|         'lv': 'lav', | ||||
|         'mg': 'mlg', | ||||
|         'mh': 'mah', | ||||
|         'mi': 'mri', | ||||
|         'mk': 'mkd', | ||||
|         'ml': 'mal', | ||||
|         'mn': 'mon', | ||||
|         'mr': 'mar', | ||||
|         'ms': 'msa', | ||||
|         'mt': 'mlt', | ||||
|         'my': 'mya', | ||||
|         'na': 'nau', | ||||
|         'nb': 'nob', | ||||
|         'nd': 'nde', | ||||
|         'ne': 'nep', | ||||
|         'ng': 'ndo', | ||||
|         'nl': 'nld', | ||||
|         'nn': 'nno', | ||||
|         'no': 'nor', | ||||
|         'nr': 'nbl', | ||||
|         'nv': 'nav', | ||||
|         'ny': 'nya', | ||||
|         'oc': 'oci', | ||||
|         'oj': 'oji', | ||||
|         'om': 'orm', | ||||
|         'or': 'ori', | ||||
|         'os': 'oss', | ||||
|         'pa': 'pan', | ||||
|         'pi': 'pli', | ||||
|         'pl': 'pol', | ||||
|         'ps': 'pus', | ||||
|         'pt': 'por', | ||||
|         'qu': 'que', | ||||
|         'rm': 'roh', | ||||
|         'rn': 'run', | ||||
|         'ro': 'ron', | ||||
|         'ru': 'rus', | ||||
|         'rw': 'kin', | ||||
|         'sa': 'san', | ||||
|         'sc': 'srd', | ||||
|         'sd': 'snd', | ||||
|         'se': 'sme', | ||||
|         'sg': 'sag', | ||||
|         'si': 'sin', | ||||
|         'sk': 'slk', | ||||
|         'sl': 'slv', | ||||
|         'sm': 'smo', | ||||
|         'sn': 'sna', | ||||
|         'so': 'som', | ||||
|         'sq': 'sqi', | ||||
|         'sr': 'srp', | ||||
|         'ss': 'ssw', | ||||
|         'st': 'sot', | ||||
|         'su': 'sun', | ||||
|         'sv': 'swe', | ||||
|         'sw': 'swa', | ||||
|         'ta': 'tam', | ||||
|         'te': 'tel', | ||||
|         'tg': 'tgk', | ||||
|         'th': 'tha', | ||||
|         'ti': 'tir', | ||||
|         'tk': 'tuk', | ||||
|         'tl': 'tgl', | ||||
|         'tn': 'tsn', | ||||
|         'to': 'ton', | ||||
|         'tr': 'tur', | ||||
|         'ts': 'tso', | ||||
|         'tt': 'tat', | ||||
|         'tw': 'twi', | ||||
|         'ty': 'tah', | ||||
|         'ug': 'uig', | ||||
|         'uk': 'ukr', | ||||
|         'ur': 'urd', | ||||
|         'uz': 'uzb', | ||||
|         've': 'ven', | ||||
|         'vi': 'vie', | ||||
|         'vo': 'vol', | ||||
|         'wa': 'wln', | ||||
|         'wo': 'wol', | ||||
|         'xh': 'xho', | ||||
|         'yi': 'yid', | ||||
|         'yo': 'yor', | ||||
|         'za': 'zha', | ||||
|         'zh': 'zho', | ||||
|         'zu': 'zul', | ||||
|     } | ||||
|  | ||||
|     def __init__(self, downloader=None, subtitlesformat='srt'): | ||||
|         super(FFmpegEmbedSubtitlePP, self).__init__(downloader) | ||||
|         self._subformat = subtitlesformat | ||||
|  | ||||
|     @classmethod | ||||
|     def _conver_lang_code(cls, code): | ||||
|         """Convert language code from ISO 639-1 to ISO 639-2/T""" | ||||
|         return cls._lang_map.get(code[:2]) | ||||
|  | ||||
|     def run(self, information): | ||||
|         if information['ext'] != u'mp4': | ||||
|             self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files') | ||||
|             return True, information | ||||
|         sub_langs = [key for key in information['subtitles']] | ||||
|  | ||||
|         filename = information['filepath'] | ||||
|         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs] | ||||
|  | ||||
|         opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy'] | ||||
|         for (i, lang) in enumerate(sub_langs): | ||||
|             opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text']) | ||||
|             lang_code = self._conver_lang_code(lang) | ||||
|             if lang_code is not None: | ||||
|                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) | ||||
|         opts.extend(['-f', 'mp4']) | ||||
|  | ||||
|         temp_filename = filename + u'.temp' | ||||
|         self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename) | ||||
|         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) | ||||
|         os.remove(encodeFilename(filename)) | ||||
|         os.rename(encodeFilename(temp_filename), encodeFilename(filename)) | ||||
|  | ||||
|         return True, information | ||||
|   | ||||
| @@ -76,7 +76,7 @@ class YoutubeDL(object): | ||||
|     allsubtitles:      Downloads all the subtitles of the video | ||||
|     listsubtitles:     Lists all available subtitles for the video | ||||
|     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt) | ||||
|     subtitleslang:     Language of the subtitles to download | ||||
|     subtitleslangs:    List of languages of the subtitles to download | ||||
|     keepvideo:         Keep the video file after post-processing | ||||
|     daterange:         A DateRange object, download only if the upload_date is in the range. | ||||
|     skip_download:     Skip the actual download of the video file | ||||
| @@ -264,7 +264,7 @@ class YoutubeDL(object): | ||||
|             self.report_error(u'Erroneous output template') | ||||
|             return None | ||||
|         except ValueError as err: | ||||
|             self.report_error(u'Insufficient system charset ' + repr(preferredencoding())) | ||||
|             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')') | ||||
|             return None | ||||
|  | ||||
|     def _match_entry(self, info_dict): | ||||
| @@ -483,41 +483,28 @@ class YoutubeDL(object): | ||||
|                 self.report_error(u'Cannot write description file ' + descfn) | ||||
|                 return | ||||
|  | ||||
|         if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|         subtitles_are_requested = any([self.params.get('writesubtitles', False), | ||||
|                                        self.params.get('writeautomaticsub'), | ||||
|                                        self.params.get('allsubtitles', False)]) | ||||
|  | ||||
|         if  subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|             # subtitles download errors are already managed as troubles in relevant IE | ||||
|             # that way it will silently go on when used with unsupporting IE | ||||
|             subtitle = info_dict['subtitles'][0] | ||||
|             (sub_error, sub_lang, sub) = subtitle | ||||
|             subtitles = info_dict['subtitles'] | ||||
|             sub_format = self.params.get('subtitlesformat') | ||||
|             if sub_error: | ||||
|                 self.report_warning("Some error while getting the subtitles") | ||||
|             else: | ||||
|             for sub_lang in subtitles.keys(): | ||||
|                 sub = subtitles[sub_lang] | ||||
|                 if sub is None: | ||||
|                     continue | ||||
|                 try: | ||||
|                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | ||||
|                     sub_filename = subtitles_filename(filename, sub_lang, sub_format) | ||||
|                     self.report_writesubtitles(sub_filename) | ||||
|                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                         subfile.write(sub) | ||||
|                             subfile.write(sub) | ||||
|                 except (OSError, IOError): | ||||
|                     self.report_error(u'Cannot write subtitles file ' + descfn) | ||||
|                     return | ||||
|  | ||||
|         if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|             subtitles = info_dict['subtitles'] | ||||
|             sub_format = self.params.get('subtitlesformat') | ||||
|             for subtitle in subtitles: | ||||
|                 (sub_error, sub_lang, sub) = subtitle | ||||
|                 if sub_error: | ||||
|                     self.report_warning("Some error while getting the subtitles") | ||||
|                 else: | ||||
|                     try: | ||||
|                         sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | ||||
|                         self.report_writesubtitles(sub_filename) | ||||
|                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                                 subfile.write(sub) | ||||
|                     except (OSError, IOError): | ||||
|                         self.report_error(u'Cannot write subtitles file ' + descfn) | ||||
|                         return | ||||
|  | ||||
|         if self.params.get('writeinfojson', False): | ||||
|             infofn = filename + u'.info.json' | ||||
|             self.report_writeinfojson(infofn) | ||||
| @@ -547,7 +534,7 @@ class YoutubeDL(object): | ||||
|                 try: | ||||
|                     success = self.fd._do_download(filename, info_dict) | ||||
|                 except (OSError, IOError) as err: | ||||
|                     raise UnavailableVideoError() | ||||
|                     raise UnavailableVideoError(err) | ||||
|                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                     self.report_error(u'unable to download video data: %s' % str(err)) | ||||
|                     return | ||||
| @@ -594,7 +581,7 @@ class YoutubeDL(object): | ||||
|                         # No clear decision yet, let IE decide | ||||
|                         keep_video = keep_video_wish | ||||
|             except PostProcessingError as e: | ||||
|                 self.to_stderr(u'ERROR: ' + e.msg) | ||||
|                 self.report_error(e.msg) | ||||
|         if keep_video is False and not self.params.get('keepvideo', False): | ||||
|             try: | ||||
|                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) | ||||
|   | ||||
| @@ -27,6 +27,7 @@ __authors__  = ( | ||||
|     'Johny Mo Swag', | ||||
|     'Axel Noack', | ||||
|     'Albert Kim', | ||||
|     'Pierre Rudloff', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
| @@ -82,6 +83,9 @@ def parseOpts(overrideArguments=None): | ||||
|  | ||||
|         return "".join(opts) | ||||
|  | ||||
|     def _comma_separated_values_options_callback(option, opt_str, value, parser): | ||||
|         setattr(parser.values, option.dest, value.split(',')) | ||||
|  | ||||
|     def _find_term_columns(): | ||||
|         columns = os.environ.get('COLUMNS', None) | ||||
|         if columns: | ||||
| @@ -119,6 +123,7 @@ def parseOpts(overrideArguments=None): | ||||
|     selection      = optparse.OptionGroup(parser, 'Video Selection') | ||||
|     authentication = optparse.OptionGroup(parser, 'Authentication Options') | ||||
|     video_format   = optparse.OptionGroup(parser, 'Video Format Options') | ||||
|     subtitles      = optparse.OptionGroup(parser, 'Subtitle Options') | ||||
|     downloader     = optparse.OptionGroup(parser, 'Download Options') | ||||
|     postproc       = optparse.OptionGroup(parser, 'Post-processing Options') | ||||
|     filesystem     = optparse.OptionGroup(parser, 'Filesystem Options') | ||||
| @@ -185,27 +190,29 @@ def parseOpts(overrideArguments=None): | ||||
|             action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') | ||||
|     video_format.add_option('-F', '--list-formats', | ||||
|             action='store_true', dest='listformats', help='list all available formats (currently youtube only)') | ||||
|     video_format.add_option('--write-sub', '--write-srt', | ||||
|  | ||||
|     subtitles.add_option('--write-sub', '--write-srt', | ||||
|             action='store_true', dest='writesubtitles', | ||||
|             help='write subtitle file (currently youtube only)', default=False) | ||||
|     video_format.add_option('--write-auto-sub', '--write-automatic-sub', | ||||
|     subtitles.add_option('--write-auto-sub', '--write-automatic-sub', | ||||
|             action='store_true', dest='writeautomaticsub', | ||||
|             help='write automatic subtitle file (currently youtube only)', default=False) | ||||
|     video_format.add_option('--only-sub', | ||||
|     subtitles.add_option('--only-sub', | ||||
|             action='store_true', dest='skip_download', | ||||
|             help='[deprecated] alias of --skip-download', default=False) | ||||
|     video_format.add_option('--all-subs', | ||||
|     subtitles.add_option('--all-subs', | ||||
|             action='store_true', dest='allsubtitles', | ||||
|             help='downloads all the available subtitles of the video (currently youtube only)', default=False) | ||||
|     video_format.add_option('--list-subs', | ||||
|             help='downloads all the available subtitles of the video', default=False) | ||||
|     subtitles.add_option('--list-subs', | ||||
|             action='store_true', dest='listsubtitles', | ||||
|             help='lists all available subtitles for the video (currently youtube only)', default=False) | ||||
|     video_format.add_option('--sub-format', | ||||
|             help='lists all available subtitles for the video', default=False) | ||||
|     subtitles.add_option('--sub-format', | ||||
|             action='store', dest='subtitlesformat', metavar='FORMAT', | ||||
|             help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt') | ||||
|     video_format.add_option('--sub-lang', '--srt-lang', | ||||
|             action='store', dest='subtitleslang', metavar='LANG', | ||||
|             help='language of the subtitles to download (optional) use IETF language tags like \'en\'') | ||||
|             help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') | ||||
|     subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang', | ||||
|             action='callback', dest='subtitleslang', metavar='LANGS', type='str', | ||||
|             default=[], callback=_comma_separated_values_options_callback, | ||||
|             help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') | ||||
|  | ||||
|     downloader.add_option('-r', '--rate-limit', | ||||
|             dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') | ||||
| @@ -320,6 +327,8 @@ def parseOpts(overrideArguments=None): | ||||
|             help='keeps the video file on disk after the post-processing; the video is erased by default') | ||||
|     postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, | ||||
|             help='do not overwrite post-processed files; the post-processed files are overwritten by default') | ||||
|     postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, | ||||
|             help='embed subtitles in the video (only for mp4 videos)') | ||||
|  | ||||
|  | ||||
|     parser.add_option_group(general) | ||||
| @@ -328,6 +337,7 @@ def parseOpts(overrideArguments=None): | ||||
|     parser.add_option_group(filesystem) | ||||
|     parser.add_option_group(verbosity) | ||||
|     parser.add_option_group(video_format) | ||||
|     parser.add_option_group(subtitles) | ||||
|     parser.add_option_group(authentication) | ||||
|     parser.add_option_group(postproc) | ||||
|  | ||||
| @@ -398,6 +408,8 @@ def _real_main(argv=None): | ||||
|             batchurls = batchfd.readlines() | ||||
|             batchurls = [x.strip() for x in batchurls] | ||||
|             batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] | ||||
|             if opts.verbose: | ||||
|                 sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') | ||||
|         except IOError: | ||||
|             sys.exit(u'ERROR: batch file could not be read') | ||||
|     all_urls = batchurls + args | ||||
| @@ -565,7 +577,7 @@ def _real_main(argv=None): | ||||
|         'allsubtitles': opts.allsubtitles, | ||||
|         'listsubtitles': opts.listsubtitles, | ||||
|         'subtitlesformat': opts.subtitlesformat, | ||||
|         'subtitleslang': opts.subtitleslang, | ||||
|         'subtitleslangs': opts.subtitleslang, | ||||
|         'matchtitle': decodeOption(opts.matchtitle), | ||||
|         'rejecttitle': decodeOption(opts.rejecttitle), | ||||
|         'max_downloads': opts.max_downloads, | ||||
| @@ -605,6 +617,8 @@ def _real_main(argv=None): | ||||
|         ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) | ||||
|     if opts.recodevideo: | ||||
|         ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) | ||||
|     if opts.embedsubtitles: | ||||
|         ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) | ||||
|  | ||||
|     # Update version | ||||
|     if opts.update_self: | ||||
|   | ||||
| @@ -12,7 +12,7 @@ from .comedycentral import ComedyCentralIE | ||||
| from .condenast import CondeNastIE | ||||
| from .criterion import CriterionIE | ||||
| from .cspan import CSpanIE | ||||
| from .dailymotion import DailymotionIE | ||||
| from .dailymotion import DailymotionIE, DailymotionPlaylistIE | ||||
| from .depositfiles import DepositFilesIE | ||||
| from .dotsub import DotsubIE | ||||
| from .dreisat import DreiSatIE | ||||
| @@ -36,23 +36,31 @@ from .ign import IGNIE, OneUPIE | ||||
| from .ina import InaIE | ||||
| from .infoq import InfoQIE | ||||
| from .instagram import InstagramIE | ||||
| from .jeuxvideo import JeuxVideoIE | ||||
| from .jukebox import JukeboxIE | ||||
| from .justintv import JustinTVIE | ||||
| from .kankan import KankanIE | ||||
| from .keek import KeekIE | ||||
| from .liveleak import LiveLeakIE | ||||
| from .livestream import LivestreamIE | ||||
| from .metacafe import MetacafeIE | ||||
| from .mixcloud import MixcloudIE | ||||
| from .mtv import MTVIE | ||||
| from .muzu import MuzuTVIE | ||||
| from .myspass import MySpassIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .nba import NBAIE | ||||
| from .ooyala import OoyalaIE | ||||
| from .pbs import PBSIE | ||||
| from .photobucket import PhotobucketIE | ||||
| from .pornotube import PornotubeIE | ||||
| from .rbmaradio import RBMARadioIE | ||||
| from .redtube import RedTubeIE | ||||
| from .ringtv import RingTVIE | ||||
| from .roxwel import RoxwelIE | ||||
| from .rtlnow import RTLnowIE | ||||
| from .sina import SinaIE | ||||
| from .slashdot import SlashdotIE | ||||
| from .soundcloud import SoundcloudIE, SoundcloudSetIE | ||||
| from .spiegel import SpiegelIE | ||||
| from .stanfordoc import StanfordOpenClassroomIE | ||||
| @@ -67,10 +75,12 @@ from .tudou import TudouIE | ||||
| from .tumblr import TumblrIE | ||||
| from .tutv import TutvIE | ||||
| from .ustream import UstreamIE | ||||
| from .unistra import UnistraIE | ||||
| from .vbox7 import Vbox7IE | ||||
| from .veoh import VeohIE | ||||
| from .vevo import VevoIE | ||||
| from .vimeo import VimeoIE | ||||
| from .videofyme import VideofyMeIE | ||||
| from .vimeo import VimeoIE, VimeoChannelIE | ||||
| from .vine import VineIE | ||||
| from .c56 import C56IE | ||||
| from .wat import WatIE | ||||
| @@ -93,6 +103,8 @@ from .youtube import ( | ||||
|     YoutubeShowIE, | ||||
|     YoutubeSubscriptionsIE, | ||||
|     YoutubeRecommendedIE, | ||||
|     YoutubeWatchLaterIE, | ||||
|     YoutubeFavouritesIE, | ||||
| ) | ||||
| from .zdf import ZDFIE | ||||
|  | ||||
|   | ||||
| @@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor): | ||||
|     """ | ||||
|     _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' | ||||
|     _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html' | ||||
|     _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)' | ||||
|     _LIVE_URL = r'index-[0-9]+\.html$' | ||||
|  | ||||
|     IE_NAME = u'arte.tv' | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL)) | ||||
|         return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL)) | ||||
|  | ||||
|     # TODO implement Live Stream | ||||
|     # from ..utils import compat_urllib_parse | ||||
| @@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor): | ||||
|             lang = mobj.group('lang') | ||||
|             return self._extract_video(url, id, lang) | ||||
|  | ||||
|         mobj = re.match(self._LIVEWEB_URL, url) | ||||
|         if mobj is not None: | ||||
|             name = mobj.group('name') | ||||
|             lang = mobj.group('lang') | ||||
|             return self._extract_liveweb(url, name, lang) | ||||
|  | ||||
|         if re.search(self._LIVE_URL, video_id) is not None: | ||||
|             raise ExtractorError(u'Arte live streams are not yet supported, sorry') | ||||
|             # self.extractLiveStream(url) | ||||
| @@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor): | ||||
|  | ||||
|         info_dict = {'id': player_info['VID'], | ||||
|                      'title': player_info['VTI'], | ||||
|                      'description': player_info['VDE'], | ||||
|                      'description': player_info.get('VDE'), | ||||
|                      'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]), | ||||
|                      'thumbnail': player_info['programImage'], | ||||
|                      'ext': 'flv', | ||||
| @@ -98,12 +105,14 @@ class ArteTvIE(InfoExtractor): | ||||
|                 l = 'F' | ||||
|             elif lang == 'de': | ||||
|                 l = 'A' | ||||
|             regexes = [r'VO?%s' % l, r'V%s-ST.' % l] | ||||
|             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] | ||||
|             return any(re.match(r, f['versionCode']) for r in regexes) | ||||
|         # Some formats may not be in the same language as the url | ||||
|         formats = filter(_match_lang, formats) | ||||
|         # We order the formats by quality | ||||
|         formats = sorted(formats, key=lambda f: int(f['height'])) | ||||
|         # Prefer videos without subtitles in the same language | ||||
|         formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None) | ||||
|         # Pick the best quality | ||||
|         format_info = formats[-1] | ||||
|         if format_info['mediaType'] == u'rtmp': | ||||
| @@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor): | ||||
|                 'url': video_url, | ||||
|                 'ext': 'flv', | ||||
|                 } | ||||
|  | ||||
|     def _extract_liveweb(self, url, name, lang): | ||||
|         """Extract form http://liveweb.arte.tv/""" | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') | ||||
|         config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, | ||||
|                                             video_id, u'Downloading information') | ||||
|         config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) | ||||
|         event_doc = config_doc.find('event') | ||||
|         url_node = event_doc.find('video').find('urlHd') | ||||
|         if url_node is None: | ||||
|             url_node = video_doc.find('urlSd') | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': event_doc.find('name%s' % lang.capitalize()).text, | ||||
|                 'url': url_node.text.replace('MP4', 'mp4'), | ||||
|                 'ext': 'flv', | ||||
|                 'thumbnail': self._og_search_thumbnail(webpage), | ||||
|                 } | ||||
|   | ||||
| @@ -4,15 +4,16 @@ import xml.etree.ElementTree | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     determine_ext, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CollegeHumorIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$' | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', | ||||
|         u'file': u'6902724.mp4', | ||||
|         u'md5': u'1264c12ad95dca142a9f0bf7968105a0', | ||||
| @@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             u'title': u'Comic-Con Cosplay Catastrophe', | ||||
|             u'description': u'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.', | ||||
|         }, | ||||
|     } | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://www.collegehumor.com/video/3505939/font-conference', | ||||
|         u'file': u'3505939.mp4', | ||||
|         u'md5': u'c51ca16b82bb456a4397987791a835f5', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Font Conference', | ||||
|             u'description': u'This video wasn\'t long enough, so we made it double-spaced.', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -49,25 +59,29 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             info['description'] = videoNode.findall('./description')[0].text | ||||
|             info['title'] = videoNode.findall('./caption')[0].text | ||||
|             info['thumbnail'] = videoNode.findall('./thumbnail')[0].text | ||||
|             manifest_url = videoNode.findall('./file')[0].text | ||||
|             next_url = videoNode.findall('./file')[0].text | ||||
|         except IndexError: | ||||
|             raise ExtractorError(u'Invalid metadata XML file') | ||||
|  | ||||
|         manifest_url += '?hdcore=2.10.3' | ||||
|         manifestXml = self._download_webpage(manifest_url, video_id, | ||||
|                                              u'Downloading XML manifest', | ||||
|                                              u'Unable to download video info XML') | ||||
|         if next_url.endswith(u'manifest.f4m'): | ||||
|             manifest_url = next_url + '?hdcore=2.10.3' | ||||
|             manifestXml = self._download_webpage(manifest_url, video_id, | ||||
|                                          u'Downloading XML manifest', | ||||
|                                          u'Unable to download video info XML') | ||||
|  | ||||
|         adoc = xml.etree.ElementTree.fromstring(manifestXml) | ||||
|         try: | ||||
|             media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] | ||||
|             node_id = media_node.attrib['url'] | ||||
|             video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text | ||||
|         except IndexError as err: | ||||
|             raise ExtractorError(u'Invalid manifest file') | ||||
|             adoc = xml.etree.ElementTree.fromstring(manifestXml) | ||||
|             try: | ||||
|                 media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] | ||||
|                 node_id = media_node.attrib['url'] | ||||
|                 video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text | ||||
|             except IndexError as err: | ||||
|                 raise ExtractorError(u'Invalid manifest file') | ||||
|             url_pr = compat_urllib_parse_urlparse(info['thumbnail']) | ||||
|             info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') | ||||
|             info['ext'] = 'mp4' | ||||
|         else: | ||||
|             # Old-style direct links | ||||
|             info['url'] = next_url | ||||
|             info['ext'] = determine_ext(info['url']) | ||||
|  | ||||
|         url_pr = compat_urllib_parse_urlparse(info['thumbnail']) | ||||
|  | ||||
|         info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') | ||||
|         info['ext'] = 'mp4' | ||||
|         return [info] | ||||
|         return info | ||||
|   | ||||
| @@ -47,7 +47,8 @@ class InfoExtractor(object): | ||||
|     uploader_id:    Nickname or id of the video uploader. | ||||
|     location:       Physical location of the video. | ||||
|     player_url:     SWF Player URL (used for rtmpdump). | ||||
|     subtitles:      The subtitle file contents. | ||||
|     subtitles:      The subtitle file contents as a dictionary in the format | ||||
|                     {language: subtitles}. | ||||
|     view_count:     How many users have watched the video on the platform. | ||||
|     urlhandle:      [internal] The urlHandle to be used to download the file, | ||||
|                     like returned by urllib.request.urlopen | ||||
| @@ -77,7 +78,13 @@ class InfoExtractor(object): | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url) is not None | ||||
|  | ||||
|         # This does not use has/getattr intentionally - we want to know whether | ||||
|         # we have cached the regexp for *this* class, whereas getattr would also | ||||
|         # match the superclass | ||||
|         if '_VALID_URL_RE' not in cls.__dict__: | ||||
|             cls._VALID_URL_RE = re.compile(cls._VALID_URL) | ||||
|         return cls._VALID_URL_RE.match(url) is not None | ||||
|  | ||||
|     @classmethod | ||||
|     def working(cls): | ||||
|   | ||||
| @@ -1,9 +1,12 @@ | ||||
| import re | ||||
| import json | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     get_element_by_attribute, | ||||
|     get_element_by_id, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
| @@ -77,3 +80,31 @@ class DailymotionIE(InfoExtractor): | ||||
|             'ext':      video_extension, | ||||
|             'thumbnail': info['thumbnail_url'] | ||||
|         }] | ||||
|  | ||||
|  | ||||
| class DailymotionPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' | ||||
|     _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id =  mobj.group('id') | ||||
|         video_ids = [] | ||||
|  | ||||
|         for pagenum in itertools.count(1): | ||||
|             webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum), | ||||
|                                              playlist_id, u'Downloading page %s' % pagenum) | ||||
|  | ||||
|             playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) | ||||
|             video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el)) | ||||
|  | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: | ||||
|                 break | ||||
|  | ||||
|         entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') | ||||
|                    for video_id in video_ids] | ||||
|         return {'_type': 'playlist', | ||||
|                 'id': playlist_id, | ||||
|                 'title': get_element_by_id(u'playlist_name', webpage), | ||||
|                 'entries': entries, | ||||
|                 } | ||||
|   | ||||
| @@ -8,17 +8,30 @@ class ExfmIE(InfoExtractor): | ||||
|     IE_NAME = u'exfm' | ||||
|     IE_DESC = u'ex.fm' | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)' | ||||
|     _SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream' | ||||
|     _TEST = { | ||||
|         u'url': u'http://ex.fm/song/1bgtzg', | ||||
|         u'file': u'1bgtzg.mp3', | ||||
|         u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf', | ||||
|         u'info_dict': { | ||||
|             u"title": u"We Can't Stop", | ||||
|             u"uploader": u"Miley Cyrus", | ||||
|             u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37' | ||||
|         } | ||||
|     } | ||||
|     _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u'url': u'http://ex.fm/song/1bgtzg', | ||||
|             u'file': u'95223130.mp3', | ||||
|             u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf', | ||||
|             u'info_dict': { | ||||
|                 u"title": u"We Can't Stop - Miley Cyrus", | ||||
|                 u"uploader": u"Miley Cyrus", | ||||
|                 u'upload_date': u'20130603', | ||||
|                 u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC', | ||||
|             }, | ||||
|             u'note': u'Soundcloud song', | ||||
|         }, | ||||
|         { | ||||
|             u'url': u'http://ex.fm/song/wddt8', | ||||
|             u'file': u'wddt8.mp3', | ||||
|             u'md5': u'966bd70741ac5b8570d8e45bfaed3643', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Safe and Sound', | ||||
|                 u'uploader': u'Capital Cities', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -26,11 +39,10 @@ class ExfmIE(InfoExtractor): | ||||
|         info_url = "http://ex.fm/api/v3/song/%s" %(song_id) | ||||
|         webpage = self._download_webpage(info_url, song_id) | ||||
|         info = json.loads(webpage) | ||||
|         song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url']) | ||||
|         if song_url is not None: | ||||
|         	song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28" | ||||
|         else: | ||||
|         	song_url = info['song']['url'] | ||||
|         song_url = info['song']['url'] | ||||
|         if re.match(self._SOUNDCLOUD_URL, song_url) is not None: | ||||
|             self.to_screen('Soundcloud song detected') | ||||
|             return self.url_result(song_url.replace('/stream',''), 'Soundcloud') | ||||
|         return [{ | ||||
|             'id':          song_id, | ||||
|             'url':         song_url, | ||||
|   | ||||
| @@ -21,17 +21,14 @@ class FunnyOrDieIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', | ||||
|         video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"', | ||||
|             webpage, u'video URL', flags=re.DOTALL) | ||||
|  | ||||
|         title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>", | ||||
|             r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': title, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
|         return [info] | ||||
|   | ||||
| @@ -107,8 +107,13 @@ class GenericIE(InfoExtractor): | ||||
|         return new_url | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         new_url = self._test_redirect(url) | ||||
|         if new_url: return [self.url_result(new_url)] | ||||
|         try: | ||||
|             new_url = self._test_redirect(url) | ||||
|             if new_url: | ||||
|                 return [self.url_result(new_url)] | ||||
|         except compat_urllib_error.HTTPError: | ||||
|             # This may be a stupid server that doesn't like HEAD, our UA, or so | ||||
|             pass | ||||
|  | ||||
|         video_id = url.split('/')[-1] | ||||
|         try: | ||||
| @@ -144,6 +149,9 @@ class GenericIE(InfoExtractor): | ||||
|             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: | ||||
|             if m_video_type is not None: | ||||
|                 mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) | ||||
|         if mobj is None: | ||||
|             # HTML5 video | ||||
|             mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from .common import InfoExtractor | ||||
|  | ||||
| class InaIE(InfoExtractor): | ||||
|     """Information Extractor for Ina.fr""" | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*' | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*' | ||||
|     _TEST = { | ||||
|         u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', | ||||
|         u'file': u'I12055569.mp4', | ||||
|   | ||||
							
								
								
									
										47
									
								
								youtube_dl/extractor/jeuxvideo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								youtube_dl/extractor/jeuxvideo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | ||||
| # coding: utf-8 | ||||
|  | ||||
| import json | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| class JeuxVideoIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', | ||||
|         u'file': u'5182.mp4', | ||||
|         u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0', | ||||
|         u'info_dict': { | ||||
|             u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité', | ||||
|             u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         title = re.match(self._VALID_URL, url).group(1) | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage) | ||||
|  | ||||
|         xml_link = m_download.group(1) | ||||
|          | ||||
|         id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1) | ||||
|  | ||||
|         xml_config = self._download_webpage(xml_link, title, | ||||
|                                                   'Downloading XML config') | ||||
|         config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8')) | ||||
|         info = re.search(r'<format\.json>(.*?)</format\.json>', | ||||
|                          xml_config, re.MULTILINE|re.DOTALL).group(1) | ||||
|         info = json.loads(info)['versions'][0] | ||||
|          | ||||
|         video_url = 'http://video720.jeuxvideo.com/' + info['file'] | ||||
|  | ||||
|         return {'id': id, | ||||
|                 'title' : config.find('titre_video').text, | ||||
|                 'ext' : 'mp4', | ||||
|                 'url' : video_url, | ||||
|                 'description': self._og_search_description(webpage), | ||||
|                 'thumbnail': config.find('image').text, | ||||
|                 } | ||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/kankan.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/kankan.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import determine_ext | ||||
|  | ||||
|  | ||||
| class KankanIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml' | ||||
|      | ||||
|     _TEST = { | ||||
|         u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml', | ||||
|         u'file': u'48863.flv', | ||||
|         u'md5': u'29aca1e47ae68fc28804aca89f29507e', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Ready To Go', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title') | ||||
|         gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid') | ||||
|  | ||||
|         video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid, | ||||
|                                                  video_id, u'Downloading video url info') | ||||
|         ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip') | ||||
|         path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path') | ||||
|         video_url = 'http://%s%s' % (ip, path) | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': title, | ||||
|                 'url': video_url, | ||||
|                 'ext': determine_ext(video_url), | ||||
|                 } | ||||
| @@ -4,10 +4,10 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class KeekIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)' | ||||
|     IE_NAME = u'keek' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.keek.com/ytdl/keeks/NODfbab', | ||||
|         u'url': u'https://www.keek.com/ytdl/keeks/NODfbab', | ||||
|         u'file': u'NODfbab.mp4', | ||||
|         u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83', | ||||
|         u'info_dict': { | ||||
|   | ||||
							
								
								
									
										64
									
								
								youtube_dl/extractor/muzu.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								youtube_dl/extractor/muzu.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,64 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MuzuTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)' | ||||
|     IE_NAME = u'muzu.tv' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/', | ||||
|         u'file': u'1981454.mp4', | ||||
|         u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Cat Walk (Original Mix)', | ||||
|             u'description': u'md5:90e868994de201b2570e4e5854e19420', | ||||
|             u'uploader': u'MarcAshken featuring SOS', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         info_data = compat_urllib_parse.urlencode({'format': 'json', | ||||
|                                                    'url': url, | ||||
|                                                    }) | ||||
|         video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data, | ||||
|                                                  video_id, u'Downloading video info') | ||||
|         info = json.loads(video_info_page) | ||||
|  | ||||
|         player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id, | ||||
|                                                   video_id, u'Downloading player info') | ||||
|         video_info = json.loads(player_info_page)['videos'][0] | ||||
|         for quality in ['1080' , '720', '480', '360']: | ||||
|             if video_info.get('v%s' % quality): | ||||
|                 break | ||||
|  | ||||
|         data = compat_urllib_parse.urlencode({'ai': video_id, | ||||
|                                               # Even if each time you watch a video the hash changes, | ||||
|                                               # it seems to work for different videos, and it will work | ||||
|                                               # even if you use any non empty string as a hash | ||||
|                                               'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k', | ||||
|                                               'device': 'web', | ||||
|                                               'qv': quality, | ||||
|                                               }) | ||||
|         video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data, | ||||
|                                                 video_id, u'Downloading video url') | ||||
|         video_url_info = json.loads(video_url_page) | ||||
|         video_url = video_url_info['url'] | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': info['title'], | ||||
|                 'url': video_url, | ||||
|                 'ext': determine_ext(video_url), | ||||
|                 'thumbnail': info['thumbnail_url'], | ||||
|                 'description': info['description'], | ||||
|                 'uploader': info['author_name'], | ||||
|                 } | ||||
| @@ -2,11 +2,13 @@ import binascii | ||||
| import base64 | ||||
| import hashlib | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_ord, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
| @@ -16,7 +18,7 @@ from ..utils import ( | ||||
| class MyVideoIE(InfoExtractor): | ||||
|     """Information Extractor for myvideo.de.""" | ||||
|  | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*' | ||||
|     IE_NAME = u'myvideo' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', | ||||
| @@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor): | ||||
|                 'ext':      video_ext, | ||||
|             }] | ||||
|  | ||||
|         mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) | ||||
|         if mobj is not None: | ||||
|             request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') | ||||
|             response = self._download_webpage(request, video_id, | ||||
|                                               u'Downloading video info') | ||||
|             info = json.loads(base64.b64decode(response).decode('utf-8')) | ||||
|             return {'id': video_id, | ||||
|                     'title': info['title'], | ||||
|                     'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), | ||||
|                     'play_path': info['filename'], | ||||
|                     'ext': 'flv', | ||||
|                     'thumbnail': info['thumbnail'][0]['url'], | ||||
|                     } | ||||
|  | ||||
|         # try encxml | ||||
|         mobj = re.search('var flashvars={(.+?)}', webpage) | ||||
|         if mobj is None: | ||||
|   | ||||
							
								
								
									
										52
									
								
								youtube_dl/extractor/ooyala.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								youtube_dl/extractor/ooyala.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unescapeHTML | ||||
|  | ||||
| class OoyalaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video | ||||
|         u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', | ||||
|         u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4', | ||||
|         u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Explaining Data Recovery from Hard Drives and SSDs', | ||||
|             u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _extract_result(self, info, more_info): | ||||
|         return {'id': info['embedCode'], | ||||
|                 'ext': 'mp4', | ||||
|                 'title': unescapeHTML(info['title']), | ||||
|                 'url': info['url'], | ||||
|                 'description': unescapeHTML(more_info['description']), | ||||
|                 'thumbnail': more_info['promo'], | ||||
|                 } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         embedCode = mobj.group('id') | ||||
|         player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode | ||||
|         player = self._download_webpage(player_url, embedCode) | ||||
|         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', | ||||
|                                         player, u'mobile player url') | ||||
|         mobile_player = self._download_webpage(mobile_url, embedCode) | ||||
|         videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"') | ||||
|         videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"') | ||||
|         videos_info = json.loads(videos_info) | ||||
|         videos_more_info =json.loads(videos_more_info) | ||||
|  | ||||
|         if videos_more_info.get('lineup'): | ||||
|             videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] | ||||
|             return {'_type': 'playlist', | ||||
|                     'id': embedCode, | ||||
|                     'title': unescapeHTML(videos_more_info['title']), | ||||
|                     'entries': videos, | ||||
|                     } | ||||
|         else: | ||||
|             return self._extract_result(videos_info[0], videos_more_info) | ||||
|          | ||||
							
								
								
									
										34
									
								
								youtube_dl/extractor/pbs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								youtube_dl/extractor/pbs.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class PBSIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://video.pbs.org/video/2365006249/', | ||||
|         u'file': u'2365006249.mp4', | ||||
|         u'md5': 'ce1888486f0908d555a8093cac9a7362', | ||||
|         u'info_dict': { | ||||
|             u'title': u'A More Perfect Union', | ||||
|             u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a', | ||||
|             u'duration': 3190, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id | ||||
|         info_page = self._download_webpage(info_url, video_id) | ||||
|         info =json.loads(info_page) | ||||
|         return {'id': video_id, | ||||
|                 'title': info['title'], | ||||
|                 'url': info['alternate_encoding']['url'], | ||||
|                 'ext': 'mp4', | ||||
|                 'description': info['program'].get('description'), | ||||
|                 'thumbnail': info.get('image_url'), | ||||
|                 'duration': info.get('duration'), | ||||
|                 } | ||||
							
								
								
									
										49
									
								
								youtube_dl/extractor/roxwel.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								youtube_dl/extractor/roxwel.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate, determine_ext | ||||
|  | ||||
|  | ||||
| class RoxwelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html', | ||||
|         u'file': u'passionpittakeawalklive.flv', | ||||
|         u'md5': u'd9dea8360a1e7d485d2206db7fe13035', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Take A Walk (live)', | ||||
|             u'uploader': u'Passion Pit', | ||||
|             u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ', | ||||
|         }, | ||||
|         u'skip': u'Requires rtmpdump', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         filename = mobj.group('filename') | ||||
|         info_url = 'http://www.roxwel.com/api/videos/%s' % filename | ||||
|         info_page = self._download_webpage(info_url, filename, | ||||
|                                            u'Downloading video info') | ||||
|  | ||||
|         self.report_extraction(filename) | ||||
|         info = json.loads(info_page) | ||||
|         rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')]) | ||||
|         best_rate = rtmp_rates[-1] | ||||
|         url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate) | ||||
|         rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url') | ||||
|         ext = determine_ext(rtmp_url) | ||||
|         if ext == 'f4v': | ||||
|             rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename) | ||||
|  | ||||
|         return {'id': filename, | ||||
|                 'title': info['title'], | ||||
|                 'url': rtmp_url, | ||||
|                 'ext': 'flv', | ||||
|                 'description': info['description'], | ||||
|                 'thumbnail': info.get('player_image_url') or info.get('image_url_large'), | ||||
|                 'uploader': info['artist'], | ||||
|                 'uploader_id': info['artistname'], | ||||
|                 'upload_date': unified_strdate(info['dbdate']), | ||||
|                 } | ||||
							
								
								
									
										113
									
								
								youtube_dl/extractor/rtlnow.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								youtube_dl/extractor/rtlnow.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,113 @@ | ||||
| # encoding: utf-8 | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class RTLnowIE(InfoExtractor): | ||||
|     """Information Extractor for RTLnow, RTL2now and VOXnow""" | ||||
|     _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', | ||||
|         u'file': u'90419.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20070416',  | ||||
|             u'title': u'Ahornallee - Folge 1 - Der Einzug', | ||||
|             u'description': u'Folge 1 - Der Einzug', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|         u'skip': u'Only works from Germany', | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', | ||||
|         u'file': u'69756.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20120519',  | ||||
|             u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', | ||||
|             u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', | ||||
|             u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|         u'skip': u'Only works from Germany', | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', | ||||
|         u'file': u'13883.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20090627',  | ||||
|             u'title': u'Voxtours - Südafrika-Reporter II', | ||||
|             u'description': u'Südafrika-Reporter II', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         webpage_url = u'http://' + mobj.group('url') | ||||
|         video_page_url = u'http://' + mobj.group('base_url') | ||||
|         video_id = mobj.group(u'video_id') | ||||
|  | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         note_m = re.search(r'''(?sx) | ||||
|             <div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?) | ||||
|             <div[ ]id="playerteaser">''', webpage) | ||||
|         if note_m: | ||||
|             msg = clean_html(note_m.group(1)) | ||||
|             raise ExtractorError(msg) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>', | ||||
|             webpage, u'title') | ||||
|         playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', | ||||
|             webpage, u'playerdata_url') | ||||
|  | ||||
|         playerdata = self._download_webpage(playerdata_url, video_id) | ||||
|         mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata) | ||||
|         if mobj: | ||||
|             video_description = mobj.group(u'description') | ||||
|             if mobj.group('upload_date_Y'): | ||||
|                 video_upload_date = mobj.group('upload_date_Y') | ||||
|             else: | ||||
|                 video_upload_date = u'20' + mobj.group('upload_date_y') | ||||
|             video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') | ||||
|         else: | ||||
|             video_description = None | ||||
|             video_upload_date = None | ||||
|             self._downloader.report_warning(u'Unable to extract description and upload date') | ||||
|  | ||||
|         # Thumbnail: not every video has an thumbnail | ||||
|         mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage) | ||||
|         if mobj: | ||||
|             video_thumbnail = mobj.group(u'thumbnail') | ||||
|         else: | ||||
|             video_thumbnail = None | ||||
|  | ||||
|         mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract media URL') | ||||
|         video_url = mobj.group(u'url') | ||||
|         video_play_path = u'mp4:' + mobj.group(u'play_path') | ||||
|         video_player_url = video_page_url + u'includes/vodplayer.swf' | ||||
|  | ||||
|         return [{ | ||||
|             'id':          video_id, | ||||
|             'url':         video_url, | ||||
|             'play_path':   video_play_path, | ||||
|             'page_url':    video_page_url, | ||||
|             'player_url':  video_player_url, | ||||
|             'ext':         'flv', | ||||
|             'title':       video_title, | ||||
|             'description': video_description, | ||||
|             'upload_date': video_upload_date, | ||||
|             'thumbnail':   video_thumbnail, | ||||
|         }] | ||||
							
								
								
									
										23
									
								
								youtube_dl/extractor/slashdot.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								youtube_dl/extractor/slashdot.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SlashdotIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz', | ||||
|         u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4', | ||||
|         u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735', | ||||
|         u'info_dict': { | ||||
|             u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url') | ||||
|         return self.url_result(ooyala_url, 'Ooyala') | ||||
| @@ -4,6 +4,7 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| @@ -19,7 +20,12 @@ class SoundcloudIE(InfoExtractor): | ||||
|        of the stream token and uid | ||||
|      """ | ||||
|  | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$' | ||||
|     _VALID_URL = r'''^(?:https?://)? | ||||
|                     (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$) | ||||
|                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) | ||||
|                        |(?P<widget>w.soundcloud.com/player/?.*?url=.*) | ||||
|                     ) | ||||
|                     ''' | ||||
|     IE_NAME = u'soundcloud' | ||||
|     _TEST = { | ||||
|         u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', | ||||
| @@ -33,59 +39,68 @@ class SoundcloudIE(InfoExtractor): | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None | ||||
|  | ||||
|     def report_resolve(self, video_id): | ||||
|         """Report information extraction.""" | ||||
|         self.to_screen(u'%s: Resolving id' % video_id) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|     @classmethod | ||||
|     def _resolv_url(cls, url): | ||||
|         return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID | ||||
|  | ||||
|         # extract uploader (which is in the url) | ||||
|         uploader = mobj.group(1) | ||||
|         # extract simple title (uploader + slug of song title) | ||||
|         slug_title =  mobj.group(2) | ||||
|         full_title = '%s/%s' % (uploader, slug_title) | ||||
|  | ||||
|         self.report_resolve(full_title) | ||||
|  | ||||
|         url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title) | ||||
|         resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|         info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON') | ||||
|  | ||||
|         info = json.loads(info_json) | ||||
|     def _extract_info_dict(self, info, full_title=None): | ||||
|         video_id = info['id'] | ||||
|         self.report_extraction(full_title) | ||||
|         name = full_title or video_id | ||||
|         self.report_extraction(name) | ||||
|  | ||||
|         streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|         stream_json = self._download_webpage(streams_url, full_title, | ||||
|                                              u'Downloading stream definitions', | ||||
|                                              u'unable to download stream definitions') | ||||
|  | ||||
|         streams = json.loads(stream_json) | ||||
|         mediaURL = streams['http_mp3_128_url'] | ||||
|         upload_date = unified_strdate(info['created_at']) | ||||
|  | ||||
|         return [{ | ||||
|         thumbnail = info['artwork_url'] | ||||
|         if thumbnail is not None: | ||||
|             thumbnail = thumbnail.replace('-large', '-t500x500') | ||||
|         return { | ||||
|             'id':       info['id'], | ||||
|             'url':      mediaURL, | ||||
|             'url':      info['stream_url'] + '?client_id=' + self._CLIENT_ID, | ||||
|             'uploader': info['user']['username'], | ||||
|             'upload_date': upload_date, | ||||
|             'upload_date': unified_strdate(info['created_at']), | ||||
|             'title':    info['title'], | ||||
|             'ext':      u'mp3', | ||||
|             'description': info['description'], | ||||
|         }] | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
| class SoundcloudSetIE(InfoExtractor): | ||||
|     """Information extractor for soundcloud.com sets | ||||
|        To access the media, the uid of the song and a stream token | ||||
|        must be extracted from the page source and the script must make | ||||
|        a request to media.soundcloud.com/crossdomain.xml. Then | ||||
|        the media can be grabbed by requesting from an url composed | ||||
|        of the stream token and uid | ||||
|      """ | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         track_id = mobj.group('track_id') | ||||
|         if track_id is not None: | ||||
|             info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID | ||||
|             full_title = track_id | ||||
|         elif mobj.group('widget'): | ||||
|             query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | ||||
|             return self.url_result(query['url'][0], ie='Soundcloud') | ||||
|         else: | ||||
|             # extract uploader (which is in the url) | ||||
|             uploader = mobj.group(1) | ||||
|             # extract simple title (uploader + slug of song title) | ||||
|             slug_title =  mobj.group(2) | ||||
|             full_title = '%s/%s' % (uploader, slug_title) | ||||
|      | ||||
|             self.report_resolve(full_title) | ||||
|      | ||||
|             url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title) | ||||
|             info_json_url = self._resolv_url(url) | ||||
|         info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON') | ||||
|  | ||||
|         info = json.loads(info_json) | ||||
|         return self._extract_info_dict(info, full_title) | ||||
|  | ||||
| class SoundcloudSetIE(SoundcloudIE): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$' | ||||
|     IE_NAME = u'soundcloud:set' | ||||
|     _TEST = { | ||||
| @@ -153,10 +168,6 @@ class SoundcloudSetIE(InfoExtractor): | ||||
|         ] | ||||
|     } | ||||
|  | ||||
|     def report_resolve(self, video_id): | ||||
|         """Report information extraction.""" | ||||
|         self.to_screen(u'%s: Resolving id' % video_id) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
| @@ -171,7 +182,7 @@ class SoundcloudSetIE(InfoExtractor): | ||||
|         self.report_resolve(full_title) | ||||
|  | ||||
|         url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) | ||||
|         resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|         resolv_url = self._resolv_url(url) | ||||
|         info_json = self._download_webpage(resolv_url, full_title) | ||||
|  | ||||
|         videos = [] | ||||
| @@ -182,23 +193,8 @@ class SoundcloudSetIE(InfoExtractor): | ||||
|             return | ||||
|  | ||||
|         self.report_extraction(full_title) | ||||
|         for track in info['tracks']: | ||||
|             video_id = track['id'] | ||||
|  | ||||
|             streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|             stream_json = self._download_webpage(streams_url, video_id, u'Downloading track info JSON') | ||||
|  | ||||
|             self.report_extraction(video_id) | ||||
|             streams = json.loads(stream_json) | ||||
|             mediaURL = streams['http_mp3_128_url'] | ||||
|  | ||||
|             videos.append({ | ||||
|                 'id':       video_id, | ||||
|                 'url':      mediaURL, | ||||
|                 'uploader': track['user']['username'], | ||||
|                 'upload_date':  unified_strdate(track['created_at']), | ||||
|                 'title':    track['title'], | ||||
|                 'ext':      u'mp3', | ||||
|                 'description': track['description'], | ||||
|             }) | ||||
|         return videos | ||||
|         return {'_type': 'playlist', | ||||
|                 'entries': [self._extract_info_dict(track) for track in info['tracks']], | ||||
|                 'id': info['id'], | ||||
|                 'title': info['title'], | ||||
|                 } | ||||
|   | ||||
| @@ -5,13 +5,13 @@ from .common import InfoExtractor | ||||
| class StatigramIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://statigr.am/p/484091715184808010_284179915', | ||||
|         u'file': u'484091715184808010_284179915.mp4', | ||||
|         u'md5': u'deda4ff333abe2e118740321e992605b', | ||||
|         u'url': u'http://statigr.am/p/522207370455279102_24101272', | ||||
|         u'file': u'522207370455279102_24101272.mp4', | ||||
|         u'md5': u'6eb93b882a3ded7c378ee1d6884b1814', | ||||
|         u'info_dict': { | ||||
|             u"uploader_id": u"videoseconds",  | ||||
|             u"title": u"Instagram photo by @videoseconds" | ||||
|         } | ||||
|             u'uploader_id': u'aguynamedpatrick', | ||||
|             u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -33,7 +33,7 @@ class TeamcocoIE(InfoExtractor): | ||||
|         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id | ||||
|         data = self._download_webpage(data_url, video_id, 'Downloading data webpage') | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>', | ||||
|         video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>', | ||||
|             data, u'video URL') | ||||
|  | ||||
|         return [{ | ||||
|   | ||||
| @@ -6,20 +6,17 @@ import re | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| class TF1IE(InfoExtractor): | ||||
|     """ | ||||
|     TF1 uses the wat.tv player, currently it can only download videos with the | ||||
|     html5 player enabled, it cannot download HD videos. | ||||
|     """ | ||||
|     _WORKING = False | ||||
|     """TF1 uses the wat.tv player.""" | ||||
|     _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html' | ||||
|     _TEST = { | ||||
|         u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', | ||||
|         u'file': u'10635995.mp4', | ||||
|         u'md5': u'66789d3e91278d332f75e1feb7aea327', | ||||
|         u'md5': u'2e378cc28b9957607d5e88f274e637d8', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle', | ||||
|             u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.', | ||||
|         } | ||||
|         }, | ||||
|         u'skip': u'Sometimes wat serves the whole file with the --test option', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
							
								
								
									
										32
									
								
								youtube_dl/extractor/unistra.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								youtube_dl/extractor/unistra.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,32 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| class UnistraIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://utv.unistra.fr/video.php?id_video=154', | ||||
|         u'file': u'154.mp4', | ||||
|         u'md5': u'736f605cfdc96724d55bb543ab3ced24', | ||||
|         u'info_dict': { | ||||
|             u'title': u'M!ss Yella', | ||||
|             u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         id = re.match(self._VALID_URL, url).group(1) | ||||
|         webpage = self._download_webpage(url, id) | ||||
|         file = re.search(r'file: "(.*?)",', webpage).group(1) | ||||
|         title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title') | ||||
|  | ||||
|         video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file | ||||
|  | ||||
|         return {'id': id, | ||||
|                 'title': title, | ||||
|                 'ext': 'mp4', | ||||
|                 'url': video_url, | ||||
|                 'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL), | ||||
|                 'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'), | ||||
|                 } | ||||
| @@ -8,10 +8,10 @@ from ..utils import ( | ||||
|  | ||||
| class VevoIE(InfoExtractor): | ||||
|     """ | ||||
|     Accecps urls from vevo.com or in the format 'vevo:{id}' | ||||
|     Accepts urls from vevo.com or in the format 'vevo:{id}' | ||||
|     (currently used by MTVIE) | ||||
|     """ | ||||
|     _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$' | ||||
|     _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | ||||
|         u'file': u'GB1101300280.mp4', | ||||
| @@ -19,7 +19,7 @@ class VevoIE(InfoExtractor): | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20130624", | ||||
|             u"uploader": u"Hurts", | ||||
|             u"title": u"Somebody To Die For" | ||||
|             u"title": u"Somebody to Die For" | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -35,12 +35,12 @@ class VevoIE(InfoExtractor): | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         video_info = json.loads(info_json) | ||||
|         m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):(?P<url>.*?)"', links_webpage)) | ||||
|         m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage)) | ||||
|         if m_urls is None or len(m_urls) == 0: | ||||
|             raise ExtractorError(u'Unable to extract video url') | ||||
|         # They are sorted from worst to best quality | ||||
|         m_url = m_urls[-1] | ||||
|         video_url = base_url + m_url.group('url') | ||||
|         video_url = base_url + '/' + m_url.group('url') | ||||
|         ext = m_url.group('ext') | ||||
|  | ||||
|         return {'url': video_url, | ||||
|   | ||||
							
								
								
									
										48
									
								
								youtube_dl/extractor/videofyme.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								youtube_dl/extractor/videofyme.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     find_xpath_attr, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
| class VideofyMeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)' | ||||
|     IE_NAME = u'videofy.me' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.videofy.me/thisisvideofyme/1100701', | ||||
|         u'file':  u'1100701.mp4', | ||||
|         u'md5': u'c77d700bdc16ae2e9f3c26019bd96143', | ||||
|         u'info_dict': { | ||||
|             u'title': u'This is VideofyMe', | ||||
|             u'description': None, | ||||
|             u'uploader': u'VideofyMe', | ||||
|             u'uploader_id': u'thisisvideofyme', | ||||
|         }, | ||||
|          | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id, | ||||
|                                             video_id) | ||||
|         config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) | ||||
|         video = config.find('video') | ||||
|         sources = video.find('sources') | ||||
|         url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)  | ||||
|             for key in ['on', 'av', 'off']] if node is not None) | ||||
|         video_url = url_node.find('url').text | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': video.find('title').text, | ||||
|                 'url': video_url, | ||||
|                 'ext': determine_ext(video_url), | ||||
|                 'thumbnail': video.find('thumb').text, | ||||
|                 'description': video.find('description').text, | ||||
|                 'uploader': config.find('blog/name').text, | ||||
|                 'uploader_id': video.find('identifier').text, | ||||
|                 'view_count': re.search(r'\d+', video.find('views').text).group(), | ||||
|                 } | ||||
| @@ -1,5 +1,6 @@ | ||||
| import json | ||||
| import re | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| @@ -19,18 +20,31 @@ class VimeoIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$' | ||||
|     _NETRC_MACHINE = 'vimeo' | ||||
|     IE_NAME = u'vimeo' | ||||
|     _TEST = { | ||||
|         u'url': u'http://vimeo.com/56015672', | ||||
|         u'file': u'56015672.mp4', | ||||
|         u'md5': u'8879b6cc097e987f02484baf890129e5', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20121220",  | ||||
|             u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",  | ||||
|             u"uploader_id": u"user7108434",  | ||||
|             u"uploader": u"Filippo Valsorda",  | ||||
|             u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550" | ||||
|         } | ||||
|     } | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u'url': u'http://vimeo.com/56015672', | ||||
|             u'file': u'56015672.mp4', | ||||
|             u'md5': u'8879b6cc097e987f02484baf890129e5', | ||||
|             u'info_dict': { | ||||
|                 u"upload_date": u"20121220",  | ||||
|                 u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",  | ||||
|                 u"uploader_id": u"user7108434",  | ||||
|                 u"uploader": u"Filippo Valsorda",  | ||||
|                 u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             u'url': u'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', | ||||
|             u'file': u'68093876.mp4', | ||||
|             u'md5': u'3b5ca6aa22b60dfeeadf50b72e44ed82', | ||||
|             u'note': u'Vimeo Pro video (#1197)', | ||||
|             u'info_dict': { | ||||
|                 u'uploader_id': u'openstreetmapus',  | ||||
|                 u'uploader': u'OpenStreetMap US',  | ||||
|                 u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
| @@ -82,7 +96,9 @@ class VimeoIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|         if not mobj.group('proto'): | ||||
|             url = 'https://' + url | ||||
|         if mobj.group('direct_link') or mobj.group('pro'): | ||||
|         elif mobj.group('pro'): | ||||
|             url = 'http://player.vimeo.com/video/' + video_id | ||||
|         elif mobj.group('direct_link'): | ||||
|             url = 'https://vimeo.com/' + video_id | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
| @@ -171,3 +187,31 @@ class VimeoIE(InfoExtractor): | ||||
|             'thumbnail':    video_thumbnail, | ||||
|             'description':  video_description, | ||||
|         }] | ||||
|  | ||||
|  | ||||
| class VimeoChannelIE(InfoExtractor): | ||||
|     IE_NAME = u'vimeo:channel' | ||||
|     _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)' | ||||
|     _MORE_PAGES_INDICATOR = r'<a.+?rel="next"' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         channel_id =  mobj.group('id') | ||||
|         video_ids = [] | ||||
|  | ||||
|         for pagenum in itertools.count(1): | ||||
|             webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum), | ||||
|                                              channel_id, u'Downloading page %s' % pagenum) | ||||
|             video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: | ||||
|                 break | ||||
|  | ||||
|         entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') | ||||
|                    for video_id in video_ids] | ||||
|         channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id, | ||||
|                                                 webpage, u'channel title') | ||||
|         return {'_type': 'playlist', | ||||
|                 'id': channel_id, | ||||
|                 'title': channel_title, | ||||
|                 'entries': entries, | ||||
|                 } | ||||
|   | ||||
| @@ -12,17 +12,17 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class WatIE(InfoExtractor): | ||||
|     _WORKING = False | ||||
|     _VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html' | ||||
|     IE_NAME = 'wat.tv' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html', | ||||
|         u'file': u'10631273.mp4', | ||||
|         u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a', | ||||
|         u'md5': u'd8b2231e1e333acd12aad94b80937e19', | ||||
|         u'info_dict': { | ||||
|             u'title': u'World War Z - Philadelphia VOST', | ||||
|             u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr', | ||||
|         } | ||||
|         }, | ||||
|         u'skip': u'Sometimes wat serves the whole file with the --test option', | ||||
|     } | ||||
|      | ||||
|     def download_video_info(self, real_id): | ||||
| @@ -59,20 +59,8 @@ class WatIE(InfoExtractor): | ||||
|  | ||||
|         # Otherwise we can continue and extract just one part, we have to use | ||||
|         # the short id for getting the video url | ||||
|         player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id, | ||||
|                                                      'html5': '1'}) | ||||
|         player_info = self._download_webpage('http://www.wat.tv/player?' + player_data, | ||||
|                                              real_id, u'Downloading player info') | ||||
|         player = json.loads(player_info)['player'] | ||||
|         html5_player = self._html_search_regex(r'iframe src="(.*?)"', player, | ||||
|                                                'html5 player') | ||||
|         player_webpage = self._download_webpage(html5_player, real_id, | ||||
|                                                 u'Downloading player webpage') | ||||
|  | ||||
|         video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage, | ||||
|                                        'video url') | ||||
|         info = {'id': real_id, | ||||
|                 'url': video_url, | ||||
|                 'url': 'http://wat.tv/get/android5/%s.mp4' % real_id, | ||||
|                 'ext': 'mp4', | ||||
|                 'title': first_chapter['title'], | ||||
|                 'thumbnail': first_chapter['preview'], | ||||
|   | ||||
| @@ -21,6 +21,13 @@ class WorldStarHipHopIE(InfoExtractor): | ||||
|  | ||||
|         webpage_src = self._download_webpage(url, video_id) | ||||
|  | ||||
|         m_vevo_id = re.search(r'videoId=(.*?)&?', | ||||
|             webpage_src) | ||||
|          | ||||
|         if m_vevo_id is not None: | ||||
|             self.to_screen(u'Vevo video detected:') | ||||
|             return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo') | ||||
|  | ||||
|         video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)', | ||||
|             webpage_src, u'video URL') | ||||
|  | ||||
|   | ||||
| @@ -3,7 +3,8 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     unescapeHTML, | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| @@ -36,15 +37,16 @@ class XHamsterIE(InfoExtractor): | ||||
|             video_url = compat_urllib_parse.unquote(mobj.group('file')) | ||||
|         else: | ||||
|             video_url = mobj.group('server')+'/key='+mobj.group('file') | ||||
|         video_extension = video_url.split('.')[-1] | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         # Can't see the description anywhere in the UI | ||||
|         # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)', | ||||
|         #     webpage, u'description', fatal=False) | ||||
|         # if video_description: video_description = unescapeHTML(video_description) | ||||
|         # Only a few videos have an description | ||||
|         mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage) | ||||
|         if mobj: | ||||
|             video_description = unescapeHTML(mobj.group('description')) | ||||
|         else: | ||||
|             video_description = None | ||||
|  | ||||
|         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) | ||||
|         if mobj: | ||||
| @@ -62,9 +64,9 @@ class XHamsterIE(InfoExtractor): | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'ext':      video_extension, | ||||
|             'ext':      determine_ext(video_url), | ||||
|             'title':    video_title, | ||||
|             # 'description': video_description, | ||||
|             'description': video_description, | ||||
|             'upload_date': video_upload_date, | ||||
|             'uploader_id': video_uploader_id, | ||||
|             'thumbnail': video_thumbnail | ||||
|   | ||||
| @@ -23,8 +23,114 @@ from ..utils import ( | ||||
|     orderedSet, | ||||
| ) | ||||
|  | ||||
| class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|     """Provide base functions for Youtube extractors""" | ||||
|     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' | ||||
|     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' | ||||
|     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' | ||||
|     _NETRC_MACHINE = 'youtube' | ||||
|     # If True it will raise an error if no login info is provided | ||||
|     _LOGIN_REQUIRED = False | ||||
|  | ||||
| class YoutubeIE(InfoExtractor): | ||||
|     def report_lang(self): | ||||
|         """Report attempt to set language.""" | ||||
|         self.to_screen(u'Setting language') | ||||
|  | ||||
|     def _set_language(self): | ||||
|         request = compat_urllib_request.Request(self._LANG_URL) | ||||
|         try: | ||||
|             self.report_lang() | ||||
|             compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err)) | ||||
|             return False | ||||
|         return True | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         # No authentication to be performed | ||||
|         if username is None: | ||||
|             if self._LOGIN_REQUIRED: | ||||
|                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) | ||||
|             return False | ||||
|  | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL) | ||||
|         try: | ||||
|             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err)) | ||||
|             return False | ||||
|  | ||||
|         galx = None | ||||
|         dsh = None | ||||
|         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page) | ||||
|         if match: | ||||
|           galx = match.group(1) | ||||
|         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page) | ||||
|         if match: | ||||
|           dsh = match.group(1) | ||||
|  | ||||
|         # Log in | ||||
|         login_form_strs = { | ||||
|                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', | ||||
|                 u'Email': username, | ||||
|                 u'GALX': galx, | ||||
|                 u'Passwd': password, | ||||
|                 u'PersistentCookie': u'yes', | ||||
|                 u'_utf8': u'霱', | ||||
|                 u'bgresponse': u'js_disabled', | ||||
|                 u'checkConnection': u'', | ||||
|                 u'checkedDomains': u'youtube', | ||||
|                 u'dnConn': u'', | ||||
|                 u'dsh': dsh, | ||||
|                 u'pstMsg': u'0', | ||||
|                 u'rmShown': u'1', | ||||
|                 u'secTok': u'', | ||||
|                 u'signIn': u'Sign in', | ||||
|                 u'timeStmp': u'', | ||||
|                 u'service': u'youtube', | ||||
|                 u'uilel': u'3', | ||||
|                 u'hl': u'en_US', | ||||
|         } | ||||
|         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | ||||
|         # chokes on unicode | ||||
|         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) | ||||
|         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, login_data) | ||||
|         try: | ||||
|             self.report_login() | ||||
|             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: | ||||
|                 self._downloader.report_warning(u'unable to log in: bad username or password') | ||||
|                 return False | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) | ||||
|             return False | ||||
|         return True | ||||
|  | ||||
|     def _confirm_age(self): | ||||
|         age_form = { | ||||
|                 'next_url':     '/', | ||||
|                 'action_confirm':   'Confirm', | ||||
|                 } | ||||
|         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) | ||||
|         try: | ||||
|             self.report_age_confirmation() | ||||
|             compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err)) | ||||
|         return True | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         if self._downloader is None: | ||||
|             return | ||||
|         if not self._set_language(): | ||||
|             return | ||||
|         if not self._login(): | ||||
|             return | ||||
|         self._confirm_age() | ||||
|  | ||||
| class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|     IE_DESC = u'YouTube.com' | ||||
|     _VALID_URL = r"""^ | ||||
|                      ( | ||||
| @@ -35,7 +141,7 @@ class YoutubeIE(InfoExtractor): | ||||
|                          (?:                                                  # the various things that can precede the ID: | ||||
|                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ | ||||
|                              |(?:                                             # or the v= param in all its forms | ||||
|                                  (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx) | ||||
|                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx) | ||||
|                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! | ||||
|                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) | ||||
|                                  v= | ||||
| @@ -45,14 +151,27 @@ class YoutubeIE(InfoExtractor): | ||||
|                      ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID | ||||
|                      (?(1).+)?                                                # if we found the ID, everything can follow | ||||
|                      $""" | ||||
|     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' | ||||
|     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' | ||||
|     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' | ||||
|     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' | ||||
|     _NETRC_MACHINE = 'youtube' | ||||
|     # Listed in order of quality | ||||
|     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] | ||||
|     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13'] | ||||
|     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13', | ||||
|                           '95', '94', '93', '92', '132', '151', | ||||
|                           # 3D | ||||
|                           '85', '84', '102', '83', '101', '82', '100', | ||||
|                           # Dash video | ||||
|                           '138', '137', '248', '136', '247', '135', '246', | ||||
|                           '245', '244', '134', '243', '133', '242', '160', | ||||
|                           # Dash audio | ||||
|                           '141', '172', '140', '171', '139', | ||||
|                           ] | ||||
|     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13', | ||||
|                                       '95', '94', '93', '92', '132', '151', | ||||
|                                       '85', '102', '84', '101', '83', '100', '82', | ||||
|                                       # Dash video | ||||
|                                       '138', '248', '137', '247', '136', '246', '245', | ||||
|                                       '244', '135', '243', '134', '242', '133', '160', | ||||
|                                       # Dash audio | ||||
|                                       '172', '141', '171', '140', '139', | ||||
|                                       ] | ||||
|     _video_extensions = { | ||||
|         '13': '3gp', | ||||
|         '17': 'mp4', | ||||
| @@ -64,6 +183,47 @@ class YoutubeIE(InfoExtractor): | ||||
|         '44': 'webm', | ||||
|         '45': 'webm', | ||||
|         '46': 'webm', | ||||
|  | ||||
|         # 3d videos | ||||
|         '82': 'mp4', | ||||
|         '83': 'mp4', | ||||
|         '84': 'mp4', | ||||
|         '85': 'mp4', | ||||
|         '100': 'webm', | ||||
|         '101': 'webm', | ||||
|         '102': 'webm', | ||||
|  | ||||
|         # videos that use m3u8 | ||||
|         '92': 'mp4', | ||||
|         '93': 'mp4', | ||||
|         '94': 'mp4', | ||||
|         '95': 'mp4', | ||||
|         '96': 'mp4', | ||||
|         '132': 'mp4', | ||||
|         '151': 'mp4', | ||||
|  | ||||
|         # Dash mp4 | ||||
|         '133': 'mp4', | ||||
|         '134': 'mp4', | ||||
|         '135': 'mp4', | ||||
|         '136': 'mp4', | ||||
|         '137': 'mp4', | ||||
|         '138': 'mp4', | ||||
|         '139': 'mp4', | ||||
|         '140': 'mp4', | ||||
|         '141': 'mp4', | ||||
|         '160': 'mp4', | ||||
|  | ||||
|         # Dash webm | ||||
|         '171': 'webm', | ||||
|         '172': 'webm', | ||||
|         '242': 'webm', | ||||
|         '243': 'webm', | ||||
|         '244': 'webm', | ||||
|         '245': 'webm', | ||||
|         '246': 'webm', | ||||
|         '247': 'webm', | ||||
|         '248': 'webm', | ||||
|     } | ||||
|     _video_dimensions = { | ||||
|         '5': '240x400', | ||||
| @@ -80,7 +240,69 @@ class YoutubeIE(InfoExtractor): | ||||
|         '44': '480x854', | ||||
|         '45': '720x1280', | ||||
|         '46': '1080x1920', | ||||
|         '82': '360p', | ||||
|         '83': '480p', | ||||
|         '84': '720p', | ||||
|         '85': '1080p', | ||||
|         '92': '240p', | ||||
|         '93': '360p', | ||||
|         '94': '480p', | ||||
|         '95': '720p', | ||||
|         '96': '1080p', | ||||
|         '100': '360p', | ||||
|         '101': '480p', | ||||
|         '102': '720p', | ||||
|         '132': '240p', | ||||
|         '151': '72p', | ||||
|         '133': '240p', | ||||
|         '134': '360p', | ||||
|         '135': '480p', | ||||
|         '136': '720p', | ||||
|         '137': '1080p', | ||||
|         '138': '>1080p', | ||||
|         '139': '48k', | ||||
|         '140': '128k', | ||||
|         '141': '256k', | ||||
|         '160': '192p', | ||||
|         '171': '128k', | ||||
|         '172': '256k', | ||||
|         '242': '240p', | ||||
|         '243': '360p', | ||||
|         '244': '480p', | ||||
|         '245': '480p', | ||||
|         '246': '480p', | ||||
|         '247': '720p', | ||||
|         '248': '1080p', | ||||
|     } | ||||
|     _special_itags = { | ||||
|         '82': '3D', | ||||
|         '83': '3D', | ||||
|         '84': '3D', | ||||
|         '85': '3D', | ||||
|         '100': '3D', | ||||
|         '101': '3D', | ||||
|         '102': '3D', | ||||
|         '133': 'DASH Video', | ||||
|         '134': 'DASH Video', | ||||
|         '135': 'DASH Video', | ||||
|         '136': 'DASH Video', | ||||
|         '137': 'DASH Video', | ||||
|         '138': 'DASH Video', | ||||
|         '139': 'DASH Audio', | ||||
|         '140': 'DASH Audio', | ||||
|         '141': 'DASH Audio', | ||||
|         '160': 'DASH Video', | ||||
|         '171': 'DASH Audio', | ||||
|         '172': 'DASH Audio', | ||||
|         '242': 'DASH Video', | ||||
|         '243': 'DASH Video', | ||||
|         '244': 'DASH Video', | ||||
|         '245': 'DASH Video', | ||||
|         '246': 'DASH Video', | ||||
|         '247': 'DASH Video', | ||||
|         '248': 'DASH Video', | ||||
|     } | ||||
|  | ||||
|     IE_NAME = u'youtube' | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -114,7 +336,7 @@ class YoutubeIE(InfoExtractor): | ||||
|                 u"upload_date": u"20120506", | ||||
|                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", | ||||
|                 u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c", | ||||
|                 u"uploader": u"IconaPop", | ||||
|                 u"uploader": u"Icona Pop", | ||||
|                 u"uploader_id": u"IconaPop" | ||||
|             } | ||||
|         }, | ||||
| @@ -130,6 +352,21 @@ class YoutubeIE(InfoExtractor): | ||||
|                 u"uploader_id": u"justintimberlakeVEVO" | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE', | ||||
|             u'file': u'TGi3HqYrWHE.mp4', | ||||
|             u'note': u'm3u8 video', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Triathlon - Men - London 2012 Olympic Games', | ||||
|                 u'description': u'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games', | ||||
|                 u'uploader': u'olympic', | ||||
|                 u'upload_date': u'20120807', | ||||
|                 u'uploader_id': u'olympic', | ||||
|             }, | ||||
|             u'params': { | ||||
|                 u'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|  | ||||
| @@ -139,10 +376,6 @@ class YoutubeIE(InfoExtractor): | ||||
|         if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|  | ||||
|     def report_lang(self): | ||||
|         """Report attempt to set language.""" | ||||
|         self.to_screen(u'Setting language') | ||||
|  | ||||
|     def report_video_webpage_download(self, video_id): | ||||
|         """Report attempt to download video webpage.""" | ||||
|         self.to_screen(u'%s: Downloading video webpage' % video_id) | ||||
| @@ -183,37 +416,55 @@ class YoutubeIE(InfoExtractor): | ||||
|             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] | ||||
|         elif len(s) == 90: | ||||
|             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] | ||||
|         elif len(s) == 89: | ||||
|             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1] | ||||
|         elif len(s) == 88: | ||||
|             return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] | ||||
|         elif len(s) == 87: | ||||
|             return s[4:23] + s[86] + s[24:85] | ||||
|             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] | ||||
|         elif len(s) == 86: | ||||
|             return s[2:63] + s[82] + s[64:82] + s[63] | ||||
|             return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86] | ||||
|         elif len(s) == 85: | ||||
|             return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21] | ||||
|             return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] | ||||
|         elif len(s) == 84: | ||||
|             return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] | ||||
|             return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] | ||||
|         elif len(s) == 83: | ||||
|             return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:] | ||||
|             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] | ||||
|         elif len(s) == 82: | ||||
|             return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] | ||||
|             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82] | ||||
|         elif len(s) == 81: | ||||
|             return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[2] + s[34:53] + s[24] + s[54:81] | ||||
|             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] | ||||
|         elif len(s) == 80: | ||||
|             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80] | ||||
|         elif len(s) == 79: | ||||
|             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] | ||||
|  | ||||
|         else: | ||||
|             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) | ||||
|  | ||||
|     def _decrypt_signature_age_gate(self, s): | ||||
|         # The videos with age protection use another player, so the algorithms | ||||
|         # can be different. | ||||
|         if len(s) == 86: | ||||
|             return s[2:63] + s[82] + s[64:82] + s[63] | ||||
|         else: | ||||
|             # Fallback to the other algortihms | ||||
|             return self._decrypt_signature(s) | ||||
|  | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         self.report_video_subtitles_download(video_id) | ||||
|         request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) | ||||
|         try: | ||||
|             sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             return (u'unable to download video subtitles: %s' % compat_str(err), None) | ||||
|             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) | ||||
|             return {} | ||||
|         sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) | ||||
|         sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) | ||||
|         if not sub_lang_list: | ||||
|             return (u'video doesn\'t have subtitles', None) | ||||
|             self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|             return {} | ||||
|         return sub_lang_list | ||||
|  | ||||
|     def _list_available_subtitles(self, video_id): | ||||
| @@ -222,8 +473,7 @@ class YoutubeIE(InfoExtractor): | ||||
|  | ||||
|     def _request_subtitle(self, sub_lang, sub_name, video_id, format): | ||||
|         """ | ||||
|         Return tuple: | ||||
|         (error_message, sub_lang, sub) | ||||
|         Return the subtitle as a string or None if they are not found | ||||
|         """ | ||||
|         self.report_video_subtitles_request(video_id, sub_lang, format) | ||||
|         params = compat_urllib_parse.urlencode({ | ||||
| @@ -236,21 +486,24 @@ class YoutubeIE(InfoExtractor): | ||||
|         try: | ||||
|             sub = compat_urllib_request.urlopen(url).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             return (u'unable to download video subtitles: %s' % compat_str(err), None, None) | ||||
|             self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) | ||||
|             return | ||||
|         if not sub: | ||||
|             return (u'Did not fetch video subtitles', None, None) | ||||
|         return (None, sub_lang, sub) | ||||
|             self._downloader.report_warning(u'Did not fetch video subtitles') | ||||
|             return | ||||
|         return sub | ||||
|  | ||||
|     def _request_automatic_caption(self, video_id, webpage): | ||||
|         """We need the webpage for getting the captions url, pass it as an | ||||
|            argument to speed up the process.""" | ||||
|         sub_lang = self._downloader.params.get('subtitleslang') or 'en' | ||||
|         sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0] | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         self.to_screen(u'%s: Looking for automatic captions' % video_id) | ||||
|         mobj = re.search(r';ytplayer.config = ({.*?});', webpage) | ||||
|         err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang | ||||
|         if mobj is None: | ||||
|             return [(err_msg, None, None)] | ||||
|             self._downloader.report_warning(err_msg) | ||||
|             return {} | ||||
|         player_config = json.loads(mobj.group(1)) | ||||
|         try: | ||||
|             args = player_config[u'args'] | ||||
| @@ -265,131 +518,51 @@ class YoutubeIE(InfoExtractor): | ||||
|             }) | ||||
|             subtitles_url = caption_url + '&' + params | ||||
|             sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') | ||||
|             return [(None, sub_lang, sub)] | ||||
|         except KeyError: | ||||
|             return [(err_msg, None, None)] | ||||
|             return {sub_lang: sub} | ||||
|         # An extractor error can be raise by the download process if there are | ||||
|         # no automatic captions but there are subtitles | ||||
|         except (KeyError, ExtractorError): | ||||
|             self._downloader.report_warning(err_msg) | ||||
|             return {} | ||||
|      | ||||
|     def _extract_subtitle(self, video_id): | ||||
|     def _extract_subtitles(self, video_id): | ||||
|         """ | ||||
|         Return a list with a tuple: | ||||
|         [(error_message, sub_lang, sub)] | ||||
|         Return a dictionary: {language: subtitles} or {} if the subtitles | ||||
|         couldn't be found | ||||
|         """ | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         available_subs_list = self._get_available_subtitles(video_id) | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles | ||||
|             return [(sub_lang_list[0], None, None)] | ||||
|         if self._downloader.params.get('subtitleslang', False): | ||||
|             sub_lang = self._downloader.params.get('subtitleslang') | ||||
|         elif 'en' in sub_lang_list: | ||||
|             sub_lang = 'en' | ||||
|         if  not available_subs_list: #There was some error, it didn't get the available subtitles | ||||
|             return {} | ||||
|         if self._downloader.params.get('allsubtitles', False): | ||||
|             sub_lang_list = available_subs_list | ||||
|         else: | ||||
|             sub_lang = list(sub_lang_list.keys())[0] | ||||
|         if not sub_lang in sub_lang_list: | ||||
|             return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)] | ||||
|             if self._downloader.params.get('subtitleslangs', False): | ||||
|                 reqested_langs = self._downloader.params.get('subtitleslangs') | ||||
|             elif 'en' in available_subs_list: | ||||
|                 reqested_langs = ['en'] | ||||
|             else: | ||||
|                 reqested_langs = [list(available_subs_list.keys())[0]] | ||||
|  | ||||
|         subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) | ||||
|         return [subtitle] | ||||
|  | ||||
|     def _extract_all_subtitles(self, video_id): | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles | ||||
|             return [(sub_lang_list[0], None, None)] | ||||
|         subtitles = [] | ||||
|             sub_lang_list = {} | ||||
|             for sub_lang in reqested_langs: | ||||
|                 if not sub_lang in available_subs_list: | ||||
|                     self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) | ||||
|                     continue | ||||
|                 sub_lang_list[sub_lang] = available_subs_list[sub_lang] | ||||
|         subtitles = {} | ||||
|         for sub_lang in sub_lang_list: | ||||
|             subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) | ||||
|             subtitles.append(subtitle) | ||||
|             if subtitle: | ||||
|                 subtitles[sub_lang] = subtitle | ||||
|         return subtitles | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         print('Available formats:') | ||||
|         for x in formats: | ||||
|             print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))) | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         if self._downloader is None: | ||||
|             return | ||||
|  | ||||
|         # Set language | ||||
|         request = compat_urllib_request.Request(self._LANG_URL) | ||||
|         try: | ||||
|             self.report_lang() | ||||
|             compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err)) | ||||
|             return | ||||
|  | ||||
|         (username, password) = self._get_login_info() | ||||
|  | ||||
|         # No authentication to be performed | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL) | ||||
|         try: | ||||
|             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err)) | ||||
|             return | ||||
|  | ||||
|         galx = None | ||||
|         dsh = None | ||||
|         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page) | ||||
|         if match: | ||||
|           galx = match.group(1) | ||||
|  | ||||
|         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page) | ||||
|         if match: | ||||
|           dsh = match.group(1) | ||||
|  | ||||
|         # Log in | ||||
|         login_form_strs = { | ||||
|                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', | ||||
|                 u'Email': username, | ||||
|                 u'GALX': galx, | ||||
|                 u'Passwd': password, | ||||
|                 u'PersistentCookie': u'yes', | ||||
|                 u'_utf8': u'霱', | ||||
|                 u'bgresponse': u'js_disabled', | ||||
|                 u'checkConnection': u'', | ||||
|                 u'checkedDomains': u'youtube', | ||||
|                 u'dnConn': u'', | ||||
|                 u'dsh': dsh, | ||||
|                 u'pstMsg': u'0', | ||||
|                 u'rmShown': u'1', | ||||
|                 u'secTok': u'', | ||||
|                 u'signIn': u'Sign in', | ||||
|                 u'timeStmp': u'', | ||||
|                 u'service': u'youtube', | ||||
|                 u'uilel': u'3', | ||||
|                 u'hl': u'en_US', | ||||
|         } | ||||
|         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | ||||
|         # chokes on unicode | ||||
|         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) | ||||
|         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, login_data) | ||||
|         try: | ||||
|             self.report_login() | ||||
|             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: | ||||
|                 self._downloader.report_warning(u'unable to log in: bad username or password') | ||||
|                 return | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) | ||||
|             return | ||||
|  | ||||
|         # Confirm age | ||||
|         age_form = { | ||||
|                 'next_url':     '/', | ||||
|                 'action_confirm':   'Confirm', | ||||
|                 } | ||||
|         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) | ||||
|         try: | ||||
|             self.report_age_confirmation() | ||||
|             compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err)) | ||||
|             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'), | ||||
|                                         self._video_dimensions.get(x, '???'), | ||||
|                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else '')) | ||||
|  | ||||
|     def _extract_id(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
| @@ -398,6 +571,57 @@ class YoutubeIE(InfoExtractor): | ||||
|         video_id = mobj.group(2) | ||||
|         return video_id | ||||
|  | ||||
|     def _get_video_url_list(self, url_map): | ||||
|         """ | ||||
|         Transform a dictionary in the format {itag:url} to a list of (itag, url) | ||||
|         with the requested formats. | ||||
|         """ | ||||
|         req_format = self._downloader.params.get('format', None) | ||||
|         format_limit = self._downloader.params.get('format_limit', None) | ||||
|         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats | ||||
|         if format_limit is not None and format_limit in available_formats: | ||||
|             format_list = available_formats[available_formats.index(format_limit):] | ||||
|         else: | ||||
|             format_list = available_formats | ||||
|         existing_formats = [x for x in format_list if x in url_map] | ||||
|         if len(existing_formats) == 0: | ||||
|             raise ExtractorError(u'no known formats available for video') | ||||
|         if self._downloader.params.get('listformats', None): | ||||
|             self._print_formats(existing_formats) | ||||
|             return | ||||
|         if req_format is None or req_format == 'best': | ||||
|             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality | ||||
|         elif req_format == 'worst': | ||||
|             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality | ||||
|         elif req_format in ('-1', 'all'): | ||||
|             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats | ||||
|         else: | ||||
|             # Specific formats. We pick the first in a slash-delimeted sequence. | ||||
|             # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. | ||||
|             req_formats = req_format.split('/') | ||||
|             video_url_list = None | ||||
|             for rf in req_formats: | ||||
|                 if rf in url_map: | ||||
|                     video_url_list = [(rf, url_map[rf])] | ||||
|                     break | ||||
|             if video_url_list is None: | ||||
|                 raise ExtractorError(u'requested format not available') | ||||
|         return video_url_list | ||||
|  | ||||
|     def _extract_from_m3u8(self, manifest_url, video_id): | ||||
|         url_map = {} | ||||
|         def _get_urls(_manifest): | ||||
|             lines = _manifest.split('\n') | ||||
|             urls = filter(lambda l: l and not l.startswith('#'), | ||||
|                             lines) | ||||
|             return urls | ||||
|         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest') | ||||
|         formats_urls = _get_urls(manifest) | ||||
|         for format_url in formats_urls: | ||||
|             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag') | ||||
|             url_map[itag] = format_url | ||||
|         return url_map | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url): | ||||
|             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).') | ||||
| @@ -521,25 +745,10 @@ class YoutubeIE(InfoExtractor): | ||||
|         # subtitles | ||||
|         video_subtitles = None | ||||
|  | ||||
|         if self._downloader.params.get('writesubtitles', False): | ||||
|             video_subtitles = self._extract_subtitle(video_id) | ||||
|             if video_subtitles: | ||||
|                 (sub_error, sub_lang, sub) = video_subtitles[0] | ||||
|                 if sub_error: | ||||
|                     self._downloader.report_warning(sub_error) | ||||
|          | ||||
|         if self._downloader.params.get('writeautomaticsub', False): | ||||
|         if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): | ||||
|             video_subtitles = self._extract_subtitles(video_id) | ||||
|         elif self._downloader.params.get('writeautomaticsub', False): | ||||
|             video_subtitles = self._request_automatic_caption(video_id, video_webpage) | ||||
|             (sub_error, sub_lang, sub) = video_subtitles[0] | ||||
|             if sub_error: | ||||
|                 self._downloader.report_warning(sub_error) | ||||
|  | ||||
|         if self._downloader.params.get('allsubtitles', False): | ||||
|             video_subtitles = self._extract_all_subtitles(video_id) | ||||
|             for video_subtitle in video_subtitles: | ||||
|                 (sub_error, sub_lang, sub) = video_subtitle | ||||
|                 if sub_error: | ||||
|                     self._downloader.report_warning(sub_error) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id) | ||||
| @@ -552,7 +761,6 @@ class YoutubeIE(InfoExtractor): | ||||
|             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) | ||||
|  | ||||
|         # Decide which formats to download | ||||
|         req_format = self._downloader.params.get('format', None) | ||||
|  | ||||
|         try: | ||||
|             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) | ||||
| @@ -566,6 +774,17 @@ class YoutubeIE(InfoExtractor): | ||||
|             if m_s is not None: | ||||
|                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id) | ||||
|                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] | ||||
|             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u'')) | ||||
|             if m_s is not None: | ||||
|                 if 'url_encoded_fmt_stream_map' in video_info: | ||||
|                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts'] | ||||
|                 else: | ||||
|                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']] | ||||
|             elif 'adaptive_fmts' in video_info: | ||||
|                 if 'url_encoded_fmt_stream_map' in video_info: | ||||
|                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0] | ||||
|                 else: | ||||
|                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts'] | ||||
|         except ValueError: | ||||
|             pass | ||||
|  | ||||
| @@ -587,8 +806,8 @@ class YoutubeIE(InfoExtractor): | ||||
|                             s = url_data['s'][0] | ||||
|                             if age_gate: | ||||
|                                 player_version = self._search_regex(r'ad3-(.+?)\.swf', | ||||
|                                     video_info['ad3_module'][0], 'flash player', | ||||
|                                     fatal=False) | ||||
|                                     video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND', | ||||
|                                     'flash player', fatal=False) | ||||
|                                 player = 'flash player %s' % player_version | ||||
|                             else: | ||||
|                                 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage, | ||||
| @@ -596,41 +815,25 @@ class YoutubeIE(InfoExtractor): | ||||
|                             parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.')) | ||||
|                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % | ||||
|                                 (len(s), parts_sizes, url_data['itag'][0], player)) | ||||
|                         signature = self._decrypt_signature(url_data['s'][0]) | ||||
|                         encrypted_sig = url_data['s'][0] | ||||
|                         if age_gate: | ||||
|                             signature = self._decrypt_signature_age_gate(encrypted_sig) | ||||
|                         else: | ||||
|                             signature = self._decrypt_signature(encrypted_sig) | ||||
|                         url += '&signature=' + signature | ||||
|                     if 'ratebypass' not in url: | ||||
|                         url += '&ratebypass=yes' | ||||
|                     url_map[url_data['itag'][0]] = url | ||||
|  | ||||
|             format_limit = self._downloader.params.get('format_limit', None) | ||||
|             available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats | ||||
|             if format_limit is not None and format_limit in available_formats: | ||||
|                 format_list = available_formats[available_formats.index(format_limit):] | ||||
|             else: | ||||
|                 format_list = available_formats | ||||
|             existing_formats = [x for x in format_list if x in url_map] | ||||
|             if len(existing_formats) == 0: | ||||
|                 raise ExtractorError(u'no known formats available for video') | ||||
|             if self._downloader.params.get('listformats', None): | ||||
|                 self._print_formats(existing_formats) | ||||
|             video_url_list = self._get_video_url_list(url_map) | ||||
|             if not video_url_list: | ||||
|                 return | ||||
|             if req_format is None or req_format == 'best': | ||||
|                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality | ||||
|             elif req_format == 'worst': | ||||
|                 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality | ||||
|             elif req_format in ('-1', 'all'): | ||||
|                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats | ||||
|             else: | ||||
|                 # Specific formats. We pick the first in a slash-delimeted sequence. | ||||
|                 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. | ||||
|                 req_formats = req_format.split('/') | ||||
|                 video_url_list = None | ||||
|                 for rf in req_formats: | ||||
|                     if rf in url_map: | ||||
|                         video_url_list = [(rf, url_map[rf])] | ||||
|                         break | ||||
|                 if video_url_list is None: | ||||
|                     raise ExtractorError(u'requested format not available') | ||||
|         elif video_info.get('hlsvp'): | ||||
|             manifest_url = video_info['hlsvp'][0] | ||||
|             url_map = self._extract_from_m3u8(manifest_url, video_id) | ||||
|             video_url_list = self._get_video_url_list(url_map) | ||||
|             if not video_url_list: | ||||
|                 return | ||||
|  | ||||
|         else: | ||||
|             raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info') | ||||
|  | ||||
| @@ -639,8 +842,9 @@ class YoutubeIE(InfoExtractor): | ||||
|             # Extension | ||||
|             video_extension = self._video_extensions.get(format_param, 'flv') | ||||
|  | ||||
|             video_format = '{0} - {1}'.format(format_param if format_param else video_extension, | ||||
|                                               self._video_dimensions.get(format_param, '???')) | ||||
|             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension, | ||||
|                                               self._video_dimensions.get(format_param, '???'), | ||||
|                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '') | ||||
|  | ||||
|             results.append({ | ||||
|                 'id':       video_id, | ||||
| @@ -670,10 +874,10 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
|                            \? (?:.*?&)*? (?:p|a|list)= | ||||
|                         |  p/ | ||||
|                         ) | ||||
|                         ((?:PL|EC|UU)?[0-9A-Za-z-_]{10,}) | ||||
|                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,}) | ||||
|                         .* | ||||
|                      | | ||||
|                         ((?:PL|EC|UU)[0-9A-Za-z-_]{10,}) | ||||
|                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,}) | ||||
|                      )""" | ||||
|     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' | ||||
|     _MAX_RESULTS = 50 | ||||
| @@ -692,11 +896,14 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
|  | ||||
|         # Download playlist videos from API | ||||
|         playlist_id = mobj.group(1) or mobj.group(2) | ||||
|         page_num = 1 | ||||
|         videos = [] | ||||
|  | ||||
|         while True: | ||||
|             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, self._MAX_RESULTS * (page_num - 1) + 1) | ||||
|         for page_num in itertools.count(1): | ||||
|             start_index = self._MAX_RESULTS * (page_num - 1) + 1 | ||||
|             if start_index >= 1000: | ||||
|                 self._downloader.report_warning(u'Max number of results reached') | ||||
|                 break | ||||
|             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index) | ||||
|             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) | ||||
|  | ||||
|             try: | ||||
| @@ -716,10 +923,6 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
|                 if 'media$group' in entry and 'media$player' in entry['media$group']: | ||||
|                     videos.append((index, entry['media$group']['media$player']['url'])) | ||||
|  | ||||
|             if len(response['feed']['entry']) < self._MAX_RESULTS: | ||||
|                 break | ||||
|             page_num += 1 | ||||
|  | ||||
|         videos = [v[1] for v in sorted(videos)] | ||||
|  | ||||
|         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos] | ||||
| @@ -762,9 +965,7 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|  | ||||
|         # Download any subsequent channel pages using the json-based channel_ajax query | ||||
|         if self._MORE_PAGES_INDICATOR in page: | ||||
|             while True: | ||||
|                 pagenum = pagenum + 1 | ||||
|  | ||||
|             for pagenum in itertools.count(1): | ||||
|                 url = self._MORE_PAGES_URL % (pagenum, channel_id) | ||||
|                 page = self._download_webpage(url, channel_id, | ||||
|                                               u'Downloading page #%s' % pagenum) | ||||
| @@ -807,9 +1008,8 @@ class YoutubeUserIE(InfoExtractor): | ||||
|         # all of them. | ||||
|  | ||||
|         video_ids = [] | ||||
|         pagenum = 0 | ||||
|  | ||||
|         while True: | ||||
|         for pagenum in itertools.count(0): | ||||
|             start_index = pagenum * self._GDATA_PAGE_SIZE + 1 | ||||
|  | ||||
|             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index) | ||||
| @@ -834,8 +1034,6 @@ class YoutubeUserIE(InfoExtractor): | ||||
|             if len(ids_in_page) < self._GDATA_PAGE_SIZE: | ||||
|                 break | ||||
|  | ||||
|             pagenum += 1 | ||||
|  | ||||
|         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] | ||||
|         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls] | ||||
|         return [self.playlist_result(url_results, playlist_title = username)] | ||||
| @@ -898,33 +1096,30 @@ class YoutubeShowIE(InfoExtractor): | ||||
|         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] | ||||
|  | ||||
|  | ||||
| class YoutubeFeedsInfoExtractor(YoutubeIE): | ||||
| class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): | ||||
|     """ | ||||
|     Base class for extractors that fetch info from | ||||
|     http://www.youtube.com/feed_ajax | ||||
|     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. | ||||
|     """ | ||||
|     _LOGIN_REQUIRED = True | ||||
|     _PAGING_STEP = 30 | ||||
|  | ||||
|     # Overwrite YoutubeIE properties we don't want | ||||
|     _TESTS = [] | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return re.match(cls._VALID_URL, url) is not None | ||||
|     # use action_load_personal_feed instead of action_load_system_feed | ||||
|     _PERSONAL_FEED = False | ||||
|  | ||||
|     @property | ||||
|     def _FEED_TEMPLATE(self): | ||||
|         return 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=%s&paging=%%s' % self._FEED_NAME | ||||
|         action = 'action_load_system_feed' | ||||
|         if self._PERSONAL_FEED: | ||||
|             action = 'action_load_personal_feed' | ||||
|         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME) | ||||
|  | ||||
|     @property | ||||
|     def IE_NAME(self): | ||||
|         return u'youtube:%s' % self._FEED_NAME | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True) | ||||
|         super(YoutubeFeedsInfoExtractor, self)._real_initialize() | ||||
|         self._login() | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         feed_entries = [] | ||||
| @@ -936,7 +1131,7 @@ class YoutubeFeedsInfoExtractor(YoutubeIE): | ||||
|                                           u'Downloading page %s' % i) | ||||
|             info = json.loads(info) | ||||
|             feed_html = info['feed_html'] | ||||
|             m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html) | ||||
|             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) | ||||
|             ids = orderedSet(m.group(1) for m in m_ids) | ||||
|             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids) | ||||
|             if info['paging'] is None: | ||||
| @@ -954,3 +1149,22 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' | ||||
|     _FEED_NAME = 'recommended' | ||||
|     _PLAYLIST_TITLE = u'Youtube Recommended videos' | ||||
|  | ||||
| class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): | ||||
|     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' | ||||
|     _FEED_NAME = 'watch_later' | ||||
|     _PLAYLIST_TITLE = u'Youtube Watch Later' | ||||
|     _PAGING_STEP = 100 | ||||
|     _PERSONAL_FEED = True | ||||
|  | ||||
| class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): | ||||
|     IE_NAME = u'youtube:favorites' | ||||
|     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?' | ||||
|     _LOGIN_REQUIRED = True | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') | ||||
|         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id') | ||||
|         return self.url_result(playlist_id, 'YoutubePlaylist') | ||||
|   | ||||
| @@ -207,7 +207,7 @@ if sys.version_info >= (2,7): | ||||
|     def find_xpath_attr(node, xpath, key, val): | ||||
|         """ Find the xpath xpath[@key=val] """ | ||||
|         assert re.match(r'^[a-zA-Z]+$', key) | ||||
|         assert re.match(r'^[a-zA-Z@]*$', val) | ||||
|         assert re.match(r'^[a-zA-Z@\s]*$', val) | ||||
|         expr = xpath + u"[@%s='%s']" % (key, val) | ||||
|         return node.find(expr) | ||||
| else: | ||||
| @@ -497,7 +497,7 @@ class ExtractorError(Exception): | ||||
|         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): | ||||
|             expected = True | ||||
|         if not expected: | ||||
|             msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.' | ||||
|             msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.' | ||||
|         super(ExtractorError, self).__init__(msg) | ||||
|  | ||||
|         self.traceback = tb | ||||
| @@ -657,6 +657,9 @@ def determine_ext(url, default_ext=u'unknown_video'): | ||||
|     else: | ||||
|         return default_ext | ||||
|  | ||||
| def subtitles_filename(filename, sub_lang, sub_format): | ||||
|     return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | ||||
|  | ||||
| def date_from_str(date_str): | ||||
|     """ | ||||
|     Return a datetime object from a string in the format YYYYMMDD or | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2013.07.24.1' | ||||
| __version__ = '2013.08.23' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user