Compare commits
	
		
			615 Commits
		
	
	
		
			2013.02.18
			...
			2013.07.02
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | d5a62e4f5f | ||
|  | 9a82b2389f | ||
|  | 8dba13f7e8 | ||
|  | deacef651f | ||
|  | 2e1b3afeca | ||
|  | 652e776893 | ||
|  | d055fe4cb0 | ||
|  | 131842bb0b | ||
|  | 59fc531f78 | ||
|  | 5c44c15438 | ||
|  | 62067cb9b8 | ||
|  | 0f81866329 | ||
|  | 2db67bc0f4 | ||
|  | 7dba9cd039 | ||
|  | 75dff0eef7 | ||
|  | d828f3a550 | ||
|  | bcd6e4bd07 | ||
|  | 53936f3d57 | ||
|  | 0beb3add18 | ||
|  | f9bd64c098 | ||
|  | d7f44b5bdb | ||
|  | 48bfb5f238 | ||
|  | 97ebe8dcaf | ||
|  | d4409747ba | ||
|  | 37b6a6617f | ||
|  | ca1c9cfe11 | ||
|  | adeb4d7469 | ||
|  | 50587ee8ec | ||
|  | 8244288dfe | ||
|  | 6ffe72835a | ||
|  | 8ba5e990a5 | ||
|  | 9afb1afcc6 | ||
|  | 0e21093a8f | ||
|  | 9c5cd0948f | ||
|  | 1083705fe8 | ||
|  | f3d294617f | ||
|  | de33a30858 | ||
|  | 887a227953 | ||
|  | 705f6f35bc | ||
|  | e648b22dbd | ||
|  | 257a2501fa | ||
|  | 99afb3ddd4 | ||
|  | a3c776203f | ||
|  | 53f350c165 | ||
|  | f46d31f948 | ||
|  | bf64ff72db | ||
|  | bc2884afc1 | ||
|  | 023fa8c440 | ||
|  | 427023a1e6 | ||
|  | a924876fed | ||
|  | 3f223f7b2e | ||
|  | fc2c063e1e | ||
|  | 20db33e299 | ||
|  | c0109aa497 | ||
|  | ba7a1de04d | ||
|  | 4269e78a80 | ||
|  | 6f5ac90cf3 | ||
|  | de282fc217 | ||
|  | ddbd903576 | ||
|  | 0c56a3f773 | ||
|  | 9d069c4778 | ||
|  | 0d843f796b | ||
|  | 67f51b3d8c | ||
|  | 5c5de1c79a | ||
|  | 0821771466 | ||
|  | 83f6f68e79 | ||
|  | 27473d18da | ||
|  | 0c6c096c20 | ||
|  | 52c8ade4ad | ||
|  | 0e853ca4c4 | ||
|  | 41beccbab0 | ||
|  | 2eb88d953f | ||
|  | 1f0483b4b1 | ||
|  | 6b47c7f24e | ||
|  | d798e1c7a9 | ||
|  | 3a8736bd74 | ||
|  | c8c5163618 | ||
|  | 500f3d2432 | ||
|  | ed4a915e08 | ||
|  | b8f7b1579a | ||
|  | ed54491c60 | ||
|  | e4decf2750 | ||
|  | c90f13d106 | ||
|  | 62008f69c1 | ||
|  | e88f5e0b4e | ||
|  | 769fda3c5a | ||
|  | 23300d7149 | ||
|  | f5756f388a | ||
|  | ee313cdcbf | ||
|  | 8b50fed04b | ||
|  | 5b66de8859 | ||
|  | e38af9e00c | ||
|  | 6b37f0be55 | ||
|  | 6e5d5f2fc1 | ||
|  | 75c9481224 | ||
|  | 5746f9da99 | ||
|  | 112da0a0ce | ||
|  | bcd606c0fe | ||
|  | ed92bc9f6e | ||
|  | 9b0756f8f2 | ||
|  | aa0c87391c | ||
|  | b1dfdc51b1 | ||
|  | 2e32528012 | ||
|  | f64e7695a1 | ||
|  | 5abeaf0650 | ||
|  | 8bcc355972 | ||
|  | 6b4642fae3 | ||
|  | d1bd37deac | ||
|  | 405ec05cb2 | ||
|  | 52e8e1dc88 | ||
|  | b98a6b2f72 | ||
|  | 0ca45b233f | ||
|  | 65cceef8f4 | ||
|  | b004821fa9 | ||
|  | 81b42336ad | ||
|  | c6c1974672 | ||
|  | a545d1d262 | ||
|  | 037fcd0047 | ||
|  | 318452bc0c | ||
|  | d746cd88c2 | ||
|  | 9c42603b5a | ||
|  | ea93cce4f6 | ||
|  | f4daa18152 | ||
|  | 9caa687d81 | ||
|  | 3b58c6fb54 | ||
|  | 5926c10690 | ||
|  | df725153d2 | ||
|  | d662896090 | ||
|  | db241e8645 | ||
|  | ead28ff30a | ||
|  | 515d7a5e73 | ||
|  | 14fbdc9cdd | ||
|  | 98bcd2834a | ||
|  | f7ab6cbe16 | ||
|  | 28ef06f7c2 | ||
|  | 577d02370d | ||
|  | 50be92c11c | ||
|  | d18596baf4 | ||
|  | 7ce7e39476 | ||
|  | 93eb15c573 | ||
|  | 9f4d83e3b1 | ||
|  | 1c251cd948 | ||
|  | 70d1924f8b | ||
|  | 7b4948b05f | ||
|  | 878b5d9f0d | ||
|  | 2bc1820660 | ||
|  | 8bf8b5a577 | ||
|  | 8222d8de88 | ||
|  | c7253e2e8c | ||
|  | d69cf69a6a | ||
|  | d02ecdefab | ||
|  | bc857bfce0 | ||
|  | f8bf74575a | ||
|  | 964ac8b584 | ||
|  | a3522dfddd | ||
|  | d3a8613b6e | ||
|  | 200b388752 | ||
|  | dabcaf3b06 | ||
|  | e646ffe795 | ||
|  | b0dcc3c47f | ||
|  | b07d9c23c5 | ||
|  | d71cae62cc | ||
|  | 633a50cf4b | ||
|  | 825e0984e2 | ||
|  | d1cade5ade | ||
|  | 190717e31f | ||
|  | 0824c28c8b | ||
|  | c59b4aaeef | ||
|  | f9c6cbf002 | ||
|  | b8fe71ab86 | ||
|  | cb10cded2a | ||
|  | cd8b830292 | ||
|  | 1ac4004f3a | ||
|  | e17d368ae2 | ||
|  | 27110b0567 | ||
|  | 9fe4de3471 | ||
|  | d26d440e19 | ||
|  | 9f5daf0006 | ||
|  | eb1634cbf8 | ||
|  | 01c10ca26e | ||
|  | 45aef47281 | ||
|  | ae287755b7 | ||
|  | a37f27ae99 | ||
|  | 49f5f315fd | ||
|  | 97d2db017c | ||
|  | 2c64df0399 | ||
|  | 828400422a | ||
|  | c3c77cec30 | ||
|  | 1183b85f50 | ||
|  | 0143dc029c | ||
|  | e10e576fed | ||
|  | 78af8eb1d1 | ||
|  | 79e93125d0 | ||
|  | 48db0b1f4a | ||
|  | 8f0578f0fc | ||
|  | 250f557872 | ||
|  | 462dc88b17 | ||
|  | 570fa151fc | ||
|  | 9c286cfa00 | ||
|  | 80cbb6ddbb | ||
|  | 9fd5ce0cbe | ||
|  | 1736dec629 | ||
|  | b8a360837a | ||
|  | fc28721960 | ||
|  | 51ce3a75c9 | ||
|  | 335056663a | ||
|  | 5b286728de | ||
|  | 291a168bcc | ||
|  | fda7d31aa0 | ||
|  | cbf46c737c | ||
|  | 7beb36a529 | ||
|  | 153697660d | ||
|  | 60a72e8d45 | ||
|  | 426ff04282 | ||
|  | a50e1b32e4 | ||
|  | 9eae41ddef | ||
|  | aad0d6d5ba | ||
|  | 7aca14a1ec | ||
|  | d1596ef439 | ||
|  | ea63e4998b | ||
|  | a08dfd27a8 | ||
|  | f58848011e | ||
|  | 934858ad86 | ||
|  | 3c25b9abae | ||
|  | 3fc03845a1 | ||
|  | 9b122384e9 | ||
|  | 9f4e6bbaeb | ||
|  | b05654f0e3 | ||
|  | 9b3a760bbb | ||
|  | d5822b96b0 | ||
|  | b3d14cbfa7 | ||
|  | d6039175e5 | ||
|  | 97d6faaced | ||
|  | 219b8130df | ||
|  | 38cbc40a64 | ||
|  | 93d3a642a9 | ||
|  | c5e8d7af0e | ||
|  | d6983cb460 | ||
|  | dd9829292e | ||
|  | 89cb0eb0b6 | ||
|  | 9b5fffb149 | ||
|  | 1f90438025 | ||
|  | a130adb25b | ||
|  | 8756c5fe7a | ||
|  | 828dba2983 | ||
|  | 6b3f5a329b | ||
|  | 63ef586b05 | ||
|  | 383a6a61b1 | ||
|  | 4fdd4e6f6f | ||
|  | 01ba4b80a7 | ||
|  | de66764e4e | ||
|  | 1037d53988 | ||
|  | c3ab8f866c | ||
|  | 94eb2dd1fe | ||
|  | 346b5ce8fd | ||
|  | b37fbb990b | ||
|  | ef75f76f5c | ||
|  | e296100005 | ||
|  | 953dd93a48 | ||
|  | e704f4d378 | ||
|  | 77d0f05f71 | ||
|  | 50d2376769 | ||
|  | 759d525301 | ||
|  | fcfa188548 | ||
|  | f4c8bbcfc2 | ||
|  | 31eead52e7 | ||
|  | 038a3a1a61 | ||
|  | 587c68b2cd | ||
|  | 377fdf5dde | ||
|  | 5c67601931 | ||
|  | 68f54207a3 | ||
|  | bb47437686 | ||
|  | 213b715893 | ||
|  | 449d5c910c | ||
|  | 0251f9c9c0 | ||
|  | 8bc7c3d858 | ||
|  | af44c94862 | ||
|  | 36ed7177f0 | ||
|  | 32aa88bcae | ||
|  | 51090d636b | ||
|  | 31513ea6b9 | ||
|  | 88cebbd7b8 | ||
|  | fb8f7280bc | ||
|  | f380401bbd | ||
|  | 9abc6c8b31 | ||
|  | 8cd252f115 | ||
|  | 53f72b11e5 | ||
|  | ee55fcbe12 | ||
|  | 78d3442b12 | ||
|  | 979a9dd4c4 | ||
|  | d5979c5d55 | ||
|  | 8027175600 | ||
|  | 3054ff0cbe | ||
|  | cd453d38bb | ||
|  | f5a290eed9 | ||
|  | ecb3e676a5 | ||
|  | 8b59a98610 | ||
|  | 8409501206 | ||
|  | be95cac157 | ||
|  | 476203d025 | ||
|  | 468e2e926b | ||
|  | ac3e9394e7 | ||
|  | 868d62a509 | ||
|  | 157b864a01 | ||
|  | 951b9dfd94 | ||
|  | 1142d31164 | ||
|  | 9131bde941 | ||
|  | 1132c10dc2 | ||
|  | c978a96c02 | ||
|  | 71e458d437 | ||
|  | 57bde0d9c7 | ||
|  | 50b4d25980 | ||
|  | eda60e8251 | ||
|  | c794cbbb19 | ||
|  | 4a76d1dbe5 | ||
|  | 418f734a58 | ||
|  | dc1c355b72 | ||
|  | 1b2b22ed9f | ||
|  | f2cd958c0a | ||
|  | 57adeaea87 | ||
|  | 8f3f1aef05 | ||
|  | 51d2453c7a | ||
|  | 45014296be | ||
|  | afef36c950 | ||
|  | b31756c18e | ||
|  | f008688520 | ||
|  | 5b68ea215b | ||
|  | b1d568f0bc | ||
|  | 17bd1b2f41 | ||
|  | 5b0d3cc0cd | ||
|  | d4f76f1674 | ||
|  | 340fa21198 | ||
|  | de5d66d431 | ||
|  | 7bdb17d4d5 | ||
|  | 419c64b107 | ||
|  | 99a5ae3f8e | ||
|  | c7563c528b | ||
|  | e30e9318da | ||
|  | 5c51028d38 | ||
|  | c1d58e1c67 | ||
|  | 02030ff7fe | ||
|  | f45c185fa9 | ||
|  | 1bd96c3a60 | ||
|  | 929f85d851 | ||
|  | 98d4a4e6bc | ||
|  | fb2f83360c | ||
|  | 3c5e7729e1 | ||
|  | 5a853e1423 | ||
|  | 2f58b12dad | ||
|  | 59f4fd4dc6 | ||
|  | 5738240ee8 | ||
|  | 86fd453ea8 | ||
|  | c83411b9ee | ||
|  | 057c9938a1 | ||
|  | 9259966132 | ||
|  | b08980412e | ||
|  | 532a1e0429 | ||
|  | 2a36c352a0 | ||
|  | 1a2adf3f49 | ||
|  | 43b62accbb | ||
|  | be74864ace | ||
|  | 0ae456f08a | ||
|  | 0f75d25991 | ||
|  | 67129e4a15 | ||
|  | dfb9323cf9 | ||
|  | 7f5bd09baf | ||
|  | 02d5eb935f | ||
|  | 94ca71b7cc | ||
|  | b338f1b154 | ||
|  | 486f0c9476 | ||
|  | d96680f58d | ||
|  | f8602d3242 | ||
|  | 0c021ad171 | ||
|  | 086d7b4500 | ||
|  | 891629c84a | ||
|  | ea6d901e51 | ||
|  | 4539dd30e6 | ||
|  | c43e57242e | ||
|  | db8fd71ca9 | ||
|  | f4f316881d | ||
|  | 0e16f09474 | ||
|  | 09dd418f53 | ||
|  | decd1d1737 | ||
|  | 180e689f7e | ||
|  | 7da5556ac2 | ||
|  | f23a03a89b | ||
|  | 84e4682f0e | ||
|  | 1f99511210 | ||
|  | 0d94f2474c | ||
|  | 480b6c1e8b | ||
|  | 95464f14d1 | ||
|  | c34407d16c | ||
|  | 5e34d2ebbf | ||
|  | 815dd2ffa8 | ||
|  | ecd5fb49c5 | ||
|  | b86174e7a3 | ||
|  | 2e2038dc35 | ||
|  | 46bfb42258 | ||
|  | feecf22511 | ||
|  | 4c4f15eb78 | ||
|  | 104ccdb8b4 | ||
|  | 6ccff79594 | ||
|  | aed523ecc1 | ||
|  | d496a75d0a | ||
|  | 5c01dd1e73 | ||
|  | 11d9224e3b | ||
|  | 34c29ba1d7 | ||
|  | 6cd657f9f2 | ||
|  | 4ae9e55822 | ||
|  | 8749b71273 | ||
|  | dbc50fdf82 | ||
|  | b1d2ef9255 | ||
|  | 5fb16555af | ||
|  | ba7c775a04 | ||
|  | fe348844d9 | ||
|  | 767e00277f | ||
|  | 6ce533a220 | ||
|  | 08b2ac745a | ||
|  | 46a127eecb | ||
|  | fc63faf070 | ||
|  | 9665577802 | ||
|  | 434aca5b14 | ||
|  | e31852aba9 | ||
|  | 37254abc36 | ||
|  | a11ea50319 | ||
|  | 81df121dd3 | ||
|  | 50f6412eb8 | ||
|  | bf50b0383e | ||
|  | bd55852517 | ||
|  | 4c9f7a9988 | ||
|  | aba8df23ed | ||
|  | 3820df0106 | ||
|  | e74c504f91 | ||
|  | fa70605db2 | ||
|  | 0d173446ff | ||
|  | 320e26a0af | ||
|  | a3d689cfb3 | ||
|  | 59cc5d9380 | ||
|  | 28535652ab | ||
|  | 7b670a4483 | ||
|  | 69fc019f26 | ||
|  | 613bf66939 | ||
|  | 9edb0916f4 | ||
|  | f4b659f782 | ||
|  | c70446c7df | ||
|  | c76cb6d548 | ||
|  | 71f37e90ef | ||
|  | 75b5c590a8 | ||
|  | 4469666780 | ||
|  | c15e024141 | ||
|  | 8cb94542f4 | ||
|  | c681a03918 | ||
|  | 30f2999962 | ||
|  | 74e3452b9e | ||
|  | 9e1cf0c200 | ||
|  | e11eb11906 | ||
|  | c04bca6f60 | ||
|  | b0936ef423 | ||
|  | 41a6eb949a | ||
|  | f17ce13a92 | ||
|  | 8c416ad29a | ||
|  | c72938240e | ||
|  | e905b6f80e | ||
|  | 6de8f1afb7 | ||
|  | 9341212642 | ||
|  | f7a9721e16 | ||
|  | 089e843b0f | ||
|  | c8056d866a | ||
|  | 49da66e459 | ||
|  | fb6c319904 | ||
|  | 5a8d13199c | ||
|  | dce9027045 | ||
|  | feba604e92 | ||
|  | d22f65413a | ||
|  | 0599ef8c08 | ||
|  | bfdf469295 | ||
|  | 32c96387c1 | ||
|  | c8c5443bb5 | ||
|  | a60b854d90 | ||
|  | b8ad4f02a2 | ||
|  | d281274bf2 | ||
|  | b625bc2c31 | ||
|  | f4381ab88a | ||
|  | 744435f2a4 | ||
|  | 855703e55e | ||
|  | 927c8c4924 | ||
|  | 0ba994e9e3 | ||
|  | af9ad45cd4 | ||
|  | e0fee250c3 | ||
|  | 72ca05016d | ||
|  | 844d1f9fa1 | ||
|  | 213c31ae16 | ||
|  | 04f3d551a0 | ||
|  | e8600d69fd | ||
|  | b03d65c237 | ||
|  | 8743974189 | ||
|  | dc36bc9434 | ||
|  | bce878a7c1 | ||
|  | 532d797824 | ||
|  | 146c12a2da | ||
|  | d39919c03e | ||
|  | df2dedeefb | ||
|  | adb029ed81 | ||
|  | 43ff1a347d | ||
|  | 14294236bf | ||
|  | c2b293ba30 | ||
|  | 37cd9f522f | ||
|  | f33154cd39 | ||
|  | bafeed9f5d | ||
|  | ef767f9fd5 | ||
|  | bc97f6d60c | ||
|  | 90a99c1b5e | ||
|  | f375d4b7de | ||
|  | fa41fbd318 | ||
|  | 6a205c8876 | ||
|  | 0fb3756409 | ||
|  | fbbdf475b1 | ||
|  | c238be3e3a | ||
|  | 1bf2801e6a | ||
|  | c9c8402093 | ||
|  | 6060788083 | ||
|  | e3700fc9e4 | ||
|  | b693216d8d | ||
|  | 46b9d8295d | ||
|  | 7decf8951c | ||
|  | 1f46c15262 | ||
|  | 0cd358676c | ||
|  | 43113d92cc | ||
|  | 7eab8dc750 | ||
|  | 44e939514e | ||
|  | 95506f1235 | ||
|  | a91556fd74 | ||
|  | 1447f728b5 | ||
|  | d2c690828a | ||
|  | cfa90f4adc | ||
|  | 898280a056 | ||
|  | 59b4a2f0e4 | ||
|  | 1ee9778405 | ||
|  | db74c11d2b | ||
|  | 5011cded16 | ||
|  | f10b2a9c14 | ||
|  | 5cb3c0b319 | ||
|  | b9fc428494 | ||
|  | c0ba104674 | ||
|  | 2a4093eaf3 | ||
|  | 9e62bc4439 | ||
|  | 553d097442 | ||
|  | ae608b8076 | ||
|  | c397187061 | ||
|  | e32b06e977 | ||
|  | 8c42c506cd | ||
|  | 8cc83b8dbe | ||
|  | 51af426d89 | ||
|  | 08ec0af7c6 | ||
|  | 3b221c5406 | ||
|  | 3d3423574d | ||
|  | e5edd51de4 | ||
|  | 64c78d50cc | ||
|  | b3bcca0844 | ||
|  | 61e40c88a9 | ||
|  | 40634747f7 | ||
|  | c2e21f2f0d | ||
|  | 47dcd621c0 | ||
|  | a0d6fe7b92 | ||
|  | c9fa1cbab6 | ||
|  | 8a38a194fb | ||
|  | 6ac7f082c4 | ||
|  | f6e6da9525 | ||
|  | 597cc8a455 | ||
|  | 3370abd509 | ||
|  | 631f73978c | ||
|  | e5f30ade10 | ||
|  | 6622d22c79 | ||
|  | 4e1582f372 | ||
|  | 967897fd22 | ||
|  | f918ec7ea2 | ||
|  | a2ae43a55f | ||
|  | 7ae153ee9c | ||
|  | f7b567ff84 | ||
|  | f2e237adc8 | ||
|  | 2e5457be1d | ||
|  | 7f9d41a55e | ||
|  | 8207626bbe | ||
|  | df8db1aa21 | ||
|  | 691db5ba02 | ||
|  | acb8752f80 | ||
|  | 679790eee1 | ||
|  | 6bf48bd866 | ||
|  | 790d4fcbe1 | ||
|  | 89de9eb125 | ||
|  | 6324fd1d74 | ||
|  | 9e07cf2955 | ||
|  | f03b88b3fb | ||
|  | 97d0365f49 | ||
|  | 12887875a2 | ||
|  | 450e709972 | ||
|  | 9befce2b8c | ||
|  | cb99797798 | ||
|  | f82b28146a | ||
|  | 4dc72b830c | ||
|  | ea05129ebd | ||
|  | 35d217133f | ||
|  | d1b7a24354 | ||
|  | c85538dba1 | ||
|  | 60bd48b175 | ||
|  | 4be0aa3539 | ||
|  | f636c34481 | ||
|  | 3bf79c752e | ||
|  | cdb130b09a | ||
|  | 2e5d60b7db | ||
|  | 8271226a55 | ||
|  | 1013186a17 | ||
|  | 7c038b3c32 | ||
|  | c8cd8e5f55 | ||
|  | 471cf47796 | 
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -17,4 +17,4 @@ youtube-dl.tar.gz | ||||
| .coverage | ||||
| cover/ | ||||
| updates_key.pem | ||||
| *.egg-info | ||||
| *.egg-info | ||||
| @@ -8,6 +8,7 @@ notifications: | ||||
|   email: | ||||
|     - filippo.valsorda@gmail.com | ||||
|     - phihag@phihag.de | ||||
|     - jaime.marquinez.ferrandiz+travis@gmail.com | ||||
| #  irc: | ||||
| #    channels: | ||||
| #      - "irc.freenode.org#youtube-dl" | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| include README.md | ||||
| include test/*.py | ||||
| include test/*.json | ||||
| include test/*.json | ||||
| include youtube-dl.bash-completion | ||||
| include youtube-dl.1 | ||||
|   | ||||
							
								
								
									
										20
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								Makefile
									
									
									
									
									
								
							| @@ -9,9 +9,19 @@ cleanall: clean | ||||
| PREFIX=/usr/local | ||||
| BINDIR=$(PREFIX)/bin | ||||
| MANDIR=$(PREFIX)/man | ||||
| SYSCONFDIR=/etc | ||||
| PYTHON=/usr/bin/env python | ||||
|  | ||||
| # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local | ||||
| ifeq ($(PREFIX),/usr) | ||||
|     SYSCONFDIR=/etc | ||||
| else | ||||
|     ifeq ($(PREFIX),/usr/local) | ||||
|         SYSCONFDIR=/etc | ||||
|     else | ||||
|         SYSCONFDIR=$(PREFIX)/etc | ||||
|     endif | ||||
| endif | ||||
|  | ||||
| install: youtube-dl youtube-dl.1 youtube-dl.bash-completion | ||||
| 	install -d $(DESTDIR)$(BINDIR) | ||||
| 	install -m 755 youtube-dl $(DESTDIR)$(BINDIR) | ||||
| @@ -30,15 +40,15 @@ tar: youtube-dl.tar.gz | ||||
|  | ||||
| pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 | ||||
|  | ||||
| youtube-dl: youtube_dl/*.py | ||||
| 	zip --quiet youtube-dl youtube_dl/*.py | ||||
| youtube-dl: youtube_dl/*.py youtube_dl/*/*.py | ||||
| 	zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py | ||||
| 	zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py | ||||
| 	echo '#!$(PYTHON)' > youtube-dl | ||||
| 	cat youtube-dl.zip >> youtube-dl | ||||
| 	rm youtube-dl.zip | ||||
| 	chmod a+x youtube-dl | ||||
|  | ||||
| README.md: youtube_dl/*.py | ||||
| README.md: youtube_dl/*.py youtube_dl/*/*.py | ||||
| 	COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py | ||||
|  | ||||
| README.txt: README.md | ||||
| @@ -47,7 +57,7 @@ README.txt: README.md | ||||
| youtube-dl.1: README.md | ||||
| 	pandoc -s -f markdown -t man README.md -o youtube-dl.1 | ||||
|  | ||||
| youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in | ||||
| youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in | ||||
| 	python devscripts/bash-completion.py | ||||
|  | ||||
| bash-completion: youtube-dl.bash-completion | ||||
|   | ||||
							
								
								
									
										243
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										243
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,7 +1,7 @@ | ||||
| % YOUTUBE-DL(1) | ||||
|  | ||||
| # NAME | ||||
| youtube-dl | ||||
| youtube-dl - download videos from youtube.com or other video platforms | ||||
|  | ||||
| # SYNOPSIS | ||||
| **youtube-dl** [OPTIONS] URL [URL...] | ||||
| @@ -14,113 +14,143 @@ your Unix box, on Windows or on Mac OS X. It is released to the public domain, | ||||
| which means you can modify it, redistribute it or use it however you like. | ||||
|  | ||||
| # OPTIONS | ||||
|     -h, --help               print this help text and exit | ||||
|     --version                print program version and exit | ||||
|     -U, --update             update this program to latest version | ||||
|     -i, --ignore-errors      continue on download errors | ||||
|     -r, --rate-limit LIMIT   download rate limit (e.g. 50k or 44.6m) | ||||
|     -R, --retries RETRIES    number of retries (default is 10) | ||||
|     --buffer-size SIZE       size of download buffer (e.g. 1024 or 16k) (default | ||||
|                              is 1024) | ||||
|     --no-resize-buffer       do not automatically adjust the buffer size. By | ||||
|                              default, the buffer size is automatically resized | ||||
|                              from an initial value of SIZE. | ||||
|     --dump-user-agent        display the current browser identification | ||||
|     --user-agent UA          specify a custom user agent | ||||
|     --list-extractors        List all supported extractors and the URLs they | ||||
|                              would handle | ||||
|     -h, --help                 print this help text and exit | ||||
|     --version                  print program version and exit | ||||
|     -U, --update               update this program to latest version | ||||
|     -i, --ignore-errors        continue on download errors | ||||
|     --dump-user-agent          display the current browser identification | ||||
|     --user-agent UA            specify a custom user agent | ||||
|     --referer REF              specify a custom referer, use if the video access | ||||
|                                is restricted to one domain | ||||
|     --list-extractors          List all supported extractors and the URLs they | ||||
|                                would handle | ||||
|     --extractor-descriptions   Output descriptions of all supported extractors | ||||
|     --proxy URL                Use the specified HTTP/HTTPS proxy | ||||
|     --no-check-certificate     Suppress HTTPS certificate validation. | ||||
|  | ||||
| ## Video Selection: | ||||
|     --playlist-start NUMBER  playlist video to start at (default is 1) | ||||
|     --playlist-end NUMBER    playlist video to end at (default is last) | ||||
|     --match-title REGEX      download only matching titles (regex or caseless | ||||
|                              sub-string) | ||||
|     --reject-title REGEX     skip download for matching titles (regex or | ||||
|                              caseless sub-string) | ||||
|     --max-downloads NUMBER   Abort after downloading NUMBER files | ||||
|     --min-filesize SIZE      Do not download any videos smaller than SIZE (e.g. | ||||
|                              50k or 44.6m) | ||||
|     --max-filesize SIZE      Do not download any videos larger than SIZE (e.g. | ||||
|                              50k or 44.6m) | ||||
|     --playlist-start NUMBER    playlist video to start at (default is 1) | ||||
|     --playlist-end NUMBER      playlist video to end at (default is last) | ||||
|     --match-title REGEX        download only matching titles (regex or caseless | ||||
|                                sub-string) | ||||
|     --reject-title REGEX       skip download for matching titles (regex or | ||||
|                                caseless sub-string) | ||||
|     --max-downloads NUMBER     Abort after downloading NUMBER files | ||||
|     --min-filesize SIZE        Do not download any videos smaller than SIZE | ||||
|                                (e.g. 50k or 44.6m) | ||||
|     --max-filesize SIZE        Do not download any videos larger than SIZE (e.g. | ||||
|                                50k or 44.6m) | ||||
|     --date DATE                download only videos uploaded in this date | ||||
|     --datebefore DATE          download only videos uploaded before this date | ||||
|     --dateafter DATE           download only videos uploaded after this date | ||||
|  | ||||
| ## Download Options: | ||||
|     -r, --rate-limit LIMIT     maximum download rate (e.g. 50k or 44.6m) | ||||
|     -R, --retries RETRIES      number of retries (default is 10) | ||||
|     --buffer-size SIZE         size of download buffer (e.g. 1024 or 16k) | ||||
|                                (default is 1024) | ||||
|     --no-resize-buffer         do not automatically adjust the buffer size. By | ||||
|                                default, the buffer size is automatically resized | ||||
|                                from an initial value of SIZE. | ||||
|  | ||||
| ## Filesystem Options: | ||||
|     -t, --title              use title in file name | ||||
|     --id                     use video ID in file name | ||||
|     -l, --literal            [deprecated] alias of --title | ||||
|     -A, --auto-number        number downloaded files starting from 00000 | ||||
|     -o, --output TEMPLATE    output filename template. Use %(title)s to get the | ||||
|                              title, %(uploader)s for the uploader name, | ||||
|                              %(uploader_id)s for the uploader nickname if | ||||
|                              different, %(autonumber)s to get an automatically | ||||
|                              incremented number, %(ext)s for the filename | ||||
|                              extension, %(upload_date)s for the upload date | ||||
|                              (YYYYMMDD), %(extractor)s for the provider | ||||
|                              (youtube, metacafe, etc), %(id)s for the video id | ||||
|                              and %% for a literal percent. Use - to output to | ||||
|                              stdout. Can also be used to download to a different | ||||
|                              directory, for example with -o '/my/downloads/%(upl | ||||
|                              oader)s/%(title)s-%(id)s.%(ext)s' . | ||||
|     --restrict-filenames     Restrict filenames to only ASCII characters, and | ||||
|                              avoid "&" and spaces in filenames | ||||
|     -a, --batch-file FILE    file containing URLs to download ('-' for stdin) | ||||
|     -w, --no-overwrites      do not overwrite files | ||||
|     -c, --continue           resume partially downloaded files | ||||
|     --no-continue            do not resume partially downloaded files (restart | ||||
|                              from beginning) | ||||
|     --cookies FILE           file to read cookies from and dump cookie jar in | ||||
|     --no-part                do not use .part files | ||||
|     --no-mtime               do not use the Last-modified header to set the file | ||||
|                              modification time | ||||
|     --write-description      write video description to a .description file | ||||
|     --write-info-json        write video metadata to a .info.json file | ||||
|     -t, --title                use title in file name (default) | ||||
|     --id                       use only video ID in file name | ||||
|     -l, --literal              [deprecated] alias of --title | ||||
|     -A, --auto-number          number downloaded files starting from 00000 | ||||
|     -o, --output TEMPLATE      output filename template. Use %(title)s to get | ||||
|                                the title, %(uploader)s for the uploader name, | ||||
|                                %(uploader_id)s for the uploader nickname if | ||||
|                                different, %(autonumber)s to get an automatically | ||||
|                                incremented number, %(ext)s for the filename | ||||
|                                extension, %(upload_date)s for the upload date | ||||
|                                (YYYYMMDD), %(extractor)s for the provider | ||||
|                                (youtube, metacafe, etc), %(id)s for the video id | ||||
|                                , %(playlist)s for the playlist the video is in, | ||||
|                                %(playlist_index)s for the position in the | ||||
|                                playlist and %% for a literal percent. Use - to | ||||
|                                output to stdout. Can also be used to download to | ||||
|                                a different directory, for example with -o '/my/d | ||||
|                                ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' . | ||||
|     --autonumber-size NUMBER   Specifies the number of digits in %(autonumber)s | ||||
|                                when it is present in output filename template or | ||||
|                                --autonumber option is given | ||||
|     --restrict-filenames       Restrict filenames to only ASCII characters, and | ||||
|                                avoid "&" and spaces in filenames | ||||
|     -a, --batch-file FILE      file containing URLs to download ('-' for stdin) | ||||
|     -w, --no-overwrites        do not overwrite files | ||||
|     -c, --continue             resume partially downloaded files | ||||
|     --no-continue              do not resume partially downloaded files (restart | ||||
|                                from beginning) | ||||
|     --cookies FILE             file to read cookies from and dump cookie jar in | ||||
|     --no-part                  do not use .part files | ||||
|     --no-mtime                 do not use the Last-modified header to set the | ||||
|                                file modification time | ||||
|     --write-description        write video description to a .description file | ||||
|     --write-info-json          write video metadata to a .info.json file | ||||
|     --write-thumbnail          write thumbnail image to disk | ||||
|  | ||||
| ## Verbosity / Simulation Options: | ||||
|     -q, --quiet              activates quiet mode | ||||
|     -s, --simulate           do not download the video and do not write anything | ||||
|                              to disk | ||||
|     --skip-download          do not download the video | ||||
|     -g, --get-url            simulate, quiet but print URL | ||||
|     -e, --get-title          simulate, quiet but print title | ||||
|     --get-thumbnail          simulate, quiet but print thumbnail URL | ||||
|     --get-description        simulate, quiet but print video description | ||||
|     --get-filename           simulate, quiet but print output filename | ||||
|     --get-format             simulate, quiet but print output format | ||||
|     --newline                output progress bar as new lines | ||||
|     --no-progress            do not print progress bar | ||||
|     --console-title          display progress in console titlebar | ||||
|     -v, --verbose            print various debugging information | ||||
|     -q, --quiet                activates quiet mode | ||||
|     -s, --simulate             do not download the video and do not write | ||||
|                                anything to disk | ||||
|     --skip-download            do not download the video | ||||
|     -g, --get-url              simulate, quiet but print URL | ||||
|     -e, --get-title            simulate, quiet but print title | ||||
|     --get-id                   simulate, quiet but print id | ||||
|     --get-thumbnail            simulate, quiet but print thumbnail URL | ||||
|     --get-description          simulate, quiet but print video description | ||||
|     --get-filename             simulate, quiet but print output filename | ||||
|     --get-format               simulate, quiet but print output format | ||||
|     --newline                  output progress bar as new lines | ||||
|     --no-progress              do not print progress bar | ||||
|     --console-title            display progress in console titlebar | ||||
|     -v, --verbose              print various debugging information | ||||
|     --dump-intermediate-pages  print downloaded pages to debug problems(very | ||||
|                                verbose) | ||||
|  | ||||
| ## Video Format Options: | ||||
|     -f, --format FORMAT      video format code | ||||
|     --all-formats            download all available video formats | ||||
|     --prefer-free-formats    prefer free video formats unless a specific one is | ||||
|                              requested | ||||
|     --max-quality FORMAT     highest quality format to download | ||||
|     -F, --list-formats       list all available formats (currently youtube only) | ||||
|     --write-srt              write video closed captions to a .srt file | ||||
|                              (currently youtube only) | ||||
|     --srt-lang LANG          language of the closed captions to download | ||||
|                              (optional) use IETF language tags like 'en' | ||||
|     -f, --format FORMAT        video format code, specifiy the order of | ||||
|                                preference using slashes: "-f 22/17/18" | ||||
|     --all-formats              download all available video formats | ||||
|     --prefer-free-formats      prefer free video formats unless a specific one | ||||
|                                is requested | ||||
|     --max-quality FORMAT       highest quality format to download | ||||
|     -F, --list-formats         list all available formats (currently youtube | ||||
|                                only) | ||||
|     --write-sub                write subtitle file (currently youtube only) | ||||
|     --write-auto-sub           write automatic subtitle file (currently youtube | ||||
|                                only) | ||||
|     --only-sub                 [deprecated] alias of --skip-download | ||||
|     --all-subs                 downloads all the available subtitles of the | ||||
|                                video (currently youtube only) | ||||
|     --list-subs                lists all available subtitles for the video | ||||
|                                (currently youtube only) | ||||
|     --sub-format FORMAT        subtitle format [srt/sbv/vtt] (default=srt) | ||||
|                                (currently youtube only) | ||||
|     --sub-lang LANG            language of the subtitles to download (optional) | ||||
|                                use IETF language tags like 'en' | ||||
|  | ||||
| ## Authentication Options: | ||||
|     -u, --username USERNAME  account username | ||||
|     -p, --password PASSWORD  account password | ||||
|     -n, --netrc              use .netrc authentication data | ||||
|     -u, --username USERNAME    account username | ||||
|     -p, --password PASSWORD    account password | ||||
|     -n, --netrc                use .netrc authentication data | ||||
|     --video-password PASSWORD  video password (vimeo only) | ||||
|  | ||||
| ## Post-processing Options: | ||||
|     -x, --extract-audio      convert video files to audio-only files (requires | ||||
|                              ffmpeg or avconv and ffprobe or avprobe) | ||||
|     --audio-format FORMAT    "best", "aac", "vorbis", "mp3", "m4a", "opus", or | ||||
|                              "wav"; best by default | ||||
|     --audio-quality QUALITY  ffmpeg/avconv audio quality specification, insert a | ||||
|                              value between 0 (better) and 9 (worse) for VBR or a | ||||
|                              specific bitrate like 128K (default 5) | ||||
|     --recode-video FORMAT    Encode the video to another format if necessary | ||||
|                              (currently supported: mp4|flv|ogg|webm) | ||||
|     -k, --keep-video         keeps the video file on disk after the post- | ||||
|                              processing; the video is erased by default | ||||
|     --no-post-overwrites     do not overwrite post-processed files; the post- | ||||
|                              processed files are overwritten by default | ||||
|     -x, --extract-audio        convert video files to audio-only files (requires | ||||
|                                ffmpeg or avconv and ffprobe or avprobe) | ||||
|     --audio-format FORMAT      "best", "aac", "vorbis", "mp3", "m4a", "opus", or | ||||
|                                "wav"; best by default | ||||
|     --audio-quality QUALITY    ffmpeg/avconv audio quality specification, insert | ||||
|                                a value between 0 (better) and 9 (worse) for VBR | ||||
|                                or a specific bitrate like 128K (default 5) | ||||
|     --recode-video FORMAT      Encode the video to another format if necessary | ||||
|                                (currently supported: mp4|flv|ogg|webm) | ||||
|     -k, --keep-video           keeps the video file on disk after the post- | ||||
|                                processing; the video is erased by default | ||||
|     --no-post-overwrites       do not overwrite post-processed files; the post- | ||||
|                                processed files are overwritten by default | ||||
|  | ||||
| # CONFIGURATION | ||||
|  | ||||
| @@ -138,8 +168,10 @@ The `-o` option allows users to indicate a template for the output file names. T | ||||
|  - `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4). | ||||
|  - `epoch`: The sequence will be replaced by the Unix epoch when creating the file. | ||||
|  - `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. | ||||
|  - `playlist`: The name or the id of the playlist that contains the video. | ||||
|  - `playlist_index`: The index of the video in the playlist, a five-digit number. | ||||
|  | ||||
| The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment). | ||||
| The current default template is `%(title)s-%(id)s.%(ext)s`. | ||||
|  | ||||
| In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: | ||||
|  | ||||
| @@ -148,15 +180,28 @@ In some cases, you don't want special characters such as 中, spaces, or &, such | ||||
|     $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames | ||||
|     youtube-dl_test_video_.mp4          # A simple file name | ||||
|  | ||||
| # VIDEO SELECTION | ||||
|  | ||||
| Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`, they accept dates in two formats: | ||||
|  | ||||
|  - Absolute dates: Dates in the format `YYYYMMDD`. | ||||
|  - Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?` | ||||
|   | ||||
| Examples: | ||||
|  | ||||
| 	$ youtube-dl --dateafter now-6months #will only download the videos uploaded in the last 6 months | ||||
| 	$ youtube-dl --date 19700101 #will only download the videos uploaded in January 1, 1970 | ||||
| 	$ youtube-dl --dateafter 20000101 --datebefore 20100101 #will only download the videos uploaded between 2000 and 2010 | ||||
|  | ||||
| # FAQ | ||||
|  | ||||
| ### Can you please put the -b option back? | ||||
|  | ||||
| Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. | ||||
| Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it. | ||||
|  | ||||
| ### I get HTTP error 402 when trying to download a video. What's this? | ||||
|  | ||||
| Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. | ||||
| Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. | ||||
|  | ||||
| ### I have downloaded a video but how can I play it? | ||||
|  | ||||
|   | ||||
							
								
								
									
										57
									
								
								devscripts/gh-pages/update-feed.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										57
									
								
								devscripts/gh-pages/update-feed.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,57 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import datetime | ||||
|  | ||||
| import textwrap | ||||
|  | ||||
| import json | ||||
|  | ||||
| atom_template=textwrap.dedent("""\ | ||||
| 								<?xml version='1.0' encoding='utf-8'?> | ||||
| 								<atom:feed xmlns:atom="http://www.w3.org/2005/Atom"> | ||||
| 									<atom:title>youtube-dl releases</atom:title> | ||||
| 									<atom:id>youtube-dl-updates-feed</atom:id> | ||||
| 									<atom:updated>@TIMESTAMP@</atom:updated> | ||||
| 									@ENTRIES@ | ||||
| 								</atom:feed>""") | ||||
|  | ||||
| entry_template=textwrap.dedent(""" | ||||
| 								<atom:entry> | ||||
| 									<atom:id>youtube-dl-@VERSION@</atom:id> | ||||
| 									<atom:title>New version @VERSION@</atom:title> | ||||
| 									<atom:link href="http://rg3.github.io/youtube-dl" /> | ||||
| 									<atom:content type="xhtml"> | ||||
| 										<div xmlns="http://www.w3.org/1999/xhtml"> | ||||
| 											Downloads available at <a href="http://youtube-dl.org/downloads/@VERSION@/">http://youtube-dl.org/downloads/@VERSION@/</a> | ||||
| 										</div> | ||||
| 									</atom:content> | ||||
| 									<atom:author> | ||||
| 										<atom:name>The youtube-dl maintainers</atom:name> | ||||
| 									</atom:author> | ||||
| 									<atom:updated>@TIMESTAMP@</atom:updated> | ||||
| 								</atom:entry> | ||||
| 								""") | ||||
|  | ||||
| now = datetime.datetime.now() | ||||
| now_iso = now.isoformat() | ||||
|  | ||||
| atom_template = atom_template.replace('@TIMESTAMP@',now_iso) | ||||
|  | ||||
| entries=[] | ||||
|  | ||||
| versions_info = json.load(open('update/versions.json')) | ||||
| versions = list(versions_info['versions'].keys()) | ||||
| versions.sort() | ||||
|  | ||||
| for v in versions: | ||||
| 	entry = entry_template.replace('@TIMESTAMP@',v.replace('.','-')) | ||||
| 	entry = entry.replace('@VERSION@',v) | ||||
| 	entries.append(entry) | ||||
|  | ||||
| entries_str = textwrap.indent(''.join(entries), '\t') | ||||
| atom_template = atom_template.replace('@ENTRIES@', entries_str) | ||||
|  | ||||
| with open('update/releases.atom','w',encoding='utf-8') as atom_file: | ||||
| 	atom_file.write(atom_template) | ||||
|  | ||||
|  | ||||
| @@ -14,6 +14,12 @@ | ||||
|  | ||||
| set -e | ||||
|  | ||||
| skip_tests=false | ||||
| if [ "$1" = '--skip-test' ]; then | ||||
|     skip_tests=true | ||||
|     shift | ||||
| fi | ||||
|  | ||||
| if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi | ||||
| version="$1" | ||||
| if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi | ||||
| @@ -22,7 +28,11 @@ if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit | ||||
|  | ||||
| /bin/echo -e "\n### First of all, testing..." | ||||
| make cleanall | ||||
| nosetests --with-coverage --cover-package=youtube_dl --cover-html test || exit 1 | ||||
| if $skip_tests ; then | ||||
|     echo 'SKIPPING TESTS' | ||||
| else | ||||
|     nosetests --verbose --with-coverage --cover-package=youtube_dl --cover-html test --stop || exit 1 | ||||
| fi | ||||
|  | ||||
| /bin/echo -e "\n### Changing version in version.py..." | ||||
| sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py | ||||
| @@ -59,7 +69,9 @@ git checkout HEAD -- youtube-dl youtube-dl.exe | ||||
|  | ||||
| /bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..." | ||||
| for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done | ||||
| scp -r "build/$version" ytdl@youtube-dl.org:html/downloads/ | ||||
| scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ | ||||
| ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" | ||||
| ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" | ||||
|  | ||||
| /bin/echo -e "\n### Now switching to gh-pages..." | ||||
| git clone --branch gh-pages --single-branch . build/gh-pages | ||||
| @@ -69,6 +81,7 @@ ROOT=$(pwd) | ||||
|     ORIGIN_URL=$(git config --get remote.origin.url) | ||||
|     cd build/gh-pages | ||||
|     "$ROOT/devscripts/gh-pages/add-version.py" $version | ||||
|     "$ROOT/devscripts/gh-pages/update-feed.py" | ||||
|     "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem" | ||||
|     "$ROOT/devscripts/gh-pages/generate-download.py" | ||||
|     "$ROOT/devscripts/gh-pages/update-copyright.py" | ||||
|   | ||||
| @@ -40,7 +40,7 @@ raw_input() | ||||
|  | ||||
| filename = sys.argv[0] | ||||
|  | ||||
| UPDATE_URL = "http://rg3.github.com/youtube-dl/update/" | ||||
| UPDATE_URL = "http://rg3.github.io/youtube-dl/update/" | ||||
| VERSION_URL = UPDATE_URL + 'LATEST_VERSION' | ||||
| JSON_URL = UPDATE_URL + 'versions.json' | ||||
| UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) | ||||
|   | ||||
							
								
								
									
										83
									
								
								devscripts/youtube_genalgo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								devscripts/youtube_genalgo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| # Generate youtube signature algorithm from test cases | ||||
|  | ||||
| import sys | ||||
|  | ||||
| tests = [ | ||||
|     # 88 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", | ||||
|      "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), | ||||
|     # 87 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", | ||||
|      "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), | ||||
|     # 86 - vfl_ymO4Z 2013/06/27 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", | ||||
|      "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), | ||||
|     # 85 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", | ||||
|      "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), | ||||
|     # 84 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", | ||||
|      "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), | ||||
|     # 83 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", | ||||
|      "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"), | ||||
|     # 82 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", | ||||
|      "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), | ||||
| ] | ||||
|  | ||||
| def find_matching(wrong, right): | ||||
|     idxs = [wrong.index(c) for c in right] | ||||
|     return compress(idxs) | ||||
|     return ('s[%d]' % i for i in idxs) | ||||
|  | ||||
| def compress(idxs): | ||||
|     def _genslice(start, end, step): | ||||
|         starts = '' if start == 0 else str(start) | ||||
|         ends = ':%d' % (end+step) | ||||
|         steps = '' if step == 1 else (':%d' % step) | ||||
|         return 's[%s%s%s]' % (starts, ends, steps) | ||||
|  | ||||
|     step = None | ||||
|     for i, prev in zip(idxs[1:], idxs[:-1]): | ||||
|         if step is not None: | ||||
|             if i - prev == step: | ||||
|                 continue | ||||
|             yield _genslice(start, prev, step) | ||||
|             step = None | ||||
|             continue | ||||
|         if i - prev in [-1, 1]: | ||||
|             step = i - prev | ||||
|             start = prev | ||||
|             continue | ||||
|         else: | ||||
|             yield 's[%d]' % prev | ||||
|     if step is None: | ||||
|         yield 's[%d]' % i | ||||
|     else: | ||||
|         yield _genslice(start, i, step) | ||||
|  | ||||
| def _assert_compress(inp, exp): | ||||
|     res = list(compress(inp)) | ||||
|     if res != exp: | ||||
|         print('Got %r, expected %r' % (res, exp)) | ||||
|         assert res == exp | ||||
| _assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]']) | ||||
| _assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]']) | ||||
| _assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]']) | ||||
|  | ||||
| def gen(wrong, right, indent): | ||||
|     code = ' + '.join(find_matching(wrong, right)) | ||||
|     return 'if len(s) == %d:\n%s    return %s\n' % (len(wrong), indent, code) | ||||
|  | ||||
| def genall(tests): | ||||
|     indent = ' ' * 8 | ||||
|     return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests) | ||||
|  | ||||
| def main(): | ||||
|     print(genall(tests)) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
							
								
								
									
										46
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										46
									
								
								setup.py
									
									
									
									
									
								
							| @@ -2,6 +2,7 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| from __future__ import print_function | ||||
|  | ||||
| import pkg_resources | ||||
| import sys | ||||
|  | ||||
| @@ -11,8 +12,9 @@ except ImportError: | ||||
|     from distutils.core import setup | ||||
|  | ||||
| try: | ||||
|     # This will create an exe that needs Microsoft Visual C++ 2008 | ||||
|     # Redistributable Package | ||||
|     import py2exe | ||||
|     """This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package""" | ||||
| except ImportError: | ||||
|     if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': | ||||
|         print("Cannot import py2exe", file=sys.stderr) | ||||
| @@ -23,15 +25,17 @@ py2exe_options = { | ||||
|     "compressed": 1, | ||||
|     "optimize": 2, | ||||
|     "dist_dir": '.', | ||||
|     "dll_excludes": ['w9xpopen.exe'] | ||||
|     "dll_excludes": ['w9xpopen.exe'], | ||||
| } | ||||
|  | ||||
| py2exe_console = [{ | ||||
|     "script": "./youtube_dl/__main__.py", | ||||
|     "dest_base": "youtube-dl", | ||||
| }] | ||||
|  | ||||
| py2exe_params = { | ||||
|     'console': py2exe_console, | ||||
|     'options': { "py2exe": py2exe_options }, | ||||
|     'options': {"py2exe": py2exe_options}, | ||||
|     'zipfile': None | ||||
| } | ||||
|  | ||||
| @@ -40,30 +44,34 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': | ||||
| else: | ||||
|     params = { | ||||
|         'scripts': ['bin/youtube-dl'], | ||||
|         'data_files': [('etc/bash_completion.d', ['youtube-dl.bash-completion']), # Installing system-wide would require sudo... | ||||
|                        ('share/doc/youtube_dl', ['README.txt']), | ||||
|                        ('share/man/man1/', ['youtube-dl.1'])] | ||||
|         'data_files': [  # Installing system-wide would require sudo... | ||||
|             ('etc/bash_completion.d', ['youtube-dl.bash-completion']), | ||||
|             ('share/doc/youtube_dl', ['README.txt']), | ||||
|             ('share/man/man1/', ['youtube-dl.1']) | ||||
|         ] | ||||
|     } | ||||
|  | ||||
| # Get the version from youtube_dl/version.py without importing the package | ||||
| exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) | ||||
| exec(compile(open('youtube_dl/version.py').read(), | ||||
|              'youtube_dl/version.py', 'exec')) | ||||
|  | ||||
| setup( | ||||
|     name = 'youtube_dl', | ||||
|     version = __version__, | ||||
|     description = 'YouTube video downloader', | ||||
|     long_description = 'Small command-line program to download videos from YouTube.com and other video sites.', | ||||
|     url = 'https://github.com/rg3/youtube-dl', | ||||
|     author = 'Ricardo Garcia', | ||||
|     maintainer = 'Philipp Hagemeister', | ||||
|     maintainer_email = 'phihag@phihag.de', | ||||
|     packages = ['youtube_dl'], | ||||
|     name='youtube_dl', | ||||
|     version=__version__, | ||||
|     description='YouTube video downloader', | ||||
|     long_description='Small command-line program to download videos from' | ||||
|     ' YouTube.com and other video sites.', | ||||
|     url='https://github.com/rg3/youtube-dl', | ||||
|     author='Ricardo Garcia', | ||||
|     maintainer='Philipp Hagemeister', | ||||
|     maintainer_email='phihag@phihag.de', | ||||
|     packages=['youtube_dl', 'youtube_dl.extractor'], | ||||
|  | ||||
|     # Provokes warning on most systems (why?!) | ||||
|     #test_suite = 'nose.collector', | ||||
|     #test_requires = ['nosetest'], | ||||
|     # test_suite = 'nose.collector', | ||||
|     # test_requires = ['nosetest'], | ||||
|  | ||||
|     classifiers = [ | ||||
|     classifiers=[ | ||||
|         "Topic :: Multimedia :: Video", | ||||
|         "Development Status :: 5 - Production/Stable", | ||||
|         "Environment :: Console", | ||||
|   | ||||
							
								
								
									
										44
									
								
								test/helper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								test/helper.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | ||||
| import io | ||||
| import json | ||||
| import os.path | ||||
|  | ||||
| import youtube_dl.extractor | ||||
| from youtube_dl import YoutubeDL, YoutubeDLHandler | ||||
| from youtube_dl.utils import ( | ||||
|     compat_cookiejar, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
| # General configuration (from __init__, not very elegant...) | ||||
| jar = compat_cookiejar.CookieJar() | ||||
| cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) | ||||
| proxy_handler = compat_urllib_request.ProxyHandler() | ||||
| opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) | ||||
| compat_urllib_request.install_opener(opener) | ||||
|  | ||||
| PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") | ||||
| with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: | ||||
|     parameters = json.load(pf) | ||||
|  | ||||
| class FakeYDL(YoutubeDL): | ||||
|     def __init__(self): | ||||
|         self.result = [] | ||||
|         # Different instances of the downloader can't share the same dictionary | ||||
|         # some test set the "sublang" parameter, which would break the md5 checks. | ||||
|         self.params = dict(parameters) | ||||
|     def to_screen(self, s): | ||||
|         print(s) | ||||
|     def trouble(self, s, tb=None): | ||||
|         raise Exception(s) | ||||
|     def download(self, x): | ||||
|         self.result.append(x) | ||||
|  | ||||
| def get_testcases(): | ||||
|     for ie in youtube_dl.extractor.gen_extractors(): | ||||
|         t = getattr(ie, '_TEST', None) | ||||
|         if t: | ||||
|             t['name'] = type(ie).__name__[:-len('IE')] | ||||
|             yield t | ||||
|         for t in getattr(ie, '_TESTS', []): | ||||
|             t['name'] = type(ie).__name__[:-len('IE')] | ||||
|             yield t | ||||
| @@ -29,6 +29,7 @@ | ||||
|     "simulate": false,  | ||||
|     "skip_download": false,  | ||||
|     "subtitleslang": null,  | ||||
|     "subtitlesformat": "srt", | ||||
|     "test": true,  | ||||
|     "updatetime": true,  | ||||
|     "usenetrc": false,  | ||||
| @@ -36,5 +37,8 @@ | ||||
|     "verbose": true,  | ||||
|     "writedescription": false,  | ||||
|     "writeinfojson": true,  | ||||
|     "writesubtitles": false | ||||
| } | ||||
|     "writesubtitles": false, | ||||
|     "onlysubtitles": false, | ||||
|     "allsubtitles": false, | ||||
|     "listssubtitles": false | ||||
| } | ||||
|   | ||||
| @@ -7,21 +7,60 @@ import unittest | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE | ||||
| from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors | ||||
| from helper import get_testcases | ||||
|  | ||||
| class TestAllURLsMatching(unittest.TestCase): | ||||
|     def test_youtube_playlist_matching(self): | ||||
|         self.assertTrue(YoutubePlaylistIE().suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) | ||||
|         self.assertTrue(YoutubePlaylistIE().suitable(u'PL63F0C78739B09958')) | ||||
|         self.assertFalse(YoutubePlaylistIE().suitable(u'PLtS2H6bU1M')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585 | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 | ||||
|         self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M')) | ||||
|  | ||||
|     def test_youtube_matching(self): | ||||
|         self.assertTrue(YoutubeIE().suitable(u'PLtS2H6bU1M')) | ||||
|         self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M')) | ||||
|         self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 | ||||
|  | ||||
|     def test_youtube_channel_matching(self): | ||||
|         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM')) | ||||
|         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')) | ||||
|         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')) | ||||
|  | ||||
|     def test_justin_tv_channelid_matching(self): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/")) | ||||
|  | ||||
|     def test_justintv_videoid_matching(self): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483")) | ||||
|  | ||||
|     def test_justin_tv_chapterid_matching(self): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) | ||||
|  | ||||
|     def test_youtube_extract(self): | ||||
|         self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|  | ||||
|     def test_no_duplicates(self): | ||||
|         ies = gen_extractors() | ||||
|         for tc in get_testcases(): | ||||
|             url = tc['url'] | ||||
|             for ie in ies: | ||||
|                 if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']: | ||||
|                     self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) | ||||
|                 else: | ||||
|                     self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -7,19 +7,19 @@ import os | ||||
| import json | ||||
| import unittest | ||||
| import sys | ||||
| import hashlib | ||||
| import socket | ||||
| import binascii | ||||
|  | ||||
| # Allow direct execution | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| import youtube_dl.FileDownloader | ||||
| import youtube_dl.InfoExtractors | ||||
| import youtube_dl.YoutubeDL | ||||
| from youtube_dl.utils import * | ||||
|  | ||||
| DEF_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tests.json') | ||||
| PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") | ||||
|  | ||||
| RETRIES = 3 | ||||
|  | ||||
| # General configuration (from __init__, not very elegant...) | ||||
| jar = compat_cookiejar.CookieJar() | ||||
| cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) | ||||
| @@ -36,26 +36,33 @@ def _try_rm(filename): | ||||
|         if ose.errno != errno.ENOENT: | ||||
|             raise | ||||
|  | ||||
| class FileDownloader(youtube_dl.FileDownloader): | ||||
| md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
| class YoutubeDL(youtube_dl.YoutubeDL): | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         self.to_stderr = self.to_screen | ||||
|         self.processed_info_dicts = [] | ||||
|         return youtube_dl.FileDownloader.__init__(self, *args, **kwargs) | ||||
|         super(YoutubeDL, self).__init__(*args, **kwargs) | ||||
|     def report_warning(self, message): | ||||
|         # Don't accept warnings during tests | ||||
|         raise ExtractorError(message) | ||||
|     def process_info(self, info_dict): | ||||
|         self.processed_info_dicts.append(info_dict) | ||||
|         return youtube_dl.FileDownloader.process_info(self, info_dict) | ||||
|         return super(YoutubeDL, self).process_info(info_dict) | ||||
|  | ||||
| def _file_md5(fn): | ||||
|     with open(fn, 'rb') as f: | ||||
|         return hashlib.md5(f.read()).hexdigest() | ||||
|  | ||||
| with io.open(DEF_FILE, encoding='utf-8') as deff: | ||||
|     defs = json.load(deff) | ||||
| from helper import get_testcases | ||||
| defs = get_testcases() | ||||
|  | ||||
| with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: | ||||
|     parameters = json.load(pf) | ||||
|  | ||||
|  | ||||
| class TestDownload(unittest.TestCase): | ||||
|     maxDiff = None | ||||
|     def setUp(self): | ||||
|         self.parameters = parameters | ||||
|         self.defs = defs | ||||
| @@ -64,29 +71,29 @@ class TestDownload(unittest.TestCase): | ||||
| def generator(test_case): | ||||
|  | ||||
|     def test_template(self): | ||||
|         ie = getattr(youtube_dl.InfoExtractors, test_case['name'] + 'IE') | ||||
|         ie = youtube_dl.extractor.get_info_extractor(test_case['name']) | ||||
|         def print_skipping(reason): | ||||
|             print('Skipping %s: %s' % (test_case['name'], reason)) | ||||
|         if not ie._WORKING: | ||||
|             print('Skipping: IE marked as not _WORKING') | ||||
|             print_skipping('IE marked as not _WORKING') | ||||
|             return | ||||
|         if 'playlist' not in test_case and not test_case['file']: | ||||
|             print('Skipping: No output file specified') | ||||
|             print_skipping('No output file specified') | ||||
|             return | ||||
|         if 'skip' in test_case: | ||||
|             print('Skipping: {0}'.format(test_case['skip'])) | ||||
|             print_skipping(test_case['skip']) | ||||
|             return | ||||
|  | ||||
|         params = self.parameters.copy() | ||||
|         params.update(test_case.get('params', {})) | ||||
|  | ||||
|         fd = FileDownloader(params) | ||||
|         fd.add_info_extractor(ie()) | ||||
|         for ien in test_case.get('add_ie', []): | ||||
|             fd.add_info_extractor(getattr(youtube_dl.InfoExtractors, ien + 'IE')()) | ||||
|         ydl = YoutubeDL(params) | ||||
|         ydl.add_default_info_extractors() | ||||
|         finished_hook_called = set() | ||||
|         def _hook(status): | ||||
|             if status['status'] == 'finished': | ||||
|                 finished_hook_called.add(status['filename']) | ||||
|         fd.add_progress_hook(_hook) | ||||
|         ydl.fd.add_progress_hook(_hook) | ||||
|  | ||||
|         test_cases = test_case.get('playlist', [test_case]) | ||||
|         for tc in test_cases: | ||||
| @@ -94,7 +101,19 @@ def generator(test_case): | ||||
|             _try_rm(tc['file'] + '.part') | ||||
|             _try_rm(tc['file'] + '.info.json') | ||||
|         try: | ||||
|             fd.download([test_case['url']]) | ||||
|             for retry in range(1, RETRIES + 1): | ||||
|                 try: | ||||
|                     ydl.download([test_case['url']]) | ||||
|                 except (DownloadError, ExtractorError) as err: | ||||
|                     if retry == RETRIES: raise | ||||
|  | ||||
|                     # Check if the exception is not a network related one | ||||
|                     if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): | ||||
|                         raise | ||||
|  | ||||
|                     print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry)) | ||||
|                 else: | ||||
|                     break | ||||
|  | ||||
|             for tc in test_cases: | ||||
|                 if not test_case.get('params', {}).get('skip_download', False): | ||||
| @@ -106,8 +125,25 @@ def generator(test_case): | ||||
|                     self.assertEqual(md5_for_file, tc['md5']) | ||||
|                 with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: | ||||
|                     info_dict = json.load(infof) | ||||
|                 for (info_field, value) in tc.get('info_dict', {}).items(): | ||||
|                     self.assertEqual(value, info_dict.get(info_field)) | ||||
|                 for (info_field, expected) in tc.get('info_dict', {}).items(): | ||||
|                     if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||
|                         self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field))) | ||||
|                     else: | ||||
|                         got = info_dict.get(info_field) | ||||
|                         self.assertEqual( | ||||
|                             expected, got, | ||||
|                             u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) | ||||
|  | ||||
|                 # If checkable fields are missing from the test case, print the info_dict | ||||
|                 test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) | ||||
|                     for key, value in info_dict.items() | ||||
|                     if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location')) | ||||
|                 if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): | ||||
|                     sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n') | ||||
|  | ||||
|                 # Check for the presence of mandatory fields | ||||
|                 for key in ('id', 'url', 'title', 'ext'): | ||||
|                     self.assertTrue(key in info_dict.keys() and info_dict[key]) | ||||
|         finally: | ||||
|             for tc in test_cases: | ||||
|                 _try_rm(tc['file']) | ||||
| @@ -117,9 +153,14 @@ def generator(test_case): | ||||
|     return test_template | ||||
|  | ||||
| ### And add them to TestDownload | ||||
| for test_case in defs: | ||||
| for n, test_case in enumerate(defs): | ||||
|     test_method = generator(test_case) | ||||
|     test_method.__name__ = "test_{0}".format(test_case["name"]) | ||||
|     tname = 'test_' + str(test_case['name']) | ||||
|     i = 1 | ||||
|     while hasattr(TestDownload, tname): | ||||
|         tname = 'test_'  + str(test_case['name']) + '_' + str(i) | ||||
|         i += 1 | ||||
|     test_method.__name__ = tname | ||||
|     setattr(TestDownload, test_method.__name__, test_method) | ||||
|     del test_method | ||||
|  | ||||
|   | ||||
| @@ -14,6 +14,8 @@ from youtube_dl.utils import timeconvert | ||||
| from youtube_dl.utils import sanitize_filename | ||||
| from youtube_dl.utils import unescapeHTML | ||||
| from youtube_dl.utils import orderedSet | ||||
| from youtube_dl.utils import DateRange | ||||
| from youtube_dl.utils import unified_strdate | ||||
|  | ||||
| if sys.version_info < (3, 0): | ||||
|     _compat_str = lambda b: b.decode('unicode-escape') | ||||
| @@ -95,6 +97,20 @@ class TestUtil(unittest.TestCase): | ||||
|  | ||||
|     def test_unescape_html(self): | ||||
|         self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;')) | ||||
|          | ||||
|     def test_daterange(self): | ||||
|         _20century = DateRange("19000101","20000101") | ||||
|         self.assertFalse("17890714" in _20century) | ||||
|         _ac = DateRange("00010101") | ||||
|         self.assertTrue("19690721" in _ac) | ||||
|         _firstmilenium = DateRange(end="10000101") | ||||
|         self.assertTrue("07110427" in _firstmilenium) | ||||
|  | ||||
|     def test_unified_dates(self): | ||||
|         self.assertEqual(unified_strdate('December 21, 2010'), '20101221') | ||||
|         self.assertEqual(unified_strdate('8/7/2009'), '20090708') | ||||
|         self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') | ||||
|         self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -9,8 +9,8 @@ import unittest | ||||
| # Allow direct execution | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| import youtube_dl.FileDownloader | ||||
| import youtube_dl.InfoExtractors | ||||
| import youtube_dl.YoutubeDL | ||||
| import youtube_dl.extractor | ||||
| from youtube_dl.utils import * | ||||
|  | ||||
| PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") | ||||
| @@ -22,9 +22,9 @@ proxy_handler = compat_urllib_request.ProxyHandler() | ||||
| opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) | ||||
| compat_urllib_request.install_opener(opener) | ||||
|  | ||||
| class FileDownloader(youtube_dl.FileDownloader): | ||||
| class YoutubeDL(youtube_dl.YoutubeDL): | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         youtube_dl.FileDownloader.__init__(self, *args, **kwargs) | ||||
|         super(YoutubeDL, self).__init__(*args, **kwargs) | ||||
|         self.to_stderr = self.to_screen | ||||
|  | ||||
| with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: | ||||
| @@ -48,10 +48,10 @@ class TestInfoJSON(unittest.TestCase): | ||||
|         self.tearDown() | ||||
|  | ||||
|     def test_info_json(self): | ||||
|         ie = youtube_dl.InfoExtractors.YoutubeIE() | ||||
|         fd = FileDownloader(params) | ||||
|         fd.add_info_extractor(ie) | ||||
|         fd.download([TEST_ID]) | ||||
|         ie = youtube_dl.extractor.YoutubeIE() | ||||
|         ydl = YoutubeDL(params) | ||||
|         ydl.add_info_extractor(ie) | ||||
|         ydl.download([TEST_ID]) | ||||
|         self.assertTrue(os.path.exists(INFO_JSON_FILE)) | ||||
|         with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf: | ||||
|             jd = json.load(jsonf) | ||||
|   | ||||
| @@ -8,66 +8,91 @@ import json | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.InfoExtractors import YoutubeUserIE,YoutubePlaylistIE | ||||
| from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE | ||||
| from youtube_dl.utils import * | ||||
|  | ||||
| PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") | ||||
| with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: | ||||
|     parameters = json.load(pf) | ||||
|  | ||||
| # General configuration (from __init__, not very elegant...) | ||||
| jar = compat_cookiejar.CookieJar() | ||||
| cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) | ||||
| proxy_handler = compat_urllib_request.ProxyHandler() | ||||
| opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) | ||||
| compat_urllib_request.install_opener(opener) | ||||
|  | ||||
| class FakeDownloader(object): | ||||
|     def __init__(self): | ||||
|         self.result = [] | ||||
|         self.params = parameters | ||||
|     def to_screen(self, s): | ||||
|         print(s) | ||||
|     def trouble(self, s): | ||||
|         raise Exception(s) | ||||
|     def download(self, x): | ||||
|         self.result.append(x) | ||||
| from helper import FakeYDL | ||||
|  | ||||
| class TestYoutubeLists(unittest.TestCase): | ||||
|     def assertIsPlaylist(self,info): | ||||
|         """Make sure the info has '_type' set to 'playlist'""" | ||||
|         self.assertEqual(info['_type'], 'playlist') | ||||
|  | ||||
|     def test_youtube_playlist(self): | ||||
|         DL = FakeDownloader() | ||||
|         IE = YoutubePlaylistIE(DL) | ||||
|         IE.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') | ||||
|         self.assertEqual(DL.result, [ | ||||
|             ['http://www.youtube.com/watch?v=bV9L5Ht9LgY'], | ||||
|             ['http://www.youtube.com/watch?v=FXxLjLQi3Fg'], | ||||
|             ['http://www.youtube.com/watch?v=tU3Bgo5qJZE'] | ||||
|         ]) | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0] | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], 'ytdl test PL') | ||||
|         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] | ||||
|         self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) | ||||
|  | ||||
|     def test_issue_673(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('PLBB231211A4F62143')[0] | ||||
|         self.assertTrue(len(result['entries']) > 25) | ||||
|  | ||||
|     def test_youtube_playlist_long(self): | ||||
|         DL = FakeDownloader() | ||||
|         IE = YoutubePlaylistIE(DL) | ||||
|         IE.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') | ||||
|         self.assertTrue(len(DL.result) >= 799) | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0] | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertTrue(len(result['entries']) >= 799) | ||||
|  | ||||
|     def test_youtube_playlist_with_deleted(self): | ||||
|         #651 | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0] | ||||
|         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] | ||||
|         self.assertFalse('pElCt5oNDuI' in ytie_results) | ||||
|         self.assertFalse('KdPEApIVdWM' in ytie_results) | ||||
|          | ||||
|     def test_youtube_playlist_empty(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0] | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(len(result['entries']), 0) | ||||
|  | ||||
|     def test_youtube_course(self): | ||||
|         DL = FakeDownloader() | ||||
|         IE = YoutubePlaylistIE(DL) | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         # TODO find a > 100 (paginating?) videos course | ||||
|         IE.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | ||||
|         self.assertEqual(DL.result[0], ['http://www.youtube.com/watch?v=j9WZyLZCBzs']) | ||||
|         self.assertEqual(len(DL.result), 25) | ||||
|         self.assertEqual(DL.result[-1], ['http://www.youtube.com/watch?v=rYefUsYuEp0']) | ||||
|         result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0] | ||||
|         entries = result['entries'] | ||||
|         self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') | ||||
|         self.assertEqual(len(entries), 25) | ||||
|         self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0') | ||||
|  | ||||
|     def test_youtube_channel(self): | ||||
|         # I give up, please find a channel that does paginate and test this like test_youtube_playlist_long | ||||
|         pass # TODO | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeChannelIE(dl) | ||||
|         #test paginated channel | ||||
|         result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0] | ||||
|         self.assertTrue(len(result['entries']) > 90) | ||||
|         #test autogenerated channel | ||||
|         result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0] | ||||
|         self.assertTrue(len(result['entries']) >= 18) | ||||
|  | ||||
|     def test_youtube_user(self): | ||||
|         DL = FakeDownloader() | ||||
|         IE = YoutubeUserIE(DL) | ||||
|         IE.extract('https://www.youtube.com/user/TheLinuxFoundation') | ||||
|         self.assertTrue(len(DL.result) >= 320) | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeUserIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] | ||||
|         self.assertTrue(len(result['entries']) >= 320) | ||||
|  | ||||
|     def test_youtube_safe_search(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] | ||||
|         self.assertEqual(len(result['entries']), 2) | ||||
|  | ||||
|     def test_youtube_show(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeShowIE(dl) | ||||
|         result = ie.extract('http://www.youtube.com/show/airdisasters') | ||||
|         self.assertTrue(len(result) >= 4) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
							
								
								
									
										57
									
								
								test/test_youtube_sig.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										57
									
								
								test/test_youtube_sig.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,57 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| import unittest | ||||
| import sys | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.extractor.youtube import YoutubeIE | ||||
| from helper import FakeYDL | ||||
|  | ||||
| sig = YoutubeIE(FakeYDL())._decrypt_signature | ||||
|  | ||||
| class TestYoutubeSig(unittest.TestCase): | ||||
|     def test_43_43(self): | ||||
|         wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135' | ||||
|         right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE' | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_88(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<" | ||||
|         right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_87(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" | ||||
|         right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_86(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" | ||||
|         right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_85(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<" | ||||
|         right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_84(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<" | ||||
|         right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_83(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" | ||||
|         right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_82(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<" | ||||
|         right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -10,48 +10,86 @@ import hashlib | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.InfoExtractors import YoutubeIE | ||||
| from youtube_dl.extractor import YoutubeIE | ||||
| from youtube_dl.utils import * | ||||
|  | ||||
| PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") | ||||
| with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: | ||||
|     parameters = json.load(pf) | ||||
|  | ||||
| # General configuration (from __init__, not very elegant...) | ||||
| jar = compat_cookiejar.CookieJar() | ||||
| cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) | ||||
| proxy_handler = compat_urllib_request.ProxyHandler() | ||||
| opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) | ||||
| compat_urllib_request.install_opener(opener) | ||||
|  | ||||
| class FakeDownloader(object): | ||||
|     def __init__(self): | ||||
|         self.result = [] | ||||
|         self.params = parameters | ||||
|     def to_screen(self, s): | ||||
|         print(s) | ||||
|     def trouble(self, s): | ||||
|         raise Exception(s) | ||||
|     def download(self, x): | ||||
|         self.result.append(x) | ||||
| from helper import FakeYDL | ||||
|  | ||||
| md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
| class TestYoutubeSubtitles(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['allsubtitles'] = False | ||||
|         DL.params['writesubtitles'] = False | ||||
|         DL.params['subtitlesformat'] = 'srt' | ||||
|         DL.params['listsubtitles'] = False | ||||
|     def test_youtube_no_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = False | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         subtitles = info_dict[0]['subtitles'] | ||||
|         self.assertEqual(subtitles, None) | ||||
|     def test_youtube_subtitles(self): | ||||
|         DL = FakeDownloader() | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         self.assertEqual(md5(info_dict[0]['subtitles']), 'c3228550d59116f3c29fba370b55d033') | ||||
|  | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') | ||||
|     def test_youtube_subtitles_it(self): | ||||
|         DL = FakeDownloader() | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitleslang'] = 'it' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         self.assertEqual(md5(info_dict[0]['subtitles']), '132a88a0daf8e1520f393eb58f1f646a') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d') | ||||
|     def test_youtube_onlysubtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['onlysubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') | ||||
|     def test_youtube_allsubtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['allsubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         subtitles = info_dict[0]['subtitles'] | ||||
|         self.assertEqual(len(subtitles), 13) | ||||
|     def test_youtube_subtitles_sbv_format(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitlesformat'] = 'sbv' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b') | ||||
|     def test_youtube_subtitles_vtt_format(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitlesformat'] = 'vtt' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7') | ||||
|     def test_youtube_list_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['listsubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         self.assertEqual(info_dict, None) | ||||
|     def test_youtube_automatic_captions(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writeautomaticsub'] = True | ||||
|         DL.params['subtitleslang'] = 'it' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('8YoUxe5ncPo') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertTrue(sub[2] is not None) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
							
								
								
									
										308
									
								
								test/tests.json
									
									
									
									
									
								
							
							
						
						
									
										308
									
								
								test/tests.json
									
									
									
									
									
								
							| @@ -1,308 +0,0 @@ | ||||
| [ | ||||
|   { | ||||
|     "name": "Youtube", | ||||
|     "url":  "http://www.youtube.com/watch?v=BaW_jenozKc", | ||||
|     "file":  "BaW_jenozKc.mp4", | ||||
|     "info_dict": { | ||||
|       "title": "youtube-dl test video \"'/\\ä↭𝕐", | ||||
|       "uploader": "Philipp Hagemeister", | ||||
|       "uploader_id": "phihag", | ||||
|       "upload_date": "20121002", | ||||
|       "description": "test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." | ||||
|     } | ||||
|   }, | ||||
|   { | ||||
|     "name": "Dailymotion", | ||||
|     "md5":  "392c4b85a60a90dc4792da41ce3144eb", | ||||
|     "url":  "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech", | ||||
|     "file":  "x33vw9.mp4" | ||||
|   }, | ||||
|   { | ||||
|     "name": "Metacafe", | ||||
|     "add_ie": ["Youtube"], | ||||
|     "url":  "http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", | ||||
|     "file":  "_aUehQsCQtM.flv" | ||||
|   }, | ||||
|   { | ||||
|     "name": "BlipTV", | ||||
|     "md5":  "b2d849efcf7ee18917e4b4d9ff37cafe", | ||||
|     "url":  "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352", | ||||
|     "file":  "5779306.m4v" | ||||
|   }, | ||||
|   { | ||||
|     "name": "XVideos", | ||||
|     "md5":  "1d0c835822f0a71a7bf011855db929d0", | ||||
|     "url":  "http://www.xvideos.com/video939581/funny_porns_by_s_-1", | ||||
|     "file":  "939581.flv" | ||||
|   }, | ||||
|   { | ||||
|     "name": "YouPorn", | ||||
|     "md5": "c37ddbaaa39058c76a7e86c6813423c1", | ||||
|     "url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/", | ||||
|     "file": "505835.mp4" | ||||
|   }, | ||||
|   { | ||||
|     "name": "Pornotube", | ||||
|     "md5": "374dd6dcedd24234453b295209aa69b6", | ||||
|     "url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing", | ||||
|     "file": "1689755.flv" | ||||
|   }, | ||||
|   { | ||||
|     "name": "YouJizz", | ||||
|     "md5": "07e15fa469ba384c7693fd246905547c", | ||||
|     "url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html", | ||||
|     "file": "2189178.flv" | ||||
|   }, | ||||
|   { | ||||
|     "name": "Vimeo", | ||||
|     "md5":  "8879b6cc097e987f02484baf890129e5", | ||||
|     "url":  "http://vimeo.com/56015672", | ||||
|     "file": "56015672.mp4", | ||||
|     "info_dict": { | ||||
|       "title": "youtube-dl test video - ★ \" ' 幸 / \\ ä ↭ 𝕐", | ||||
|       "uploader": "Filippo Valsorda", | ||||
|       "uploader_id": "user7108434", | ||||
|       "upload_date": "20121220", | ||||
|       "description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: ★ \" ' 幸 / \\ ä ↭ 𝕐" | ||||
|     } | ||||
|   }, | ||||
|   { | ||||
|     "name": "Soundcloud", | ||||
|     "md5":  "ebef0a451b909710ed1d7787dddbf0d7", | ||||
|     "url":  "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy", | ||||
|     "file":  "62986583.mp3" | ||||
|   }, | ||||
|   { | ||||
|     "name": "StanfordOpenClassroom", | ||||
|     "md5":  "544a9468546059d4e80d76265b0443b8", | ||||
|     "url":  "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100", | ||||
|     "file":  "PracticalUnix_intro-environment.mp4", | ||||
|     "skip": "Currently offline" | ||||
|   }, | ||||
|   { | ||||
|     "name": "XNXX", | ||||
|     "md5":  "0831677e2b4761795f68d417e0b7b445", | ||||
|     "url":  "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_", | ||||
|     "file":  "1135332.flv" | ||||
|   }, | ||||
|   { | ||||
|     "name": "Youku", | ||||
|     "url": "http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", | ||||
|     "file": "XNDgyMDQ2NTQw_part00.flv", | ||||
|     "md5": "ffe3f2e435663dc2d1eea34faeff5b5b", | ||||
|     "params": { "test": false } | ||||
|   }, | ||||
|   { | ||||
|     "name": "NBA", | ||||
|     "url": "http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html", | ||||
|     "file": "0021200253-okc-bkn-recap.nba.mp4", | ||||
|     "md5": "c0edcfc37607344e2ff8f13c378c88a4" | ||||
|   }, | ||||
|   { | ||||
|     "name": "JustinTV", | ||||
|     "url": "http://www.twitch.tv/thegamedevhub/b/296128360", | ||||
|     "file": "296128360.flv", | ||||
|     "md5": "ecaa8a790c22a40770901460af191c9a" | ||||
|   }, | ||||
|   { | ||||
|     "name": "MyVideo", | ||||
|     "url": "http://www.myvideo.de/watch/8229274/bowling_fail_or_win", | ||||
|     "file": "8229274.flv", | ||||
|     "md5": "2d2753e8130479ba2cb7e0a37002053e" | ||||
|   }, | ||||
|   { | ||||
|     "name": "Escapist", | ||||
|     "url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate", | ||||
|     "file": "6618-Breaking-Down-Baldurs-Gate.flv", | ||||
|     "md5": "c6793dbda81388f4264c1ba18684a74d", | ||||
|     "skip": "Fails with timeout on Travis" | ||||
|   }, | ||||
|   { | ||||
|     "name": "GooglePlus", | ||||
|     "url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", | ||||
|     "file": "ZButuJc6CtH.flv" | ||||
|   }, | ||||
|   { | ||||
|     "name": "FunnyOrDie", | ||||
|     "url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version", | ||||
|     "file": "0732f586d7.mp4", | ||||
|     "md5": "f647e9e90064b53b6e046e75d0241fbd" | ||||
|   }, | ||||
|   { | ||||
|     "name": "TweetReel", | ||||
|     "url": "http://tweetreel.com/?77smq", | ||||
|     "file": "77smq.mov", | ||||
|     "md5": "56b4d9ca9de467920f3f99a6d91255d6", | ||||
|     "info_dict": { | ||||
|         "uploader": "itszero", | ||||
|         "uploader_id": "itszero", | ||||
|         "upload_date": "20091225", | ||||
|         "description": "Installing Gentoo Linux on Powerbook G4, it turns out the sleep indicator becomes HDD activity indicator :D" | ||||
|     } | ||||
|   }, | ||||
|   { | ||||
|     "name": "Steam", | ||||
|     "url": "http://store.steampowered.com/video/105600/", | ||||
|     "playlist": [ | ||||
|       { | ||||
|         "file": "81300.flv", | ||||
|         "md5": "f870007cee7065d7c76b88f0a45ecc07", | ||||
|         "info_dict": { | ||||
|             "title": "Terraria 1.1 Trailer" | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "file": "80859.flv", | ||||
|         "md5": "61aaf31a5c5c3041afb58fb83cbb5751", | ||||
|         "info_dict": { | ||||
|           "title": "Terraria Trailer" | ||||
|         } | ||||
|       } | ||||
|     ] | ||||
|   }, | ||||
|   { | ||||
|     "name": "Ustream", | ||||
|     "url": "http://www.ustream.tv/recorded/20274954", | ||||
|     "file": "20274954.flv", | ||||
|     "md5": "088f151799e8f572f84eb62f17d73e5c", | ||||
|     "info_dict": { | ||||
|         "title": "Young Americans for Liberty February 7, 2012 2:28 AM" | ||||
|     } | ||||
|   }, | ||||
|   { | ||||
|     "name": "InfoQ", | ||||
|     "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", | ||||
|     "file": "12-jan-pythonthings.mp4", | ||||
|     "info_dict": { | ||||
|       "title": "A Few of My Favorite [Python] Things" | ||||
|     }, | ||||
|     "params": { | ||||
|       "skip_download": true | ||||
|     } | ||||
|   }, | ||||
|   { | ||||
|     "name": "ComedyCentral", | ||||
|     "url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart", | ||||
|     "file": "422212.mp4", | ||||
|     "md5": "4e2f5cb088a83cd8cdb7756132f9739d", | ||||
|     "info_dict": { | ||||
|         "title": "thedailyshow-kristen-stewart part 1" | ||||
|     } | ||||
|   }, | ||||
|   { | ||||
|     "name": "RBMARadio", | ||||
|     "url": "http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011", | ||||
|     "file": "ford-lopatin-live-at-primavera-sound-2011.mp3", | ||||
|     "md5": "6bc6f9bcb18994b4c983bc3bf4384d95", | ||||
|     "info_dict": { | ||||
|         "title": "Live at Primavera Sound 2011", | ||||
|         "description": "Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.", | ||||
|         "uploader": "Ford & Lopatin", | ||||
|         "uploader_id": "ford-lopatin", | ||||
|         "location": "Spain" | ||||
|     } | ||||
|   }, | ||||
|   { | ||||
|     "name": "Facebook", | ||||
|     "url": "https://www.facebook.com/photo.php?v=120708114770723", | ||||
|     "file": "120708114770723.mp4", | ||||
|     "md5": "48975a41ccc4b7a581abd68651c1a5a8", | ||||
|     "info_dict": { | ||||
|       "title": "PEOPLE ARE AWESOME 2013", | ||||
|       "duration": 279 | ||||
|     } | ||||
|   }, | ||||
|   { | ||||
|     "name": "EightTracks", | ||||
|     "url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a", | ||||
|     "playlist": [ | ||||
|       { | ||||
|         "file": "11885610.m4a", | ||||
|         "md5": "96ce57f24389fc8734ce47f4c1abcc55", | ||||
|         "info_dict": { | ||||
|           "title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", | ||||
|           "uploader_id": "ytdl" | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "file": "11885608.m4a", | ||||
|         "md5": "4ab26f05c1f7291ea460a3920be8021f", | ||||
|         "info_dict": { | ||||
|           "title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", | ||||
|           "uploader_id": "ytdl" | ||||
|  | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "file": "11885679.m4a", | ||||
|         "md5": "d30b5b5f74217410f4689605c35d1fd7", | ||||
|         "info_dict": { | ||||
|           "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad" | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "file": "11885680.m4a", | ||||
|         "md5": "4eb0a669317cd725f6bbd336a29f923a", | ||||
|         "info_dict": { | ||||
|           "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad" | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "file": "11885682.m4a", | ||||
|         "md5": "1893e872e263a2705558d1d319ad19e8", | ||||
|         "info_dict": { | ||||
|           "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad" | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "file": "11885683.m4a", | ||||
|         "md5": "b673c46f47a216ab1741ae8836af5899", | ||||
|         "info_dict": { | ||||
|           "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad" | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "file": "11885684.m4a", | ||||
|         "md5": "1d74534e95df54986da7f5abf7d842b7", | ||||
|         "info_dict": { | ||||
|           "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad" | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "file": "11885685.m4a", | ||||
|         "md5": "f081f47af8f6ae782ed131d38b9cd1c0", | ||||
|         "info_dict": { | ||||
|           "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad" | ||||
|         } | ||||
|       } | ||||
|     ] | ||||
|   }, | ||||
|   { | ||||
|     "name": "Keek", | ||||
|     "url": "http://www.keek.com/ytdl/keeks/NODfbab", | ||||
|     "file": "NODfbab.mp4", | ||||
|     "md5": "9b0636f8c0f7614afa4ea5e4c6e57e83", | ||||
|     "info_dict": { | ||||
|       "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." | ||||
|     } | ||||
|  | ||||
|   }, | ||||
|   { | ||||
|     "name": "TED", | ||||
|     "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html", | ||||
|     "file": "102.mp4", | ||||
|     "md5": "7bc087e71d16f18f9b8ab9fa62a8a031", | ||||
|     "info_dict": { | ||||
|         "title": "Dan Dennett: The illusion of consciousness" | ||||
|     } | ||||
|   }, | ||||
|   { | ||||
|     "name": "MySpass", | ||||
|     "url": "http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/", | ||||
|     "file": "11741.mp4", | ||||
|     "md5": "0b49f4844a068f8b33f4b7c88405862b", | ||||
|     "info_dict": { | ||||
|         "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" | ||||
|     } | ||||
|   } | ||||
| ] | ||||
							
								
								
									
										
											BIN
										
									
								
								youtube-dl
									
									
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								youtube-dl
									
									
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							| @@ -1,13 +1,6 @@ | ||||
| #!/usr/bin/env python | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| import math | ||||
| import io | ||||
| import os | ||||
| import re | ||||
| import socket | ||||
| import subprocess | ||||
| import sys | ||||
| import time | ||||
| @@ -23,89 +16,39 @@ class FileDownloader(object): | ||||
|     """File Downloader class. | ||||
|  | ||||
|     File downloader objects are the ones responsible of downloading the | ||||
|     actual video file and writing it to disk if the user has requested | ||||
|     it, among some other tasks. In most cases there should be one per | ||||
|     program. As, given a video URL, the downloader doesn't know how to | ||||
|     extract all the needed information, task that InfoExtractors do, it | ||||
|     has to pass the URL to one of them. | ||||
|  | ||||
|     For this, file downloader objects have a method that allows | ||||
|     InfoExtractors to be registered in a given order. When it is passed | ||||
|     a URL, the file downloader handles it to the first InfoExtractor it | ||||
|     finds that reports being able to handle it. The InfoExtractor extracts | ||||
|     all the information about the video or videos the URL refers to, and | ||||
|     asks the FileDownloader to process the video information, possibly | ||||
|     downloading the video. | ||||
|     actual video file and writing it to disk. | ||||
|  | ||||
|     File downloaders accept a lot of parameters. In order not to saturate | ||||
|     the object constructor with arguments, it receives a dictionary of | ||||
|     options instead. These options are available through the params | ||||
|     attribute for the InfoExtractors to use. The FileDownloader also | ||||
|     registers itself as the downloader in charge for the InfoExtractors | ||||
|     that are added to it, so this is a "mutual registration". | ||||
|     options instead. | ||||
|  | ||||
|     Available options: | ||||
|  | ||||
|     username:          Username for authentication purposes. | ||||
|     password:          Password for authentication purposes. | ||||
|     usenetrc:          Use netrc for authentication instead. | ||||
|     verbose:           Print additional info to stdout. | ||||
|     quiet:             Do not print messages to stdout. | ||||
|     forceurl:          Force printing final URL. | ||||
|     forcetitle:        Force printing title. | ||||
|     forcethumbnail:    Force printing thumbnail URL. | ||||
|     forcedescription:  Force printing description. | ||||
|     forcefilename:     Force printing final filename. | ||||
|     simulate:          Do not download the video files. | ||||
|     format:            Video format code. | ||||
|     format_limit:      Highest quality format to try. | ||||
|     outtmpl:           Template for output names. | ||||
|     restrictfilenames: Do not allow "&" and spaces in file names | ||||
|     ignoreerrors:      Do not stop on download errors. | ||||
|     ratelimit:         Download speed limit, in bytes/sec. | ||||
|     nooverwrites:      Prevent overwriting files. | ||||
|     retries:           Number of times to retry for HTTP error 5xx | ||||
|     buffersize:        Size of download buffer in bytes. | ||||
|     noresizebuffer:    Do not automatically resize the download buffer. | ||||
|     continuedl:        Try to continue downloads if possible. | ||||
|     noprogress:        Do not print the progress bar. | ||||
|     playliststart:     Playlist item to start at. | ||||
|     playlistend:       Playlist item to end at. | ||||
|     matchtitle:        Download only matching titles. | ||||
|     rejecttitle:       Reject downloads for matching titles. | ||||
|     logtostderr:       Log messages to stderr instead of stdout. | ||||
|     consoletitle:      Display progress in console window's titlebar. | ||||
|     nopart:            Do not use temporary .part files. | ||||
|     updatetime:        Use the Last-modified header to set output file timestamps. | ||||
|     writedescription:  Write the video description to a .description file | ||||
|     writeinfojson:     Write the video description to a .info.json file | ||||
|     writesubtitles:    Write the video subtitles to a .srt file | ||||
|     subtitleslang:     Language of the subtitles to download | ||||
|     test:              Download only first bytes to test the downloader. | ||||
|     keepvideo:         Keep the video file after post-processing | ||||
|     min_filesize:      Skip files smaller than this size | ||||
|     max_filesize:      Skip files larger than this size | ||||
|     """ | ||||
|  | ||||
|     params = None | ||||
|     _ies = [] | ||||
|     _pps = [] | ||||
|     _download_retcode = None | ||||
|     _num_downloads = None | ||||
|     _screen_file = None | ||||
|  | ||||
|     def __init__(self, params): | ||||
|     def __init__(self, ydl, params): | ||||
|         """Create a FileDownloader object with the given options.""" | ||||
|         self._ies = [] | ||||
|         self._pps = [] | ||||
|         self.ydl = ydl | ||||
|         self._progress_hooks = [] | ||||
|         self._download_retcode = 0 | ||||
|         self._num_downloads = 0 | ||||
|         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] | ||||
|         self.params = params | ||||
|  | ||||
|         if '%(stitle)s' in self.params['outtmpl']: | ||||
|             self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_bytes(bytes): | ||||
|         if bytes is None: | ||||
| @@ -116,7 +59,7 @@ class FileDownloader(object): | ||||
|             exponent = 0 | ||||
|         else: | ||||
|             exponent = int(math.log(bytes, 1024.0)) | ||||
|         suffix = 'bkMGTPEZY'[exponent] | ||||
|         suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent] | ||||
|         converted = float(bytes) / float(1024 ** exponent) | ||||
|         return '%.2f%s' % (converted, suffix) | ||||
|  | ||||
| @@ -170,34 +113,11 @@ class FileDownloader(object): | ||||
|         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) | ||||
|         return int(round(number * multiplier)) | ||||
|  | ||||
|     def add_info_extractor(self, ie): | ||||
|         """Add an InfoExtractor object to the end of the list.""" | ||||
|         self._ies.append(ie) | ||||
|         ie.set_downloader(self) | ||||
|  | ||||
|     def add_post_processor(self, pp): | ||||
|         """Add a PostProcessor object to the end of the chain.""" | ||||
|         self._pps.append(pp) | ||||
|         pp.set_downloader(self) | ||||
|  | ||||
|     def to_screen(self, message, skip_eol=False): | ||||
|         """Print message to stdout if not in quiet mode.""" | ||||
|         assert type(message) == type(u'') | ||||
|         if not self.params.get('quiet', False): | ||||
|             terminator = [u'\n', u''][skip_eol] | ||||
|             output = message + terminator | ||||
|             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | ||||
|                 output = output.encode(preferredencoding(), 'ignore') | ||||
|             self._screen_file.write(output) | ||||
|             self._screen_file.flush() | ||||
|     def to_screen(self, *args, **kargs): | ||||
|         self.ydl.to_screen(*args, **kargs) | ||||
|  | ||||
|     def to_stderr(self, message): | ||||
|         """Print message to stderr.""" | ||||
|         assert type(message) == type(u'') | ||||
|         output = message + u'\n' | ||||
|         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | ||||
|             output = output.encode(preferredencoding()) | ||||
|         sys.stderr.write(output) | ||||
|         self.ydl.to_screen(message) | ||||
|  | ||||
|     def to_cons_title(self, message): | ||||
|         """Set console/terminal window title to message.""" | ||||
| @@ -210,29 +130,14 @@ class FileDownloader(object): | ||||
|         elif 'TERM' in os.environ: | ||||
|             self.to_screen('\033]0;%s\007' % message, skip_eol=True) | ||||
|  | ||||
|     def fixed_template(self): | ||||
|         """Checks if the output template is fixed.""" | ||||
|         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None) | ||||
|     def trouble(self, *args, **kargs): | ||||
|         self.ydl.trouble(*args, **kargs) | ||||
|  | ||||
|     def trouble(self, message=None, tb=None): | ||||
|         """Determine action to take when a download problem appears. | ||||
|     def report_warning(self, *args, **kargs): | ||||
|         self.ydl.report_warning(*args, **kargs) | ||||
|  | ||||
|         Depending on if the downloader has been configured to ignore | ||||
|         download errors or not, this method may throw an exception or | ||||
|         not when errors are found, after printing the message. | ||||
|  | ||||
|         tb, if given, is additional traceback information. | ||||
|         """ | ||||
|         if message is not None: | ||||
|             self.to_stderr(message) | ||||
|         if self.params.get('verbose'): | ||||
|             if tb is None: | ||||
|                 tb_data = traceback.format_list(traceback.extract_stack()) | ||||
|                 tb = u''.join(tb_data) | ||||
|             self.to_stderr(tb) | ||||
|         if not self.params.get('ignoreerrors', False): | ||||
|             raise DownloadError(message) | ||||
|         self._download_retcode = 1 | ||||
|     def report_error(self, *args, **kargs): | ||||
|         self.ydl.report_error(*args, **kargs) | ||||
|  | ||||
|     def slow_down(self, start_time, byte_counter): | ||||
|         """Sleep if the download speed is over the rate limit.""" | ||||
| @@ -265,7 +170,7 @@ class FileDownloader(object): | ||||
|                 return | ||||
|             os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) | ||||
|         except (IOError, OSError) as err: | ||||
|             self.trouble(u'ERROR: unable to rename file') | ||||
|             self.report_error(u'unable to rename file') | ||||
|  | ||||
|     def try_utime(self, filename, last_modified_hdr): | ||||
|         """Try to set the last-modified time of the given file.""" | ||||
| @@ -279,24 +184,15 @@ class FileDownloader(object): | ||||
|         filetime = timeconvert(timestr) | ||||
|         if filetime is None: | ||||
|             return filetime | ||||
|         # Ignore obviously invalid dates | ||||
|         if filetime == 0: | ||||
|             return | ||||
|         try: | ||||
|             os.utime(filename, (time.time(), filetime)) | ||||
|         except: | ||||
|             pass | ||||
|         return filetime | ||||
|  | ||||
|     def report_writedescription(self, descfn): | ||||
|         """ Report that the description file is being written """ | ||||
|         self.to_screen(u'[info] Writing video description to: ' + descfn) | ||||
|  | ||||
|     def report_writesubtitles(self, srtfn): | ||||
|         """ Report that the subtitles file is being written """ | ||||
|         self.to_screen(u'[info] Writing video subtitles to: ' + srtfn) | ||||
|  | ||||
|     def report_writeinfojson(self, infofn): | ||||
|         """ Report that the metadata file has been written """ | ||||
|         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) | ||||
|  | ||||
|     def report_destination(self, filename): | ||||
|         """Report destination filename.""" | ||||
|         self.to_screen(u'[download] Destination: ' + filename) | ||||
| @@ -305,12 +201,13 @@ class FileDownloader(object): | ||||
|         """Report download progress.""" | ||||
|         if self.params.get('noprogress', False): | ||||
|             return | ||||
|         clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'') | ||||
|         if self.params.get('progress_with_newline', False): | ||||
|             self.to_screen(u'[download] %s of %s at %s ETA %s' % | ||||
|                 (percent_str, data_len_str, speed_str, eta_str)) | ||||
|         else: | ||||
|             self.to_screen(u'\r[download] %s of %s at %s ETA %s' % | ||||
|                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) | ||||
|             self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' % | ||||
|                 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True) | ||||
|         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' % | ||||
|                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip())) | ||||
|  | ||||
| @@ -340,245 +237,31 @@ class FileDownloader(object): | ||||
|         else: | ||||
|             self.to_screen(u'') | ||||
|  | ||||
|     def increment_downloads(self): | ||||
|         """Increment the ordinal that assigns a number to each file.""" | ||||
|         self._num_downloads += 1 | ||||
|  | ||||
|     def prepare_filename(self, info_dict): | ||||
|         """Generate the output filename.""" | ||||
|         try: | ||||
|             template_dict = dict(info_dict) | ||||
|  | ||||
|             template_dict['epoch'] = int(time.time()) | ||||
|             template_dict['autonumber'] = u'%05d' % self._num_downloads | ||||
|  | ||||
|             sanitize = lambda k,v: sanitize_filename( | ||||
|                 u'NA' if v is None else compat_str(v), | ||||
|                 restricted=self.params.get('restrictfilenames'), | ||||
|                 is_id=(k==u'id')) | ||||
|             template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) | ||||
|  | ||||
|             filename = self.params['outtmpl'] % template_dict | ||||
|             return filename | ||||
|         except (ValueError, KeyError) as err: | ||||
|             self.trouble(u'ERROR: invalid system charset or erroneous output template') | ||||
|             return None | ||||
|  | ||||
|     def _match_entry(self, info_dict): | ||||
|         """ Returns None iff the file should be downloaded """ | ||||
|  | ||||
|         title = info_dict['title'] | ||||
|         matchtitle = self.params.get('matchtitle', False) | ||||
|         if matchtitle: | ||||
|             matchtitle = matchtitle.decode('utf8') | ||||
|             if not re.search(matchtitle, title, re.IGNORECASE): | ||||
|                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' | ||||
|         rejecttitle = self.params.get('rejecttitle', False) | ||||
|         if rejecttitle: | ||||
|             rejecttitle = rejecttitle.decode('utf8') | ||||
|             if re.search(rejecttitle, title, re.IGNORECASE): | ||||
|                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' | ||||
|         return None | ||||
|  | ||||
|     def process_info(self, info_dict): | ||||
|         """Process a single dictionary returned by an InfoExtractor.""" | ||||
|  | ||||
|         # Keep for backwards compatibility | ||||
|         info_dict['stitle'] = info_dict['title'] | ||||
|  | ||||
|         if not 'format' in info_dict: | ||||
|             info_dict['format'] = info_dict['ext'] | ||||
|  | ||||
|         reason = self._match_entry(info_dict) | ||||
|         if reason is not None: | ||||
|             self.to_screen(u'[download] ' + reason) | ||||
|             return | ||||
|  | ||||
|         max_downloads = self.params.get('max_downloads') | ||||
|         if max_downloads is not None: | ||||
|             if self._num_downloads > int(max_downloads): | ||||
|                 raise MaxDownloadsReached() | ||||
|  | ||||
|         filename = self.prepare_filename(info_dict) | ||||
|  | ||||
|         # Forced printings | ||||
|         if self.params.get('forcetitle', False): | ||||
|             compat_print(info_dict['title']) | ||||
|         if self.params.get('forceurl', False): | ||||
|             compat_print(info_dict['url']) | ||||
|         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: | ||||
|             compat_print(info_dict['thumbnail']) | ||||
|         if self.params.get('forcedescription', False) and 'description' in info_dict: | ||||
|             compat_print(info_dict['description']) | ||||
|         if self.params.get('forcefilename', False) and filename is not None: | ||||
|             compat_print(filename) | ||||
|         if self.params.get('forceformat', False): | ||||
|             compat_print(info_dict['format']) | ||||
|  | ||||
|         # Do nothing else if in simulate mode | ||||
|         if self.params.get('simulate', False): | ||||
|             return | ||||
|  | ||||
|         if filename is None: | ||||
|             return | ||||
|  | ||||
|         try: | ||||
|             dn = os.path.dirname(encodeFilename(filename)) | ||||
|             if dn != '' and not os.path.exists(dn): # dn is already encoded | ||||
|                 os.makedirs(dn) | ||||
|         except (OSError, IOError) as err: | ||||
|             self.trouble(u'ERROR: unable to create directory ' + compat_str(err)) | ||||
|             return | ||||
|  | ||||
|         if self.params.get('writedescription', False): | ||||
|             try: | ||||
|                 descfn = filename + u'.description' | ||||
|                 self.report_writedescription(descfn) | ||||
|                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: | ||||
|                     descfile.write(info_dict['description']) | ||||
|             except (OSError, IOError): | ||||
|                 self.trouble(u'ERROR: Cannot write description file ' + descfn) | ||||
|                 return | ||||
|  | ||||
|         if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|             # subtitles download errors are already managed as troubles in relevant IE | ||||
|             # that way it will silently go on when used with unsupporting IE | ||||
|             try: | ||||
|                 srtfn = filename.rsplit('.', 1)[0] + u'.srt' | ||||
|                 self.report_writesubtitles(srtfn) | ||||
|                 with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile: | ||||
|                     srtfile.write(info_dict['subtitles']) | ||||
|             except (OSError, IOError): | ||||
|                 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn) | ||||
|                 return | ||||
|  | ||||
|         if self.params.get('writeinfojson', False): | ||||
|             infofn = filename + u'.info.json' | ||||
|             self.report_writeinfojson(infofn) | ||||
|             try: | ||||
|                 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle']) | ||||
|                 write_json_file(json_info_dict, encodeFilename(infofn)) | ||||
|             except (OSError, IOError): | ||||
|                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn) | ||||
|                 return | ||||
|  | ||||
|         if not self.params.get('skip_download', False): | ||||
|             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): | ||||
|                 success = True | ||||
|             else: | ||||
|                 try: | ||||
|                     success = self._do_download(filename, info_dict) | ||||
|                 except (OSError, IOError) as err: | ||||
|                     raise UnavailableVideoError() | ||||
|                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                     self.trouble(u'ERROR: unable to download video data: %s' % str(err)) | ||||
|                     return | ||||
|                 except (ContentTooShortError, ) as err: | ||||
|                     self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) | ||||
|                     return | ||||
|  | ||||
|             if success: | ||||
|                 try: | ||||
|                     self.post_process(filename, info_dict) | ||||
|                 except (PostProcessingError) as err: | ||||
|                     self.trouble(u'ERROR: postprocessing: %s' % str(err)) | ||||
|                     return | ||||
|  | ||||
|     def download(self, url_list): | ||||
|         """Download a given list of URLs.""" | ||||
|         if len(url_list) > 1 and self.fixed_template(): | ||||
|             raise SameFileError(self.params['outtmpl']) | ||||
|  | ||||
|         for url in url_list: | ||||
|             suitable_found = False | ||||
|             for ie in self._ies: | ||||
|                 # Go to next InfoExtractor if not suitable | ||||
|                 if not ie.suitable(url): | ||||
|                     continue | ||||
|  | ||||
|                 # Warn if the _WORKING attribute is False | ||||
|                 if not ie.working(): | ||||
|                     self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, ' | ||||
|                                    u'and will probably not work. If you want to go on, use the -i option.') | ||||
|  | ||||
|                 # Suitable InfoExtractor found | ||||
|                 suitable_found = True | ||||
|  | ||||
|                 # Extract information from URL and process it | ||||
|                 try: | ||||
|                     videos = ie.extract(url) | ||||
|                 except ExtractorError as de: # An error we somewhat expected | ||||
|                     self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) | ||||
|                     break | ||||
|                 except Exception as e: | ||||
|                     if self.params.get('ignoreerrors', False): | ||||
|                         self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc())) | ||||
|                         break | ||||
|                     else: | ||||
|                         raise | ||||
|  | ||||
|                 if len(videos or []) > 1 and self.fixed_template(): | ||||
|                     raise SameFileError(self.params['outtmpl']) | ||||
|  | ||||
|                 for video in videos or []: | ||||
|                     video['extractor'] = ie.IE_NAME | ||||
|                     try: | ||||
|                         self.increment_downloads() | ||||
|                         self.process_info(video) | ||||
|                     except UnavailableVideoError: | ||||
|                         self.trouble(u'\nERROR: unable to download video') | ||||
|  | ||||
|                 # Suitable InfoExtractor had been found; go to next URL | ||||
|                 break | ||||
|  | ||||
|             if not suitable_found: | ||||
|                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) | ||||
|  | ||||
|         return self._download_retcode | ||||
|  | ||||
|     def post_process(self, filename, ie_info): | ||||
|         """Run all the postprocessors on the given file.""" | ||||
|         info = dict(ie_info) | ||||
|         info['filepath'] = filename | ||||
|         keep_video = None | ||||
|         for pp in self._pps: | ||||
|             try: | ||||
|                 keep_video_wish,new_info = pp.run(info) | ||||
|                 if keep_video_wish is not None: | ||||
|                     if keep_video_wish: | ||||
|                         keep_video = keep_video_wish | ||||
|                     elif keep_video is None: | ||||
|                         # No clear decision yet, let IE decide | ||||
|                         keep_video = keep_video_wish | ||||
|             except PostProcessingError as e: | ||||
|                 self.to_stderr(u'ERROR: ' + e.msg) | ||||
|         if keep_video is False and not self.params.get('keepvideo', False): | ||||
|             try: | ||||
|                 self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename) | ||||
|                 os.remove(encodeFilename(filename)) | ||||
|             except (IOError, OSError): | ||||
|                 self.to_stderr(u'WARNING: Unable to remove downloaded video file') | ||||
|  | ||||
|     def _download_with_rtmpdump(self, filename, url, player_url, page_url): | ||||
|     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url): | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         # Check for rtmpdump first | ||||
|         try: | ||||
|             subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | ||||
|             subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | ||||
|         except (OSError, IOError): | ||||
|             self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run') | ||||
|             self.report_error(u'RTMP download detected but "rtmpdump" could not be run') | ||||
|             return False | ||||
|         verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet' | ||||
|  | ||||
|         # Download using rtmpdump. rtmpdump returns exit code 2 when | ||||
|         # the connection was interrumpted and resuming appears to be | ||||
|         # possible. This is part of rtmpdump's normal usage, AFAIK. | ||||
|         basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename] | ||||
|         basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename] | ||||
|         if player_url is not None: | ||||
|             basic_args += ['-W', player_url] | ||||
|             basic_args += ['--swfVfy', player_url] | ||||
|         if page_url is not None: | ||||
|             basic_args += ['--pageUrl', page_url] | ||||
|         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)] | ||||
|         if play_path is not None: | ||||
|             basic_args += ['--playpath', play_path] | ||||
|         if tc_url is not None: | ||||
|             basic_args += ['--tcUrl', url] | ||||
|         args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] | ||||
|         if self.params.get('verbose', False): | ||||
|             try: | ||||
|                 import pipes | ||||
| @@ -612,9 +295,41 @@ class FileDownloader(object): | ||||
|             }) | ||||
|             return True | ||||
|         else: | ||||
|             self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) | ||||
|             self.to_stderr(u"\n") | ||||
|             self.report_error(u'rtmpdump exited with code %d' % retval) | ||||
|             return False | ||||
|  | ||||
|     def _download_with_mplayer(self, filename, url): | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] | ||||
|         # Check for mplayer first | ||||
|         try: | ||||
|             subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | ||||
|         except (OSError, IOError): | ||||
|             self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] ) | ||||
|             return False | ||||
|  | ||||
|         # Download using mplayer.  | ||||
|         retval = subprocess.call(args) | ||||
|         if retval == 0: | ||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) | ||||
|             self.try_rename(tmpfilename, filename) | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': fsize, | ||||
|                 'total_bytes': fsize, | ||||
|                 'filename': filename, | ||||
|                 'status': 'finished', | ||||
|             }) | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr(u"\n") | ||||
|             self.report_error(u'mplayer exited with code %d' % retval) | ||||
|             return False | ||||
|  | ||||
|  | ||||
|     def _do_download(self, filename, info_dict): | ||||
|         url = info_dict['url'] | ||||
|  | ||||
| @@ -631,7 +346,13 @@ class FileDownloader(object): | ||||
|         if url.startswith('rtmp'): | ||||
|             return self._download_with_rtmpdump(filename, url, | ||||
|                                                 info_dict.get('player_url', None), | ||||
|                                                 info_dict.get('page_url', None)) | ||||
|                                                 info_dict.get('page_url', None), | ||||
|                                                 info_dict.get('play_path', None), | ||||
|                                                 info_dict.get('tc_url', None)) | ||||
|  | ||||
|         # Attempt to download using mplayer | ||||
|         if url.startswith('mms') or url.startswith('rtsp'): | ||||
|             return self._download_with_mplayer(filename, url) | ||||
|  | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         stream = None | ||||
| @@ -712,7 +433,7 @@ class FileDownloader(object): | ||||
|                 self.report_retry(count, retries) | ||||
|  | ||||
|         if count > retries: | ||||
|             self.trouble(u'ERROR: giving up after %s retries' % retries) | ||||
|             self.report_error(u'giving up after %s retries' % retries) | ||||
|             return False | ||||
|  | ||||
|         data_len = data.info().get('Content-length', None) | ||||
| @@ -748,12 +469,13 @@ class FileDownloader(object): | ||||
|                     filename = self.undo_temp_name(tmpfilename) | ||||
|                     self.report_destination(filename) | ||||
|                 except (OSError, IOError) as err: | ||||
|                     self.trouble(u'ERROR: unable to open for writing: %s' % str(err)) | ||||
|                     self.report_error(u'unable to open for writing: %s' % str(err)) | ||||
|                     return False | ||||
|             try: | ||||
|                 stream.write(data_block) | ||||
|             except (IOError, OSError) as err: | ||||
|                 self.trouble(u'\nERROR: unable to write data: %s' % str(err)) | ||||
|                 self.to_stderr(u"\n") | ||||
|                 self.report_error(u'unable to write data: %s' % str(err)) | ||||
|                 return False | ||||
|             if not self.params.get('noresizebuffer', False): | ||||
|                 block_size = self.best_block_size(after - before, len(data_block)) | ||||
| @@ -779,7 +501,8 @@ class FileDownloader(object): | ||||
|             self.slow_down(start, byte_counter - resume_len) | ||||
|  | ||||
|         if stream is None: | ||||
|             self.trouble(u'\nERROR: Did not get any data blocks') | ||||
|             self.to_stderr(u"\n") | ||||
|             self.report_error(u'Did not get any data blocks') | ||||
|             return False | ||||
|         stream.close() | ||||
|         self.report_finish() | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,8 +1,3 @@ | ||||
| #!/usr/bin/env python | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| import os | ||||
| import subprocess | ||||
| import sys | ||||
| @@ -85,8 +80,9 @@ class FFmpegPostProcessor(PostProcessor): | ||||
|         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         stdout,stderr = p.communicate() | ||||
|         if p.returncode != 0: | ||||
|             stderr = stderr.decode('utf-8', 'replace') | ||||
|             msg = stderr.strip().split('\n')[-1] | ||||
|             raise FFmpegPostProcessorError(msg.decode('utf-8', 'replace')) | ||||
|             raise FFmpegPostProcessorError(msg) | ||||
|  | ||||
|     def _ffmpeg_filename_argument(self, fn): | ||||
|         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details | ||||
| @@ -188,6 +184,11 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|  | ||||
|         prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups | ||||
|         new_path = prefix + sep + extension | ||||
|  | ||||
|         # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. | ||||
|         if new_path == path: | ||||
|             self._nopostoverwrites = True | ||||
|  | ||||
|         try: | ||||
|             if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)): | ||||
|                 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path) | ||||
| @@ -210,7 +211,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|                 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') | ||||
|  | ||||
|         information['filepath'] = new_path | ||||
|         return False,information | ||||
|         return self._nopostoverwrites,information | ||||
|  | ||||
| class FFmpegVideoConvertor(FFmpegPostProcessor): | ||||
|     def __init__(self, downloader=None,preferedformat=None): | ||||
|   | ||||
							
								
								
									
										604
									
								
								youtube_dl/YoutubeDL.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										604
									
								
								youtube_dl/YoutubeDL.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,604 @@ | ||||
| #!/usr/bin/env python | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| import io | ||||
| import os | ||||
| import re | ||||
| import shutil | ||||
| import socket | ||||
| import sys | ||||
| import time | ||||
| import traceback | ||||
|  | ||||
| from .utils import * | ||||
| from .extractor import get_info_extractor, gen_extractors | ||||
| from .FileDownloader import FileDownloader | ||||
|  | ||||
|  | ||||
| class YoutubeDL(object): | ||||
|     """YoutubeDL class. | ||||
|  | ||||
|     YoutubeDL objects are the ones responsible of downloading the | ||||
|     actual video file and writing it to disk if the user has requested | ||||
|     it, among some other tasks. In most cases there should be one per | ||||
|     program. As, given a video URL, the downloader doesn't know how to | ||||
|     extract all the needed information, task that InfoExtractors do, it | ||||
|     has to pass the URL to one of them. | ||||
|  | ||||
|     For this, YoutubeDL objects have a method that allows | ||||
|     InfoExtractors to be registered in a given order. When it is passed | ||||
|     a URL, the YoutubeDL object handles it to the first InfoExtractor it | ||||
|     finds that reports being able to handle it. The InfoExtractor extracts | ||||
|     all the information about the video or videos the URL refers to, and | ||||
|     YoutubeDL process the extracted information, possibly using a File | ||||
|     Downloader to download the video. | ||||
|  | ||||
|     YoutubeDL objects accept a lot of parameters. In order not to saturate | ||||
|     the object constructor with arguments, it receives a dictionary of | ||||
|     options instead. These options are available through the params | ||||
|     attribute for the InfoExtractors to use. The YoutubeDL also | ||||
|     registers itself as the downloader in charge for the InfoExtractors | ||||
|     that are added to it, so this is a "mutual registration". | ||||
|  | ||||
|     Available options: | ||||
|  | ||||
|     username:          Username for authentication purposes. | ||||
|     password:          Password for authentication purposes. | ||||
|     videopassword:     Password for acces a video. | ||||
|     usenetrc:          Use netrc for authentication instead. | ||||
|     verbose:           Print additional info to stdout. | ||||
|     quiet:             Do not print messages to stdout. | ||||
|     forceurl:          Force printing final URL. | ||||
|     forcetitle:        Force printing title. | ||||
|     forceid:           Force printing ID. | ||||
|     forcethumbnail:    Force printing thumbnail URL. | ||||
|     forcedescription:  Force printing description. | ||||
|     forcefilename:     Force printing final filename. | ||||
|     simulate:          Do not download the video files. | ||||
|     format:            Video format code. | ||||
|     format_limit:      Highest quality format to try. | ||||
|     outtmpl:           Template for output names. | ||||
|     restrictfilenames: Do not allow "&" and spaces in file names | ||||
|     ignoreerrors:      Do not stop on download errors. | ||||
|     nooverwrites:      Prevent overwriting files. | ||||
|     playliststart:     Playlist item to start at. | ||||
|     playlistend:       Playlist item to end at. | ||||
|     matchtitle:        Download only matching titles. | ||||
|     rejecttitle:       Reject downloads for matching titles. | ||||
|     logtostderr:       Log messages to stderr instead of stdout. | ||||
|     writedescription:  Write the video description to a .description file | ||||
|     writeinfojson:     Write the video description to a .info.json file | ||||
|     writethumbnail:    Write the thumbnail image to a file | ||||
|     writesubtitles:    Write the video subtitles to a file | ||||
|     writeautomaticsub: Write the automatic subtitles to a file | ||||
|     allsubtitles:      Downloads all the subtitles of the video | ||||
|     listsubtitles:     Lists all available subtitles for the video | ||||
|     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt) | ||||
|     subtitleslang:     Language of the subtitles to download | ||||
|     keepvideo:         Keep the video file after post-processing | ||||
|     daterange:         A DateRange object, download only if the upload_date is in the range. | ||||
|     skip_download:     Skip the actual download of the video file | ||||
|      | ||||
|     The following parameters are not used by YoutubeDL itself, they are used by | ||||
|     the FileDownloader: | ||||
|     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, | ||||
|     noresizebuffer, retries, continuedl, noprogress, consoletitle | ||||
|     """ | ||||
|  | ||||
|     params = None | ||||
|     _ies = [] | ||||
|     _pps = [] | ||||
|     _download_retcode = None | ||||
|     _num_downloads = None | ||||
|     _screen_file = None | ||||
|  | ||||
|     def __init__(self, params): | ||||
|         """Create a FileDownloader object with the given options.""" | ||||
|         self._ies = [] | ||||
|         self._pps = [] | ||||
|         self._progress_hooks = [] | ||||
|         self._download_retcode = 0 | ||||
|         self._num_downloads = 0 | ||||
|         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] | ||||
|         self.params = params | ||||
|         self.fd = FileDownloader(self, self.params) | ||||
|  | ||||
|         if '%(stitle)s' in self.params['outtmpl']: | ||||
|             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') | ||||
|  | ||||
|     def add_info_extractor(self, ie): | ||||
|         """Add an InfoExtractor object to the end of the list.""" | ||||
|         self._ies.append(ie) | ||||
|         ie.set_downloader(self) | ||||
|  | ||||
|     def add_default_info_extractors(self): | ||||
|         """ | ||||
|         Add the InfoExtractors returned by gen_extractors to the end of the list | ||||
|         """ | ||||
|         for ie in gen_extractors(): | ||||
|             self.add_info_extractor(ie) | ||||
|  | ||||
|     def add_post_processor(self, pp): | ||||
|         """Add a PostProcessor object to the end of the chain.""" | ||||
|         self._pps.append(pp) | ||||
|         pp.set_downloader(self) | ||||
|  | ||||
|     def to_screen(self, message, skip_eol=False): | ||||
|         """Print message to stdout if not in quiet mode.""" | ||||
|         assert type(message) == type(u'') | ||||
|         if not self.params.get('quiet', False): | ||||
|             terminator = [u'\n', u''][skip_eol] | ||||
|             output = message + terminator | ||||
|             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | ||||
|                 output = output.encode(preferredencoding(), 'ignore') | ||||
|             self._screen_file.write(output) | ||||
|             self._screen_file.flush() | ||||
|  | ||||
|     def to_stderr(self, message): | ||||
|         """Print message to stderr.""" | ||||
|         assert type(message) == type(u'') | ||||
|         output = message + u'\n' | ||||
|         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | ||||
|             output = output.encode(preferredencoding()) | ||||
|         sys.stderr.write(output) | ||||
|  | ||||
|     def fixed_template(self): | ||||
|         """Checks if the output template is fixed.""" | ||||
|         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None) | ||||
|  | ||||
|     def trouble(self, message=None, tb=None): | ||||
|         """Determine action to take when a download problem appears. | ||||
|  | ||||
|         Depending on if the downloader has been configured to ignore | ||||
|         download errors or not, this method may throw an exception or | ||||
|         not when errors are found, after printing the message. | ||||
|  | ||||
|         tb, if given, is additional traceback information. | ||||
|         """ | ||||
|         if message is not None: | ||||
|             self.to_stderr(message) | ||||
|         if self.params.get('verbose'): | ||||
|             if tb is None: | ||||
|                 if sys.exc_info()[0]:  # if .trouble has been called from an except block | ||||
|                     tb = u'' | ||||
|                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: | ||||
|                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) | ||||
|                     tb += compat_str(traceback.format_exc()) | ||||
|                 else: | ||||
|                     tb_data = traceback.format_list(traceback.extract_stack()) | ||||
|                     tb = u''.join(tb_data) | ||||
|             self.to_stderr(tb) | ||||
|         if not self.params.get('ignoreerrors', False): | ||||
|             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: | ||||
|                 exc_info = sys.exc_info()[1].exc_info | ||||
|             else: | ||||
|                 exc_info = sys.exc_info() | ||||
|             raise DownloadError(message, exc_info) | ||||
|         self._download_retcode = 1 | ||||
|  | ||||
|     def report_warning(self, message): | ||||
|         ''' | ||||
|         Print the message to stderr, it will be prefixed with 'WARNING:' | ||||
|         If stderr is a tty file the 'WARNING:' will be colored | ||||
|         ''' | ||||
|         if sys.stderr.isatty() and os.name != 'nt': | ||||
|             _msg_header=u'\033[0;33mWARNING:\033[0m' | ||||
|         else: | ||||
|             _msg_header=u'WARNING:' | ||||
|         warning_message=u'%s %s' % (_msg_header,message) | ||||
|         self.to_stderr(warning_message) | ||||
|  | ||||
|     def report_error(self, message, tb=None): | ||||
|         ''' | ||||
|         Do the same as trouble, but prefixes the message with 'ERROR:', colored | ||||
|         in red if stderr is a tty file. | ||||
|         ''' | ||||
|         if sys.stderr.isatty() and os.name != 'nt': | ||||
|             _msg_header = u'\033[0;31mERROR:\033[0m' | ||||
|         else: | ||||
|             _msg_header = u'ERROR:' | ||||
|         error_message = u'%s %s' % (_msg_header, message) | ||||
|         self.trouble(error_message, tb) | ||||
|  | ||||
|     def slow_down(self, start_time, byte_counter): | ||||
|         """Sleep if the download speed is over the rate limit.""" | ||||
|         rate_limit = self.params.get('ratelimit', None) | ||||
|         if rate_limit is None or byte_counter == 0: | ||||
|             return | ||||
|         now = time.time() | ||||
|         elapsed = now - start_time | ||||
|         if elapsed <= 0.0: | ||||
|             return | ||||
|         speed = float(byte_counter) / elapsed | ||||
|         if speed > rate_limit: | ||||
|             time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) | ||||
|  | ||||
|     def report_writedescription(self, descfn): | ||||
|         """ Report that the description file is being written """ | ||||
|         self.to_screen(u'[info] Writing video description to: ' + descfn) | ||||
|  | ||||
|     def report_writesubtitles(self, sub_filename): | ||||
|         """ Report that the subtitles file is being written """ | ||||
|         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename) | ||||
|  | ||||
|     def report_writeinfojson(self, infofn): | ||||
|         """ Report that the metadata file has been written """ | ||||
|         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) | ||||
|  | ||||
|     def report_file_already_downloaded(self, file_name): | ||||
|         """Report file has already been fully downloaded.""" | ||||
|         try: | ||||
|             self.to_screen(u'[download] %s has already been downloaded' % file_name) | ||||
|         except (UnicodeEncodeError) as err: | ||||
|             self.to_screen(u'[download] The file has already been downloaded') | ||||
|  | ||||
|     def increment_downloads(self): | ||||
|         """Increment the ordinal that assigns a number to each file.""" | ||||
|         self._num_downloads += 1 | ||||
|  | ||||
|     def prepare_filename(self, info_dict): | ||||
|         """Generate the output filename.""" | ||||
|         try: | ||||
|             template_dict = dict(info_dict) | ||||
|  | ||||
|             template_dict['epoch'] = int(time.time()) | ||||
|             autonumber_size = self.params.get('autonumber_size') | ||||
|             if autonumber_size is None: | ||||
|                 autonumber_size = 5 | ||||
|             autonumber_templ = u'%0' + str(autonumber_size) + u'd' | ||||
|             template_dict['autonumber'] = autonumber_templ % self._num_downloads | ||||
|             if template_dict['playlist_index'] is not None: | ||||
|                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] | ||||
|  | ||||
|             sanitize = lambda k,v: sanitize_filename( | ||||
|                 u'NA' if v is None else compat_str(v), | ||||
|                 restricted=self.params.get('restrictfilenames'), | ||||
|                 is_id=(k==u'id')) | ||||
|             template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) | ||||
|  | ||||
|             filename = self.params['outtmpl'] % template_dict | ||||
|             return filename | ||||
|         except KeyError as err: | ||||
|             self.report_error(u'Erroneous output template') | ||||
|             return None | ||||
|         except ValueError as err: | ||||
|             self.report_error(u'Insufficient system charset ' + repr(preferredencoding())) | ||||
|             return None | ||||
|  | ||||
|     def _match_entry(self, info_dict): | ||||
|         """ Returns None iff the file should be downloaded """ | ||||
|  | ||||
|         title = info_dict['title'] | ||||
|         matchtitle = self.params.get('matchtitle', False) | ||||
|         if matchtitle: | ||||
|             if not re.search(matchtitle, title, re.IGNORECASE): | ||||
|                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' | ||||
|         rejecttitle = self.params.get('rejecttitle', False) | ||||
|         if rejecttitle: | ||||
|             if re.search(rejecttitle, title, re.IGNORECASE): | ||||
|                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' | ||||
|         date = info_dict.get('upload_date', None) | ||||
|         if date is not None: | ||||
|             dateRange = self.params.get('daterange', DateRange()) | ||||
|             if date not in dateRange: | ||||
|                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) | ||||
|         return None | ||||
|          | ||||
|     def extract_info(self, url, download=True, ie_key=None, extra_info={}): | ||||
|         ''' | ||||
|         Returns a list with a dictionary for each video we find. | ||||
|         If 'download', also downloads the videos. | ||||
|         extra_info is a dict containing the extra values to add to each result | ||||
|          ''' | ||||
|          | ||||
|         if ie_key: | ||||
|             ie = get_info_extractor(ie_key)() | ||||
|             ie.set_downloader(self) | ||||
|             ies = [ie] | ||||
|         else: | ||||
|             ies = self._ies | ||||
|  | ||||
|         for ie in ies: | ||||
|             if not ie.suitable(url): | ||||
|                 continue | ||||
|  | ||||
|             if not ie.working(): | ||||
|                 self.report_warning(u'The program functionality for this site has been marked as broken, ' | ||||
|                                     u'and will probably not work.') | ||||
|  | ||||
|             try: | ||||
|                 ie_result = ie.extract(url) | ||||
|                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) | ||||
|                     break | ||||
|                 if isinstance(ie_result, list): | ||||
|                     # Backwards compatibility: old IE result format | ||||
|                     for result in ie_result: | ||||
|                         result.update(extra_info) | ||||
|                     ie_result = { | ||||
|                         '_type': 'compat_list', | ||||
|                         'entries': ie_result, | ||||
|                     } | ||||
|                 else: | ||||
|                     ie_result.update(extra_info) | ||||
|                 if 'extractor' not in ie_result: | ||||
|                     ie_result['extractor'] = ie.IE_NAME | ||||
|                 return self.process_ie_result(ie_result, download=download) | ||||
|             except ExtractorError as de: # An error we somewhat expected | ||||
|                 self.report_error(compat_str(de), de.format_traceback()) | ||||
|                 break | ||||
|             except Exception as e: | ||||
|                 if self.params.get('ignoreerrors', False): | ||||
|                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc())) | ||||
|                     break | ||||
|                 else: | ||||
|                     raise | ||||
|         else: | ||||
|             self.report_error(u'no suitable InfoExtractor: %s' % url) | ||||
|          | ||||
|     def process_ie_result(self, ie_result, download=True, extra_info={}): | ||||
|         """ | ||||
|         Take the result of the ie(may be modified) and resolve all unresolved | ||||
|         references (URLs, playlist items). | ||||
|  | ||||
|         It will also download the videos if 'download'. | ||||
|         Returns the resolved ie_result. | ||||
|         """ | ||||
|  | ||||
|         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system | ||||
|         if result_type == 'video': | ||||
|             if 'playlist' not in ie_result: | ||||
|                 # It isn't part of a playlist | ||||
|                 ie_result['playlist'] = None | ||||
|                 ie_result['playlist_index'] = None | ||||
|             if download: | ||||
|                 self.process_info(ie_result) | ||||
|             return ie_result | ||||
|         elif result_type == 'url': | ||||
|             # We have to add extra_info to the results because it may be | ||||
|             # contained in a playlist | ||||
|             return self.extract_info(ie_result['url'], | ||||
|                                      download, | ||||
|                                      ie_key=ie_result.get('ie_key'), | ||||
|                                      extra_info=extra_info) | ||||
|         elif result_type == 'playlist': | ||||
|             # We process each entry in the playlist | ||||
|             playlist = ie_result.get('title', None) or ie_result.get('id', None) | ||||
|             self.to_screen(u'[download] Downloading playlist: %s'  % playlist) | ||||
|  | ||||
|             playlist_results = [] | ||||
|  | ||||
|             n_all_entries = len(ie_result['entries']) | ||||
|             playliststart = self.params.get('playliststart', 1) - 1 | ||||
|             playlistend = self.params.get('playlistend', -1) | ||||
|  | ||||
|             if playlistend == -1: | ||||
|                 entries = ie_result['entries'][playliststart:] | ||||
|             else: | ||||
|                 entries = ie_result['entries'][playliststart:playlistend] | ||||
|  | ||||
|             n_entries = len(entries) | ||||
|  | ||||
|             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % | ||||
|                 (ie_result['extractor'], playlist, n_all_entries, n_entries)) | ||||
|  | ||||
|             for i,entry in enumerate(entries,1): | ||||
|                 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) | ||||
|                 extra = { | ||||
|                          'playlist': playlist,  | ||||
|                          'playlist_index': i + playliststart, | ||||
|                          } | ||||
|                 if not 'extractor' in entry: | ||||
|                     # We set the extractor, if it's an url it will be set then to | ||||
|                     # the new extractor, but if it's already a video we must make | ||||
|                     # sure it's present: see issue #877 | ||||
|                     entry['extractor'] = ie_result['extractor'] | ||||
|                 entry_result = self.process_ie_result(entry, | ||||
|                                                       download=download, | ||||
|                                                       extra_info=extra) | ||||
|                 playlist_results.append(entry_result) | ||||
|             ie_result['entries'] = playlist_results | ||||
|             return ie_result | ||||
|         elif result_type == 'compat_list': | ||||
|             def _fixup(r): | ||||
|                 r.setdefault('extractor', ie_result['extractor']) | ||||
|                 return r | ||||
|             ie_result['entries'] = [ | ||||
|                 self.process_ie_result(_fixup(r), download=download) | ||||
|                 for r in ie_result['entries'] | ||||
|             ] | ||||
|             return ie_result | ||||
|         else: | ||||
|             raise Exception('Invalid result type: %s' % result_type) | ||||
|  | ||||
|     def process_info(self, info_dict): | ||||
|         """Process a single resolved IE result.""" | ||||
|  | ||||
|         assert info_dict.get('_type', 'video') == 'video' | ||||
|         #We increment the download the download count here to match the previous behaviour. | ||||
|         self.increment_downloads() | ||||
|  | ||||
|         info_dict['fulltitle'] = info_dict['title'] | ||||
|         if len(info_dict['title']) > 200: | ||||
|             info_dict['title'] = info_dict['title'][:197] + u'...' | ||||
|  | ||||
|         # Keep for backwards compatibility | ||||
|         info_dict['stitle'] = info_dict['title'] | ||||
|  | ||||
|         if not 'format' in info_dict: | ||||
|             info_dict['format'] = info_dict['ext'] | ||||
|  | ||||
|         reason = self._match_entry(info_dict) | ||||
|         if reason is not None: | ||||
|             self.to_screen(u'[download] ' + reason) | ||||
|             return | ||||
|  | ||||
|         max_downloads = self.params.get('max_downloads') | ||||
|         if max_downloads is not None: | ||||
|             if self._num_downloads > int(max_downloads): | ||||
|                 raise MaxDownloadsReached() | ||||
|  | ||||
|         filename = self.prepare_filename(info_dict) | ||||
|  | ||||
|         # Forced printings | ||||
|         if self.params.get('forcetitle', False): | ||||
|             compat_print(info_dict['title']) | ||||
|         if self.params.get('forceid', False): | ||||
|             compat_print(info_dict['id']) | ||||
|         if self.params.get('forceurl', False): | ||||
|             compat_print(info_dict['url']) | ||||
|         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: | ||||
|             compat_print(info_dict['thumbnail']) | ||||
|         if self.params.get('forcedescription', False) and 'description' in info_dict: | ||||
|             compat_print(info_dict['description']) | ||||
|         if self.params.get('forcefilename', False) and filename is not None: | ||||
|             compat_print(filename) | ||||
|         if self.params.get('forceformat', False): | ||||
|             compat_print(info_dict['format']) | ||||
|  | ||||
|         # Do nothing else if in simulate mode | ||||
|         if self.params.get('simulate', False): | ||||
|             return | ||||
|  | ||||
|         if filename is None: | ||||
|             return | ||||
|  | ||||
|         try: | ||||
|             dn = os.path.dirname(encodeFilename(filename)) | ||||
|             if dn != '' and not os.path.exists(dn): | ||||
|                 os.makedirs(dn) | ||||
|         except (OSError, IOError) as err: | ||||
|             self.report_error(u'unable to create directory ' + compat_str(err)) | ||||
|             return | ||||
|  | ||||
|         if self.params.get('writedescription', False): | ||||
|             try: | ||||
|                 descfn = filename + u'.description' | ||||
|                 self.report_writedescription(descfn) | ||||
|                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: | ||||
|                     descfile.write(info_dict['description']) | ||||
|             except (OSError, IOError): | ||||
|                 self.report_error(u'Cannot write description file ' + descfn) | ||||
|                 return | ||||
|  | ||||
|         if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|             # subtitles download errors are already managed as troubles in relevant IE | ||||
|             # that way it will silently go on when used with unsupporting IE | ||||
|             subtitle = info_dict['subtitles'][0] | ||||
|             (sub_error, sub_lang, sub) = subtitle | ||||
|             sub_format = self.params.get('subtitlesformat') | ||||
|             if sub_error: | ||||
|                 self.report_warning("Some error while getting the subtitles") | ||||
|             else: | ||||
|                 try: | ||||
|                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | ||||
|                     self.report_writesubtitles(sub_filename) | ||||
|                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                         subfile.write(sub) | ||||
|                 except (OSError, IOError): | ||||
|                     self.report_error(u'Cannot write subtitles file ' + descfn) | ||||
|                     return | ||||
|  | ||||
|         if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|             subtitles = info_dict['subtitles'] | ||||
|             sub_format = self.params.get('subtitlesformat') | ||||
|             for subtitle in subtitles: | ||||
|                 (sub_error, sub_lang, sub) = subtitle | ||||
|                 if sub_error: | ||||
|                     self.report_warning("Some error while getting the subtitles") | ||||
|                 else: | ||||
|                     try: | ||||
|                         sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | ||||
|                         self.report_writesubtitles(sub_filename) | ||||
|                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                                 subfile.write(sub) | ||||
|                     except (OSError, IOError): | ||||
|                         self.report_error(u'Cannot write subtitles file ' + descfn) | ||||
|                         return | ||||
|  | ||||
|         if self.params.get('writeinfojson', False): | ||||
|             infofn = filename + u'.info.json' | ||||
|             self.report_writeinfojson(infofn) | ||||
|             try: | ||||
|                 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle']) | ||||
|                 write_json_file(json_info_dict, encodeFilename(infofn)) | ||||
|             except (OSError, IOError): | ||||
|                 self.report_error(u'Cannot write metadata to JSON file ' + infofn) | ||||
|                 return | ||||
|  | ||||
|         if self.params.get('writethumbnail', False): | ||||
|             if 'thumbnail' in info_dict: | ||||
|                 thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2] | ||||
|                 if not thumb_format: | ||||
|                     thumb_format = 'jpg' | ||||
|                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format | ||||
|                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' % | ||||
|                                (info_dict['extractor'], info_dict['id'])) | ||||
|                 uf = compat_urllib_request.urlopen(info_dict['thumbnail']) | ||||
|                 with open(thumb_filename, 'wb') as thumbf: | ||||
|                     shutil.copyfileobj(uf, thumbf) | ||||
|                 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % | ||||
|                                (info_dict['extractor'], info_dict['id'], thumb_filename)) | ||||
|  | ||||
|         if not self.params.get('skip_download', False): | ||||
|             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): | ||||
|                 success = True | ||||
|             else: | ||||
|                 try: | ||||
|                     success = self.fd._do_download(filename, info_dict) | ||||
|                 except (OSError, IOError) as err: | ||||
|                     raise UnavailableVideoError() | ||||
|                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                     self.report_error(u'unable to download video data: %s' % str(err)) | ||||
|                     return | ||||
|                 except (ContentTooShortError, ) as err: | ||||
|                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) | ||||
|                     return | ||||
|  | ||||
|             if success: | ||||
|                 try: | ||||
|                     self.post_process(filename, info_dict) | ||||
|                 except (PostProcessingError) as err: | ||||
|                     self.report_error(u'postprocessing: %s' % str(err)) | ||||
|                     return | ||||
|  | ||||
|     def download(self, url_list): | ||||
|         """Download a given list of URLs.""" | ||||
|         if len(url_list) > 1 and self.fixed_template(): | ||||
|             raise SameFileError(self.params['outtmpl']) | ||||
|  | ||||
|         for url in url_list: | ||||
|             try: | ||||
|                 #It also downloads the videos | ||||
|                 videos = self.extract_info(url) | ||||
|             except UnavailableVideoError: | ||||
|                 self.report_error(u'unable to download video') | ||||
|             except MaxDownloadsReached: | ||||
|                 self.to_screen(u'[info] Maximum number of downloaded files reached.') | ||||
|                 raise | ||||
|  | ||||
|         return self._download_retcode | ||||
|  | ||||
|     def post_process(self, filename, ie_info): | ||||
|         """Run all the postprocessors on the given file.""" | ||||
|         info = dict(ie_info) | ||||
|         info['filepath'] = filename | ||||
|         keep_video = None | ||||
|         for pp in self._pps: | ||||
|             try: | ||||
|                 keep_video_wish,new_info = pp.run(info) | ||||
|                 if keep_video_wish is not None: | ||||
|                     if keep_video_wish: | ||||
|                         keep_video = keep_video_wish | ||||
|                     elif keep_video is None: | ||||
|                         # No clear decision yet, let IE decide | ||||
|                         keep_video = keep_video_wish | ||||
|             except PostProcessingError as e: | ||||
|                 self.to_stderr(u'ERROR: ' + e.msg) | ||||
|         if keep_video is False and not self.params.get('keepvideo', False): | ||||
|             try: | ||||
|                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) | ||||
|                 os.remove(encodeFilename(filename)) | ||||
|             except (IOError, OSError): | ||||
|                 self.report_warning(u'Unable to remove downloaded video file') | ||||
| @@ -1,9 +1,6 @@ | ||||
| #!/usr/bin/env python | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| from __future__ import with_statement | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| __authors__  = ( | ||||
|     'Ricardo Garcia Gonzalez', | ||||
|     'Danny Colligan', | ||||
| @@ -24,13 +21,21 @@ __authors__  = ( | ||||
|     'Jaime Marquínez Ferrándiz', | ||||
|     'Jeff Crouse', | ||||
|     'Osama Khalid', | ||||
|     ) | ||||
|     'Michael Walter', | ||||
|     'M. Yasoob Ullah Khalid', | ||||
|     'Julien Fraichard', | ||||
|     'Johny Mo Swag', | ||||
|     'Axel Noack', | ||||
|     'Albert Kim', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
|  | ||||
| import codecs | ||||
| import getpass | ||||
| import optparse | ||||
| import os | ||||
| import random | ||||
| import re | ||||
| import shlex | ||||
| import socket | ||||
| @@ -43,10 +48,11 @@ from .utils import * | ||||
| from .update import update_self | ||||
| from .version import __version__ | ||||
| from .FileDownloader import * | ||||
| from .InfoExtractors import gen_extractors | ||||
| from .extractor import gen_extractors | ||||
| from .YoutubeDL import YoutubeDL | ||||
| from .PostProcessor import * | ||||
|  | ||||
| def parseOpts(): | ||||
| def parseOpts(overrideArguments=None): | ||||
|     def _readOptions(filename_bytes): | ||||
|         try: | ||||
|             optionf = open(filename_bytes) | ||||
| @@ -113,6 +119,7 @@ def parseOpts(): | ||||
|     selection      = optparse.OptionGroup(parser, 'Video Selection') | ||||
|     authentication = optparse.OptionGroup(parser, 'Authentication Options') | ||||
|     video_format   = optparse.OptionGroup(parser, 'Video Format Options') | ||||
|     downloader     = optparse.OptionGroup(parser, 'Download Options') | ||||
|     postproc       = optparse.OptionGroup(parser, 'Post-processing Options') | ||||
|     filesystem     = optparse.OptionGroup(parser, 'Filesystem Options') | ||||
|     verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') | ||||
| @@ -125,24 +132,23 @@ def parseOpts(): | ||||
|             action='store_true', dest='update_self', help='update this program to latest version') | ||||
|     general.add_option('-i', '--ignore-errors', | ||||
|             action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) | ||||
|     general.add_option('-r', '--rate-limit', | ||||
|             dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') | ||||
|     general.add_option('-R', '--retries', | ||||
|             dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) | ||||
|     general.add_option('--buffer-size', | ||||
|             dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") | ||||
|     general.add_option('--no-resize-buffer', | ||||
|             action='store_true', dest='noresizebuffer', | ||||
|             help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) | ||||
|     general.add_option('--dump-user-agent', | ||||
|             action='store_true', dest='dump_user_agent', | ||||
|             help='display the current browser identification', default=False) | ||||
|     general.add_option('--user-agent', | ||||
|             dest='user_agent', help='specify a custom user agent', metavar='UA') | ||||
|     general.add_option('--referer', | ||||
|             dest='referer', help='specify a custom referer, use if the video access is restricted to one domain', | ||||
|             metavar='REF', default=None) | ||||
|     general.add_option('--list-extractors', | ||||
|             action='store_true', dest='list_extractors', | ||||
|             help='List all supported extractors and the URLs they would handle', default=False) | ||||
|     general.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) | ||||
|     general.add_option('--extractor-descriptions', | ||||
|             action='store_true', dest='list_extractor_descriptions', | ||||
|             help='Output descriptions of all supported extractors', default=False) | ||||
|     general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') | ||||
|     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') | ||||
|  | ||||
|  | ||||
|     selection.add_option('--playlist-start', | ||||
|             dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1) | ||||
| @@ -153,6 +159,9 @@ def parseOpts(): | ||||
|     selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None) | ||||
|     selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None) | ||||
|     selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) | ||||
|     selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None) | ||||
|     selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) | ||||
|     selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) | ||||
|  | ||||
|  | ||||
|     authentication.add_option('-u', '--username', | ||||
| @@ -161,10 +170,13 @@ def parseOpts(): | ||||
|             dest='password', metavar='PASSWORD', help='account password') | ||||
|     authentication.add_option('-n', '--netrc', | ||||
|             action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) | ||||
|     authentication.add_option('--video-password', | ||||
|             dest='videopassword', metavar='PASSWORD', help='video password (vimeo only)') | ||||
|  | ||||
|  | ||||
|     video_format.add_option('-f', '--format', | ||||
|             action='store', dest='format', metavar='FORMAT', help='video format code') | ||||
|             action='store', dest='format', metavar='FORMAT', | ||||
|             help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"') | ||||
|     video_format.add_option('--all-formats', | ||||
|             action='store_const', dest='format', help='download all available video formats', const='all') | ||||
|     video_format.add_option('--prefer-free-formats', | ||||
| @@ -173,12 +185,38 @@ def parseOpts(): | ||||
|             action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') | ||||
|     video_format.add_option('-F', '--list-formats', | ||||
|             action='store_true', dest='listformats', help='list all available formats (currently youtube only)') | ||||
|     video_format.add_option('--write-srt', | ||||
|     video_format.add_option('--write-sub', '--write-srt', | ||||
|             action='store_true', dest='writesubtitles', | ||||
|             help='write video closed captions to a .srt file (currently youtube only)', default=False) | ||||
|     video_format.add_option('--srt-lang', | ||||
|             help='write subtitle file (currently youtube only)', default=False) | ||||
|     video_format.add_option('--write-auto-sub', '--write-automatic-sub', | ||||
|             action='store_true', dest='writeautomaticsub', | ||||
|             help='write automatic subtitle file (currently youtube only)', default=False) | ||||
|     video_format.add_option('--only-sub', | ||||
|             action='store_true', dest='skip_download', | ||||
|             help='[deprecated] alias of --skip-download', default=False) | ||||
|     video_format.add_option('--all-subs', | ||||
|             action='store_true', dest='allsubtitles', | ||||
|             help='downloads all the available subtitles of the video (currently youtube only)', default=False) | ||||
|     video_format.add_option('--list-subs', | ||||
|             action='store_true', dest='listsubtitles', | ||||
|             help='lists all available subtitles for the video (currently youtube only)', default=False) | ||||
|     video_format.add_option('--sub-format', | ||||
|             action='store', dest='subtitlesformat', metavar='FORMAT', | ||||
|             help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt') | ||||
|     video_format.add_option('--sub-lang', '--srt-lang', | ||||
|             action='store', dest='subtitleslang', metavar='LANG', | ||||
|             help='language of the closed captions to download (optional) use IETF language tags like \'en\'') | ||||
|             help='language of the subtitles to download (optional) use IETF language tags like \'en\'') | ||||
|  | ||||
|     downloader.add_option('-r', '--rate-limit', | ||||
|             dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') | ||||
|     downloader.add_option('-R', '--retries', | ||||
|             dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) | ||||
|     downloader.add_option('--buffer-size', | ||||
|             dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") | ||||
|     downloader.add_option('--no-resize-buffer', | ||||
|             action='store_true', dest='noresizebuffer', | ||||
|             help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) | ||||
|     downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) | ||||
|  | ||||
|     verbosity.add_option('-q', '--quiet', | ||||
|             action='store_true', dest='quiet', help='activates quiet mode', default=False) | ||||
| @@ -190,6 +228,8 @@ def parseOpts(): | ||||
|             action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) | ||||
|     verbosity.add_option('-e', '--get-title', | ||||
|             action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) | ||||
|     verbosity.add_option('--get-id', | ||||
|             action='store_true', dest='getid', help='simulate, quiet but print id', default=False) | ||||
|     verbosity.add_option('--get-thumbnail', | ||||
|             action='store_true', dest='getthumbnail', | ||||
|             help='simulate, quiet but print thumbnail URL', default=False) | ||||
| @@ -211,18 +251,33 @@ def parseOpts(): | ||||
|             help='display progress in console titlebar', default=False) | ||||
|     verbosity.add_option('-v', '--verbose', | ||||
|             action='store_true', dest='verbose', help='print various debugging information', default=False) | ||||
|     verbosity.add_option('--dump-intermediate-pages', | ||||
|             action='store_true', dest='dump_intermediate_pages', default=False, | ||||
|             help='print downloaded pages to debug problems(very verbose)') | ||||
|  | ||||
|     filesystem.add_option('-t', '--title', | ||||
|             action='store_true', dest='usetitle', help='use title in file name', default=False) | ||||
|             action='store_true', dest='usetitle', help='use title in file name (default)', default=False) | ||||
|     filesystem.add_option('--id', | ||||
|             action='store_true', dest='useid', help='use video ID in file name', default=False) | ||||
|             action='store_true', dest='useid', help='use only video ID in file name', default=False) | ||||
|     filesystem.add_option('-l', '--literal', | ||||
|             action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) | ||||
|     filesystem.add_option('-A', '--auto-number', | ||||
|             action='store_true', dest='autonumber', | ||||
|             help='number downloaded files starting from 00000', default=False) | ||||
|     filesystem.add_option('-o', '--output', | ||||
|             dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .') | ||||
|             dest='outtmpl', metavar='TEMPLATE', | ||||
|             help=('output filename template. Use %(title)s to get the title, ' | ||||
|                   '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' | ||||
|                   '%(autonumber)s to get an automatically incremented number, ' | ||||
|                   '%(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), ' | ||||
|                   '%(extractor)s for the provider (youtube, metacafe, etc), ' | ||||
|                   '%(id)s for the video id , %(playlist)s for the playlist the video is in, ' | ||||
|                   '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' | ||||
|                   'Use - to output to stdout. Can also be used to download to a different directory, ' | ||||
|                   'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) | ||||
|     filesystem.add_option('--autonumber-size', | ||||
|             dest='autonumber_size', metavar='NUMBER', | ||||
|             help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given') | ||||
|     filesystem.add_option('--restrict-filenames', | ||||
|             action='store_true', dest='restrictfilenames', | ||||
|             help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) | ||||
| @@ -248,6 +303,9 @@ def parseOpts(): | ||||
|     filesystem.add_option('--write-info-json', | ||||
|             action='store_true', dest='writeinfojson', | ||||
|             help='write video metadata to a .info.json file', default=False) | ||||
|     filesystem.add_option('--write-thumbnail', | ||||
|             action='store_true', dest='writethumbnail', | ||||
|             help='write thumbnail image to disk', default=False) | ||||
|  | ||||
|  | ||||
|     postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, | ||||
| @@ -266,24 +324,42 @@ def parseOpts(): | ||||
|  | ||||
|     parser.add_option_group(general) | ||||
|     parser.add_option_group(selection) | ||||
|     parser.add_option_group(downloader) | ||||
|     parser.add_option_group(filesystem) | ||||
|     parser.add_option_group(verbosity) | ||||
|     parser.add_option_group(video_format) | ||||
|     parser.add_option_group(authentication) | ||||
|     parser.add_option_group(postproc) | ||||
|  | ||||
|     xdg_config_home = os.environ.get('XDG_CONFIG_HOME') | ||||
|     if xdg_config_home: | ||||
|         userConf = os.path.join(xdg_config_home, 'youtube-dl.conf') | ||||
|     if overrideArguments is not None: | ||||
|         opts, args = parser.parse_args(overrideArguments) | ||||
|         if opts.verbose: | ||||
|             sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n') | ||||
|     else: | ||||
|         userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') | ||||
|     argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:] | ||||
|     opts, args = parser.parse_args(argv) | ||||
|         xdg_config_home = os.environ.get('XDG_CONFIG_HOME') | ||||
|         if xdg_config_home: | ||||
|             userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf') | ||||
|         else: | ||||
|             userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') | ||||
|         systemConf = _readOptions('/etc/youtube-dl.conf') | ||||
|         userConf = _readOptions(userConfFile) | ||||
|         commandLineConf = sys.argv[1:]  | ||||
|         argv = systemConf + userConf + commandLineConf | ||||
|         opts, args = parser.parse_args(argv) | ||||
|         if opts.verbose: | ||||
|             sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n') | ||||
|             sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n') | ||||
|             sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n') | ||||
|  | ||||
|     return parser, opts, args | ||||
|  | ||||
| def _real_main(): | ||||
|     parser, opts, args = parseOpts() | ||||
| def _real_main(argv=None): | ||||
|     # Compatibility fixes for Windows | ||||
|     if sys.platform == 'win32': | ||||
|         # https://github.com/rg3/youtube-dl/issues/820 | ||||
|         codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) | ||||
|  | ||||
|     parser, opts, args = parseOpts(argv) | ||||
|  | ||||
|     # Open appropriate CookieJar | ||||
|     if opts.cookiefile is None: | ||||
| @@ -301,10 +377,14 @@ def _real_main(): | ||||
|     # Set user agent | ||||
|     if opts.user_agent is not None: | ||||
|         std_headers['User-Agent'] = opts.user_agent | ||||
|      | ||||
|     # Set referer | ||||
|     if opts.referer is not None: | ||||
|         std_headers['Referer'] = opts.referer | ||||
|  | ||||
|     # Dump user agent | ||||
|     if opts.dump_user_agent: | ||||
|         print(std_headers['User-Agent']) | ||||
|         compat_print(std_headers['User-Agent']) | ||||
|         sys.exit(0) | ||||
|  | ||||
|     # Batch file verification | ||||
| @@ -325,27 +405,50 @@ def _real_main(): | ||||
|  | ||||
|     # General configuration | ||||
|     cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) | ||||
|     proxy_handler = compat_urllib_request.ProxyHandler() | ||||
|     opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) | ||||
|     if opts.proxy is not None: | ||||
|         if opts.proxy == '': | ||||
|             proxies = {} | ||||
|         else: | ||||
|             proxies = {'http': opts.proxy, 'https': opts.proxy} | ||||
|     else: | ||||
|         proxies = compat_urllib_request.getproxies() | ||||
|         # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) | ||||
|         if 'http' in proxies and 'https' not in proxies: | ||||
|             proxies['https'] = proxies['http'] | ||||
|     proxy_handler = compat_urllib_request.ProxyHandler(proxies) | ||||
|     https_handler = make_HTTPS_handler(opts) | ||||
|     opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) | ||||
|     compat_urllib_request.install_opener(opener) | ||||
|     socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) | ||||
|  | ||||
|     extractors = gen_extractors() | ||||
|  | ||||
|     if opts.list_extractors: | ||||
|         for ie in extractors: | ||||
|             print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) | ||||
|         for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): | ||||
|             compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) | ||||
|             matchedUrls = [url for url in all_urls if ie.suitable(url)] | ||||
|             all_urls = [url for url in all_urls if url not in matchedUrls] | ||||
|             for mu in matchedUrls: | ||||
|                 print(u'  ' + mu) | ||||
|                 compat_print(u'  ' + mu) | ||||
|         sys.exit(0) | ||||
|     if opts.list_extractor_descriptions: | ||||
|         for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): | ||||
|             if not ie._WORKING: | ||||
|                 continue | ||||
|             desc = getattr(ie, 'IE_DESC', ie.IE_NAME) | ||||
|             if hasattr(ie, 'SEARCH_KEY'): | ||||
|                 _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise') | ||||
|                 _COUNTS = (u'', u'5', u'10', u'all') | ||||
|                 desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) | ||||
|             compat_print(desc) | ||||
|         sys.exit(0) | ||||
|  | ||||
|  | ||||
|     # Conflicting, missing and erroneous options | ||||
|     if opts.usenetrc and (opts.username is not None or opts.password is not None): | ||||
|         parser.error(u'using .netrc conflicts with giving username/password') | ||||
|     if opts.password is not None and opts.username is None: | ||||
|         parser.error(u'account username missing') | ||||
|         parser.error(u' account username missing\n') | ||||
|     if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): | ||||
|         parser.error(u'using output template conflicts with using title, video ID or auto number') | ||||
|     if opts.usetitle and opts.useid: | ||||
| @@ -399,6 +502,10 @@ def _real_main(): | ||||
|     if opts.recodevideo is not None: | ||||
|         if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']: | ||||
|             parser.error(u'invalid video recode format specified') | ||||
|     if opts.date is not None: | ||||
|         date = DateRange.day(opts.date) | ||||
|     else: | ||||
|         date = DateRange(opts.dateafter, opts.datebefore) | ||||
|  | ||||
|     if sys.version_info < (3,): | ||||
|         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) | ||||
| @@ -411,25 +518,29 @@ def _real_main(): | ||||
|             or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s') | ||||
|             or (opts.useid and u'%(id)s.%(ext)s') | ||||
|             or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') | ||||
|             or u'%(id)s.%(ext)s') | ||||
|     # File downloader | ||||
|     fd = FileDownloader({ | ||||
|             or u'%(title)s-%(id)s.%(ext)s') | ||||
|  | ||||
|     # YoutubeDL | ||||
|     ydl = YoutubeDL({ | ||||
|         'usenetrc': opts.usenetrc, | ||||
|         'username': opts.username, | ||||
|         'password': opts.password, | ||||
|         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), | ||||
|         'videopassword': opts.videopassword, | ||||
|         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), | ||||
|         'forceurl': opts.geturl, | ||||
|         'forcetitle': opts.gettitle, | ||||
|         'forceid': opts.getid, | ||||
|         'forcethumbnail': opts.getthumbnail, | ||||
|         'forcedescription': opts.getdescription, | ||||
|         'forcefilename': opts.getfilename, | ||||
|         'forceformat': opts.getformat, | ||||
|         'simulate': opts.simulate, | ||||
|         'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), | ||||
|         'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), | ||||
|         'format': opts.format, | ||||
|         'format_limit': opts.format_limit, | ||||
|         'listformats': opts.listformats, | ||||
|         'outtmpl': outtmpl, | ||||
|         'autonumber_size': opts.autonumber_size, | ||||
|         'restrictfilenames': opts.restrictfilenames, | ||||
|         'ignoreerrors': opts.ignoreerrors, | ||||
|         'ratelimit': opts.ratelimit, | ||||
| @@ -448,45 +559,53 @@ def _real_main(): | ||||
|         'updatetime': opts.updatetime, | ||||
|         'writedescription': opts.writedescription, | ||||
|         'writeinfojson': opts.writeinfojson, | ||||
|         'writethumbnail': opts.writethumbnail, | ||||
|         'writesubtitles': opts.writesubtitles, | ||||
|         'writeautomaticsub': opts.writeautomaticsub, | ||||
|         'allsubtitles': opts.allsubtitles, | ||||
|         'listsubtitles': opts.listsubtitles, | ||||
|         'subtitlesformat': opts.subtitlesformat, | ||||
|         'subtitleslang': opts.subtitleslang, | ||||
|         'matchtitle': opts.matchtitle, | ||||
|         'rejecttitle': opts.rejecttitle, | ||||
|         'matchtitle': decodeOption(opts.matchtitle), | ||||
|         'rejecttitle': decodeOption(opts.rejecttitle), | ||||
|         'max_downloads': opts.max_downloads, | ||||
|         'prefer_free_formats': opts.prefer_free_formats, | ||||
|         'verbose': opts.verbose, | ||||
|         'dump_intermediate_pages': opts.dump_intermediate_pages, | ||||
|         'test': opts.test, | ||||
|         'keepvideo': opts.keepvideo, | ||||
|         'min_filesize': opts.min_filesize, | ||||
|         'max_filesize': opts.max_filesize | ||||
|         'max_filesize': opts.max_filesize, | ||||
|         'daterange': date, | ||||
|         }) | ||||
|  | ||||
|     if opts.verbose: | ||||
|         fd.to_screen(u'[debug] youtube-dl version ' + __version__) | ||||
|         ydl.to_screen(u'[debug] youtube-dl version ' + __version__) | ||||
|         try: | ||||
|             sp = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, | ||||
|                                   cwd=os.path.dirname(os.path.abspath(__file__))) | ||||
|             sp = subprocess.Popen( | ||||
|                 ['git', 'rev-parse', '--short', 'HEAD'], | ||||
|                 stdout=subprocess.PIPE, stderr=subprocess.PIPE, | ||||
|                 cwd=os.path.dirname(os.path.abspath(__file__))) | ||||
|             out, err = sp.communicate() | ||||
|             out = out.decode().strip() | ||||
|             if re.match('[0-9a-f]+', out): | ||||
|                 fd.to_screen(u'[debug] Git HEAD: ' + out) | ||||
|                 ydl.to_screen(u'[debug] Git HEAD: ' + out) | ||||
|         except: | ||||
|             pass | ||||
|         fd.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform())) | ||||
|         fd.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies)) | ||||
|             sys.exc_clear() | ||||
|         ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform())) | ||||
|         ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies)) | ||||
|  | ||||
|     for extractor in extractors: | ||||
|         fd.add_info_extractor(extractor) | ||||
|     ydl.add_default_info_extractors() | ||||
|  | ||||
|     # PostProcessors | ||||
|     if opts.extractaudio: | ||||
|         fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) | ||||
|         ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) | ||||
|     if opts.recodevideo: | ||||
|         fd.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) | ||||
|         ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) | ||||
|  | ||||
|     # Update version | ||||
|     if opts.update_self: | ||||
|         update_self(fd.to_screen, opts.verbose, sys.argv[0]) | ||||
|         update_self(ydl.to_screen, opts.verbose, sys.argv[0]) | ||||
|  | ||||
|     # Maybe do nothing | ||||
|     if len(all_urls) < 1: | ||||
| @@ -496,9 +615,9 @@ def _real_main(): | ||||
|             sys.exit() | ||||
|  | ||||
|     try: | ||||
|         retcode = fd.download(all_urls) | ||||
|         retcode = ydl.download(all_urls) | ||||
|     except MaxDownloadsReached: | ||||
|         fd.to_screen(u'--max-download limit reached, aborting.') | ||||
|         ydl.to_screen(u'--max-download limit reached, aborting.') | ||||
|         retcode = 101 | ||||
|  | ||||
|     # Dump cookie jar if requested | ||||
| @@ -510,9 +629,9 @@ def _real_main(): | ||||
|  | ||||
|     sys.exit(retcode) | ||||
|  | ||||
| def main(): | ||||
| def main(argv=None): | ||||
|     try: | ||||
|         _real_main() | ||||
|         _real_main(argv) | ||||
|     except DownloadError: | ||||
|         sys.exit(1) | ||||
|     except SameFileError: | ||||
|   | ||||
| @@ -9,7 +9,8 @@ import sys | ||||
| if __package__ is None and not hasattr(sys, "frozen"): | ||||
|     # direct call of __main__.py | ||||
|     import os.path | ||||
|     sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|     path = os.path.realpath(os.path.abspath(__file__)) | ||||
|     sys.path.append(os.path.dirname(os.path.dirname(path))) | ||||
|  | ||||
| import youtube_dl | ||||
|  | ||||
|   | ||||
							
								
								
									
										90
									
								
								youtube_dl/extractor/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								youtube_dl/extractor/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,90 @@ | ||||
|  | ||||
| from .ard import ARDIE | ||||
| from .arte import ArteTvIE | ||||
| from .auengine import AUEngineIE | ||||
| from .bandcamp import BandcampIE | ||||
| from .bliptv import BlipTVIE, BlipTVUserIE | ||||
| from .breakcom import BreakIE | ||||
| from .collegehumor import CollegeHumorIE | ||||
| from .comedycentral import ComedyCentralIE | ||||
| from .cspan import CSpanIE | ||||
| from .dailymotion import DailymotionIE | ||||
| from .depositfiles import DepositFilesIE | ||||
| from .eighttracks import EightTracksIE | ||||
| from .escapist import EscapistIE | ||||
| from .facebook import FacebookIE | ||||
| from .flickr import FlickrIE | ||||
| from .funnyordie import FunnyOrDieIE | ||||
| from .gamespot import GameSpotIE | ||||
| from .gametrailers import GametrailersIE | ||||
| from .generic import GenericIE | ||||
| from .googleplus import GooglePlusIE | ||||
| from .googlesearch import GoogleSearchIE | ||||
| from .hotnewhiphop import HotNewHipHopIE | ||||
| from .howcast import HowcastIE | ||||
| from .hypem import HypemIE | ||||
| from .ina import InaIE | ||||
| from .infoq import InfoQIE | ||||
| from .instagram import InstagramIE | ||||
| from .jukebox import JukeboxIE | ||||
| from .justintv import JustinTVIE | ||||
| from .keek import KeekIE | ||||
| from .liveleak import LiveLeakIE | ||||
| from .metacafe import MetacafeIE | ||||
| from .mixcloud import MixcloudIE | ||||
| from .mtv import MTVIE | ||||
| from .myspass import MySpassIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .nba import NBAIE | ||||
| from .photobucket import PhotobucketIE | ||||
| from .pornotube import PornotubeIE | ||||
| from .rbmaradio import RBMARadioIE | ||||
| from .redtube import RedTubeIE | ||||
| from .ringtv import RingTVIE | ||||
| from .soundcloud import SoundcloudIE, SoundcloudSetIE | ||||
| from .spiegel import SpiegelIE | ||||
| from .stanfordoc import StanfordOpenClassroomIE | ||||
| from .statigram import StatigramIE | ||||
| from .steam import SteamIE | ||||
| from .teamcoco import TeamcocoIE | ||||
| from .ted import TEDIE | ||||
| from .tf1 import TF1IE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| from .tudou import TudouIE | ||||
| from .tumblr import TumblrIE | ||||
| from .tutv import TutvIE | ||||
| from .ustream import UstreamIE | ||||
| from .vbox7 import Vbox7IE | ||||
| from .vevo import VevoIE | ||||
| from .vimeo import VimeoIE | ||||
| from .vine import VineIE | ||||
| from .wat import WatIE | ||||
| from .wimp import WimpIE | ||||
| from .worldstarhiphop import WorldStarHipHopIE | ||||
| from .xhamster import XHamsterIE | ||||
| from .xnxx import XNXXIE | ||||
| from .xvideos import XVideosIE | ||||
| from .yahoo import YahooIE, YahooSearchIE | ||||
| from .youjizz import YouJizzIE | ||||
| from .youku import YoukuIE | ||||
| from .youporn import YouPornIE | ||||
| from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE | ||||
| from .zdf import ZDFIE | ||||
|  | ||||
|  | ||||
| _ALL_CLASSES = [ | ||||
|     klass | ||||
|     for name, klass in globals().items() | ||||
|     if name.endswith('IE') and name != 'GenericIE' | ||||
| ] | ||||
| _ALL_CLASSES.append(GenericIE) | ||||
|  | ||||
| def gen_extractors(): | ||||
|     """ Return a list of an instance of every supported extractor. | ||||
|     The order does matter; the first extractor matched is the one handling the URL. | ||||
|     """ | ||||
|     return [klass() for klass in _ALL_CLASSES] | ||||
|  | ||||
| def get_info_extractor(ie_name): | ||||
|     """Returns the info extractor class with the given ie_name""" | ||||
|     return globals()[ie_name+'IE'] | ||||
							
								
								
									
										54
									
								
								youtube_dl/extractor/ard.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								youtube_dl/extractor/ard.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class ARDIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?' | ||||
|     _TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>' | ||||
|     _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640', | ||||
|         u'file': u'14077640.mp4', | ||||
|         u'md5': u'6ca8824255460c787376353f9e20bbd8', | ||||
|         u'info_dict': { | ||||
|             u"title": u"11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden" | ||||
|         }, | ||||
|         u'skip': u'Requires rtmpdump' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # determine video id from url | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         numid = re.search(r'documentId=([0-9]+)', url) | ||||
|         if numid: | ||||
|             video_id = numid.group(1) | ||||
|         else: | ||||
|             video_id = m.group('video_id') | ||||
|  | ||||
|         # determine title and media streams from webpage | ||||
|         html = self._download_webpage(url, video_id) | ||||
|         title = re.search(self._TITLE, html).group('title') | ||||
|         streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)] | ||||
|         if not streams: | ||||
|             assert '"fsk"' in html | ||||
|             raise ExtractorError(u'This video is only available after 8:00 pm') | ||||
|  | ||||
|         # choose default media type and highest quality for now | ||||
|         stream = max([s for s in streams if int(s["media_type"]) == 0], | ||||
|                      key=lambda s: int(s["quality"])) | ||||
|  | ||||
|         # there's two possibilities: RTMP stream or HTTP download | ||||
|         info = {'id': video_id, 'title': title, 'ext': 'mp4'} | ||||
|         if stream['rtmp_url']: | ||||
|             self.to_screen(u'RTMP download detected') | ||||
|             assert stream['video_url'].startswith('mp4:') | ||||
|             info["url"] = stream["rtmp_url"] | ||||
|             info["play_path"] = stream['video_url'] | ||||
|         else: | ||||
|             assert stream["video_url"].endswith('.mp4') | ||||
|             info["url"] = stream["video_url"] | ||||
|         return [info] | ||||
							
								
								
									
										133
									
								
								youtube_dl/extractor/arte.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								youtube_dl/extractor/arte.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     # This is used by the not implemented extractLiveStream method | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
| class ArteTvIE(InfoExtractor): | ||||
|     """ | ||||
|     There are two sources of video in arte.tv: videos.arte.tv and | ||||
|     www.arte.tv/guide, the extraction process is different for each one. | ||||
|     The videos expire in 7 days, so we can't add tests. | ||||
|     """ | ||||
|     _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' | ||||
|     _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P<id>.*?).html' | ||||
|     _LIVE_URL = r'index-[0-9]+\.html$' | ||||
|  | ||||
|     IE_NAME = u'arte.tv' | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL)) | ||||
|  | ||||
|     # TODO implement Live Stream | ||||
|     # def extractLiveStream(self, url): | ||||
|     #     video_lang = url.split('/')[-4] | ||||
|     #     info = self.grep_webpage( | ||||
|     #         url, | ||||
|     #         r'src="(.*?/videothek_js.*?\.js)', | ||||
|     #         0, | ||||
|     #         [ | ||||
|     #             (1, 'url', u'Invalid URL: %s' % url) | ||||
|     #         ] | ||||
|     #     ) | ||||
|     #     http_host = url.split('/')[2] | ||||
|     #     next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url'))) | ||||
|     #     info = self.grep_webpage( | ||||
|     #         next_url, | ||||
|     #         r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' + | ||||
|     #             '(http://.*?\.swf).*?' + | ||||
|     #             '(rtmp://.*?)\'', | ||||
|     #         re.DOTALL, | ||||
|     #         [ | ||||
|     #             (1, 'path',   u'could not extract video path: %s' % url), | ||||
|     #             (2, 'player', u'could not extract video player: %s' % url), | ||||
|     #             (3, 'url',    u'could not extract video url: %s' % url) | ||||
|     #         ] | ||||
|     #     ) | ||||
|     #     video_url = u'%s/%s' % (info.get('url'), info.get('path')) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._EMISSION_URL, url) | ||||
|         if mobj is not None: | ||||
|             name = mobj.group('name') | ||||
|             # This is not a real id, it can be for example AJT for the news | ||||
|             # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal | ||||
|             video_id = mobj.group('id') | ||||
|             return self._extract_emission(url, video_id) | ||||
|  | ||||
|         mobj = re.match(self._VIDEOS_URL, url) | ||||
|         if mobj is not None: | ||||
|             id = mobj.group('id') | ||||
|             return self._extract_video(url, id) | ||||
|  | ||||
|         if re.search(self._LIVE_URL, video_id) is not None: | ||||
|             raise ExtractorError(u'Arte live streams are not yet supported, sorry') | ||||
|             # self.extractLiveStream(url) | ||||
|             # return | ||||
|  | ||||
|     def _extract_emission(self, url, video_id): | ||||
|         """Extract from www.arte.tv/guide""" | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') | ||||
|  | ||||
|         json_info = self._download_webpage(json_url, video_id, 'Downloading info json') | ||||
|         self.report_extraction(video_id) | ||||
|         info = json.loads(json_info) | ||||
|         player_info = info['videoJsonPlayer'] | ||||
|  | ||||
|         info_dict = {'id': player_info['VID'], | ||||
|                      'title': player_info['VTI'], | ||||
|                      'description': player_info['VDE'], | ||||
|                      'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]), | ||||
|                      'thumbnail': player_info['programImage'], | ||||
|                      'ext': 'flv', | ||||
|                      } | ||||
|  | ||||
|         formats = player_info['VSR'].values() | ||||
|         # We order the formats by quality | ||||
|         formats = sorted(formats, key=lambda f: int(f['height'])) | ||||
|         # Pick the best quality | ||||
|         format_info = formats[-1] | ||||
|         if format_info['mediaType'] == u'rtmp': | ||||
|             info_dict['url'] = format_info['streamer'] | ||||
|             info_dict['play_path'] = 'mp4:' + format_info['url'] | ||||
|         else: | ||||
|             info_dict['url'] = format_info['url'] | ||||
|  | ||||
|         return info_dict | ||||
|  | ||||
|     def _extract_video(self, url, video_id): | ||||
|         """Extract from videos.arte.tv""" | ||||
|         config_xml_url = url.replace('/videos/', '/do_delegate/videos/') | ||||
|         config_xml_url = config_xml_url.replace('.html', ',view,asPlayerXml.xml') | ||||
|         config_xml = self._download_webpage(config_xml_url, video_id) | ||||
|         config_xml_url = self._html_search_regex(r'<video lang=".*?" ref="(.*?)"', config_xml, 'config xml url') | ||||
|         config_xml = self._download_webpage(config_xml_url, video_id) | ||||
|  | ||||
|         video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml)) | ||||
|         def _key(m): | ||||
|             quality = m.group('quality') | ||||
|             if quality == 'hd': | ||||
|                 return 2 | ||||
|             else: | ||||
|                 return 1 | ||||
|         # We pick the best quality | ||||
|         video_urls = sorted(video_urls, key=_key) | ||||
|         video_url = list(video_urls)[-1].group('url') | ||||
|          | ||||
|         title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title') | ||||
|         thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>', | ||||
|                                             config_xml, 'thumbnail') | ||||
|         return {'id': video_id, | ||||
|                 'title': title, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'url': video_url, | ||||
|                 'ext': 'flv', | ||||
|                 } | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/auengine.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/auengine.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| import os.path | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_urlparse, | ||||
| ) | ||||
|  | ||||
| class AUEngineIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', | ||||
|                 webpage, u'title') | ||||
|         title = title.strip() | ||||
|         links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage) | ||||
|         links = [compat_urllib_parse.unquote(l) for l in links] | ||||
|         for link in links: | ||||
|             root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path) | ||||
|             if pathext == '.png': | ||||
|                 thumbnail = link | ||||
|             elif pathext == '.mp4': | ||||
|                 url = link | ||||
|                 ext = pathext | ||||
|         if ext == title[-len(ext):]: | ||||
|             title = title[:-len(ext)] | ||||
|         ext = ext[1:] | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       url, | ||||
|             'ext':       ext, | ||||
|             'title':     title, | ||||
|             'thumbnail': thumbnail, | ||||
|         }] | ||||
							
								
								
									
										63
									
								
								youtube_dl/extractor/bandcamp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								youtube_dl/extractor/bandcamp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,63 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BandcampIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', | ||||
|         u'file': u'1812978515.mp3', | ||||
|         u'md5': u'cdeb30cdae1921719a3cbcab696ef53c', | ||||
|         u'info_dict': { | ||||
|             u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad" | ||||
|         }, | ||||
|         u'skip': u'There is a limit of 200 free downloads / month for the test song' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         # We get the link to the free download page | ||||
|         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) | ||||
|         if m_download is None: | ||||
|             raise ExtractorError(u'No free songs found') | ||||
|  | ||||
|         download_link = m_download.group(1) | ||||
|         id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',  | ||||
|                        webpage, re.MULTILINE|re.DOTALL).group('id') | ||||
|  | ||||
|         download_webpage = self._download_webpage(download_link, id, | ||||
|                                                   'Downloading free downloads page') | ||||
|         # We get the dictionary of the track from some javascrip code | ||||
|         info = re.search(r'items: (.*?),$', | ||||
|                          download_webpage, re.MULTILINE).group(1) | ||||
|         info = json.loads(info)[0] | ||||
|         # We pick mp3-320 for now, until format selection can be easily implemented. | ||||
|         mp3_info = info[u'downloads'][u'mp3-320'] | ||||
|         # If we try to use this url it says the link has expired | ||||
|         initial_url = mp3_info[u'url'] | ||||
|         re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' | ||||
|         m_url = re.match(re_url, initial_url) | ||||
|         #We build the url we will use to get the final track url | ||||
|         # This url is build in Bandcamp in the script download_bunde_*.js | ||||
|         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts')) | ||||
|         final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url') | ||||
|         # If we could correctly generate the .rand field the url would be | ||||
|         #in the "download_url" key | ||||
|         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) | ||||
|  | ||||
|         track_info = {'id':id, | ||||
|                       'title' : info[u'title'], | ||||
|                       'ext' :   'mp3', | ||||
|                       'url' :   final_url, | ||||
|                       'thumbnail' : info[u'thumb_url'], | ||||
|                       'uploader' :  info[u'artist'] | ||||
|                       } | ||||
|  | ||||
|         return [track_info] | ||||
							
								
								
									
										188
									
								
								youtube_dl/extractor/bliptv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										188
									
								
								youtube_dl/extractor/bliptv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,188 @@ | ||||
| import datetime | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_parse_qs, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BlipTVIE(InfoExtractor): | ||||
|     """Information extractor for blip.tv""" | ||||
|  | ||||
|     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' | ||||
|     _URL_EXT = r'^.*\.([a-z0-9]+)$' | ||||
|     IE_NAME = u'blip.tv' | ||||
|     _TEST = { | ||||
|         u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', | ||||
|         u'file': u'5779306.m4v', | ||||
|         u'md5': u'b2d849efcf7ee18917e4b4d9ff37cafe', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20111205",  | ||||
|             u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",  | ||||
|             u"uploader": u"Comic Book Resources - CBR TV",  | ||||
|             u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def report_direct_download(self, title): | ||||
|         """Report information extraction.""" | ||||
|         self.to_screen(u'%s: Direct download detected' % title) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         # See https://github.com/rg3/youtube-dl/issues/857 | ||||
|         api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url) | ||||
|         if api_mobj is not None: | ||||
|             url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id') | ||||
|         urlp = compat_urllib_parse_urlparse(url) | ||||
|         if urlp.path.startswith('/play/'): | ||||
|             request = compat_urllib_request.Request(url) | ||||
|             response = compat_urllib_request.urlopen(request) | ||||
|             redirecturl = response.geturl() | ||||
|             rurlp = compat_urllib_parse_urlparse(redirecturl) | ||||
|             file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2] | ||||
|             url = 'http://blip.tv/a/a-' + file_id | ||||
|             return self._real_extract(url) | ||||
|  | ||||
|  | ||||
|         if '?' in url: | ||||
|             cchar = '&' | ||||
|         else: | ||||
|             cchar = '?' | ||||
|         json_url = url + cchar + 'skin=json&version=2&no_wrap=1' | ||||
|         request = compat_urllib_request.Request(json_url) | ||||
|         request.add_header('User-Agent', 'iTunes/10.6.1') | ||||
|         self.report_extraction(mobj.group(1)) | ||||
|         info = None | ||||
|         try: | ||||
|             urlh = compat_urllib_request.urlopen(request) | ||||
|             if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download | ||||
|                 basename = url.split('/')[-1] | ||||
|                 title,ext = os.path.splitext(basename) | ||||
|                 title = title.decode('UTF-8') | ||||
|                 ext = ext.replace('.', '') | ||||
|                 self.report_direct_download(title) | ||||
|                 info = { | ||||
|                     'id': title, | ||||
|                     'url': url, | ||||
|                     'uploader': None, | ||||
|                     'upload_date': None, | ||||
|                     'title': title, | ||||
|                     'ext': ext, | ||||
|                     'urlhandle': urlh | ||||
|                 } | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) | ||||
|         if info is None: # Regular URL | ||||
|             try: | ||||
|                 json_code_bytes = urlh.read() | ||||
|                 json_code = json_code_bytes.decode('utf-8') | ||||
|             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                 raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err)) | ||||
|  | ||||
|             try: | ||||
|                 json_data = json.loads(json_code) | ||||
|                 if 'Post' in json_data: | ||||
|                     data = json_data['Post'] | ||||
|                 else: | ||||
|                     data = json_data | ||||
|  | ||||
|                 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') | ||||
|                 video_url = data['media']['url'] | ||||
|                 umobj = re.match(self._URL_EXT, video_url) | ||||
|                 if umobj is None: | ||||
|                     raise ValueError('Can not determine filename extension') | ||||
|                 ext = umobj.group(1) | ||||
|  | ||||
|                 info = { | ||||
|                     'id': data['item_id'], | ||||
|                     'url': video_url, | ||||
|                     'uploader': data['display_name'], | ||||
|                     'upload_date': upload_date, | ||||
|                     'title': data['title'], | ||||
|                     'ext': ext, | ||||
|                     'format': data['media']['mimeType'], | ||||
|                     'thumbnail': data['thumbnailUrl'], | ||||
|                     'description': data['description'], | ||||
|                     'player_url': data['embedUrl'], | ||||
|                     'user_agent': 'iTunes/10.6.1', | ||||
|                 } | ||||
|             except (ValueError,KeyError) as err: | ||||
|                 raise ExtractorError(u'Unable to parse video information: %s' % repr(err)) | ||||
|  | ||||
|         return [info] | ||||
|  | ||||
|  | ||||
| class BlipTVUserIE(InfoExtractor): | ||||
|     """Information Extractor for blip.tv users.""" | ||||
|  | ||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$' | ||||
|     _PAGE_SIZE = 12 | ||||
|     IE_NAME = u'blip.tv:user' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract username | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         username = mobj.group(1) | ||||
|  | ||||
|         page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1' | ||||
|  | ||||
|         page = self._download_webpage(url, username, u'Downloading user page') | ||||
|         mobj = re.search(r'data-users-id="([^"]+)"', page) | ||||
|         page_base = page_base % mobj.group(1) | ||||
|  | ||||
|  | ||||
|         # Download video ids using BlipTV Ajax calls. Result size per | ||||
|         # query is limited (currently to 12 videos) so we need to query | ||||
|         # page by page until there are no video ids - it means we got | ||||
|         # all of them. | ||||
|  | ||||
|         video_ids = [] | ||||
|         pagenum = 1 | ||||
|  | ||||
|         while True: | ||||
|             url = page_base + "&page=" + str(pagenum) | ||||
|             page = self._download_webpage(url, username, | ||||
|                                           u'Downloading video ids from page %d' % pagenum) | ||||
|  | ||||
|             # Extract video identifiers | ||||
|             ids_in_page = [] | ||||
|  | ||||
|             for mobj in re.finditer(r'href="/([^"]+)"', page): | ||||
|                 if mobj.group(1) not in ids_in_page: | ||||
|                     ids_in_page.append(unescapeHTML(mobj.group(1))) | ||||
|  | ||||
|             video_ids.extend(ids_in_page) | ||||
|  | ||||
|             # A little optimization - if current page is not | ||||
|             # "full", ie. does not contain PAGE_SIZE video ids then | ||||
|             # we can assume that this page is the last one - there | ||||
|             # are no more ids on further pages - no need to query | ||||
|             # again. | ||||
|  | ||||
|             if len(ids_in_page) < self._PAGE_SIZE: | ||||
|                 break | ||||
|  | ||||
|             pagenum += 1 | ||||
|  | ||||
|         urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] | ||||
|         url_entries = [self.url_result(url, 'BlipTV') for url in urls] | ||||
|         return [self.playlist_result(url_entries, playlist_title = username)] | ||||
							
								
								
									
										33
									
								
								youtube_dl/extractor/breakcom.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								youtube_dl/extractor/breakcom.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class BreakIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?break\.com/video/([^/]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.break.com/video/when-girls-act-like-guys-2468056', | ||||
|         u'file': u'2468056.mp4', | ||||
|         u'md5': u'a3513fb1547fba4fb6cfac1bffc6c46b', | ||||
|         u'info_dict': { | ||||
|             u"title": u"When Girls Act Like D-Bags" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1).split("-")[-1] | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_url = re.search(r"videoPath: '(.+?)',",webpage).group(1) | ||||
|         key = re.search(r"icon: '(.+?)',",webpage).group(1) | ||||
|         final_url = str(video_url)+"?"+str(key) | ||||
|         thumbnail_url = re.search(r"thumbnailURL: '(.+?)'",webpage).group(1) | ||||
|         title = re.search(r"sVidTitle: '(.+)',",webpage).group(1) | ||||
|         ext = video_url.split('.')[-1] | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       final_url, | ||||
|             'ext':       ext, | ||||
|             'title':     title, | ||||
|             'thumbnail': thumbnail_url, | ||||
|         }] | ||||
							
								
								
									
										74
									
								
								youtube_dl/extractor/collegehumor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								youtube_dl/extractor/collegehumor.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,74 @@ | ||||
| import re | ||||
| import socket | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CollegeHumorIE(InfoExtractor): | ||||
|     _WORKING = False | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$' | ||||
|  | ||||
|     def report_manifest(self, video_id): | ||||
|         """Report information extraction.""" | ||||
|         self.to_screen(u'%s: Downloading XML manifest' % video_id) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('videoid') | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'uploader': None, | ||||
|             'upload_date': None, | ||||
|         } | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id | ||||
|         try: | ||||
|             metaXml = compat_urllib_request.urlopen(xmlUrl).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err)) | ||||
|  | ||||
|         mdoc = xml.etree.ElementTree.fromstring(metaXml) | ||||
|         try: | ||||
|             videoNode = mdoc.findall('./video')[0] | ||||
|             info['description'] = videoNode.findall('./description')[0].text | ||||
|             info['title'] = videoNode.findall('./caption')[0].text | ||||
|             info['thumbnail'] = videoNode.findall('./thumbnail')[0].text | ||||
|             manifest_url = videoNode.findall('./file')[0].text | ||||
|         except IndexError: | ||||
|             raise ExtractorError(u'Invalid metadata XML file') | ||||
|  | ||||
|         manifest_url += '?hdcore=2.10.3' | ||||
|         self.report_manifest(video_id) | ||||
|         try: | ||||
|             manifestXml = compat_urllib_request.urlopen(manifest_url).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err)) | ||||
|  | ||||
|         adoc = xml.etree.ElementTree.fromstring(manifestXml) | ||||
|         try: | ||||
|             media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] | ||||
|             node_id = media_node.attrib['url'] | ||||
|             video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text | ||||
|         except IndexError as err: | ||||
|             raise ExtractorError(u'Invalid manifest file') | ||||
|  | ||||
|         url_pr = compat_urllib_parse_urlparse(manifest_url) | ||||
|         url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1' | ||||
|  | ||||
|         info['url'] = url | ||||
|         info['ext'] = 'f4f' | ||||
|         return [info] | ||||
							
								
								
									
										189
									
								
								youtube_dl/extractor/comedycentral.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										189
									
								
								youtube_dl/extractor/comedycentral.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,189 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ComedyCentralIE(InfoExtractor): | ||||
|     IE_DESC = u'The Daily Show / Colbert Report' | ||||
|     # urls can be abbreviations like :thedailyshow or :colbert | ||||
|     # urls for episodes like: | ||||
|     # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day | ||||
|     #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news | ||||
|     #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524 | ||||
|     _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport) | ||||
|                       |(https?://)?(www\.)? | ||||
|                           (?P<showname>thedailyshow|colbertnation)\.com/ | ||||
|                          (full-episodes/(?P<episode>.*)| | ||||
|                           (?P<clip> | ||||
|                               (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) | ||||
|                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))))) | ||||
|                      $""" | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart', | ||||
|         u'file': u'422212.mp4', | ||||
|         u'md5': u'4e2f5cb088a83cd8cdb7756132f9739d', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20121214",  | ||||
|             u"description": u"Kristen Stewart",  | ||||
|             u"uploader": u"thedailyshow",  | ||||
|             u"title": u"thedailyshow-kristen-stewart part 1" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] | ||||
|  | ||||
|     _video_extensions = { | ||||
|         '3500': 'mp4', | ||||
|         '2200': 'mp4', | ||||
|         '1700': 'mp4', | ||||
|         '1200': 'mp4', | ||||
|         '750': 'mp4', | ||||
|         '400': 'mp4', | ||||
|     } | ||||
|     _video_dimensions = { | ||||
|         '3500': '1280x720', | ||||
|         '2200': '960x540', | ||||
|         '1700': '768x432', | ||||
|         '1200': '640x360', | ||||
|         '750': '512x288', | ||||
|         '400': '384x216', | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         print('Available formats:') | ||||
|         for x in formats: | ||||
|             print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???'))) | ||||
|  | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         if mobj.group('shortname'): | ||||
|             if mobj.group('shortname') in ('tds', 'thedailyshow'): | ||||
|                 url = u'http://www.thedailyshow.com/full-episodes/' | ||||
|             else: | ||||
|                 url = u'http://www.colbertnation.com/full-episodes/' | ||||
|             mobj = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|             assert mobj is not None | ||||
|  | ||||
|         if mobj.group('clip'): | ||||
|             if mobj.group('showname') == 'thedailyshow': | ||||
|                 epTitle = mobj.group('tdstitle') | ||||
|             else: | ||||
|                 epTitle = mobj.group('cntitle') | ||||
|             dlNewest = False | ||||
|         else: | ||||
|             dlNewest = not mobj.group('episode') | ||||
|             if dlNewest: | ||||
|                 epTitle = mobj.group('showname') | ||||
|             else: | ||||
|                 epTitle = mobj.group('episode') | ||||
|  | ||||
|         self.report_extraction(epTitle) | ||||
|         webpage,htmlHandle = self._download_webpage_handle(url, epTitle) | ||||
|         if dlNewest: | ||||
|             url = htmlHandle.geturl() | ||||
|             mobj = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'Invalid redirected URL: ' + url) | ||||
|             if mobj.group('episode') == '': | ||||
|                 raise ExtractorError(u'Redirected URL is still not specific: ' + url) | ||||
|             epTitle = mobj.group('episode') | ||||
|  | ||||
|         mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage) | ||||
|  | ||||
|         if len(mMovieParams) == 0: | ||||
|             # The Colbert Report embeds the information in a without | ||||
|             # a URL prefix; so extract the alternate reference | ||||
|             # and then add the URL prefix manually. | ||||
|  | ||||
|             altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage) | ||||
|             if len(altMovieParams) == 0: | ||||
|                 raise ExtractorError(u'unable to find Flash URL in webpage ' + url) | ||||
|             else: | ||||
|                 mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])] | ||||
|  | ||||
|         uri = mMovieParams[0][1] | ||||
|         indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri}) | ||||
|         indexXml = self._download_webpage(indexUrl, epTitle, | ||||
|                                           u'Downloading show index', | ||||
|                                           u'unable to download episode index') | ||||
|  | ||||
|         results = [] | ||||
|  | ||||
|         idoc = xml.etree.ElementTree.fromstring(indexXml) | ||||
|         itemEls = idoc.findall('.//item') | ||||
|         for partNum,itemEl in enumerate(itemEls): | ||||
|             mediaId = itemEl.findall('./guid')[0].text | ||||
|             shortMediaId = mediaId.split(':')[-1] | ||||
|             showId = mediaId.split(':')[-2].replace('.com', '') | ||||
|             officialTitle = itemEl.findall('./title')[0].text | ||||
|             officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text) | ||||
|  | ||||
|             configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + | ||||
|                         compat_urllib_parse.urlencode({'uri': mediaId})) | ||||
|             configXml = self._download_webpage(configUrl, epTitle, | ||||
|                                                u'Downloading configuration for %s' % shortMediaId) | ||||
|  | ||||
|             cdoc = xml.etree.ElementTree.fromstring(configXml) | ||||
|             turls = [] | ||||
|             for rendition in cdoc.findall('.//rendition'): | ||||
|                 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) | ||||
|                 turls.append(finfo) | ||||
|  | ||||
|             if len(turls) == 0: | ||||
|                 self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found') | ||||
|                 continue | ||||
|  | ||||
|             if self._downloader.params.get('listformats', None): | ||||
|                 self._print_formats([i[0] for i in turls]) | ||||
|                 return | ||||
|  | ||||
|             # For now, just pick the highest bitrate | ||||
|             format,rtmp_video_url = turls[-1] | ||||
|  | ||||
|             # Get the format arg from the arg stream | ||||
|             req_format = self._downloader.params.get('format', None) | ||||
|  | ||||
|             # Select format if we can find one | ||||
|             for f,v in turls: | ||||
|                 if f == req_format: | ||||
|                     format, rtmp_video_url = f, v | ||||
|                     break | ||||
|  | ||||
|             m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url) | ||||
|             if not m: | ||||
|                 raise ExtractorError(u'Cannot transform RTMP url') | ||||
|             base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' | ||||
|             video_url = base + m.group('finalid') | ||||
|  | ||||
|             effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1) | ||||
|             info = { | ||||
|                 'id': shortMediaId, | ||||
|                 'url': video_url, | ||||
|                 'uploader': showId, | ||||
|                 'upload_date': officialDate, | ||||
|                 'title': effTitle, | ||||
|                 'ext': 'mp4', | ||||
|                 'format': format, | ||||
|                 'thumbnail': None, | ||||
|                 'description': compat_str(officialTitle), | ||||
|             } | ||||
|             results.append(info) | ||||
|  | ||||
|         return results | ||||
							
								
								
									
										269
									
								
								youtube_dl/extractor/common.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										269
									
								
								youtube_dl/extractor/common.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,269 @@ | ||||
| import base64 | ||||
| import os | ||||
| import re | ||||
| import socket | ||||
| import sys | ||||
|  | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
|     compat_str, | ||||
|  | ||||
|     clean_html, | ||||
|     compiled_regex_type, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class InfoExtractor(object): | ||||
|     """Information Extractor class. | ||||
|  | ||||
|     Information extractors are the classes that, given a URL, extract | ||||
|     information about the video (or videos) the URL refers to. This | ||||
|     information includes the real video URL, the video title, author and | ||||
|     others. The information is stored in a dictionary which is then | ||||
|     passed to the FileDownloader. The FileDownloader processes this | ||||
|     information possibly downloading the video to the file system, among | ||||
|     other possible outcomes. | ||||
|  | ||||
|     The dictionaries must include the following fields: | ||||
|  | ||||
|     id:             Video identifier. | ||||
|     url:            Final video URL. | ||||
|     title:          Video title, unescaped. | ||||
|     ext:            Video filename extension. | ||||
|  | ||||
|     The following fields are optional: | ||||
|  | ||||
|     format:         The video format, defaults to ext (used for --get-format) | ||||
|     thumbnail:      Full URL to a video thumbnail image. | ||||
|     description:    One-line video description. | ||||
|     uploader:       Full name of the video uploader. | ||||
|     upload_date:    Video upload date (YYYYMMDD). | ||||
|     uploader_id:    Nickname or id of the video uploader. | ||||
|     location:       Physical location of the video. | ||||
|     player_url:     SWF Player URL (used for rtmpdump). | ||||
|     subtitles:      The subtitle file contents. | ||||
|     view_count:     How many users have watched the video on the platform. | ||||
|     urlhandle:      [internal] The urlHandle to be used to download the file, | ||||
|                     like returned by urllib.request.urlopen | ||||
|  | ||||
|     The fields should all be Unicode strings. | ||||
|  | ||||
|     Subclasses of this one should re-define the _real_initialize() and | ||||
|     _real_extract() methods and define a _VALID_URL regexp. | ||||
|     Probably, they should also be added to the list of extractors. | ||||
|  | ||||
|     _real_extract() must return a *list* of information dictionaries as | ||||
|     described above. | ||||
|  | ||||
|     Finally, the _WORKING attribute should be set to False for broken IEs | ||||
|     in order to warn the users and skip the tests. | ||||
|     """ | ||||
|  | ||||
|     _ready = False | ||||
|     _downloader = None | ||||
|     _WORKING = True | ||||
|  | ||||
|     def __init__(self, downloader=None): | ||||
|         """Constructor. Receives an optional downloader.""" | ||||
|         self._ready = False | ||||
|         self.set_downloader(downloader) | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url) is not None | ||||
|  | ||||
|     @classmethod | ||||
|     def working(cls): | ||||
|         """Getter method for _WORKING.""" | ||||
|         return cls._WORKING | ||||
|  | ||||
|     def initialize(self): | ||||
|         """Initializes an instance (authentication, etc).""" | ||||
|         if not self._ready: | ||||
|             self._real_initialize() | ||||
|             self._ready = True | ||||
|  | ||||
|     def extract(self, url): | ||||
|         """Extracts URL information and returns it in list of dicts.""" | ||||
|         self.initialize() | ||||
|         return self._real_extract(url) | ||||
|  | ||||
|     def set_downloader(self, downloader): | ||||
|         """Sets the downloader for this IE.""" | ||||
|         self._downloader = downloader | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         """Real initialization process. Redefine in subclasses.""" | ||||
|         pass | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         """Real extraction process. Redefine in subclasses.""" | ||||
|         pass | ||||
|  | ||||
|     @property | ||||
|     def IE_NAME(self): | ||||
|         return type(self).__name__[:-2] | ||||
|  | ||||
|     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None): | ||||
|         """ Returns the response handle """ | ||||
|         if note is None: | ||||
|             self.report_download_webpage(video_id) | ||||
|         elif note is not False: | ||||
|             self.to_screen(u'%s: %s' % (video_id, note)) | ||||
|         try: | ||||
|             return compat_urllib_request.urlopen(url_or_request) | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             if errnote is None: | ||||
|                 errnote = u'Unable to download webpage' | ||||
|             raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2]) | ||||
|  | ||||
|     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None): | ||||
|         """ Returns a tuple (page content as string, URL handle) """ | ||||
|         urlh = self._request_webpage(url_or_request, video_id, note, errnote) | ||||
|         content_type = urlh.headers.get('Content-Type', '') | ||||
|         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) | ||||
|         if m: | ||||
|             encoding = m.group(1) | ||||
|         else: | ||||
|             encoding = 'utf-8' | ||||
|         webpage_bytes = urlh.read() | ||||
|         if self._downloader.params.get('dump_intermediate_pages', False): | ||||
|             try: | ||||
|                 url = url_or_request.get_full_url() | ||||
|             except AttributeError: | ||||
|                 url = url_or_request | ||||
|             self.to_screen(u'Dumping request to ' + url) | ||||
|             dump = base64.b64encode(webpage_bytes).decode('ascii') | ||||
|             self._downloader.to_screen(dump) | ||||
|         content = webpage_bytes.decode(encoding, 'replace') | ||||
|         return (content, urlh) | ||||
|  | ||||
|     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None): | ||||
|         """ Returns the data of the page as a string """ | ||||
|         return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] | ||||
|  | ||||
|     def to_screen(self, msg): | ||||
|         """Print msg to screen, prefixing it with '[ie_name]'""" | ||||
|         self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) | ||||
|  | ||||
|     def report_extraction(self, id_or_name): | ||||
|         """Report information extraction.""" | ||||
|         self.to_screen(u'%s: Extracting information' % id_or_name) | ||||
|  | ||||
|     def report_download_webpage(self, video_id): | ||||
|         """Report webpage download.""" | ||||
|         self.to_screen(u'%s: Downloading webpage' % video_id) | ||||
|  | ||||
|     def report_age_confirmation(self): | ||||
|         """Report attempt to confirm age.""" | ||||
|         self.to_screen(u'Confirming age') | ||||
|  | ||||
|     #Methods for following #608 | ||||
|     #They set the correct value of the '_type' key | ||||
|     def video_result(self, video_info): | ||||
|         """Returns a video""" | ||||
|         video_info['_type'] = 'video' | ||||
|         return video_info | ||||
|     def url_result(self, url, ie=None): | ||||
|         """Returns a url that points to a page that should be processed""" | ||||
|         #TODO: ie should be the class used for getting the info | ||||
|         video_info = {'_type': 'url', | ||||
|                       'url': url, | ||||
|                       'ie_key': ie} | ||||
|         return video_info | ||||
|     def playlist_result(self, entries, playlist_id=None, playlist_title=None): | ||||
|         """Returns a playlist""" | ||||
|         video_info = {'_type': 'playlist', | ||||
|                       'entries': entries} | ||||
|         if playlist_id: | ||||
|             video_info['id'] = playlist_id | ||||
|         if playlist_title: | ||||
|             video_info['title'] = playlist_title | ||||
|         return video_info | ||||
|  | ||||
|     def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0): | ||||
|         """ | ||||
|         Perform a regex search on the given string, using a single or a list of | ||||
|         patterns returning the first matching group. | ||||
|         In case of failure return a default value or raise a WARNING or a | ||||
|         ExtractorError, depending on fatal, specifying the field name. | ||||
|         """ | ||||
|         if isinstance(pattern, (str, compat_str, compiled_regex_type)): | ||||
|             mobj = re.search(pattern, string, flags) | ||||
|         else: | ||||
|             for p in pattern: | ||||
|                 mobj = re.search(p, string, flags) | ||||
|                 if mobj: break | ||||
|  | ||||
|         if sys.stderr.isatty() and os.name != 'nt': | ||||
|             _name = u'\033[0;34m%s\033[0m' % name | ||||
|         else: | ||||
|             _name = name | ||||
|  | ||||
|         if mobj: | ||||
|             # return the first matching group | ||||
|             return next(g for g in mobj.groups() if g is not None) | ||||
|         elif default is not None: | ||||
|             return default | ||||
|         elif fatal: | ||||
|             raise ExtractorError(u'Unable to extract %s' % _name) | ||||
|         else: | ||||
|             self._downloader.report_warning(u'unable to extract %s; ' | ||||
|                 u'please report this issue on http://yt-dl.org/bug' % _name) | ||||
|             return None | ||||
|  | ||||
|     def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0): | ||||
|         """ | ||||
|         Like _search_regex, but strips HTML tags and unescapes entities. | ||||
|         """ | ||||
|         res = self._search_regex(pattern, string, name, default, fatal, flags) | ||||
|         if res: | ||||
|             return clean_html(res).strip() | ||||
|         else: | ||||
|             return res | ||||
|  | ||||
| class SearchInfoExtractor(InfoExtractor): | ||||
|     """ | ||||
|     Base class for paged search queries extractors. | ||||
|     They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query} | ||||
|     Instances should define _SEARCH_KEY and _MAX_RESULTS. | ||||
|     """ | ||||
|  | ||||
|     @classmethod | ||||
|     def _make_valid_url(cls): | ||||
|         return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return re.match(cls._make_valid_url(), url) is not None | ||||
|  | ||||
|     def _real_extract(self, query): | ||||
|         mobj = re.match(self._make_valid_url(), query) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid search query "%s"' % query) | ||||
|  | ||||
|         prefix = mobj.group('prefix') | ||||
|         query = mobj.group('query') | ||||
|         if prefix == '': | ||||
|             return self._get_n_results(query, 1) | ||||
|         elif prefix == 'all': | ||||
|             return self._get_n_results(query, self._MAX_RESULTS) | ||||
|         else: | ||||
|             n = int(prefix) | ||||
|             if n <= 0: | ||||
|                 raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query)) | ||||
|             elif n > self._MAX_RESULTS: | ||||
|                 self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n)) | ||||
|                 n = self._MAX_RESULTS | ||||
|             return self._get_n_results(query, n) | ||||
|  | ||||
|     def _get_n_results(self, query, n): | ||||
|         """Get a specified number of results for a query""" | ||||
|         raise NotImplementedError("This method must be implemented by sublclasses") | ||||
|  | ||||
|     @property | ||||
|     def SEARCH_KEY(self): | ||||
|         return self._SEARCH_KEY | ||||
							
								
								
									
										53
									
								
								youtube_dl/extractor/cspan.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								youtube_dl/extractor/cspan.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
|  | ||||
| class CSpanIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.c-spanvideo.org/program/HolderonV', | ||||
|         u'file': u'315139.flv', | ||||
|         u'md5': u'74a623266956f69e4df0068ab6c80fe4', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Attorney General Eric Holder on Voting Rights Act Decision" | ||||
|         }, | ||||
|         u'skip': u'Requires rtmpdump' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         prog_name = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, prog_name) | ||||
|         video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id') | ||||
|         data = compat_urllib_parse.urlencode({'programid': video_id, | ||||
|                                               'dynamic':'1'}) | ||||
|         info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data | ||||
|         video_info = self._download_webpage(info_url, video_id, u'Downloading video info') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         title = self._html_search_regex(r'<string name="title">(.*?)</string>', | ||||
|                                         video_info, 'title') | ||||
|         description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"', | ||||
|                                               webpage, 'description', | ||||
|                                               flags=re.MULTILINE|re.DOTALL) | ||||
|         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.*?)"', | ||||
|                                             webpage, 'thumbnail') | ||||
|  | ||||
|         url = self._search_regex(r'<string name="URL">(.*?)</string>', | ||||
|                                  video_info, 'video url') | ||||
|         url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443') | ||||
|         path = self._search_regex(r'<string name="path">(.*?)</string>', | ||||
|                             video_info, 'rtmp play path') | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': title, | ||||
|                 'ext': 'flv', | ||||
|                 'url': url, | ||||
|                 'play_path': path, | ||||
|                 'description': description, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 } | ||||
							
								
								
									
										86
									
								
								youtube_dl/extractor/dailymotion.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								youtube_dl/extractor/dailymotion.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,86 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
| class DailymotionIE(InfoExtractor): | ||||
|     """Information Extractor for Dailymotion""" | ||||
|  | ||||
|     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' | ||||
|     IE_NAME = u'dailymotion' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', | ||||
|         u'file': u'x33vw9.mp4', | ||||
|         u'md5': u'392c4b85a60a90dc4792da41ce3144eb', | ||||
|         u'info_dict': { | ||||
|             u"uploader": u"Alex and Van .",  | ||||
|             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract id and simplified title from URL | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group(1).split('_')[0].split('?')[0] | ||||
|  | ||||
|         video_extension = 'mp4' | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         request = compat_urllib_request.Request(url) | ||||
|         request.add_header('Cookie', 'family_filter=off') | ||||
|         webpage = self._download_webpage(request, video_id) | ||||
|  | ||||
|         # Extract URL, uploader and title from webpage | ||||
|         self.report_extraction(video_id) | ||||
|         mobj = re.search(r'\s*var flashvars = (.*)', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract media URL') | ||||
|         flashvars = compat_urllib_parse.unquote(mobj.group(1)) | ||||
|  | ||||
|         for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']: | ||||
|             if key in flashvars: | ||||
|                 max_quality = key | ||||
|                 self.to_screen(u'Using %s' % key) | ||||
|                 break | ||||
|         else: | ||||
|             raise ExtractorError(u'Unable to extract video URL') | ||||
|  | ||||
|         mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract video URL') | ||||
|  | ||||
|         video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/') | ||||
|  | ||||
|         # TODO: support choosing qualities | ||||
|  | ||||
|         mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract title') | ||||
|         video_title = unescapeHTML(mobj.group('title')) | ||||
|  | ||||
|         video_uploader = None | ||||
|         video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', | ||||
|                                              # Looking for official user | ||||
|                                              r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'], | ||||
|                                             webpage, 'video uploader') | ||||
|  | ||||
|         video_upload_date = None | ||||
|         mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) | ||||
|         if mobj is not None: | ||||
|             video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date':  video_upload_date, | ||||
|             'title':    video_title, | ||||
|             'ext':      video_extension, | ||||
|         }] | ||||
							
								
								
									
										60
									
								
								youtube_dl/extractor/depositfiles.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								youtube_dl/extractor/depositfiles.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| import re | ||||
| import os | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DepositFilesIE(InfoExtractor): | ||||
|     """Information extractor for depositfiles.com""" | ||||
|  | ||||
|     _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         file_id = url.split('/')[-1] | ||||
|         # Rebuild url in english locale | ||||
|         url = 'http://depositfiles.com/en/files/' + file_id | ||||
|  | ||||
|         # Retrieve file webpage with 'Free download' button pressed | ||||
|         free_download_indication = { 'gateway_result' : '1' } | ||||
|         request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) | ||||
|         try: | ||||
|             self.report_download_webpage(file_id) | ||||
|             webpage = compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) | ||||
|  | ||||
|         # Search for the real file URL | ||||
|         mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage) | ||||
|         if (mobj is None) or (mobj.group(1) is None): | ||||
|             # Try to figure out reason of the error. | ||||
|             mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) | ||||
|             if (mobj is not None) and (mobj.group(1) is not None): | ||||
|                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() | ||||
|                 raise ExtractorError(u'%s' % restriction_message) | ||||
|             else: | ||||
|                 raise ExtractorError(u'Unable to extract download URL from: %s' % url) | ||||
|  | ||||
|         file_url = mobj.group(1) | ||||
|         file_extension = os.path.splitext(file_url)[1][1:] | ||||
|  | ||||
|         # Search for file title | ||||
|         file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title') | ||||
|  | ||||
|         return [{ | ||||
|             'id':       file_id.decode('utf-8'), | ||||
|             'url':      file_url.decode('utf-8'), | ||||
|             'uploader': None, | ||||
|             'upload_date':  None, | ||||
|             'title':    file_title, | ||||
|             'ext':      file_extension.decode('utf-8'), | ||||
|         }] | ||||
							
								
								
									
										122
									
								
								youtube_dl/extractor/eighttracks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								youtube_dl/extractor/eighttracks.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | ||||
| import itertools | ||||
| import json | ||||
| import random | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EightTracksIE(InfoExtractor): | ||||
|     IE_NAME = '8tracks' | ||||
|     _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$' | ||||
|     _TEST = { | ||||
|         u"name": u"EightTracks", | ||||
|         u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a", | ||||
|         u"playlist": [ | ||||
|             { | ||||
|                 u"file": u"11885610.m4a", | ||||
|                 u"md5": u"96ce57f24389fc8734ce47f4c1abcc55", | ||||
|                 u"info_dict": { | ||||
|                     u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", | ||||
|                     u"uploader_id": u"ytdl" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"11885608.m4a", | ||||
|                 u"md5": u"4ab26f05c1f7291ea460a3920be8021f", | ||||
|                 u"info_dict": { | ||||
|                     u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", | ||||
|                     u"uploader_id": u"ytdl" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"11885679.m4a", | ||||
|                 u"md5": u"d30b5b5f74217410f4689605c35d1fd7", | ||||
|                 u"info_dict": { | ||||
|                     u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad", | ||||
|                     u"uploader_id": u"ytdl" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"11885680.m4a", | ||||
|                 u"md5": u"4eb0a669317cd725f6bbd336a29f923a", | ||||
|                 u"info_dict": { | ||||
|                     u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad", | ||||
|                     u"uploader_id": u"ytdl" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"11885682.m4a", | ||||
|                 u"md5": u"1893e872e263a2705558d1d319ad19e8", | ||||
|                 u"info_dict": { | ||||
|                     u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad", | ||||
|                     u"uploader_id": u"ytdl" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"11885683.m4a", | ||||
|                 u"md5": u"b673c46f47a216ab1741ae8836af5899", | ||||
|                 u"info_dict": { | ||||
|                     u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad", | ||||
|                     u"uploader_id": u"ytdl" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"11885684.m4a", | ||||
|                 u"md5": u"1d74534e95df54986da7f5abf7d842b7", | ||||
|                 u"info_dict": { | ||||
|                     u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad", | ||||
|                     u"uploader_id": u"ytdl" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"11885685.m4a", | ||||
|                 u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0", | ||||
|                 u"info_dict": { | ||||
|                     u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad", | ||||
|                     u"uploader_id": u"ytdl" | ||||
|                 } | ||||
|             } | ||||
|         ] | ||||
|     } | ||||
|  | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         playlist_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL) | ||||
|         data = json.loads(json_like) | ||||
|  | ||||
|         session = str(random.randint(0, 1000000000)) | ||||
|         mix_id = data['id'] | ||||
|         track_count = data['tracks_count'] | ||||
|         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id) | ||||
|         next_url = first_url | ||||
|         res = [] | ||||
|         for i in itertools.count(): | ||||
|             api_json = self._download_webpage(next_url, playlist_id, | ||||
|                 note=u'Downloading song information %s/%s' % (str(i+1), track_count), | ||||
|                 errnote=u'Failed to download song information') | ||||
|             api_data = json.loads(api_json) | ||||
|             track_data = api_data[u'set']['track'] | ||||
|             info = { | ||||
|                 'id': track_data['id'], | ||||
|                 'url': track_data['track_file_stream_url'], | ||||
|                 'title': track_data['performer'] + u' - ' + track_data['name'], | ||||
|                 'raw_title': track_data['name'], | ||||
|                 'uploader_id': data['user']['login'], | ||||
|                 'ext': 'm4a', | ||||
|             } | ||||
|             res.append(info) | ||||
|             if api_data['set']['at_last_track']: | ||||
|                 break | ||||
|             next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id']) | ||||
|         return res | ||||
							
								
								
									
										78
									
								
								youtube_dl/extractor/escapist.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								youtube_dl/extractor/escapist.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EscapistIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', | ||||
|         u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4', | ||||
|         u'md5': u'c6793dbda81388f4264c1ba18684a74d', | ||||
|         u'info_dict': { | ||||
|             u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",  | ||||
|             u"uploader": u"the-escapist-presents",  | ||||
|             u"title": u"Breaking Down Baldur's Gate" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         showName = mobj.group('showname') | ||||
|         videoId = mobj.group('episode') | ||||
|  | ||||
|         self.report_extraction(videoId) | ||||
|         webpage = self._download_webpage(url, videoId) | ||||
|  | ||||
|         videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"', | ||||
|             webpage, u'description', fatal=False) | ||||
|  | ||||
|         imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"', | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|  | ||||
|         playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"', | ||||
|             webpage, u'player url') | ||||
|  | ||||
|         title = self._html_search_regex('<meta name="title" content="([^"]*)"', | ||||
|             webpage, u'player url').split(' : ')[-1] | ||||
|  | ||||
|         configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url') | ||||
|         configUrl = compat_urllib_parse.unquote(configUrl) | ||||
|  | ||||
|         configJSON = self._download_webpage(configUrl, videoId, | ||||
|                                             u'Downloading configuration', | ||||
|                                             u'unable to download configuration') | ||||
|  | ||||
|         # Technically, it's JavaScript, not JSON | ||||
|         configJSON = configJSON.replace("'", '"') | ||||
|  | ||||
|         try: | ||||
|             config = json.loads(configJSON) | ||||
|         except (ValueError,) as err: | ||||
|             raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err)) | ||||
|  | ||||
|         playlist = config['playlist'] | ||||
|         videoUrl = playlist[1]['url'] | ||||
|  | ||||
|         info = { | ||||
|             'id': videoId, | ||||
|             'url': videoUrl, | ||||
|             'uploader': showName, | ||||
|             'upload_date': None, | ||||
|             'title': title, | ||||
|             'ext': 'mp4', | ||||
|             'thumbnail': imgUrl, | ||||
|             'description': videoDesc, | ||||
|             'player_url': playerUrl, | ||||
|         } | ||||
|  | ||||
|         return [info] | ||||
							
								
								
									
										120
									
								
								youtube_dl/extractor/facebook.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								youtube_dl/extractor/facebook.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,120 @@ | ||||
| import json | ||||
| import netrc | ||||
| import re | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FacebookIE(InfoExtractor): | ||||
|     """Information Extractor for Facebook""" | ||||
|  | ||||
|     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' | ||||
|     _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' | ||||
|     _NETRC_MACHINE = 'facebook' | ||||
|     IE_NAME = u'facebook' | ||||
|     _TEST = { | ||||
|         u'url': u'https://www.facebook.com/photo.php?v=120708114770723', | ||||
|         u'file': u'120708114770723.mp4', | ||||
|         u'md5': u'48975a41ccc4b7a581abd68651c1a5a8', | ||||
|         u'info_dict': { | ||||
|             u"duration": 279,  | ||||
|             u"title": u"PEOPLE ARE AWESOME 2013" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def report_login(self): | ||||
|         """Report attempt to log in.""" | ||||
|         self.to_screen(u'Logging in') | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         if self._downloader is None: | ||||
|             return | ||||
|  | ||||
|         useremail = None | ||||
|         password = None | ||||
|         downloader_params = self._downloader.params | ||||
|  | ||||
|         # Attempt to use provided username and password or .netrc data | ||||
|         if downloader_params.get('username', None) is not None: | ||||
|             useremail = downloader_params['username'] | ||||
|             password = downloader_params['password'] | ||||
|         elif downloader_params.get('usenetrc', False): | ||||
|             try: | ||||
|                 info = netrc.netrc().authenticators(self._NETRC_MACHINE) | ||||
|                 if info is not None: | ||||
|                     useremail = info[0] | ||||
|                     password = info[2] | ||||
|                 else: | ||||
|                     raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) | ||||
|             except (IOError, netrc.NetrcParseError) as err: | ||||
|                 self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err)) | ||||
|                 return | ||||
|  | ||||
|         if useremail is None: | ||||
|             return | ||||
|  | ||||
|         # Log in | ||||
|         login_form = { | ||||
|             'email': useremail, | ||||
|             'pass': password, | ||||
|             'login': 'Log+In' | ||||
|             } | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | ||||
|         try: | ||||
|             self.report_login() | ||||
|             login_results = compat_urllib_request.urlopen(request).read() | ||||
|             if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: | ||||
|                 self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') | ||||
|                 return | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) | ||||
|             return | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('ID') | ||||
|  | ||||
|         url = 'https://www.facebook.com/video/video.php?v=%s' % video_id | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         BEFORE = '{swf.addParam(param[0], param[1]);});\n' | ||||
|         AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' | ||||
|         m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) | ||||
|         if not m: | ||||
|             raise ExtractorError(u'Cannot parse data') | ||||
|         data = dict(json.loads(m.group(1))) | ||||
|         params_raw = compat_urllib_parse.unquote(data['params']) | ||||
|         params = json.loads(params_raw) | ||||
|         video_data = params['video_data'][0] | ||||
|         video_url = video_data.get('hd_src') | ||||
|         if not video_url: | ||||
|             video_url = video_data['sd_src'] | ||||
|         if not video_url: | ||||
|             raise ExtractorError(u'Cannot find video URL') | ||||
|         video_duration = int(video_data['video_duration']) | ||||
|         thumbnail = video_data['thumbnail_src'] | ||||
|  | ||||
|         video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'duration': video_duration, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|         return [info] | ||||
							
								
								
									
										67
									
								
								youtube_dl/extractor/flickr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								youtube_dl/extractor/flickr.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,67 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FlickrIE(InfoExtractor): | ||||
|     """Information Extractor for Flickr videos""" | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', | ||||
|         u'file': u'5645318632.mp4', | ||||
|         u'md5': u'6fdc01adbc89d72fc9c4f15b4a4ba87b', | ||||
|         u'info_dict': { | ||||
|             u"description": u"Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",  | ||||
|             u"uploader_id": u"forestwander-nature-pictures",  | ||||
|             u"title": u"Dark Hollow Waterfalls" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         video_uploader_id = mobj.group('uploader_id') | ||||
|         webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret') | ||||
|  | ||||
|         first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self' | ||||
|         first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage') | ||||
|  | ||||
|         node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>', | ||||
|             first_xml, u'node_id') | ||||
|  | ||||
|         second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1' | ||||
|         second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract video url') | ||||
|         video_url = mobj.group(1) + unescapeHTML(mobj.group(2)) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')', | ||||
|             webpage, u'video title') | ||||
|  | ||||
|         video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')', | ||||
|             webpage, u'description', fatal=False) | ||||
|  | ||||
|         thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')', | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|  | ||||
|         return [{ | ||||
|             'id':          video_id, | ||||
|             'url':         video_url, | ||||
|             'ext':         'mp4', | ||||
|             'title':       video_title, | ||||
|             'description': video_description, | ||||
|             'thumbnail':   thumbnail, | ||||
|             'uploader_id': video_uploader_id, | ||||
|         }] | ||||
							
								
								
									
										40
									
								
								youtube_dl/extractor/funnyordie.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								youtube_dl/extractor/funnyordie.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class FunnyOrDieIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', | ||||
|         u'file': u'0732f586d7.mp4', | ||||
|         u'md5': u'f647e9e90064b53b6e046e75d0241fbd', | ||||
|         u'info_dict': { | ||||
|             u"description": u"Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.",  | ||||
|             u"title": u"Heart-Shaped Box: Literal Video Version" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', | ||||
|             webpage, u'video URL', flags=re.DOTALL) | ||||
|  | ||||
|         title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>", | ||||
|             r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL) | ||||
|  | ||||
|         video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"', | ||||
|             webpage, u'description', fatal=False, flags=re.DOTALL) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': title, | ||||
|             'description': video_description, | ||||
|         } | ||||
|         return [info] | ||||
							
								
								
									
										45
									
								
								youtube_dl/extractor/gamespot.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								youtube_dl/extractor/gamespot.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
| class GameSpotIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/' | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", | ||||
|         u"file": u"6410818.mp4", | ||||
|         u"md5": u"5569d64ca98db01f0177c934fe8c1e9b", | ||||
|         u"info_dict": { | ||||
|             u"title": u"Arma III - Community Guide: SITREP I", | ||||
|             u"upload_date": u"20130627",  | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(3).split("-")[-1] | ||||
|         info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id) | ||||
|         info_xml = self._download_webpage(info_url, video_id) | ||||
|         doc = xml.etree.ElementTree.fromstring(info_xml) | ||||
|         clip_el = doc.find('./playList/clip') | ||||
|  | ||||
|         video_url = clip_el.find('./URI').text | ||||
|         title = clip_el.find('./title').text | ||||
|         ext = video_url.rpartition('.')[2] | ||||
|         thumbnail_url = clip_el.find('./screenGrabURI').text | ||||
|         view_count = int(clip_el.find('./views').text) | ||||
|         upload_date = unified_strdate(clip_el.find('./postDate').text) | ||||
|  | ||||
|         return [{ | ||||
|             'id'          : video_id, | ||||
|             'url'         : video_url, | ||||
|             'ext'         : ext, | ||||
|             'title'       : title, | ||||
|             'thumbnail'   : thumbnail_url, | ||||
|             'upload_date' : upload_date, | ||||
|             'view_count'  : view_count, | ||||
|         }] | ||||
							
								
								
									
										68
									
								
								youtube_dl/extractor/gametrailers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								youtube_dl/extractor/gametrailers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,68 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class GametrailersIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', | ||||
|         u'file': u'zbvr8i.flv', | ||||
|         u'md5': u'c3edbc995ab4081976e16779bd96a878', | ||||
|         u'info_dict': { | ||||
|             u"title": u"E3 2013: Debut Trailer" | ||||
|         }, | ||||
|         u'skip': u'Requires rtmpdump' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_type = mobj.group('type') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         if video_type == 'full-episodes': | ||||
|             mgid_re = r'data-video="(?P<mgid>mgid:.*?)"' | ||||
|         else: | ||||
|             mgid_re = r'data-contentId=\'(?P<mgid>mgid:.*?)\'' | ||||
|         mgid = self._search_regex(mgid_re, webpage, u'mgid') | ||||
|         data = compat_urllib_parse.urlencode({'uri': mgid, 'acceptMethods': 'fms'}) | ||||
|  | ||||
|         info_page = self._download_webpage('http://www.gametrailers.com/feeds/mrss?' + data, | ||||
|                                            video_id, u'Downloading video info') | ||||
|         links_webpage = self._download_webpage('http://www.gametrailers.com/feeds/mediagen/?' + data, | ||||
|                                                video_id, u'Downloading video urls info') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.* | ||||
|                       <description><!\[CDATA\[(?P<description>.*?)\]\]></description>.* | ||||
|                       <image>.* | ||||
|                         <url>(?P<thumb>.*?)</url>.* | ||||
|                       </image>''' | ||||
|  | ||||
|         m_info = re.search(info_re, info_page, re.VERBOSE|re.DOTALL) | ||||
|         if m_info is None: | ||||
|             raise ExtractorError(u'Unable to extract video info') | ||||
|         video_title = m_info.group('title') | ||||
|         video_description = m_info.group('description') | ||||
|         video_thumb = m_info.group('thumb') | ||||
|  | ||||
|         m_urls = list(re.finditer(r'<src>(?P<url>.*)</src>', links_webpage)) | ||||
|         if m_urls is None or len(m_urls) == 0: | ||||
|             raise ExtractorError(u'Unable to extract video url') | ||||
|         # They are sorted from worst to best quality | ||||
|         video_url = m_urls[-1].group('url') | ||||
|  | ||||
|         return {'url':         video_url, | ||||
|                 'id':          video_id, | ||||
|                 'title':       video_title, | ||||
|                 # Videos are actually flv not mp4 | ||||
|                 'ext':         'flv', | ||||
|                 'thumbnail':   video_thumb, | ||||
|                 'description': video_description, | ||||
|                 } | ||||
							
								
								
									
										159
									
								
								youtube_dl/extractor/generic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										159
									
								
								youtube_dl/extractor/generic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,159 @@ | ||||
| import os | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
|     IE_DESC = u'Generic downloader that works on some sites' | ||||
|     _VALID_URL = r'.*' | ||||
|     IE_NAME = u'generic' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', | ||||
|         u'file': u'13601338388002.mp4', | ||||
|         u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', | ||||
|         u'info_dict': { | ||||
|             u"uploader": u"www.hodiho.fr",  | ||||
|             u"title": u"R\u00e9gis plante sa Jeep" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def report_download_webpage(self, video_id): | ||||
|         """Report webpage download.""" | ||||
|         if not self._downloader.params.get('test', False): | ||||
|             self._downloader.report_warning(u'Falling back on generic information extractor.') | ||||
|         super(GenericIE, self).report_download_webpage(video_id) | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
|         """Report information extraction.""" | ||||
|         self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url) | ||||
|  | ||||
|     def _test_redirect(self, url): | ||||
|         """Check if it is a redirect, like url shorteners, in case return the new url.""" | ||||
|         class HeadRequest(compat_urllib_request.Request): | ||||
|             def get_method(self): | ||||
|                 return "HEAD" | ||||
|  | ||||
|         class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler): | ||||
|             """ | ||||
|             Subclass the HTTPRedirectHandler to make it use our | ||||
|             HeadRequest also on the redirected URL | ||||
|             """ | ||||
|             def redirect_request(self, req, fp, code, msg, headers, newurl): | ||||
|                 if code in (301, 302, 303, 307): | ||||
|                     newurl = newurl.replace(' ', '%20') | ||||
|                     newheaders = dict((k,v) for k,v in req.headers.items() | ||||
|                                       if k.lower() not in ("content-length", "content-type")) | ||||
|                     return HeadRequest(newurl, | ||||
|                                        headers=newheaders, | ||||
|                                        origin_req_host=req.get_origin_req_host(), | ||||
|                                        unverifiable=True) | ||||
|                 else: | ||||
|                     raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp) | ||||
|  | ||||
|         class HTTPMethodFallback(compat_urllib_request.BaseHandler): | ||||
|             """ | ||||
|             Fallback to GET if HEAD is not allowed (405 HTTP error) | ||||
|             """ | ||||
|             def http_error_405(self, req, fp, code, msg, headers): | ||||
|                 fp.read() | ||||
|                 fp.close() | ||||
|  | ||||
|                 newheaders = dict((k,v) for k,v in req.headers.items() | ||||
|                                   if k.lower() not in ("content-length", "content-type")) | ||||
|                 return self.parent.open(compat_urllib_request.Request(req.get_full_url(), | ||||
|                                                  headers=newheaders, | ||||
|                                                  origin_req_host=req.get_origin_req_host(), | ||||
|                                                  unverifiable=True)) | ||||
|  | ||||
|         # Build our opener | ||||
|         opener = compat_urllib_request.OpenerDirector() | ||||
|         for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler, | ||||
|                         HTTPMethodFallback, HEADRedirectHandler, | ||||
|                         compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]: | ||||
|             opener.add_handler(handler()) | ||||
|  | ||||
|         response = opener.open(HeadRequest(url)) | ||||
|         if response is None: | ||||
|             raise ExtractorError(u'Invalid URL protocol') | ||||
|         new_url = response.geturl() | ||||
|  | ||||
|         if url == new_url: | ||||
|             return False | ||||
|  | ||||
|         self.report_following_redirect(new_url) | ||||
|         return new_url | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         new_url = self._test_redirect(url) | ||||
|         if new_url: return [self.url_result(new_url)] | ||||
|  | ||||
|         video_id = url.split('/')[-1] | ||||
|         try: | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|         except ValueError: | ||||
|             # since this is the last-resort InfoExtractor, if | ||||
|             # this error is thrown, it'll be thrown here | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|             # Broaden the search a little bit | ||||
|             mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|             # Broaden the search a little bit: JWPlayer JS loader | ||||
|             mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|             # Try to find twitter cards info | ||||
|             mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) | ||||
|         if mobj is None: | ||||
|             # We look for Open Graph info: | ||||
|             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am) | ||||
|             m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) | ||||
|             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: | ||||
|             if m_video_type is not None: | ||||
|                 mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         # It's possible that one of the regexes | ||||
|         # matched, but returned an empty group: | ||||
|         if mobj.group(1) is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         video_url = compat_urllib_parse.unquote(mobj.group(1)) | ||||
|         video_id = os.path.basename(video_url) | ||||
|  | ||||
|         # here's a fun little line of code for you: | ||||
|         video_extension = os.path.splitext(video_id)[1][1:] | ||||
|         video_id = os.path.splitext(video_id)[0] | ||||
|  | ||||
|         # it's tempting to parse this further, but you would | ||||
|         # have to take into account all the variations like | ||||
|         #   Video Title - Site Name | ||||
|         #   Site Name | Video Title | ||||
|         #   Video Title - Tagline | Site Name | ||||
|         # and so on and so forth; it's just not practical | ||||
|         video_title = self._html_search_regex(r'<title>(.*)</title>', | ||||
|             webpage, u'video title', default=u'video', flags=re.DOTALL) | ||||
|  | ||||
|         # video uploader is domain name | ||||
|         video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*', | ||||
|             url, u'video uploader') | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date':  None, | ||||
|             'title':    video_title, | ||||
|             'ext':      video_extension, | ||||
|         }] | ||||
							
								
								
									
										96
									
								
								youtube_dl/extractor/googleplus.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								youtube_dl/extractor/googleplus.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,96 @@ | ||||
| # coding: utf-8 | ||||
|  | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GooglePlusIE(InfoExtractor): | ||||
|     IE_DESC = u'Google Plus' | ||||
|     _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' | ||||
|     IE_NAME = u'plus.google' | ||||
|     _TEST = { | ||||
|         u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", | ||||
|         u"file": u"ZButuJc6CtH.flv", | ||||
|         u"info_dict": { | ||||
|             u"upload_date": u"20120613", | ||||
|             u"uploader": u"井上ヨシマサ", | ||||
|             u"title": u"嘆きの天使 降臨" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract id from URL | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         post_url = mobj.group(0) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         video_extension = 'flv' | ||||
|  | ||||
|         # Step 1, Retrieve post webpage to extract further information | ||||
|         webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         # Extract update date | ||||
|         upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>', | ||||
|             webpage, u'upload date', fatal=False) | ||||
|         if upload_date: | ||||
|             # Convert timestring to a format suitable for filename | ||||
|             upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d") | ||||
|             upload_date = upload_date.strftime('%Y%m%d') | ||||
|  | ||||
|         # Extract uploader | ||||
|         uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>', | ||||
|             webpage, u'uploader', fatal=False) | ||||
|  | ||||
|         # Extract title | ||||
|         # Get the first line for title | ||||
|         video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]', | ||||
|             webpage, 'title', default=u'NA') | ||||
|  | ||||
|         # Step 2, Simulate clicking the image box to launch video | ||||
|         DOMAIN = 'https://plus.google.com' | ||||
|         video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN), | ||||
|             webpage, u'video page URL') | ||||
|         if not video_page.startswith(DOMAIN): | ||||
|             video_page = DOMAIN + video_page | ||||
|  | ||||
|         webpage = self._download_webpage(video_page, video_id, u'Downloading video page') | ||||
|  | ||||
|         # Extract video links on video page | ||||
|         """Extract video links of all sizes""" | ||||
|         pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"' | ||||
|         mobj = re.findall(pattern, webpage) | ||||
|         if len(mobj) == 0: | ||||
|             raise ExtractorError(u'Unable to extract video links') | ||||
|  | ||||
|         # Sort in resolution | ||||
|         links = sorted(mobj) | ||||
|  | ||||
|         # Choose the lowest of the sort, i.e. highest resolution | ||||
|         video_url = links[-1] | ||||
|         # Only get the url. The resolution part in the tuple has no use anymore | ||||
|         video_url = video_url[-1] | ||||
|         # Treat escaped \u0026 style hex | ||||
|         try: | ||||
|             video_url = video_url.decode("unicode_escape") | ||||
|         except AttributeError: # Python 3 | ||||
|             video_url = bytes(video_url, 'ascii').decode('unicode-escape') | ||||
|  | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'uploader': uploader, | ||||
|             'upload_date':  upload_date, | ||||
|             'title':    video_title, | ||||
|             'ext':      video_extension, | ||||
|         }] | ||||
							
								
								
									
										39
									
								
								youtube_dl/extractor/googlesearch.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								youtube_dl/extractor/googlesearch.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| import itertools | ||||
| import re | ||||
|  | ||||
| from .common import SearchInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GoogleSearchIE(SearchInfoExtractor): | ||||
|     IE_DESC = u'Google Video search' | ||||
|     _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"' | ||||
|     _MAX_RESULTS = 1000 | ||||
|     IE_NAME = u'video.google:search' | ||||
|     _SEARCH_KEY = 'gvsearch' | ||||
|  | ||||
|     def _get_n_results(self, query, n): | ||||
|         """Get a specified number of results for a query""" | ||||
|  | ||||
|         res = { | ||||
|             '_type': 'playlist', | ||||
|             'id': query, | ||||
|             'entries': [] | ||||
|         } | ||||
|  | ||||
|         for pagenum in itertools.count(1): | ||||
|             result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10) | ||||
|             webpage = self._download_webpage(result_url, u'gvsearch:' + query, | ||||
|                                              note='Downloading result page ' + str(pagenum)) | ||||
|  | ||||
|             for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage): | ||||
|                 e = { | ||||
|                     '_type': 'url', | ||||
|                     'url': mobj.group(1) | ||||
|                 } | ||||
|                 res['entries'].append(e) | ||||
|  | ||||
|             if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage): | ||||
|                 return res | ||||
							
								
								
									
										48
									
								
								youtube_dl/extractor/hotnewhiphop.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								youtube_dl/extractor/hotnewhiphop.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| import re | ||||
| import base64 | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class HotNewHipHopIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html' | ||||
|     _TEST = { | ||||
|         u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'", | ||||
|         u'file': u'1435540.mp3', | ||||
|         u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Freddie Gibbs Songs - Lay It Down" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('id') | ||||
|  | ||||
|         webpage_src = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url_base64 = self._search_regex(r'data-path="(.*?)"', | ||||
|             webpage_src, u'video URL', fatal=False) | ||||
|  | ||||
|         if video_url_base64 == None: | ||||
|             video_url = self._search_regex(r'"contentUrl" content="(.*?)"', webpage_src, | ||||
|                 u'video URL') | ||||
|             return self.url_result(video_url, ie='Youtube') | ||||
|  | ||||
|         video_url = base64.b64decode(video_url_base64).decode('utf-8') | ||||
|  | ||||
|         video_title = self._html_search_regex(r"<title>(.*)</title>", | ||||
|             webpage_src, u'title') | ||||
|          | ||||
|         # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. | ||||
|         thumbnail = self._html_search_regex(r'"og:image" content="(.*)"', | ||||
|             webpage_src, u'thumbnail', fatal=False) | ||||
|  | ||||
|         results = [{ | ||||
|                     'id': video_id, | ||||
|                     'url' : video_url, | ||||
|                     'title' : video_title, | ||||
|                     'thumbnail' : thumbnail, | ||||
|                     'ext' : 'mp3', | ||||
|                     }] | ||||
|         return results | ||||
							
								
								
									
										46
									
								
								youtube_dl/extractor/howcast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								youtube_dl/extractor/howcast.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,46 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class HowcastIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', | ||||
|         u'file': u'390161.mp4', | ||||
|         u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138', | ||||
|         u'info_dict': { | ||||
|             u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",  | ||||
|             u"title": u"How to Tie a Square Knot Properly" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage_url = 'http://www.howcast.com/videos/' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)', | ||||
|             webpage, u'video URL') | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', | ||||
|             webpage, u'description', fatal=False) | ||||
|  | ||||
|         thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'', | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'ext':      'mp4', | ||||
|             'title':    video_title, | ||||
|             'description': video_description, | ||||
|             'thumbnail': thumbnail, | ||||
|         }] | ||||
							
								
								
									
										71
									
								
								youtube_dl/extractor/hypem.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								youtube_dl/extractor/hypem.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | ||||
| import json | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class HypemIE(InfoExtractor): | ||||
|     """Information Extractor for hypem""" | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', | ||||
|         u'file': u'1v6ga.mp3', | ||||
|         u'md5': u'b9cc91b5af8995e9f0c1cee04c575828', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Tame" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         track_id = mobj.group(1) | ||||
|  | ||||
|         data = { 'ax': 1, 'ts': time.time() } | ||||
|         data_encoded = compat_urllib_parse.urlencode(data) | ||||
|         complete_url = url + "?" + data_encoded | ||||
|         request = compat_urllib_request.Request(complete_url) | ||||
|         response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url') | ||||
|         cookie = urlh.headers.get('Set-Cookie', '') | ||||
|  | ||||
|         self.report_extraction(track_id) | ||||
|  | ||||
|         html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>', | ||||
|             response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip() | ||||
|         try: | ||||
|             track_list = json.loads(html_tracks) | ||||
|             track = track_list[u'tracks'][0] | ||||
|         except ValueError: | ||||
|             raise ExtractorError(u'Hypemachine contained invalid JSON.') | ||||
|  | ||||
|         key = track[u"key"] | ||||
|         track_id = track[u"id"] | ||||
|         artist = track[u"artist"] | ||||
|         title = track[u"song"] | ||||
|  | ||||
|         serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key)) | ||||
|         request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'}) | ||||
|         request.add_header('cookie', cookie) | ||||
|         song_data_json = self._download_webpage(request, track_id, u'Downloading metadata') | ||||
|         try: | ||||
|             song_data = json.loads(song_data_json) | ||||
|         except ValueError: | ||||
|             raise ExtractorError(u'Hypemachine contained invalid JSON.') | ||||
|         final_url = song_data[u"url"] | ||||
|  | ||||
|         return [{ | ||||
|             'id':       track_id, | ||||
|             'url':      final_url, | ||||
|             'ext':      "mp3", | ||||
|             'title':    title, | ||||
|             'artist':   artist, | ||||
|         }] | ||||
							
								
								
									
										39
									
								
								youtube_dl/extractor/ina.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								youtube_dl/extractor/ina.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class InaIE(InfoExtractor): | ||||
|     """Information Extractor for Ina.fr""" | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*' | ||||
|     _TEST = { | ||||
|         u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', | ||||
|         u'file': u'I12055569.mp4', | ||||
|         u'md5': u'a667021bf2b41f8dc6049479d9bb38a3', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\"" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id | ||||
|         video_extension = 'mp4' | ||||
|         webpage = self._download_webpage(mrss_url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)', | ||||
|             webpage, u'video URL') | ||||
|  | ||||
|         video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'ext':      video_extension, | ||||
|             'title':    video_title, | ||||
|         }] | ||||
							
								
								
									
										62
									
								
								youtube_dl/extractor/infoq.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								youtube_dl/extractor/infoq.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | ||||
| import base64 | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class InfoQIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$' | ||||
|     _TEST = { | ||||
|         u"name": u"InfoQ", | ||||
|         u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", | ||||
|         u"file": u"12-jan-pythonthings.mp4", | ||||
|         u"info_dict": { | ||||
|             u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", | ||||
|             u"title": u"A Few of My Favorite [Python] Things" | ||||
|         }, | ||||
|         u"params": { | ||||
|             u"skip_download": True | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id=url) | ||||
|         self.report_extraction(url) | ||||
|  | ||||
|         # Extract video URL | ||||
|         mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract video url') | ||||
|         real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8')) | ||||
|         video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id | ||||
|  | ||||
|         # Extract title | ||||
|         video_title = self._search_regex(r'contentTitle = "(.*?)";', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         # Extract description | ||||
|         video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>', | ||||
|             webpage, u'description', fatal=False) | ||||
|  | ||||
|         video_filename = video_url.split('/')[-1] | ||||
|         video_id, extension = video_filename.split('.') | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'uploader': None, | ||||
|             'upload_date': None, | ||||
|             'title': video_title, | ||||
|             'ext': extension, # Extension is always(?) mp4, but seems to be flv | ||||
|             'thumbnail': None, | ||||
|             'description': video_description, | ||||
|         } | ||||
|  | ||||
|         return [info] | ||||
							
								
								
									
										42
									
								
								youtube_dl/extractor/instagram.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								youtube_dl/extractor/instagram.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| class InstagramIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/' | ||||
|     _TEST = { | ||||
|         u'url': u'http://instagram.com/p/aye83DjauH/#', | ||||
|         u'file': u'aye83DjauH.mp4', | ||||
|         u'md5': u'0d2da106a9d2631273e192b372806516', | ||||
|         u'info_dict': { | ||||
|             u"uploader_id": u"naomipq",  | ||||
|             u"title": u"Video by naomipq" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<meta property="og:video" content="(.+?)"', | ||||
|             webpage, u'video URL') | ||||
|         thumbnail_url = self._html_search_regex( | ||||
|             r'<meta property="og:image" content="(.+?)" />', | ||||
|             webpage, u'thumbnail URL', fatal=False) | ||||
|         html_title = self._html_search_regex( | ||||
|             r'<title>(.+?)</title>', | ||||
|             webpage, u'title', flags=re.DOTALL) | ||||
|         title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip() | ||||
|         uploader_id = self._html_search_regex(r'content="(.*?)\'s video on Instagram', | ||||
|             webpage, u'uploader name', fatal=False) | ||||
|         ext = 'mp4' | ||||
|  | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       video_url, | ||||
|             'ext':       ext, | ||||
|             'title':     title, | ||||
|             'thumbnail': thumbnail_url, | ||||
|             'uploader_id' : uploader_id | ||||
|         }] | ||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/jukebox.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/jukebox.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | ||||
| # coding: utf-8 | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
| class JukeboxIE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+).html' | ||||
|     _IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>' | ||||
|     _VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"' | ||||
|     _TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>' | ||||
|     _IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         html = self._download_webpage(url, video_id) | ||||
|  | ||||
|         mobj = re.search(self._IFRAME, html) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Cannot extract iframe url') | ||||
|         iframe_url = unescapeHTML(mobj.group('iframe')) | ||||
|  | ||||
|         iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe') | ||||
|         mobj = re.search(r'class="jkb_waiting"', iframe_html) | ||||
|         if mobj is not None: | ||||
|             raise ExtractorError(u'Video is not available(in your country?)!') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         mobj = re.search(self._VIDEO_URL, iframe_html) | ||||
|         if mobj is None: | ||||
|             mobj = re.search(self._IS_YOUTUBE, iframe_html) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'Cannot extract video url') | ||||
|             youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/') | ||||
|             self.to_screen(u'Youtube video detected') | ||||
|             return self.url_result(youtube_url,ie='Youtube') | ||||
|         video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/') | ||||
|         video_ext = unescapeHTML(mobj.group('video_ext')) | ||||
|  | ||||
|         mobj = re.search(self._TITLE, html) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Cannot extract title') | ||||
|         title = unescapeHTML(mobj.group('title')) | ||||
|         artist = unescapeHTML(mobj.group('artist')) | ||||
|  | ||||
|         return [{'id': video_id, | ||||
|                  'url': video_url, | ||||
|                  'title': artist + '-' + title, | ||||
|                  'ext': video_ext | ||||
|                  }] | ||||
							
								
								
									
										155
									
								
								youtube_dl/extractor/justintv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										155
									
								
								youtube_dl/extractor/justintv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,155 @@ | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     formatSeconds, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class JustinTVIE(InfoExtractor): | ||||
|     """Information extractor for justin.tv and twitch.tv""" | ||||
|     # TODO: One broadcast may be split into multiple videos. The key | ||||
|     # 'broadcast_id' is the same for all parts, and 'broadcast_part' | ||||
|     # starts at 1 and increases. Can we treat all parts as one video? | ||||
|  | ||||
|     _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/ | ||||
|         (?: | ||||
|             (?P<channelid>[^/]+)| | ||||
|             (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| | ||||
|             (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) | ||||
|         ) | ||||
|         /?(?:\#.*)?$ | ||||
|         """ | ||||
|     _JUSTIN_PAGE_LIMIT = 100 | ||||
|     IE_NAME = u'justin.tv' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360', | ||||
|         u'file': u'296128360.flv', | ||||
|         u'md5': u'ecaa8a790c22a40770901460af191c9a', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20110927",  | ||||
|             u"uploader_id": 25114803,  | ||||
|             u"uploader": u"thegamedevhub",  | ||||
|             u"title": u"Beginner Series - Scripting With Python Pt.1" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def report_download_page(self, channel, offset): | ||||
|         """Report attempt to download a single page of videos.""" | ||||
|         self.to_screen(u'%s: Downloading video information from %d to %d' % | ||||
|                 (channel, offset, offset + self._JUSTIN_PAGE_LIMIT)) | ||||
|  | ||||
|     # Return count of items, list of *valid* items | ||||
|     def _parse_page(self, url, video_id): | ||||
|         info_json = self._download_webpage(url, video_id, | ||||
|                                            u'Downloading video info JSON', | ||||
|                                            u'unable to download video info JSON') | ||||
|  | ||||
|         response = json.loads(info_json) | ||||
|         if type(response) != list: | ||||
|             error_text = response.get('error', 'unknown error') | ||||
|             raise ExtractorError(u'Justin.tv API: %s' % error_text) | ||||
|         info = [] | ||||
|         for clip in response: | ||||
|             video_url = clip['video_file_url'] | ||||
|             if video_url: | ||||
|                 video_extension = os.path.splitext(video_url)[1][1:] | ||||
|                 video_date = re.sub('-', '', clip['start_time'][:10]) | ||||
|                 video_uploader_id = clip.get('user_id', clip.get('channel_id')) | ||||
|                 video_id = clip['id'] | ||||
|                 video_title = clip.get('title', video_id) | ||||
|                 info.append({ | ||||
|                     'id': video_id, | ||||
|                     'url': video_url, | ||||
|                     'title': video_title, | ||||
|                     'uploader': clip.get('channel_name', video_uploader_id), | ||||
|                     'uploader_id': video_uploader_id, | ||||
|                     'upload_date': video_date, | ||||
|                     'ext': video_extension, | ||||
|                 }) | ||||
|         return (len(response), info) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'invalid URL: %s' % url) | ||||
|  | ||||
|         api_base = 'http://api.justin.tv' | ||||
|         paged = False | ||||
|         if mobj.group('channelid'): | ||||
|             paged = True | ||||
|             video_id = mobj.group('channelid') | ||||
|             api = api_base + '/channel/archives/%s.json' % video_id | ||||
|         elif mobj.group('chapterid'): | ||||
|             chapter_id = mobj.group('chapterid') | ||||
|  | ||||
|             webpage = self._download_webpage(url, chapter_id) | ||||
|             m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage) | ||||
|             if not m: | ||||
|                 raise ExtractorError(u'Cannot find archive of a chapter') | ||||
|             archive_id = m.group(1) | ||||
|  | ||||
|             api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id | ||||
|             chapter_info_xml = self._download_webpage(api, chapter_id, | ||||
|                                              note=u'Downloading chapter information', | ||||
|                                              errnote=u'Chapter information download failed') | ||||
|             doc = xml.etree.ElementTree.fromstring(chapter_info_xml) | ||||
|             for a in doc.findall('.//archive'): | ||||
|                 if archive_id == a.find('./id').text: | ||||
|                     break | ||||
|             else: | ||||
|                 raise ExtractorError(u'Could not find chapter in chapter information') | ||||
|  | ||||
|             video_url = a.find('./video_file_url').text | ||||
|             video_ext = video_url.rpartition('.')[2] or u'flv' | ||||
|  | ||||
|             chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id | ||||
|             chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id, | ||||
|                                    note='Downloading chapter metadata', | ||||
|                                    errnote='Download of chapter metadata failed') | ||||
|             chapter_info = json.loads(chapter_info_json) | ||||
|  | ||||
|             bracket_start = int(doc.find('.//bracket_start').text) | ||||
|             bracket_end = int(doc.find('.//bracket_end').text) | ||||
|  | ||||
|             # TODO determine start (and probably fix up file) | ||||
|             #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 | ||||
|             #video_url += u'?start=' + TODO:start_timestamp | ||||
|             # bracket_start is 13290, but we want 51670615 | ||||
|             self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. ' | ||||
|                                             u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end))) | ||||
|  | ||||
|             info = { | ||||
|                 'id': u'c' + chapter_id, | ||||
|                 'url': video_url, | ||||
|                 'ext': video_ext, | ||||
|                 'title': chapter_info['title'], | ||||
|                 'thumbnail': chapter_info['preview'], | ||||
|                 'description': chapter_info['description'], | ||||
|                 'uploader': chapter_info['channel']['display_name'], | ||||
|                 'uploader_id': chapter_info['channel']['name'], | ||||
|             } | ||||
|             return [info] | ||||
|         else: | ||||
|             video_id = mobj.group('videoid') | ||||
|             api = api_base + '/broadcast/by_archive/%s.json' % video_id | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         info = [] | ||||
|         offset = 0 | ||||
|         limit = self._JUSTIN_PAGE_LIMIT | ||||
|         while True: | ||||
|             if paged: | ||||
|                 self.report_download_page(video_id, offset) | ||||
|             page_url = api + ('?offset=%d&limit=%d' % (offset, limit)) | ||||
|             page_count, page_info = self._parse_page(page_url, video_id) | ||||
|             info.extend(page_info) | ||||
|             if not paged or page_count != limit: | ||||
|                 break | ||||
|             offset += limit | ||||
|         return info | ||||
							
								
								
									
										41
									
								
								youtube_dl/extractor/keek.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								youtube_dl/extractor/keek.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class KeekIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)' | ||||
|     IE_NAME = u'keek' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.keek.com/ytdl/keeks/NODfbab', | ||||
|         u'file': u'NODfbab.mp4', | ||||
|         u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83', | ||||
|         u'info_dict': { | ||||
|             u"uploader": u"ytdl",  | ||||
|             u"title": u"test chars: \"'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('videoID') | ||||
|  | ||||
|         video_url = u'http://cdn.keek.com/keek/video/%s' % video_id | ||||
|         thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', | ||||
|             webpage, u'uploader', fatal=False) | ||||
|  | ||||
|         info = { | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'title': video_title, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'uploader': uploader | ||||
|         } | ||||
|         return [info] | ||||
							
								
								
									
										54
									
								
								youtube_dl/extractor/liveleak.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								youtube_dl/extractor/liveleak.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LiveLeakIE(InfoExtractor): | ||||
|  | ||||
|     _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)' | ||||
|     IE_NAME = u'liveleak' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.liveleak.com/view?i=757_1364311680', | ||||
|         u'file': u'757_1364311680.mp4', | ||||
|         u'md5': u'0813c2430bea7a46bf13acf3406992f4', | ||||
|         u'info_dict': { | ||||
|             u"description": u"extremely bad day for this guy..!",  | ||||
|             u"uploader": u"ljfriel2",  | ||||
|             u"title": u"Most unlucky car accident" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex(r'file: "(.*?)",', | ||||
|             webpage, u'video URL') | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"', | ||||
|             webpage, u'title').replace('LiveLeak.com -', '').strip() | ||||
|  | ||||
|         video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"', | ||||
|             webpage, u'description', fatal=False) | ||||
|  | ||||
|         video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>', | ||||
|             webpage, u'uploader', fatal=False) | ||||
|  | ||||
|         info = { | ||||
|             'id':  video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': video_title, | ||||
|             'description': video_description, | ||||
|             'uploader': video_uploader | ||||
|         } | ||||
|  | ||||
|         return [info] | ||||
							
								
								
									
										123
									
								
								youtube_dl/extractor/metacafe.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										123
									
								
								youtube_dl/extractor/metacafe.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,123 @@ | ||||
| import re | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     compat_str, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class MetacafeIE(InfoExtractor): | ||||
|     """Information Extractor for metacafe.com.""" | ||||
|  | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' | ||||
|     _DISCLAIMER = 'http://www.metacafe.com/family_filter/' | ||||
|     _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' | ||||
|     IE_NAME = u'metacafe' | ||||
|     _TEST = { | ||||
|         u"add_ie": ["Youtube"], | ||||
|         u"url":  u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", | ||||
|         u"file":  u"_aUehQsCQtM.flv", | ||||
|         u"info_dict": { | ||||
|             u"upload_date": u"20090102", | ||||
|             u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!", | ||||
|             u"description": u"md5:2439a8ef6d5a70e380c22f5ad323e5a8", | ||||
|             u"uploader": u"PBS", | ||||
|             u"uploader_id": u"PBS" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     def report_disclaimer(self): | ||||
|         """Report disclaimer retrieval.""" | ||||
|         self.to_screen(u'Retrieving disclaimer') | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         # Retrieve disclaimer | ||||
|         request = compat_urllib_request.Request(self._DISCLAIMER) | ||||
|         try: | ||||
|             self.report_disclaimer() | ||||
|             compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to retrieve disclaimer: %s' % compat_str(err)) | ||||
|  | ||||
|         # Confirm age | ||||
|         disclaimer_form = { | ||||
|             'filters': '0', | ||||
|             'submit': "Continue - I'm over 18", | ||||
|             } | ||||
|         request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) | ||||
|         try: | ||||
|             self.report_age_confirmation() | ||||
|             compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err)) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract id and simplified title from URL | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         # Check if video comes from YouTube | ||||
|         mobj2 = re.match(r'^yt-(.*)$', video_id) | ||||
|         if mobj2 is not None: | ||||
|             return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')] | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         webpage = self._download_webpage('http://www.metacafe.com/watch/%s/' % video_id, video_id) | ||||
|  | ||||
|         # Extract URL, uploader and title from webpage | ||||
|         self.report_extraction(video_id) | ||||
|         mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) | ||||
|         if mobj is not None: | ||||
|             mediaURL = compat_urllib_parse.unquote(mobj.group(1)) | ||||
|             video_extension = mediaURL[-3:] | ||||
|  | ||||
|             # Extract gdaKey if available | ||||
|             mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) | ||||
|             if mobj is None: | ||||
|                 video_url = mediaURL | ||||
|             else: | ||||
|                 gdaKey = mobj.group(1) | ||||
|                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) | ||||
|         else: | ||||
|             mobj = re.search(r' name="flashvars" value="(.*?)"', webpage) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'Unable to extract media URL') | ||||
|             vardict = compat_parse_qs(mobj.group(1)) | ||||
|             if 'mediaData' not in vardict: | ||||
|                 raise ExtractorError(u'Unable to extract media URL') | ||||
|             mobj = re.search(r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0]) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'Unable to extract media URL') | ||||
|             mediaURL = mobj.group('mediaURL').replace('\\/', '/') | ||||
|             video_extension = mediaURL[-3:] | ||||
|             video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key')) | ||||
|  | ||||
|         mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract title') | ||||
|         video_title = mobj.group(1).decode('utf-8') | ||||
|  | ||||
|         mobj = re.search(r'submitter=(.*?);', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract uploader nickname') | ||||
|         video_uploader = mobj.group(1) | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id.decode('utf-8'), | ||||
|             'url':      video_url.decode('utf-8'), | ||||
|             'uploader': video_uploader.decode('utf-8'), | ||||
|             'upload_date':  None, | ||||
|             'title':    video_title, | ||||
|             'ext':      video_extension.decode('utf-8'), | ||||
|         }] | ||||
							
								
								
									
										115
									
								
								youtube_dl/extractor/mixcloud.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								youtube_dl/extractor/mixcloud.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,115 @@ | ||||
| import json | ||||
| import re | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MixcloudIE(InfoExtractor): | ||||
|     _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/ | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' | ||||
|     IE_NAME = u'mixcloud' | ||||
|  | ||||
|     def report_download_json(self, file_id): | ||||
|         """Report JSON download.""" | ||||
|         self.to_screen(u'Downloading json') | ||||
|  | ||||
|     def get_urls(self, jsonData, fmt, bitrate='best'): | ||||
|         """Get urls from 'audio_formats' section in json""" | ||||
|         try: | ||||
|             bitrate_list = jsonData[fmt] | ||||
|             if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list: | ||||
|                 bitrate = max(bitrate_list) # select highest | ||||
|  | ||||
|             url_list = jsonData[fmt][bitrate] | ||||
|         except TypeError: # we have no bitrate info. | ||||
|             url_list = jsonData[fmt] | ||||
|         return url_list | ||||
|  | ||||
|     def check_urls(self, url_list): | ||||
|         """Returns 1st active url from list""" | ||||
|         for url in url_list: | ||||
|             try: | ||||
|                 compat_urllib_request.urlopen(url) | ||||
|                 return url | ||||
|             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error): | ||||
|                 url = None | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         print('Available formats:') | ||||
|         for fmt in formats.keys(): | ||||
|             for b in formats[fmt]: | ||||
|                 try: | ||||
|                     ext = formats[fmt][b][0] | ||||
|                     print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) | ||||
|                 except TypeError: # we have no bitrate info | ||||
|                     ext = formats[fmt][0] | ||||
|                     print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) | ||||
|                     break | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         # extract uploader & filename from url | ||||
|         uploader = mobj.group(1).decode('utf-8') | ||||
|         file_id = uploader + "-" + mobj.group(2).decode('utf-8') | ||||
|  | ||||
|         # construct API request | ||||
|         file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json' | ||||
|         # retrieve .json file with links to files | ||||
|         request = compat_urllib_request.Request(file_url) | ||||
|         try: | ||||
|             self.report_download_json(file_url) | ||||
|             jsonData = compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err)) | ||||
|  | ||||
|         # parse JSON | ||||
|         json_data = json.loads(jsonData) | ||||
|         player_url = json_data['player_swf_url'] | ||||
|         formats = dict(json_data['audio_formats']) | ||||
|  | ||||
|         req_format = self._downloader.params.get('format', None) | ||||
|  | ||||
|         if self._downloader.params.get('listformats', None): | ||||
|             self._print_formats(formats) | ||||
|             return | ||||
|  | ||||
|         if req_format is None or req_format == 'best': | ||||
|             for format_param in formats.keys(): | ||||
|                 url_list = self.get_urls(formats, format_param) | ||||
|                 # check urls | ||||
|                 file_url = self.check_urls(url_list) | ||||
|                 if file_url is not None: | ||||
|                     break # got it! | ||||
|         else: | ||||
|             if req_format not in formats: | ||||
|                 raise ExtractorError(u'Format is not available') | ||||
|  | ||||
|             url_list = self.get_urls(formats, req_format) | ||||
|             file_url = self.check_urls(url_list) | ||||
|             format_param = req_format | ||||
|  | ||||
|         return [{ | ||||
|             'id': file_id.decode('utf-8'), | ||||
|             'url': file_url.decode('utf-8'), | ||||
|             'uploader': uploader.decode('utf-8'), | ||||
|             'upload_date': None, | ||||
|             'title': json_data['name'], | ||||
|             'ext': file_url.split('.')[-1].decode('utf-8'), | ||||
|             'format': (format_param is None and u'NA' or format_param.decode('utf-8')), | ||||
|             'thumbnail': json_data['thumbnail_url'], | ||||
|             'description': json_data['description'], | ||||
|             'player_url': player_url.decode('utf-8'), | ||||
|         }] | ||||
							
								
								
									
										80
									
								
								youtube_dl/extractor/mtv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/extractor/mtv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| import re | ||||
| import socket | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$' | ||||
|     _WORKING = False | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         if not mobj.group('proto'): | ||||
|             url = 'http://' + url | ||||
|         video_id = mobj.group('videoid') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # Some videos come from Vevo.com | ||||
|         m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', | ||||
|                            webpage, re.DOTALL) | ||||
|         if m_vevo: | ||||
|             vevo_id = m_vevo.group(1); | ||||
|             self.to_screen(u'Vevo video detected: %s' % vevo_id) | ||||
|             return self.url_result('vevo:%s' % vevo_id, ie='Vevo') | ||||
|  | ||||
|         #song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>', | ||||
|         #    webpage, u'song name', fatal=False) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>', | ||||
|             webpage, u'mtvn_uri', fatal=False) | ||||
|  | ||||
|         content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', | ||||
|             webpage, u'content id', fatal=False) | ||||
|  | ||||
|         videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri | ||||
|         self.report_extraction(video_id) | ||||
|         request = compat_urllib_request.Request(videogen_url) | ||||
|         try: | ||||
|             metadataXml = compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to download video metadata: %s' % compat_str(err)) | ||||
|  | ||||
|         mdoc = xml.etree.ElementTree.fromstring(metadataXml) | ||||
|         renditions = mdoc.findall('.//rendition') | ||||
|  | ||||
|         # For now, always pick the highest quality. | ||||
|         rendition = renditions[-1] | ||||
|  | ||||
|         try: | ||||
|             _,_,ext = rendition.attrib['type'].partition('/') | ||||
|             format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate'] | ||||
|             video_url = rendition.find('./src').text | ||||
|         except KeyError: | ||||
|             raise ExtractorError('Invalid rendition field.') | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'upload_date': None, | ||||
|             'title': video_title, | ||||
|             'ext': ext, | ||||
|             'format': format, | ||||
|         } | ||||
|  | ||||
|         return [info] | ||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/myspass.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/myspass.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| import os.path | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MySpassIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www.myspass.de/.*' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', | ||||
|         u'file': u'11741.mp4', | ||||
|         u'md5': u'0b49f4844a068f8b33f4b7c88405862b', | ||||
|         u'info_dict': { | ||||
|             u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",  | ||||
|             u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' | ||||
|  | ||||
|         # video id is the last path element of the URL | ||||
|         # usually there is a trailing slash, so also try the second but last | ||||
|         url_path = compat_urllib_parse_urlparse(url).path | ||||
|         url_parent_path, video_id = os.path.split(url_path) | ||||
|         if not video_id: | ||||
|             _, video_id = os.path.split(url_parent_path) | ||||
|  | ||||
|         # get metadata | ||||
|         metadata_url = META_DATA_URL_TEMPLATE % video_id | ||||
|         metadata_text = self._download_webpage(metadata_url, video_id) | ||||
|         metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8')) | ||||
|  | ||||
|         # extract values from metadata | ||||
|         url_flv_el = metadata.find('url_flv') | ||||
|         if url_flv_el is None: | ||||
|             raise ExtractorError(u'Unable to extract download url') | ||||
|         video_url = url_flv_el.text | ||||
|         extension = os.path.splitext(video_url)[1][1:] | ||||
|         title_el = metadata.find('title') | ||||
|         if title_el is None: | ||||
|             raise ExtractorError(u'Unable to extract title') | ||||
|         title = title_el.text | ||||
|         format_id_el = metadata.find('format_id') | ||||
|         if format_id_el is None: | ||||
|             format = 'mp4' | ||||
|         else: | ||||
|             format = format_id_el.text | ||||
|         description_el = metadata.find('description') | ||||
|         if description_el is not None: | ||||
|             description = description_el.text | ||||
|         else: | ||||
|             description = None | ||||
|         imagePreview_el = metadata.find('imagePreview') | ||||
|         if imagePreview_el is not None: | ||||
|             thumbnail = imagePreview_el.text | ||||
|         else: | ||||
|             thumbnail = None | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'ext': extension, | ||||
|             'format': format, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description | ||||
|         } | ||||
|         return [info] | ||||
							
								
								
									
										172
									
								
								youtube_dl/extractor/myvideo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								youtube_dl/extractor/myvideo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,172 @@ | ||||
| import binascii | ||||
| import base64 | ||||
| import hashlib | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_ord, | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
|  | ||||
| class MyVideoIE(InfoExtractor): | ||||
|     """Information Extractor for myvideo.de.""" | ||||
|  | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' | ||||
|     IE_NAME = u'myvideo' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', | ||||
|         u'file': u'8229274.flv', | ||||
|         u'md5': u'2d2753e8130479ba2cb7e0a37002053e', | ||||
|         u'info_dict': { | ||||
|             u"title": u"bowling-fail-or-win" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git | ||||
|     # Released into the Public Domain by Tristan Fischer on 2013-05-19 | ||||
|     # https://github.com/rg3/youtube-dl/pull/842 | ||||
|     def __rc4crypt(self,data, key): | ||||
|         x = 0 | ||||
|         box = list(range(256)) | ||||
|         for i in list(range(256)): | ||||
|             x = (x + box[i] + compat_ord(key[i % len(key)])) % 256 | ||||
|             box[i], box[x] = box[x], box[i] | ||||
|         x = 0 | ||||
|         y = 0 | ||||
|         out = '' | ||||
|         for char in data: | ||||
|             x = (x + 1) % 256 | ||||
|             y = (y + box[x]) % 256 | ||||
|             box[x], box[y] = box[y], box[x] | ||||
|             out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256]) | ||||
|         return out | ||||
|  | ||||
|     def __md5(self,s): | ||||
|         return hashlib.md5(s).hexdigest().encode() | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         GK = ( | ||||
|           b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt' | ||||
|           b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3' | ||||
|           b'TnpsbA0KTVRkbU1tSTRNdz09' | ||||
|         ) | ||||
|  | ||||
|         # Get video webpage | ||||
|         webpage_url = 'http://www.myvideo.de/watch/%s' % video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage) | ||||
|         if mobj is not None: | ||||
|             self.report_extraction(video_id) | ||||
|             video_url = mobj.group(1) + '.flv' | ||||
|  | ||||
|             video_title = self._html_search_regex('<title>([^<]+)</title>', | ||||
|                 webpage, u'title') | ||||
|  | ||||
|             video_ext = self._search_regex('[.](.+?)$', video_url, u'extension') | ||||
|  | ||||
|             return [{ | ||||
|                 'id':       video_id, | ||||
|                 'url':      video_url, | ||||
|                 'uploader': None, | ||||
|                 'upload_date':  None, | ||||
|                 'title':    video_title, | ||||
|                 'ext':      video_ext, | ||||
|             }] | ||||
|  | ||||
|         # try encxml | ||||
|         mobj = re.search('var flashvars={(.+?)}', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract video') | ||||
|  | ||||
|         params = {} | ||||
|         encxml = '' | ||||
|         sec = mobj.group(1) | ||||
|         for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec): | ||||
|             if not a == '_encxml': | ||||
|                 params[a] = b | ||||
|             else: | ||||
|                 encxml = compat_urllib_parse.unquote(b) | ||||
|         if not params.get('domain'): | ||||
|             params['domain'] = 'www.myvideo.de' | ||||
|         xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params)) | ||||
|         if 'flash_playertype=MTV' in xmldata_url: | ||||
|             self._downloader.report_warning(u'avoiding MTV player') | ||||
|             xmldata_url = ( | ||||
|                 'http://www.myvideo.de/dynamic/get_player_video_xml.php' | ||||
|                 '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes' | ||||
|             ) % video_id | ||||
|  | ||||
|         # get enc data | ||||
|         enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1] | ||||
|         enc_data_b = binascii.unhexlify(enc_data) | ||||
|         sk = self.__md5( | ||||
|             base64.b64decode(base64.b64decode(GK)) + | ||||
|             self.__md5( | ||||
|                 str(video_id).encode('utf-8') | ||||
|             ) | ||||
|         ) | ||||
|         dec_data = self.__rc4crypt(enc_data_b, sk) | ||||
|  | ||||
|         # extracting infos | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_url = None | ||||
|         mobj = re.search('connectionurl=\'(.*?)\'', dec_data) | ||||
|         if mobj: | ||||
|             video_url = compat_urllib_parse.unquote(mobj.group(1)) | ||||
|             if 'myvideo2flash' in video_url: | ||||
|                 self._downloader.report_warning(u'forcing RTMPT ...') | ||||
|                 video_url = video_url.replace('rtmpe://', 'rtmpt://') | ||||
|  | ||||
|         if not video_url: | ||||
|             # extract non rtmp videos | ||||
|             mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'unable to extract url') | ||||
|             video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2)) | ||||
|  | ||||
|         video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file') | ||||
|         video_file = compat_urllib_parse.unquote(video_file) | ||||
|  | ||||
|         if not video_file.endswith('f4m'): | ||||
|             ppath, prefix = video_file.split('.') | ||||
|             video_playpath = '%s:%s' % (prefix, ppath) | ||||
|             video_hls_playlist = '' | ||||
|         else: | ||||
|             video_playpath = '' | ||||
|             video_hls_playlist = ( | ||||
|                 video_file | ||||
|             ).replace('.f4m', '.m3u8') | ||||
|  | ||||
|         video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj') | ||||
|         video_swfobj = compat_urllib_parse.unquote(video_swfobj) | ||||
|  | ||||
|         video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", | ||||
|             webpage, u'title') | ||||
|  | ||||
|         return [{ | ||||
|             'id':                 video_id, | ||||
|             'url':                video_url, | ||||
|             'tc_url':             video_url, | ||||
|             'uploader':           None, | ||||
|             'upload_date':        None, | ||||
|             'title':              video_title, | ||||
|             'ext':                u'flv', | ||||
|             'play_path':          video_playpath, | ||||
|             'video_file':         video_file, | ||||
|             'video_hls_playlist': video_hls_playlist, | ||||
|             'player_url':         video_swfobj, | ||||
|         }] | ||||
|  | ||||
							
								
								
									
										49
									
								
								youtube_dl/extractor/nba.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								youtube_dl/extractor/nba.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NBAIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', | ||||
|         u'file': u'0021200253-okc-bkn-recap.nba.mp4', | ||||
|         u'md5': u'c0edcfc37607344e2ff8f13c378c88a4', | ||||
|         u'info_dict': { | ||||
|             u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.",  | ||||
|             u"title": u"Thunder vs. Nets" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' | ||||
|  | ||||
|         shortened_video_id = video_id.rpartition('/')[2] | ||||
|         title = self._html_search_regex(r'<meta property="og:title" content="(.*?)"', | ||||
|             webpage, 'title', default=shortened_video_id).replace('NBA.com: ', '') | ||||
|  | ||||
|         # It isn't there in the HTML it returns to us | ||||
|         # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False) | ||||
|  | ||||
|         description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False) | ||||
|  | ||||
|         info = { | ||||
|             'id': shortened_video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': title, | ||||
|             # 'uploader_date': uploader_date, | ||||
|             'description': description, | ||||
|         } | ||||
|         return [info] | ||||
							
								
								
									
										76
									
								
								youtube_dl/extractor/photobucket.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								youtube_dl/extractor/photobucket.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,76 @@ | ||||
| import datetime | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class PhotobucketIE(InfoExtractor): | ||||
|     """Information extractor for photobucket.com.""" | ||||
|  | ||||
|     # TODO: the original _VALID_URL was: | ||||
|     # r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' | ||||
|     # Check if it's necessary to keep the old extracion process | ||||
|     _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' | ||||
|     IE_NAME = u'photobucket' | ||||
|     _TEST = { | ||||
|         u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', | ||||
|         u'file': u'zpsc0c3b9fa.mp4', | ||||
|         u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20130504",  | ||||
|             u"uploader": u"rachaneronas",  | ||||
|             u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract id from URL | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_extension = mobj.group('ext') | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # Extract URL, uploader, and title from webpage | ||||
|         self.report_extraction(video_id) | ||||
|         # We try first by looking the javascript code: | ||||
|         mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage) | ||||
|         if mobj is not None: | ||||
|             info = json.loads(mobj.group('json')) | ||||
|             return [{ | ||||
|                 'id':       video_id, | ||||
|                 'url':      info[u'downloadUrl'], | ||||
|                 'uploader': info[u'username'], | ||||
|                 'upload_date':  datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'), | ||||
|                 'title':    info[u'title'], | ||||
|                 'ext':      video_extension, | ||||
|                 'thumbnail': info[u'thumbUrl'], | ||||
|             }] | ||||
|  | ||||
|         # We try looking in other parts of the webpage | ||||
|         video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />', | ||||
|             webpage, u'video URL') | ||||
|  | ||||
|         mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract title') | ||||
|         video_title = mobj.group(1).decode('utf-8') | ||||
|         video_uploader = mobj.group(2).decode('utf-8') | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id.decode('utf-8'), | ||||
|             'url':      video_url.decode('utf-8'), | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date':  None, | ||||
|             'title':    video_title, | ||||
|             'ext':      video_extension.decode('utf-8'), | ||||
|         }] | ||||
							
								
								
									
										50
									
								
								youtube_dl/extractor/pornotube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								youtube_dl/extractor/pornotube.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PornotubeIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', | ||||
|         u'file': u'1689755.flv', | ||||
|         u'md5': u'374dd6dcedd24234453b295209aa69b6', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20090708",  | ||||
|             u"title": u"Marilyn-Monroe-Bathing" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('videoid') | ||||
|         video_title = mobj.group('title') | ||||
|  | ||||
|         # Get webpage content | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # Get the video URL | ||||
|         VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",' | ||||
|         video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url') | ||||
|         video_url = compat_urllib_parse.unquote(video_url) | ||||
|  | ||||
|         #Get the uploaded date | ||||
|         VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by' | ||||
|         upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False) | ||||
|         if upload_date: upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         info = {'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'uploader': None, | ||||
|                 'upload_date': upload_date, | ||||
|                 'title': video_title, | ||||
|                 'ext': 'flv', | ||||
|                 'format': 'flv'} | ||||
|  | ||||
|         return [info] | ||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/rbmaradio.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/rbmaradio.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RBMARadioIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011', | ||||
|         u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3', | ||||
|         u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95', | ||||
|         u'info_dict': { | ||||
|             u"uploader_id": u"ford-lopatin",  | ||||
|             u"location": u"Spain",  | ||||
|             u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",  | ||||
|             u"uploader": u"Ford & Lopatin",  | ||||
|             u"title": u"Live at Primavera Sound 2011" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('videoID') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$', | ||||
|             webpage, u'json data', flags=re.MULTILINE) | ||||
|  | ||||
|         try: | ||||
|             data = json.loads(json_data) | ||||
|         except ValueError as e: | ||||
|             raise ExtractorError(u'Invalid JSON: ' + str(e)) | ||||
|  | ||||
|         video_url = data['akamai_url'] + '&cbr=256' | ||||
|         url_parts = compat_urllib_parse_urlparse(video_url) | ||||
|         video_ext = url_parts.path.rpartition('.')[2] | ||||
|         info = { | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'ext': video_ext, | ||||
|                 'title': data['title'], | ||||
|                 'description': data.get('teaser_text'), | ||||
|                 'location': data.get('country_of_origin'), | ||||
|                 'uploader': data.get('host', {}).get('name'), | ||||
|                 'uploader_id': data.get('host', {}).get('slug'), | ||||
|                 'thumbnail': data.get('image', {}).get('large_url_2x'), | ||||
|                 'duration': data.get('duration'), | ||||
|         } | ||||
|         return [info] | ||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/redtube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/redtube.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class RedTubeIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.redtube.com/66418', | ||||
|         u'file': u'66418.mp4', | ||||
|         u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Sucked on a toilet" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         video_extension = 'mp4'         | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">', | ||||
|             webpage, u'video URL') | ||||
|  | ||||
|         video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'ext':      video_extension, | ||||
|             'title':    video_title, | ||||
|         }] | ||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/ringtv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/ringtv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class RingTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/videos/video/([^/]+)' | ||||
|     _TEST = { | ||||
|         u"url": u"http://ringtv.craveonline.com/videos/video/746619-canelo-alvarez-talks-about-mayweather-showdown", | ||||
|         u"file": u"746619.mp4", | ||||
|         u"md5": u"7c46b4057d22de32e0a539f017e64ad3", | ||||
|         u"info_dict": { | ||||
|             u"title": u"Canelo Alvarez talks about Mayweather showdown", | ||||
|             u"description": u"Saul \\\"Canelo\\\" Alvarez spoke to the media about his Sept. 14 showdown with Floyd Mayweather after their kick-off presser in NYC. Canelo is motivated and confident that he will have the speed and gameplan to beat the pound-for-pound king." | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1).split('-')[0] | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._search_regex(r'<title>(.+?)</title>', | ||||
|         		webpage, 'video title').replace(' | RingTV','') | ||||
|         description = self._search_regex(r'<div class="blurb">(.+?)</div>', | ||||
|         		webpage, 'Description') | ||||
|         final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" %(str(video_id)) | ||||
|         thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" %(str(video_id)) | ||||
|         ext = final_url.split('.')[-1] | ||||
|         return [{ | ||||
|             'id'          : video_id, | ||||
|             'url'         : final_url, | ||||
|             'ext'         : ext, | ||||
|             'title'       : title, | ||||
|             'thumbnail'   : thumbnail_url, | ||||
|             'description' : description, | ||||
|         }] | ||||
|  | ||||
							
								
								
									
										204
									
								
								youtube_dl/extractor/soundcloud.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										204
									
								
								youtube_dl/extractor/soundcloud.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,204 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SoundcloudIE(InfoExtractor): | ||||
|     """Information extractor for soundcloud.com | ||||
|        To access the media, the uid of the song and a stream token | ||||
|        must be extracted from the page source and the script must make | ||||
|        a request to media.soundcloud.com/crossdomain.xml. Then | ||||
|        the media can be grabbed by requesting from an url composed | ||||
|        of the stream token and uid | ||||
|      """ | ||||
|  | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$' | ||||
|     IE_NAME = u'soundcloud' | ||||
|     _TEST = { | ||||
|         u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', | ||||
|         u'file': u'62986583.mp3', | ||||
|         u'md5': u'ebef0a451b909710ed1d7787dddbf0d7', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20121011",  | ||||
|             u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",  | ||||
|             u"uploader": u"E.T. ExTerrestrial Music",  | ||||
|             u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def report_resolve(self, video_id): | ||||
|         """Report information extraction.""" | ||||
|         self.to_screen(u'%s: Resolving id' % video_id) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         # extract uploader (which is in the url) | ||||
|         uploader = mobj.group(1) | ||||
|         # extract simple title (uploader + slug of song title) | ||||
|         slug_title =  mobj.group(2) | ||||
|         full_title = '%s/%s' % (uploader, slug_title) | ||||
|  | ||||
|         self.report_resolve(full_title) | ||||
|  | ||||
|         url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title) | ||||
|         resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|         info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON') | ||||
|  | ||||
|         info = json.loads(info_json) | ||||
|         video_id = info['id'] | ||||
|         self.report_extraction(full_title) | ||||
|  | ||||
|         streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|         stream_json = self._download_webpage(streams_url, full_title, | ||||
|                                              u'Downloading stream definitions', | ||||
|                                              u'unable to download stream definitions') | ||||
|  | ||||
|         streams = json.loads(stream_json) | ||||
|         mediaURL = streams['http_mp3_128_url'] | ||||
|         upload_date = unified_strdate(info['created_at']) | ||||
|  | ||||
|         return [{ | ||||
|             'id':       info['id'], | ||||
|             'url':      mediaURL, | ||||
|             'uploader': info['user']['username'], | ||||
|             'upload_date': upload_date, | ||||
|             'title':    info['title'], | ||||
|             'ext':      u'mp3', | ||||
|             'description': info['description'], | ||||
|         }] | ||||
|  | ||||
| class SoundcloudSetIE(InfoExtractor): | ||||
|     """Information extractor for soundcloud.com sets | ||||
|        To access the media, the uid of the song and a stream token | ||||
|        must be extracted from the page source and the script must make | ||||
|        a request to media.soundcloud.com/crossdomain.xml. Then | ||||
|        the media can be grabbed by requesting from an url composed | ||||
|        of the stream token and uid | ||||
|      """ | ||||
|  | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$' | ||||
|     IE_NAME = u'soundcloud:set' | ||||
|     _TEST = { | ||||
|         u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep", | ||||
|         u"playlist": [ | ||||
|             { | ||||
|                 u"file":"30510138.mp3", | ||||
|                 u"md5":"f9136bf103901728f29e419d2c70f55d", | ||||
|                 u"info_dict": { | ||||
|                     u"upload_date": u"20111213", | ||||
|                     u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", | ||||
|                     u"uploader": u"The Royal Concept", | ||||
|                     u"title": u"D-D-Dance" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file":"47127625.mp3", | ||||
|                 u"md5":"09b6758a018470570f8fd423c9453dd8", | ||||
|                 u"info_dict": { | ||||
|                     u"upload_date": u"20120521", | ||||
|                     u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", | ||||
|                     u"uploader": u"The Royal Concept", | ||||
|                     u"title": u"The Royal Concept - Gimme Twice" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file":"47127627.mp3", | ||||
|                 u"md5":"154abd4e418cea19c3b901f1e1306d9c", | ||||
|                 u"info_dict": { | ||||
|                     u"upload_date": u"20120521", | ||||
|                     u"uploader": u"The Royal Concept", | ||||
|                     u"title": u"Goldrushed" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file":"47127629.mp3", | ||||
|                 u"md5":"2f5471edc79ad3f33a683153e96a79c1", | ||||
|                 u"info_dict": { | ||||
|                     u"upload_date": u"20120521", | ||||
|                     u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", | ||||
|                     u"uploader": u"The Royal Concept", | ||||
|                     u"title": u"In the End" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file":"47127631.mp3", | ||||
|                 u"md5":"f9ba87aa940af7213f98949254f1c6e2", | ||||
|                 u"info_dict": { | ||||
|                     u"upload_date": u"20120521", | ||||
|                     u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com", | ||||
|                     u"uploader": u"The Royal Concept", | ||||
|                     u"title": u"Knocked Up" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file":"75206121.mp3", | ||||
|                 u"md5":"f9d1fe9406717e302980c30de4af9353", | ||||
|                 u"info_dict": { | ||||
|                     u"upload_date": u"20130116", | ||||
|                     u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central).  \r\nAs a gift to our fans we would like to offer you a free download of the track!  ", | ||||
|                     u"uploader": u"The Royal Concept", | ||||
|                     u"title": u"World On Fire" | ||||
|                 } | ||||
|             } | ||||
|         ] | ||||
|     } | ||||
|  | ||||
|     def report_resolve(self, video_id): | ||||
|         """Report information extraction.""" | ||||
|         self.to_screen(u'%s: Resolving id' % video_id) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         # extract uploader (which is in the url) | ||||
|         uploader = mobj.group(1) | ||||
|         # extract simple title (uploader + slug of song title) | ||||
|         slug_title =  mobj.group(2) | ||||
|         full_title = '%s/sets/%s' % (uploader, slug_title) | ||||
|  | ||||
|         self.report_resolve(full_title) | ||||
|  | ||||
|         url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) | ||||
|         resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|         info_json = self._download_webpage(resolv_url, full_title) | ||||
|  | ||||
|         videos = [] | ||||
|         info = json.loads(info_json) | ||||
|         if 'errors' in info: | ||||
|             for err in info['errors']: | ||||
|                 self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err['error_message'])) | ||||
|             return | ||||
|  | ||||
|         self.report_extraction(full_title) | ||||
|         for track in info['tracks']: | ||||
|             video_id = track['id'] | ||||
|  | ||||
|             streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|             stream_json = self._download_webpage(streams_url, video_id, u'Downloading track info JSON') | ||||
|  | ||||
|             self.report_extraction(video_id) | ||||
|             streams = json.loads(stream_json) | ||||
|             mediaURL = streams['http_mp3_128_url'] | ||||
|  | ||||
|             videos.append({ | ||||
|                 'id':       video_id, | ||||
|                 'url':      mediaURL, | ||||
|                 'uploader': track['user']['username'], | ||||
|                 'upload_date':  unified_strdate(track['created_at']), | ||||
|                 'title':    track['title'], | ||||
|                 'ext':      u'mp3', | ||||
|                 'description': track['description'], | ||||
|             }) | ||||
|         return videos | ||||
							
								
								
									
										45
									
								
								youtube_dl/extractor/spiegel.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								youtube_dl/extractor/spiegel.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SpiegelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', | ||||
|         u'file': u'1259285.mp4', | ||||
|         u'md5': u'2c2754212136f35fb4b19767d242f66e', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('videoID') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml' | ||||
|         xml_code = self._download_webpage(xml_url, video_id, | ||||
|                     note=u'Downloading XML', errnote=u'Failed to download XML') | ||||
|  | ||||
|         idoc = xml.etree.ElementTree.fromstring(xml_code) | ||||
|         last_type = idoc[-1] | ||||
|         filename = last_type.findall('./filename')[0].text | ||||
|         duration = float(last_type.findall('./duration')[0].text) | ||||
|  | ||||
|         video_url = 'http://video2.spiegel.de/flash/' + filename | ||||
|         video_ext = filename.rpartition('.')[2] | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': video_ext, | ||||
|             'title': video_title, | ||||
|             'duration': duration, | ||||
|         } | ||||
|         return [info] | ||||
							
								
								
									
										119
									
								
								youtube_dl/extractor/stanfordoc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								youtube_dl/extractor/stanfordoc.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,119 @@ | ||||
| import re | ||||
| import socket | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
|     orderedSet, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class StanfordOpenClassroomIE(InfoExtractor): | ||||
|     IE_NAME = u'stanfordoc' | ||||
|     IE_DESC = u'Stanford Open ClassRoom' | ||||
|     _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100', | ||||
|         u'file': u'PracticalUnix_intro-environment.mp4', | ||||
|         u'md5': u'544a9468546059d4e80d76265b0443b8', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Intro Environment" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         if mobj.group('course') and mobj.group('video'): # A specific video | ||||
|             course = mobj.group('course') | ||||
|             video = mobj.group('video') | ||||
|             info = { | ||||
|                 'id': course + '_' + video, | ||||
|                 'uploader': None, | ||||
|                 'upload_date': None, | ||||
|             } | ||||
|  | ||||
|             self.report_extraction(info['id']) | ||||
|             baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/' | ||||
|             xmlUrl = baseUrl + video + '.xml' | ||||
|             try: | ||||
|                 metaXml = compat_urllib_request.urlopen(xmlUrl).read() | ||||
|             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                 raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err)) | ||||
|             mdoc = xml.etree.ElementTree.fromstring(metaXml) | ||||
|             try: | ||||
|                 info['title'] = mdoc.findall('./title')[0].text | ||||
|                 info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text | ||||
|             except IndexError: | ||||
|                 raise ExtractorError(u'Invalid metadata XML file') | ||||
|             info['ext'] = info['url'].rpartition('.')[2] | ||||
|             return [info] | ||||
|         elif mobj.group('course'): # A course page | ||||
|             course = mobj.group('course') | ||||
|             info = { | ||||
|                 'id': course, | ||||
|                 'type': 'playlist', | ||||
|                 'uploader': None, | ||||
|                 'upload_date': None, | ||||
|             } | ||||
|  | ||||
|             coursepage = self._download_webpage(url, info['id'], | ||||
|                                         note='Downloading course info page', | ||||
|                                         errnote='Unable to download course info page') | ||||
|  | ||||
|             info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id']) | ||||
|  | ||||
|             info['description'] = self._html_search_regex('<description>([^<]+)</description>', | ||||
|                 coursepage, u'description', fatal=False) | ||||
|  | ||||
|             links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage)) | ||||
|             info['list'] = [ | ||||
|                 { | ||||
|                     'type': 'reference', | ||||
|                     'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage), | ||||
|                 } | ||||
|                     for vpage in links] | ||||
|             results = [] | ||||
|             for entry in info['list']: | ||||
|                 assert entry['type'] == 'reference' | ||||
|                 results += self.extract(entry['url']) | ||||
|             return results | ||||
|         else: # Root page | ||||
|             info = { | ||||
|                 'id': 'Stanford OpenClassroom', | ||||
|                 'type': 'playlist', | ||||
|                 'uploader': None, | ||||
|                 'upload_date': None, | ||||
|             } | ||||
|  | ||||
|             self.report_download_webpage(info['id']) | ||||
|             rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php' | ||||
|             try: | ||||
|                 rootpage = compat_urllib_request.urlopen(rootURL).read() | ||||
|             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                 raise ExtractorError(u'Unable to download course info page: ' + compat_str(err)) | ||||
|  | ||||
|             info['title'] = info['id'] | ||||
|  | ||||
|             links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage)) | ||||
|             info['list'] = [ | ||||
|                 { | ||||
|                     'type': 'reference', | ||||
|                     'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage), | ||||
|                 } | ||||
|                     for cpage in links] | ||||
|  | ||||
|             results = [] | ||||
|             for entry in info['list']: | ||||
|                 assert entry['type'] == 'reference' | ||||
|                 results += self.extract(entry['url']) | ||||
|             return results | ||||
							
								
								
									
										42
									
								
								youtube_dl/extractor/statigram.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								youtube_dl/extractor/statigram.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| class StatigramIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://statigr.am/p/484091715184808010_284179915', | ||||
|         u'file': u'484091715184808010_284179915.mp4', | ||||
|         u'md5': u'deda4ff333abe2e118740321e992605b', | ||||
|         u'info_dict': { | ||||
|             u"uploader_id": u"videoseconds",  | ||||
|             u"title": u"Instagram photo by @videoseconds" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<meta property="og:video:secure_url" content="(.+?)">', | ||||
|             webpage, u'video URL') | ||||
|         thumbnail_url = self._html_search_regex( | ||||
|             r'<meta property="og:image" content="(.+?)" />', | ||||
|             webpage, u'thumbnail URL', fatal=False) | ||||
|         html_title = self._html_search_regex( | ||||
|             r'<title>(.+?)</title>', | ||||
|             webpage, u'title') | ||||
|         title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title) | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r'@([^ ]+)', title, u'uploader name', fatal=False) | ||||
|         ext = 'mp4' | ||||
|  | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       video_url, | ||||
|             'ext':       ext, | ||||
|             'title':     title, | ||||
|             'thumbnail': thumbnail_url, | ||||
|             'uploader_id' : uploader_id | ||||
|         }] | ||||
							
								
								
									
										83
									
								
								youtube_dl/extractor/steam.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								youtube_dl/extractor/steam.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SteamIE(InfoExtractor): | ||||
|     _VALID_URL = r"""http://store\.steampowered\.com/ | ||||
|                 (agecheck/)? | ||||
|                 (?P<urltype>video|app)/ #If the page is only for videos or for a game | ||||
|                 (?P<gameID>\d+)/? | ||||
|                 (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID | ||||
|                 """ | ||||
|     _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' | ||||
|     _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' | ||||
|     _TEST = { | ||||
|         u"url": u"http://store.steampowered.com/video/105600/", | ||||
|         u"playlist": [ | ||||
|             { | ||||
|                 u"file": u"81300.flv", | ||||
|                 u"md5": u"f870007cee7065d7c76b88f0a45ecc07", | ||||
|                 u"info_dict": { | ||||
|                         u"title": u"Terraria 1.1 Trailer" | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"80859.flv", | ||||
|                 u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751", | ||||
|                 u"info_dict": { | ||||
|                     u"title": u"Terraria Trailer" | ||||
|                 } | ||||
|             } | ||||
|         ] | ||||
|     } | ||||
|  | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|         gameID = m.group('gameID') | ||||
|  | ||||
|         videourl = self._VIDEO_PAGE_TEMPLATE % gameID | ||||
|         webpage = self._download_webpage(videourl, gameID) | ||||
|  | ||||
|         if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: | ||||
|             videourl = self._AGECHECK_TEMPLATE % gameID | ||||
|             self.report_age_confirmation() | ||||
|             webpage = self._download_webpage(videourl, gameID) | ||||
|  | ||||
|         self.report_extraction(gameID) | ||||
|         game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>', | ||||
|                                              webpage, 'game title') | ||||
|  | ||||
|         urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}," | ||||
|         mweb = re.finditer(urlRE, webpage) | ||||
|         namesRE = r'<span class="title">(?P<videoName>.+?)</span>' | ||||
|         titles = re.finditer(namesRE, webpage) | ||||
|         thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">' | ||||
|         thumbs = re.finditer(thumbsRE, webpage) | ||||
|         videos = [] | ||||
|         for vid,vtitle,thumb in zip(mweb,titles,thumbs): | ||||
|             video_id = vid.group('videoID') | ||||
|             title = vtitle.group('videoName') | ||||
|             video_url = vid.group('videoURL') | ||||
|             video_thumb = thumb.group('thumbnail') | ||||
|             if not video_url: | ||||
|                 raise ExtractorError(u'Cannot find video url for %s' % video_id) | ||||
|             info = { | ||||
|                 'id':video_id, | ||||
|                 'url':video_url, | ||||
|                 'ext': 'flv', | ||||
|                 'title': unescapeHTML(title), | ||||
|                 'thumbnail': video_thumb | ||||
|                   } | ||||
|             videos.append(info) | ||||
|         return [self.playlist_result(videos, gameID, game_title)] | ||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/teamcoco.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/teamcoco.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TeamcocoIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush', | ||||
|         u'file': u'19705.mp4', | ||||
|         u'md5': u'27b6f7527da5acf534b15f21b032656e', | ||||
|         u'info_dict': { | ||||
|             u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.",  | ||||
|             u"title": u"Louis C.K. Interview Pt. 1 11/3/11" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         url_title = mobj.group('url_title') | ||||
|         webpage = self._download_webpage(url, url_title) | ||||
|  | ||||
|         video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"', | ||||
|             webpage, u'video id') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"', | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|  | ||||
|         video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"', | ||||
|             webpage, u'description', fatal=False) | ||||
|  | ||||
|         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id | ||||
|         data = self._download_webpage(data_url, video_id, 'Downloading data webpage') | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>', | ||||
|             data, u'video URL') | ||||
|  | ||||
|         return [{ | ||||
|             'id':          video_id, | ||||
|             'url':         video_url, | ||||
|             'ext':         'mp4', | ||||
|             'title':       video_title, | ||||
|             'thumbnail':   thumbnail, | ||||
|             'description': video_description, | ||||
|         }] | ||||
							
								
								
									
										88
									
								
								youtube_dl/extractor/ted.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								youtube_dl/extractor/ted.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class TEDIE(InfoExtractor): | ||||
|     _VALID_URL=r'''http://www\.ted\.com/ | ||||
|                    ( | ||||
|                         ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist | ||||
|                         | | ||||
|                         ((?P<type_talk>talks)) # We have a simple talk | ||||
|                    ) | ||||
|                    (/lang/(.*?))? # The url may contain the language | ||||
|                    /(?P<name>\w+) # Here goes the name and then ".html" | ||||
|                    ''' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', | ||||
|         u'file': u'102.mp4', | ||||
|         u'md5': u'2d76ee1576672e0bd8f187513267adf6', | ||||
|         u'info_dict': { | ||||
|             u"description": u"md5:c6fa72e6eedbd938c9caf6b2702f5922",  | ||||
|             u"title": u"Dan Dennett: The illusion of consciousness" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m=re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|         if m.group('type_talk'): | ||||
|             return [self._talk_info(url)] | ||||
|         else : | ||||
|             playlist_id=m.group('playlist_id') | ||||
|             name=m.group('name') | ||||
|             self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name)) | ||||
|             return [self._playlist_videos_info(url,name,playlist_id)] | ||||
|  | ||||
|     def _playlist_videos_info(self,url,name,playlist_id=0): | ||||
|         '''Returns the videos of the playlist''' | ||||
|         video_RE=r''' | ||||
|                      <li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)" | ||||
|                      ([.\s]*?)data-playlist_item_id="(\d+)" | ||||
|                      ([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)" | ||||
|                      ''' | ||||
|         video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>' | ||||
|         webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage') | ||||
|         m_videos=re.finditer(video_RE,webpage,re.VERBOSE) | ||||
|         m_names=re.finditer(video_name_RE,webpage) | ||||
|  | ||||
|         playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>', | ||||
|                                                  webpage, 'playlist title') | ||||
|  | ||||
|         playlist_entries = [] | ||||
|         for m_video, m_name in zip(m_videos,m_names): | ||||
|             talk_url='http://www.ted.com%s' % m_name.group('talk_url') | ||||
|             playlist_entries.append(self.url_result(talk_url, 'TED')) | ||||
|         return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title) | ||||
|  | ||||
|     def _talk_info(self, url, video_id=0): | ||||
|         """Return the video for the talk in the url""" | ||||
|         m = re.match(self._VALID_URL, url,re.VERBOSE) | ||||
|         video_name = m.group('name') | ||||
|         webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name) | ||||
|         self.report_extraction(video_name) | ||||
|         # If the url includes the language we get the title translated | ||||
|         title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>', | ||||
|                                         webpage, 'title') | ||||
|         json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>', | ||||
|                                     webpage, 'json data') | ||||
|         info = json.loads(json_data) | ||||
|         desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>', | ||||
|                                        webpage, 'description', flags = re.DOTALL) | ||||
|          | ||||
|         thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"', | ||||
|                                        webpage, 'thumbnail') | ||||
|         info = { | ||||
|                 'id': info['id'], | ||||
|                 'url': info['htmlStreams'][-1]['file'], | ||||
|                 'ext': 'mp4', | ||||
|                 'title': title, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'description': desc, | ||||
|                 } | ||||
|         return info | ||||
							
								
								
									
										35
									
								
								youtube_dl/extractor/tf1.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								youtube_dl/extractor/tf1.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| # coding: utf-8 | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| class TF1IE(InfoExtractor): | ||||
|     """ | ||||
|     TF1 uses the wat.tv player, currently it can only download videos with the | ||||
|     html5 player enabled, it cannot download HD videos. | ||||
|     """ | ||||
|     _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html' | ||||
|     _TEST = { | ||||
|         u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', | ||||
|         u'file': u'10635995.mp4', | ||||
|         u'md5': u'66789d3e91278d332f75e1feb7aea327', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle', | ||||
|             u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         id = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, id) | ||||
|         embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"', | ||||
|                                 webpage, 'embed url') | ||||
|         embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page') | ||||
|         wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id') | ||||
|         wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info') | ||||
|         wat_info = json.loads(wat_info)['media'] | ||||
|         wat_url = wat_info['url'] | ||||
|         return self.url_result(wat_url, 'Wat') | ||||
							
								
								
									
										49
									
								
								youtube_dl/extractor/traileraddict.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								youtube_dl/extractor/traileraddict.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class TrailerAddictIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/trailer/([^/]+)/(?:trailer|feature-trailer)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer', | ||||
|         u'file': u'76184.mp4', | ||||
|         u'md5': u'41365557f3c8c397d091da510e73ceb4', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Prince Avalanche Trailer", | ||||
|             u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind." | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|          | ||||
|         title = self._search_regex(r'<title>(.+?)</title>', | ||||
|                 webpage, 'video title').replace(' - Trailer Addict','') | ||||
|         view_count = self._search_regex(r'Views: (.+?)<br />', | ||||
|                 webpage, 'Views Count') | ||||
|         description = self._search_regex(r'<meta property="og:description" content="(.+?)" />', | ||||
|                 webpage, 'video description') | ||||
|         video_id = self._search_regex(r'<meta property="og:video" content="(.+?)" />', | ||||
|                 webpage, 'Video id').split('=')[1] | ||||
|          | ||||
|         info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id)) | ||||
|         info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage") | ||||
|          | ||||
|         final_url = self._search_regex(r'&fileurl=(.+)', | ||||
|                 info_webpage, 'Download url').replace('%3F','?') | ||||
|         thumbnail_url = self._search_regex(r'&image=(.+?)&', | ||||
|                 info_webpage, 'thumbnail url') | ||||
|         ext = final_url.split('.')[-1].split('?')[0] | ||||
|          | ||||
|         return [{ | ||||
|             'id'          : video_id, | ||||
|             'url'         : final_url, | ||||
|             'ext'         : ext, | ||||
|             'title'       : title, | ||||
|             'thumbnail'   : thumbnail_url, | ||||
|             'description' : description, | ||||
|             'view_count'  : view_count, | ||||
|         }] | ||||
							
								
								
									
										40
									
								
								youtube_dl/extractor/tudou.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								youtube_dl/extractor/tudou.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class TudouIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', | ||||
|         u'file': u'159447792.f4v', | ||||
|         u'md5': u'ad7c358a01541e926a1e413612c6b10a', | ||||
|         u'info_dict': { | ||||
|             u"title": u"\u5361\u9a6c\u4e54\u56fd\u8db3\u5f00\u5927\u811a\u957f\u4f20\u51b2\u540a\u96c6\u9526" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(2).replace('.html','') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_id = re.search('"k":(.+?),',webpage).group(1) | ||||
|         title = re.search(",kw:\"(.+)\"",webpage) | ||||
|         if title is None: | ||||
|             title = re.search(",kw: \'(.+)\'",webpage) | ||||
|         title = title.group(1) | ||||
|         thumbnail_url = re.search(",pic: \'(.+?)\'",webpage) | ||||
|         if thumbnail_url is None: | ||||
|             thumbnail_url = re.search(",pic:\"(.+?)\"",webpage) | ||||
|         thumbnail_url = thumbnail_url.group(1) | ||||
|         info_url = "http://v2.tudou.com/f?id="+str(video_id) | ||||
|         webpage = self._download_webpage(info_url, video_id, "Opening the info webpage") | ||||
|         final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1) | ||||
|         ext = (final_url.split('?')[0]).split('.')[-1] | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       final_url, | ||||
|             'ext':       ext, | ||||
|             'title':     title, | ||||
|             'thumbnail': thumbnail_url, | ||||
|         }] | ||||
							
								
								
									
										49
									
								
								youtube_dl/extractor/tumblr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								youtube_dl/extractor/tumblr.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TumblrIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', | ||||
|         u'file': u'54196191430.mp4', | ||||
|         u'md5': u'479bb068e5b16462f5176a6828829767', | ||||
|         u'info_dict': { | ||||
|             u"title": u"tatiana maslany news" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m_url = re.match(self._VALID_URL, url) | ||||
|         video_id = m_url.group('id') | ||||
|         blog = m_url.group('blog_name') | ||||
|  | ||||
|         url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id) | ||||
|         video = re.search(re_video, webpage) | ||||
|         if video is None: | ||||
|            raise ExtractorError(u'Unable to extract video') | ||||
|         video_url = video.group('video_url') | ||||
|         ext = video.group('ext') | ||||
|  | ||||
|         video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22', | ||||
|             webpage, u'thumbnail', fatal=False)  # We pick the first poster | ||||
|         if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '') | ||||
|  | ||||
|         # The only place where you can get a title, it's not complete, | ||||
|         # but searching in other places doesn't work for all videos | ||||
|         video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>', | ||||
|             webpage, u'title', flags=re.DOTALL) | ||||
|  | ||||
|         return [{'id': video_id, | ||||
|                  'url': video_url, | ||||
|                  'title': video_title, | ||||
|                  'thumbnail': video_thumbnail, | ||||
|                  'ext': ext | ||||
|                  }] | ||||
							
								
								
									
										41
									
								
								youtube_dl/extractor/tutv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								youtube_dl/extractor/tutv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | ||||
| import base64 | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_parse_qs, | ||||
| ) | ||||
|  | ||||
| class TutvIE(InfoExtractor): | ||||
|     _VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc', | ||||
|         u'file': u'2742556.flv', | ||||
|         u'md5': u'5eb766671f69b82e528dc1e7769c5cb2', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Noah en pabellon cuahutemoc" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'<meta property="og:title" content="(.*?)">', webpage, u'title') | ||||
|         internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID') | ||||
|  | ||||
|         data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id) | ||||
|         data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info') | ||||
|         data = compat_parse_qs(data_content) | ||||
|         video_url = base64.b64decode(data['kpt'][0]).decode('utf-8') | ||||
|         ext = video_url.partition(u'?')[0].rpartition(u'.')[2] | ||||
|  | ||||
|         info = { | ||||
|             'id': internal_id, | ||||
|             'url': video_url, | ||||
|             'ext': ext, | ||||
|             'title': title, | ||||
|         } | ||||
|         return [info] | ||||
							
								
								
									
										45
									
								
								youtube_dl/extractor/ustream.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								youtube_dl/extractor/ustream.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class UstreamIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' | ||||
|     IE_NAME = u'ustream' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.ustream.tv/recorded/20274954', | ||||
|         u'file': u'20274954.flv', | ||||
|         u'md5': u'088f151799e8f572f84eb62f17d73e5c', | ||||
|         u'info_dict': { | ||||
|             u"uploader": u"Young Americans for Liberty",  | ||||
|             u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('videoID') | ||||
|  | ||||
|         video_url = u'http://tcdn.ustream.tv/video/%s' % video_id | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'data-title="(?P<title>.+)"', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>', | ||||
|             webpage, u'uploader', fatal=False, flags=re.DOTALL) | ||||
|  | ||||
|         thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"', | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|  | ||||
|         info = { | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'ext': 'flv', | ||||
|                 'title': video_title, | ||||
|                 'uploader': uploader, | ||||
|                 'thumbnail': thumbnail, | ||||
|                } | ||||
|         return info | ||||
							
								
								
									
										54
									
								
								youtube_dl/extractor/vbox7.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								youtube_dl/extractor/vbox7.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class Vbox7IE(InfoExtractor): | ||||
|     """Information Extractor for Vbox7""" | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://vbox7.com/play:249bb972c2', | ||||
|         u'file': u'249bb972c2.flv', | ||||
|         u'md5': u'9c70d6d956f888bdc08c124acc120cfe', | ||||
|         u'info_dict': { | ||||
|             u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         redirect_page, urlh = self._download_webpage_handle(url, video_id) | ||||
|         new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location') | ||||
|         redirect_url = urlh.geturl() + new_location | ||||
|         webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page') | ||||
|  | ||||
|         title = self._html_search_regex(r'<title>(.*)</title>', | ||||
|             webpage, u'title').split('/')[0].strip() | ||||
|  | ||||
|         ext = "flv" | ||||
|         info_url = "http://vbox7.com/play/magare.do" | ||||
|         data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id}) | ||||
|         info_request = compat_urllib_request.Request(info_url, data) | ||||
|         info_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage') | ||||
|         if info_response is None: | ||||
|             raise ExtractorError(u'Unable to extract the media url') | ||||
|         (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&')) | ||||
|  | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       final_url, | ||||
|             'ext':       ext, | ||||
|             'title':     title, | ||||
|             'thumbnail': thumbnail_url, | ||||
|         }] | ||||
							
								
								
									
										53
									
								
								youtube_dl/extractor/vevo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								youtube_dl/extractor/vevo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class VevoIE(InfoExtractor): | ||||
|     """ | ||||
|     Accecps urls from vevo.com or in the format 'vevo:{id}' | ||||
|     (currently used by MTVIE) | ||||
|     """ | ||||
|     _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | ||||
|         u'file': u'GB1101300280.mp4', | ||||
|         u'md5': u'06bea460acb744eab74a9d7dcb4bfd61', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20130624",  | ||||
|             u"uploader": u"Hurts",  | ||||
|             u"title": u"Somebody To Die For" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         json_url = 'http://www.vevo.com/data/video/%s' % video_id | ||||
|         base_url = 'http://smil.lvl3.vevo.com' | ||||
|         videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower()) | ||||
|         info_json = self._download_webpage(json_url, video_id, u'Downloading json info') | ||||
|         links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         video_info = json.loads(info_json) | ||||
|         m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):(?P<url>.*?)"', links_webpage)) | ||||
|         if m_urls is None or len(m_urls) == 0: | ||||
|             raise ExtractorError(u'Unable to extract video url') | ||||
|         # They are sorted from worst to best quality | ||||
|         m_url = m_urls[-1] | ||||
|         video_url = base_url + m_url.group('url') | ||||
|         ext = m_url.group('ext') | ||||
|  | ||||
|         return {'url': video_url, | ||||
|                 'ext': ext, | ||||
|                 'id': video_id, | ||||
|                 'title': video_info['title'], | ||||
|                 'thumbnail': video_info['img'], | ||||
|                 'upload_date': video_info['launchDate'].replace('/',''), | ||||
|                 'uploader': video_info['Artists'][0]['title'], | ||||
|                 } | ||||
							
								
								
									
										150
									
								
								youtube_dl/extractor/vimeo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										150
									
								
								youtube_dl/extractor/vimeo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,150 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     clean_html, | ||||
|     get_element_by_attribute, | ||||
|     ExtractorError, | ||||
|     std_headers, | ||||
| ) | ||||
|  | ||||
| class VimeoIE(InfoExtractor): | ||||
|     """Information extractor for vimeo.com.""" | ||||
|  | ||||
|     # _VALID_URL matches Vimeo URLs | ||||
|     _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$' | ||||
|     IE_NAME = u'vimeo' | ||||
|     _TEST = { | ||||
|         u'url': u'http://vimeo.com/56015672', | ||||
|         u'file': u'56015672.mp4', | ||||
|         u'md5': u'8879b6cc097e987f02484baf890129e5', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20121220",  | ||||
|             u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",  | ||||
|             u"uploader_id": u"user7108434",  | ||||
|             u"uploader": u"Filippo Valsorda",  | ||||
|             u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _verify_video_password(self, url, video_id, webpage): | ||||
|         password = self._downloader.params.get('videopassword', None) | ||||
|         if password is None: | ||||
|             raise ExtractorError(u'This video is protected by a password, use the --video-password option') | ||||
|         token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1) | ||||
|         data = compat_urllib_parse.urlencode({'password': password, | ||||
|                                               'token': token}) | ||||
|         # I didn't manage to use the password with https | ||||
|         if url.startswith('https'): | ||||
|             pass_url = url.replace('https','http') | ||||
|         else: | ||||
|             pass_url = url | ||||
|         password_request = compat_urllib_request.Request(pass_url+'/password', data) | ||||
|         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         password_request.add_header('Cookie', 'xsrft=%s' % token) | ||||
|         self._download_webpage(password_request, video_id, | ||||
|                                u'Verifying the password', | ||||
|                                u'Wrong password') | ||||
|  | ||||
|     def _real_extract(self, url, new_video=True): | ||||
|         # Extract ID from URL | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         if not mobj.group('proto'): | ||||
|             url = 'https://' + url | ||||
|         if mobj.group('direct_link') or mobj.group('pro'): | ||||
|             url = 'https://vimeo.com/' + video_id | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         request = compat_urllib_request.Request(url, None, std_headers) | ||||
|         webpage = self._download_webpage(request, video_id) | ||||
|  | ||||
|         # Now we begin extracting as much information as we can from what we | ||||
|         # retrieved. First we extract the information common to all extractors, | ||||
|         # and latter we extract those that are Vimeo specific. | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         # Extract the config JSON | ||||
|         try: | ||||
|             config = webpage.split(' = {config:')[1].split(',assets:')[0] | ||||
|             config = json.loads(config) | ||||
|         except: | ||||
|             if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): | ||||
|                 raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option') | ||||
|  | ||||
|             if re.search('If so please provide the correct password.', webpage): | ||||
|                 self._verify_video_password(url, video_id, webpage) | ||||
|                 return self._real_extract(url) | ||||
|             else: | ||||
|                 raise ExtractorError(u'Unable to extract info section') | ||||
|  | ||||
|         # Extract title | ||||
|         video_title = config["video"]["title"] | ||||
|  | ||||
|         # Extract uploader and uploader_id | ||||
|         video_uploader = config["video"]["owner"]["name"] | ||||
|         video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None | ||||
|  | ||||
|         # Extract video thumbnail | ||||
|         video_thumbnail = config["video"]["thumbnail"] | ||||
|  | ||||
|         # Extract video description | ||||
|         video_description = get_element_by_attribute("itemprop", "description", webpage) | ||||
|         if video_description: video_description = clean_html(video_description) | ||||
|         else: video_description = u'' | ||||
|  | ||||
|         # Extract upload date | ||||
|         video_upload_date = None | ||||
|         mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage) | ||||
|         if mobj is not None: | ||||
|             video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) | ||||
|  | ||||
|         # Vimeo specific: extract request signature and timestamp | ||||
|         sig = config['request']['signature'] | ||||
|         timestamp = config['request']['timestamp'] | ||||
|  | ||||
|         # Vimeo specific: extract video codec and quality information | ||||
|         # First consider quality, then codecs, then take everything | ||||
|         # TODO bind to format param | ||||
|         codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')] | ||||
|         files = { 'hd': [], 'sd': [], 'other': []} | ||||
|         for codec_name, codec_extension in codecs: | ||||
|             if codec_name in config["video"]["files"]: | ||||
|                 if 'hd' in config["video"]["files"][codec_name]: | ||||
|                     files['hd'].append((codec_name, codec_extension, 'hd')) | ||||
|                 elif 'sd' in config["video"]["files"][codec_name]: | ||||
|                     files['sd'].append((codec_name, codec_extension, 'sd')) | ||||
|                 else: | ||||
|                     files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0])) | ||||
|  | ||||
|         for quality in ('hd', 'sd', 'other'): | ||||
|             if len(files[quality]) > 0: | ||||
|                 video_quality = files[quality][0][2] | ||||
|                 video_codec = files[quality][0][0] | ||||
|                 video_extension = files[quality][0][1] | ||||
|                 self.to_screen(u'%s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality)) | ||||
|                 break | ||||
|         else: | ||||
|             raise ExtractorError(u'No known codec found') | ||||
|  | ||||
|         video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \ | ||||
|                     %(video_id, sig, timestamp, video_quality, video_codec.upper()) | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'uploader': video_uploader, | ||||
|             'uploader_id': video_uploader_id, | ||||
|             'upload_date':  video_upload_date, | ||||
|             'title':    video_title, | ||||
|             'ext':      video_extension, | ||||
|             'thumbnail':    video_thumbnail, | ||||
|             'description':  video_description, | ||||
|         }] | ||||
							
								
								
									
										46
									
								
								youtube_dl/extractor/vine.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								youtube_dl/extractor/vine.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,46 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class VineIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)' | ||||
|     _TEST = { | ||||
|         u'url': u'https://vine.co/v/b9KOOWX7HUx', | ||||
|         u'file': u'b9KOOWX7HUx.mp4', | ||||
|         u'md5': u'2f36fed6235b16da96ce9b4dc890940d', | ||||
|         u'info_dict': { | ||||
|             u"uploader": u"Jack Dorsey",  | ||||
|             u"title": u"Chicken." | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage_url = 'https://vine.co/v/' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"', | ||||
|             webpage, u'video URL') | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"', | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|  | ||||
|         uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>', | ||||
|             webpage, u'uploader', fatal=False, flags=re.DOTALL) | ||||
|  | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       video_url, | ||||
|             'ext':       'mp4', | ||||
|             'title':     video_title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader':  uploader, | ||||
|         }] | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/wat.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/wat.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # coding: utf-8 | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class WatIE(InfoExtractor): | ||||
|     _VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html' | ||||
|     IE_NAME = 'wat.tv' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html', | ||||
|         u'file': u'10631273.mp4', | ||||
|         u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a', | ||||
|         u'info_dict': { | ||||
|             u'title': u'World War Z - Philadelphia VOST', | ||||
|             u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr', | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     def download_video_info(self, real_id): | ||||
|         # 'contentv4' is used in the website, but it also returns the related | ||||
|         # videos, we don't need them | ||||
|         info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info') | ||||
|         info = json.loads(info) | ||||
|         return info['media'] | ||||
|  | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         def real_id_for_chapter(chapter): | ||||
|             return chapter['tc_start'].split('-')[0] | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         short_id = mobj.group('shortID') | ||||
|         webpage = self._download_webpage(url, short_id) | ||||
|         real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id') | ||||
|  | ||||
|         video_info = self.download_video_info(real_id) | ||||
|         chapters = video_info['chapters'] | ||||
|         first_chapter = chapters[0] | ||||
|  | ||||
|         if real_id_for_chapter(first_chapter) != real_id: | ||||
|             self.to_screen('Multipart video detected') | ||||
|             chapter_urls = [] | ||||
|             for chapter in chapters: | ||||
|                 chapter_id = real_id_for_chapter(chapter) | ||||
|                 # Yes, when we this chapter is processed by WatIE, | ||||
|                 # it will download the info again | ||||
|                 chapter_info = self.download_video_info(chapter_id) | ||||
|                 chapter_urls.append(chapter_info['url']) | ||||
|             entries = [self.url_result(chapter_url) for chapter_url in chapter_urls] | ||||
|             return self.playlist_result(entries, real_id, video_info['title']) | ||||
|  | ||||
|         # Otherwise we can continue and extract just one part, we have to use | ||||
|         # the short id for getting the video url | ||||
|         player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id, | ||||
|                                                      'html5': '1'}) | ||||
|         player_info = self._download_webpage('http://www.wat.tv/player?' + player_data, | ||||
|                                              real_id, u'Downloading player info') | ||||
|         player = json.loads(player_info)['player'] | ||||
|         html5_player = self._html_search_regex(r'iframe src="(.*?)"', player, | ||||
|                                                'html5 player') | ||||
|         player_webpage = self._download_webpage(html5_player, real_id, | ||||
|                                                 u'Downloading player webpage') | ||||
|  | ||||
|         video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage, | ||||
|                                        'video url') | ||||
|         info = {'id': real_id, | ||||
|                 'url': video_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'title': first_chapter['title'], | ||||
|                 'thumbnail': first_chapter['preview'], | ||||
|                 'description': first_chapter['description'], | ||||
|                 'view_count': video_info['views'], | ||||
|                 } | ||||
|         if 'date_diffusion' in first_chapter: | ||||
|             info['upload_date'] = unified_strdate(first_chapter['date_diffusion']) | ||||
|  | ||||
|         return info | ||||
							
								
								
									
										36
									
								
								youtube_dl/extractor/wimp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								youtube_dl/extractor/wimp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,36 @@ | ||||
| import re | ||||
| import base64 | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class WimpIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.wimp.com/deerfence/', | ||||
|         u'file': u'deerfence.flv', | ||||
|         u'md5': u'8b215e2e0168c6081a1cf84b2846a2b5', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Watch Till End: Herd of deer jump over a fence." | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title') | ||||
|         thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail') | ||||
|         googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url') | ||||
|         googleString = base64.b64decode(googleString).decode('ascii') | ||||
|         final_url = self._search_regex('","(.*?)"', googleString,'final video url') | ||||
|         ext = final_url.rpartition(u'.')[2] | ||||
|  | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       final_url, | ||||
|             'ext':       ext, | ||||
|             'title':     title, | ||||
|             'thumbnail': thumbnail_url, | ||||
|         }] | ||||
|  | ||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/worldstarhiphop.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/worldstarhiphop.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class WorldStarHipHopIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)' | ||||
|     _TEST = { | ||||
|         "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO", | ||||
|         "file": "wshh6a7q1ny0G34ZwuIO.mp4", | ||||
|         "md5": "9d04de741161603bf7071bbf4e883186", | ||||
|         "info_dict": { | ||||
|             "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('id') | ||||
|  | ||||
|         webpage_src = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)', | ||||
|             webpage_src, u'video URL') | ||||
|  | ||||
|         if 'youtube' in video_url: | ||||
|             self.to_screen(u'Youtube video detected:') | ||||
|             return self.url_result(video_url, ie='Youtube') | ||||
|  | ||||
|         if 'mp4' in video_url: | ||||
|             ext = 'mp4' | ||||
|         else: | ||||
|             ext = 'flv' | ||||
|  | ||||
|         video_title = self._html_search_regex(r"<title>(.*)</title>", | ||||
|             webpage_src, u'title') | ||||
|  | ||||
|         # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. | ||||
|         thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />', | ||||
|             webpage_src, u'thumbnail', fatal=False) | ||||
|  | ||||
|         if not thumbnail: | ||||
|             _title = r"""candytitles.*>(.*)</span>""" | ||||
|             mobj = re.search(_title, webpage_src) | ||||
|             if mobj is not None: | ||||
|                 video_title = mobj.group(1) | ||||
|  | ||||
|         results = [{ | ||||
|                     'id': video_id, | ||||
|                     'url' : video_url, | ||||
|                     'title' : video_title, | ||||
|                     'thumbnail' : thumbnail, | ||||
|                     'ext' : ext, | ||||
|                     }] | ||||
|         return results | ||||
							
								
								
									
										71
									
								
								youtube_dl/extractor/xhamster.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								youtube_dl/extractor/xhamster.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class XHamsterIE(InfoExtractor): | ||||
|     """Information Extractor for xHamster""" | ||||
|     _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html' | ||||
|     _TEST = { | ||||
|         u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', | ||||
|         u'file': u'1509445.flv', | ||||
|         u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20121014",  | ||||
|             u"uploader_id": u"Ruseful2011",  | ||||
|             u"title": u"FemaleAgent Shy beauty takes the bait" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id | ||||
|         webpage = self._download_webpage(mrss_url, video_id) | ||||
|  | ||||
|         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract media URL') | ||||
|         if len(mobj.group('server')) == 0: | ||||
|             video_url = compat_urllib_parse.unquote(mobj.group('file')) | ||||
|         else: | ||||
|             video_url = mobj.group('server')+'/key='+mobj.group('file') | ||||
|         video_extension = video_url.split('.')[-1] | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         # Can't see the description anywhere in the UI | ||||
|         # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)', | ||||
|         #     webpage, u'description', fatal=False) | ||||
|         # if video_description: video_description = unescapeHTML(video_description) | ||||
|  | ||||
|         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) | ||||
|         if mobj: | ||||
|             video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d') | ||||
|         else: | ||||
|             video_upload_date = None | ||||
|             self._downloader.report_warning(u'Unable to extract upload date') | ||||
|  | ||||
|         video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)', | ||||
|             webpage, u'uploader id', default=u'anonymous') | ||||
|  | ||||
|         video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'', | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'ext':      video_extension, | ||||
|             'title':    video_title, | ||||
|             # 'description': video_description, | ||||
|             'upload_date': video_upload_date, | ||||
|             'uploader_id': video_uploader_id, | ||||
|             'thumbnail': video_thumbnail | ||||
|         }] | ||||
							
								
								
									
										53
									
								
								youtube_dl/extractor/xnxx.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								youtube_dl/extractor/xnxx.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class XNXXIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)' | ||||
|     VIDEO_URL_RE = r'flv_url=(.*?)&' | ||||
|     VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' | ||||
|     VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&' | ||||
|     _TEST = { | ||||
|         u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||
|         u'file': u'1135332.flv', | ||||
|         u'md5': u'0831677e2b4761795f68d417e0b7b445', | ||||
|         u'info_dict': { | ||||
|             u"title": u"lida \u00bb Naked Funny Actress  (5)" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         # Get webpage content | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex(self.VIDEO_URL_RE, | ||||
|             webpage, u'video URL') | ||||
|         video_url = compat_urllib_parse.unquote(video_url) | ||||
|  | ||||
|         video_title = self._html_search_regex(self.VIDEO_TITLE_RE, | ||||
|             webpage, u'title') | ||||
|  | ||||
|         video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE, | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|  | ||||
|         return [{ | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'uploader': None, | ||||
|             'upload_date': None, | ||||
|             'title': video_title, | ||||
|             'ext': 'flv', | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'description': None, | ||||
|         }] | ||||
							
								
								
									
										51
									
								
								youtube_dl/extractor/xvideos.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								youtube_dl/extractor/xvideos.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,51 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class XVideosIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.xvideos.com/video939581/funny_porns_by_s_-1', | ||||
|         u'file': u'939581.flv', | ||||
|         u'md5': u'1d0c835822f0a71a7bf011855db929d0', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Funny Porns By >>>>S<<<<<< -1" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         # Extract video URL | ||||
|         video_url = compat_urllib_parse.unquote(self._search_regex(r'flv_url=(.+?)&', | ||||
|             webpage, u'video URL')) | ||||
|  | ||||
|         # Extract title | ||||
|         video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XVID', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         # Extract video thumbnail | ||||
|         video_thumbnail = self._search_regex(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)', | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'uploader': None, | ||||
|             'upload_date': None, | ||||
|             'title': video_title, | ||||
|             'ext': 'flv', | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'description': None, | ||||
|         } | ||||
|  | ||||
|         return [info] | ||||
							
								
								
									
										121
									
								
								youtube_dl/extractor/yahoo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								youtube_dl/extractor/yahoo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,121 @@ | ||||
| import datetime | ||||
| import itertools | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor, SearchInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class YahooIE(InfoExtractor): | ||||
|     IE_DESC = u'Yahoo screen' | ||||
|     _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html' | ||||
|     _TEST = { | ||||
|         u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', | ||||
|         u'file': u'214727115.flv', | ||||
|         u'md5': u'2e717f169c1be93d84d3794a00d4a325', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Julian Smith & Travis Legg Watch Julian Smith" | ||||
|         }, | ||||
|         u'skip': u'Requires rtmpdump' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         m_id = re.search(r'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P<new_id>.+?)";', webpage) | ||||
|  | ||||
|         if m_id is None:  | ||||
|             # TODO: Check which url parameters are required | ||||
|             info_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id | ||||
|             webpage = self._download_webpage(info_url, video_id, u'Downloading info webpage') | ||||
|             info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.* | ||||
|                         <description><!\[CDATA\[(?P<description>.*?)\]\]></description>.* | ||||
|                         <media:pubStart><!\[CDATA\[(?P<date>.*?)\ .*\]\]></media:pubStart>.* | ||||
|                         <media:content\ medium="image"\ url="(?P<thumb>.*?)"\ name="LARGETHUMB" | ||||
|                         ''' | ||||
|             self.report_extraction(video_id) | ||||
|             m_info = re.search(info_re, webpage, re.VERBOSE|re.DOTALL) | ||||
|             if m_info is None: | ||||
|                 raise ExtractorError(u'Unable to extract video info') | ||||
|             video_title = m_info.group('title') | ||||
|             video_description = m_info.group('description') | ||||
|             video_thumb = m_info.group('thumb') | ||||
|             video_date = m_info.group('date') | ||||
|             video_date = datetime.datetime.strptime(video_date, '%m/%d/%Y').strftime('%Y%m%d') | ||||
|      | ||||
|             # TODO: Find a way to get mp4 videos | ||||
|             rest_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;element=stream;outputformat=mrss;id=%s;lmsoverride=1;bw=375;dynamicstream=1;cb=83521105;tech=flv,mp4;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id | ||||
|             webpage = self._download_webpage(rest_url, video_id, u'Downloading video url webpage') | ||||
|             m_rest = re.search(r'<media:content url="(?P<url>.*?)" path="(?P<path>.*?)"', webpage) | ||||
|             video_url = m_rest.group('url') | ||||
|             video_path = m_rest.group('path') | ||||
|             if m_rest is None: | ||||
|                 raise ExtractorError(u'Unable to extract video url') | ||||
|  | ||||
|         else: # We have to use a different method if another id is defined | ||||
|             long_id = m_id.group('new_id') | ||||
|             info_url = 'http://video.query.yahoo.com/v1/public/yql?q=SELECT%20*%20FROM%20yahoo.media.video.streams%20WHERE%20id%3D%22' + long_id + '%22%20AND%20format%3D%22mp4%2Cflv%22%20AND%20protocol%3D%22rtmp%2Chttp%22%20AND%20plrs%3D%2286Gj0vCaSzV_Iuf6hNylf2%22%20AND%20acctid%3D%22389%22%20AND%20plidl%3D%22%22%20AND%20pspid%3D%22792700001%22%20AND%20offnetwork%3D%22false%22%20AND%20site%3D%22ivy%22%20AND%20lang%3D%22en-US%22%20AND%20region%3D%22US%22%20AND%20override%3D%22none%22%3B&env=prod&format=json&callback=YUI.Env.JSONP.yui_3_8_1_1_1368368376830_335' | ||||
|             webpage = self._download_webpage(info_url, video_id, u'Downloading info json') | ||||
|             json_str = re.search(r'YUI.Env.JSONP.yui.*?\((.*?)\);', webpage).group(1) | ||||
|             info = json.loads(json_str) | ||||
|             res = info[u'query'][u'results'][u'mediaObj'][0] | ||||
|             stream = res[u'streams'][0] | ||||
|             video_path = stream[u'path'] | ||||
|             video_url = stream[u'host'] | ||||
|             meta = res[u'meta'] | ||||
|             video_title = meta[u'title'] | ||||
|             video_description = meta[u'description'] | ||||
|             video_thumb = meta[u'thumbnail'] | ||||
|             video_date = None # I can't find it | ||||
|  | ||||
|         info_dict = { | ||||
|                      'id': video_id, | ||||
|                      'url': video_url, | ||||
|                      'play_path': video_path, | ||||
|                      'title':video_title, | ||||
|                      'description': video_description, | ||||
|                      'thumbnail': video_thumb, | ||||
|                      'upload_date': video_date, | ||||
|                      'ext': 'flv', | ||||
|                      } | ||||
|         return info_dict | ||||
|  | ||||
| class YahooSearchIE(SearchInfoExtractor): | ||||
|     IE_DESC = u'Yahoo screen search' | ||||
|     _MAX_RESULTS = 1000 | ||||
|     IE_NAME = u'screen.yahoo:search' | ||||
|     _SEARCH_KEY = 'yvsearch' | ||||
|  | ||||
|     def _get_n_results(self, query, n): | ||||
|         """Get a specified number of results for a query""" | ||||
|  | ||||
|         res = { | ||||
|             '_type': 'playlist', | ||||
|             'id': query, | ||||
|             'entries': [] | ||||
|         } | ||||
|         for pagenum in itertools.count(0):  | ||||
|             result_url = u'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) | ||||
|             webpage = self._download_webpage(result_url, query, | ||||
|                                              note='Downloading results page '+str(pagenum+1)) | ||||
|             info = json.loads(webpage) | ||||
|             m = info[u'm'] | ||||
|             results = info[u'results'] | ||||
|  | ||||
|             for (i, r) in enumerate(results): | ||||
|                 if (pagenum * 30) +i >= n: | ||||
|                     break | ||||
|                 mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r) | ||||
|                 e = self.url_result('http://' + mobj.group('url'), 'Yahoo') | ||||
|                 res['entries'].append(e) | ||||
|             if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )): | ||||
|                 break | ||||
|  | ||||
|         return res | ||||
							
								
								
									
										53
									
								
								youtube_dl/extractor/youjizz.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								youtube_dl/extractor/youjizz.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class YouJizzIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', | ||||
|         u'file': u'2189178.flv', | ||||
|         u'md5': u'07e15fa469ba384c7693fd246905547c', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Zeichentrick 1" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('videoid') | ||||
|  | ||||
|         # Get webpage content | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # Get the video title | ||||
|         video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>', | ||||
|             webpage, u'title').strip() | ||||
|  | ||||
|         # Get the embed page | ||||
|         result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage) | ||||
|         if result is None: | ||||
|             raise ExtractorError(u'ERROR: unable to extract embed page') | ||||
|  | ||||
|         embed_page_url = result.group(0).strip() | ||||
|         video_id = result.group('videoid') | ||||
|  | ||||
|         webpage = self._download_webpage(embed_page_url, video_id) | ||||
|  | ||||
|         # Get the video URL | ||||
|         video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', | ||||
|             webpage, u'video URL') | ||||
|  | ||||
|         info = {'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'title': video_title, | ||||
|                 'ext': 'flv', | ||||
|                 'format': 'flv', | ||||
|                 'player_url': embed_page_url} | ||||
|  | ||||
|         return [info] | ||||
							
								
								
									
										116
									
								
								youtube_dl/extractor/youku.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								youtube_dl/extractor/youku.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,116 @@ | ||||
| # coding: utf-8 | ||||
|  | ||||
| import json | ||||
| import math | ||||
| import random | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class YoukuIE(InfoExtractor): | ||||
|     _VALID_URL =  r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html' | ||||
|     _TEST =   { | ||||
|         u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", | ||||
|         u"file": u"XNDgyMDQ2NTQw_part00.flv", | ||||
|         u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b", | ||||
|         u"params": { u"test": False }, | ||||
|         u"info_dict": { | ||||
|             u"title": u"youtube-dl test video \"'/\\ä↭𝕐" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     def _gen_sid(self): | ||||
|         nowTime = int(time.time() * 1000) | ||||
|         random1 = random.randint(1000,1998) | ||||
|         random2 = random.randint(1000,9999) | ||||
|  | ||||
|         return "%d%d%d" %(nowTime,random1,random2) | ||||
|  | ||||
|     def _get_file_ID_mix_string(self, seed): | ||||
|         mixed = [] | ||||
|         source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890") | ||||
|         seed = float(seed) | ||||
|         for i in range(len(source)): | ||||
|             seed  =  (seed * 211 + 30031 ) % 65536 | ||||
|             index  =  math.floor(seed / 65536 * len(source) ) | ||||
|             mixed.append(source[int(index)]) | ||||
|             source.remove(source[int(index)]) | ||||
|         #return ''.join(mixed) | ||||
|         return mixed | ||||
|  | ||||
|     def _get_file_id(self, fileId, seed): | ||||
|         mixed = self._get_file_ID_mix_string(seed) | ||||
|         ids = fileId.split('*') | ||||
|         realId = [] | ||||
|         for ch in ids: | ||||
|             if ch: | ||||
|                 realId.append(mixed[int(ch)]) | ||||
|         return ''.join(realId) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('ID') | ||||
|  | ||||
|         info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id | ||||
|  | ||||
|         jsondata = self._download_webpage(info_url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         try: | ||||
|             config = json.loads(jsondata) | ||||
|  | ||||
|             video_title =  config['data'][0]['title'] | ||||
|             seed = config['data'][0]['seed'] | ||||
|  | ||||
|             format = self._downloader.params.get('format', None) | ||||
|             supported_format = list(config['data'][0]['streamfileids'].keys()) | ||||
|  | ||||
|             if format is None or format == 'best': | ||||
|                 if 'hd2' in supported_format: | ||||
|                     format = 'hd2' | ||||
|                 else: | ||||
|                     format = 'flv' | ||||
|                 ext = u'flv' | ||||
|             elif format == 'worst': | ||||
|                 format = 'mp4' | ||||
|                 ext = u'mp4' | ||||
|             else: | ||||
|                 format = 'flv' | ||||
|                 ext = u'flv' | ||||
|  | ||||
|  | ||||
|             fileid = config['data'][0]['streamfileids'][format] | ||||
|             keys = [s['k'] for s in config['data'][0]['segs'][format]] | ||||
|         except (UnicodeDecodeError, ValueError, KeyError): | ||||
|             raise ExtractorError(u'Unable to extract info section') | ||||
|  | ||||
|         files_info=[] | ||||
|         sid = self._gen_sid() | ||||
|         fileid = self._get_file_id(fileid, seed) | ||||
|  | ||||
|         #column 8,9 of fileid represent the segment number | ||||
|         #fileid[7:9] should be changed | ||||
|         for index, key in enumerate(keys): | ||||
|  | ||||
|             temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) | ||||
|             download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) | ||||
|  | ||||
|             info = { | ||||
|                 'id': '%s_part%02d' % (video_id, index), | ||||
|                 'url': download_url, | ||||
|                 'uploader': None, | ||||
|                 'upload_date': None, | ||||
|                 'title': video_title, | ||||
|                 'ext': ext, | ||||
|             } | ||||
|             files_info.append(info) | ||||
|  | ||||
|         return files_info | ||||
							
								
								
									
										128
									
								
								youtube_dl/extractor/youporn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										128
									
								
								youtube_dl/extractor/youporn.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,128 @@ | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| import sys | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class YouPornIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', | ||||
|         u'file': u'505835.mp4', | ||||
|         u'md5': u'c37ddbaaa39058c76a7e86c6813423c1', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20101221",  | ||||
|             u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",  | ||||
|             u"uploader": u"Ask Dan And Jennifer",  | ||||
|             u"title": u"Sex Ed: Is It Safe To Masturbate Daily?" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         """Print all available formats""" | ||||
|         print(u'Available formats:') | ||||
|         print(u'ext\t\tformat') | ||||
|         print(u'---------------------------------') | ||||
|         for format in formats: | ||||
|             print(u'%s\t\t%s'  % (format['ext'], format['format'])) | ||||
|  | ||||
|     def _specific(self, req_format, formats): | ||||
|         for x in formats: | ||||
|             if x["format"] == req_format: | ||||
|                 return x | ||||
|         return None | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|  | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         # Get JSON parameters | ||||
|         json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') | ||||
|         try: | ||||
|             params = json.loads(json_params) | ||||
|         except: | ||||
|             raise ExtractorError(u'Invalid JSON') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         try: | ||||
|             video_title = params['title'] | ||||
|             upload_date = unified_strdate(params['release_date_f']) | ||||
|             video_description = params['description'] | ||||
|             video_uploader = params['submitted_by'] | ||||
|             thumbnail = params['thumbnails'][0]['image'] | ||||
|         except KeyError: | ||||
|             raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1]) | ||||
|  | ||||
|         # Get all of the formats available | ||||
|         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>' | ||||
|         download_list_html = self._search_regex(DOWNLOAD_LIST_RE, | ||||
|             webpage, u'download list').strip() | ||||
|  | ||||
|         # Get all of the links from the page | ||||
|         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">' | ||||
|         links = re.findall(LINK_RE, download_list_html) | ||||
|         if(len(links) == 0): | ||||
|             raise ExtractorError(u'ERROR: no known formats available for video') | ||||
|  | ||||
|         self.to_screen(u'Links found: %d' % len(links)) | ||||
|  | ||||
|         formats = [] | ||||
|         for link in links: | ||||
|  | ||||
|             # A link looks like this: | ||||
|             # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0 | ||||
|             # A path looks like this: | ||||
|             # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4 | ||||
|             video_url = unescapeHTML( link ) | ||||
|             path = compat_urllib_parse_urlparse( video_url ).path | ||||
|             extension = os.path.splitext( path )[1][1:] | ||||
|             format = path.split('/')[4].split('_')[:2] | ||||
|             # size = format[0] | ||||
|             # bitrate = format[1] | ||||
|             format = "-".join( format ) | ||||
|             # title = u'%s-%s-%s' % (video_title, size, bitrate) | ||||
|  | ||||
|             formats.append({ | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'uploader': video_uploader, | ||||
|                 'upload_date': upload_date, | ||||
|                 'title': video_title, | ||||
|                 'ext': extension, | ||||
|                 'format': format, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'description': video_description | ||||
|             }) | ||||
|  | ||||
|         if self._downloader.params.get('listformats', None): | ||||
|             self._print_formats(formats) | ||||
|             return | ||||
|  | ||||
|         req_format = self._downloader.params.get('format', None) | ||||
|         self.to_screen(u'Format: %s' % req_format) | ||||
|  | ||||
|         if req_format is None or req_format == 'best': | ||||
|             return [formats[0]] | ||||
|         elif req_format == 'worst': | ||||
|             return [formats[-1]] | ||||
|         elif req_format in ('-1', 'all'): | ||||
|             return formats | ||||
|         else: | ||||
|             format = self._specific( req_format, formats ) | ||||
|             if format is None: | ||||
|                 raise ExtractorError(u'Requested format not available') | ||||
|             return [format] | ||||
							
								
								
									
										866
									
								
								youtube_dl/extractor/youtube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										866
									
								
								youtube_dl/extractor/youtube.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,866 @@ | ||||
| # coding: utf-8 | ||||
|  | ||||
| import json | ||||
| import netrc | ||||
| import re | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor, SearchInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     compat_str, | ||||
|  | ||||
|     clean_html, | ||||
|     get_element_by_id, | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class YoutubeIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com' | ||||
|     _VALID_URL = r"""^ | ||||
|                      ( | ||||
|                          (?:https?://)?                                       # http(s):// (optional) | ||||
|                          (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/| | ||||
|                             tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains | ||||
|                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls | ||||
|                          (?:                                                  # the various things that can precede the ID: | ||||
|                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ | ||||
|                              |(?:                                             # or the v= param in all its forms | ||||
|                                  (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx) | ||||
|                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! | ||||
|                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) | ||||
|                                  v= | ||||
|                              ) | ||||
|                          )?                                                   # optional -> youtube.com/xxxx is OK | ||||
|                      )?                                                       # all until now is optional -> you can pass the naked ID | ||||
|                      ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID | ||||
|                      (?(1).+)?                                                # if we found the ID, everything can follow | ||||
|                      $""" | ||||
|     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' | ||||
|     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' | ||||
|     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' | ||||
|     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' | ||||
|     _NETRC_MACHINE = 'youtube' | ||||
|     # Listed in order of quality | ||||
|     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] | ||||
|     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13'] | ||||
|     _video_extensions = { | ||||
|         '13': '3gp', | ||||
|         '17': 'mp4', | ||||
|         '18': 'mp4', | ||||
|         '22': 'mp4', | ||||
|         '37': 'mp4', | ||||
|         '38': 'mp4', | ||||
|         '43': 'webm', | ||||
|         '44': 'webm', | ||||
|         '45': 'webm', | ||||
|         '46': 'webm', | ||||
|     } | ||||
|     _video_dimensions = { | ||||
|         '5': '240x400', | ||||
|         '6': '???', | ||||
|         '13': '???', | ||||
|         '17': '144x176', | ||||
|         '18': '360x640', | ||||
|         '22': '720x1280', | ||||
|         '34': '360x640', | ||||
|         '35': '480x854', | ||||
|         '37': '1080x1920', | ||||
|         '38': '3072x4096', | ||||
|         '43': '360x640', | ||||
|         '44': '480x854', | ||||
|         '45': '720x1280', | ||||
|         '46': '1080x1920', | ||||
|     } | ||||
|     IE_NAME = u'youtube' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc", | ||||
|             u"file":  u"BaW_jenozKc.mp4", | ||||
|             u"info_dict": { | ||||
|                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐", | ||||
|                 u"uploader": u"Philipp Hagemeister", | ||||
|                 u"uploader_id": u"phihag", | ||||
|                 u"upload_date": u"20121002", | ||||
|                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U", | ||||
|             u"file":  u"1ltcDfZMA3U.flv", | ||||
|             u"note": u"Test VEVO video (#897)", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": u"20070518", | ||||
|                 u"title": u"Maps - It Will Find You", | ||||
|                 u"description": u"Music video by Maps performing It Will Find You.", | ||||
|                 u"uploader": u"MuteUSA", | ||||
|                 u"uploader_id": u"MuteUSA" | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY", | ||||
|             u"file":  u"UxxajLWwzqY.mp4", | ||||
|             u"note": u"Test generic use_cipher_signature video (#897)", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": u"20120506", | ||||
|                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", | ||||
|                 u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c", | ||||
|                 u"uploader": u"IconaPop", | ||||
|                 u"uploader_id": u"IconaPop" | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         if YoutubePlaylistIE.suitable(url): return False | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|  | ||||
|     def report_lang(self): | ||||
|         """Report attempt to set language.""" | ||||
|         self.to_screen(u'Setting language') | ||||
|  | ||||
|     def report_login(self): | ||||
|         """Report attempt to log in.""" | ||||
|         self.to_screen(u'Logging in') | ||||
|  | ||||
|     def report_video_webpage_download(self, video_id): | ||||
|         """Report attempt to download video webpage.""" | ||||
|         self.to_screen(u'%s: Downloading video webpage' % video_id) | ||||
|  | ||||
|     def report_video_info_webpage_download(self, video_id): | ||||
|         """Report attempt to download video info webpage.""" | ||||
|         self.to_screen(u'%s: Downloading video info webpage' % video_id) | ||||
|  | ||||
|     def report_video_subtitles_download(self, video_id): | ||||
|         """Report attempt to download video info webpage.""" | ||||
|         self.to_screen(u'%s: Checking available subtitles' % video_id) | ||||
|  | ||||
|     def report_video_subtitles_request(self, video_id, sub_lang, format): | ||||
|         """Report attempt to download video info webpage.""" | ||||
|         self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format)) | ||||
|  | ||||
|     def report_video_subtitles_available(self, video_id, sub_lang_list): | ||||
|         """Report available subtitles.""" | ||||
|         sub_lang = ",".join(list(sub_lang_list.keys())) | ||||
|         self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang)) | ||||
|  | ||||
|     def report_information_extraction(self, video_id): | ||||
|         """Report attempt to extract video information.""" | ||||
|         self.to_screen(u'%s: Extracting video information' % video_id) | ||||
|  | ||||
|     def report_unavailable_format(self, video_id, format): | ||||
|         """Report extracted video URL.""" | ||||
|         self.to_screen(u'%s: Format %s not available' % (video_id, format)) | ||||
|  | ||||
|     def report_rtmp_download(self): | ||||
|         """Indicate the download will use the RTMP protocol.""" | ||||
|         self.to_screen(u'RTMP download detected') | ||||
|  | ||||
|     def _decrypt_signature(self, s): | ||||
|         """Turn the encrypted s field into a working signature""" | ||||
|  | ||||
|         if len(s) == 88: | ||||
|             return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] | ||||
|         elif len(s) == 87: | ||||
|             return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1] | ||||
|         elif len(s) == 86: | ||||
|             return s[2:63] + s[82] + s[64:82] + s[63] | ||||
|         elif len(s) == 85: | ||||
|             return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1] | ||||
|         elif len(s) == 84: | ||||
|             return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] | ||||
|         elif len(s) == 83: | ||||
|             return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36] | ||||
|         elif len(s) == 82: | ||||
|             return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] | ||||
|  | ||||
|         else: | ||||
|             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         self.report_video_subtitles_download(video_id) | ||||
|         request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) | ||||
|         try: | ||||
|             sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             return (u'unable to download video subtitles: %s' % compat_str(err), None) | ||||
|         sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) | ||||
|         sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) | ||||
|         if not sub_lang_list: | ||||
|             return (u'video doesn\'t have subtitles', None) | ||||
|         return sub_lang_list | ||||
|  | ||||
|     def _list_available_subtitles(self, video_id): | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         self.report_video_subtitles_available(video_id, sub_lang_list) | ||||
|  | ||||
|     def _request_subtitle(self, sub_lang, sub_name, video_id, format): | ||||
|         """ | ||||
|         Return tuple: | ||||
|         (error_message, sub_lang, sub) | ||||
|         """ | ||||
|         self.report_video_subtitles_request(video_id, sub_lang, format) | ||||
|         params = compat_urllib_parse.urlencode({ | ||||
|             'lang': sub_lang, | ||||
|             'name': sub_name, | ||||
|             'v': video_id, | ||||
|             'fmt': format, | ||||
|         }) | ||||
|         url = 'http://www.youtube.com/api/timedtext?' + params | ||||
|         try: | ||||
|             sub = compat_urllib_request.urlopen(url).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             return (u'unable to download video subtitles: %s' % compat_str(err), None, None) | ||||
|         if not sub: | ||||
|             return (u'Did not fetch video subtitles', None, None) | ||||
|         return (None, sub_lang, sub) | ||||
|  | ||||
|     def _request_automatic_caption(self, video_id, webpage): | ||||
|         """We need the webpage for getting the captions url, pass it as an | ||||
|            argument to speed up the process.""" | ||||
|         sub_lang = self._downloader.params.get('subtitleslang') or 'en' | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         self.to_screen(u'%s: Looking for automatic captions' % video_id) | ||||
|         mobj = re.search(r';ytplayer.config = ({.*?});', webpage) | ||||
|         err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang | ||||
|         if mobj is None: | ||||
|             return [(err_msg, None, None)] | ||||
|         player_config = json.loads(mobj.group(1)) | ||||
|         try: | ||||
|             args = player_config[u'args'] | ||||
|             caption_url = args[u'ttsurl'] | ||||
|             timestamp = args[u'timestamp'] | ||||
|             params = compat_urllib_parse.urlencode({ | ||||
|                 'lang': 'en', | ||||
|                 'tlang': sub_lang, | ||||
|                 'fmt': sub_format, | ||||
|                 'ts': timestamp, | ||||
|                 'kind': 'asr', | ||||
|             }) | ||||
|             subtitles_url = caption_url + '&' + params | ||||
|             sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') | ||||
|             return [(None, sub_lang, sub)] | ||||
|         except KeyError: | ||||
|             return [(err_msg, None, None)] | ||||
|  | ||||
|     def _extract_subtitle(self, video_id): | ||||
|         """ | ||||
|         Return a list with a tuple: | ||||
|         [(error_message, sub_lang, sub)] | ||||
|         """ | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles | ||||
|             return [(sub_lang_list[0], None, None)] | ||||
|         if self._downloader.params.get('subtitleslang', False): | ||||
|             sub_lang = self._downloader.params.get('subtitleslang') | ||||
|         elif 'en' in sub_lang_list: | ||||
|             sub_lang = 'en' | ||||
|         else: | ||||
|             sub_lang = list(sub_lang_list.keys())[0] | ||||
|         if not sub_lang in sub_lang_list: | ||||
|             return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)] | ||||
|  | ||||
|         subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) | ||||
|         return [subtitle] | ||||
|  | ||||
|     def _extract_all_subtitles(self, video_id): | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles | ||||
|             return [(sub_lang_list[0], None, None)] | ||||
|         subtitles = [] | ||||
|         for sub_lang in sub_lang_list: | ||||
|             subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) | ||||
|             subtitles.append(subtitle) | ||||
|         return subtitles | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         print('Available formats:') | ||||
|         for x in formats: | ||||
|             print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))) | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         if self._downloader is None: | ||||
|             return | ||||
|  | ||||
|         username = None | ||||
|         password = None | ||||
|         downloader_params = self._downloader.params | ||||
|  | ||||
|         # Attempt to use provided username and password or .netrc data | ||||
|         if downloader_params.get('username', None) is not None: | ||||
|             username = downloader_params['username'] | ||||
|             password = downloader_params['password'] | ||||
|         elif downloader_params.get('usenetrc', False): | ||||
|             try: | ||||
|                 info = netrc.netrc().authenticators(self._NETRC_MACHINE) | ||||
|                 if info is not None: | ||||
|                     username = info[0] | ||||
|                     password = info[2] | ||||
|                 else: | ||||
|                     raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) | ||||
|             except (IOError, netrc.NetrcParseError) as err: | ||||
|                 self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err)) | ||||
|                 return | ||||
|  | ||||
|         # Set language | ||||
|         request = compat_urllib_request.Request(self._LANG_URL) | ||||
|         try: | ||||
|             self.report_lang() | ||||
|             compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err)) | ||||
|             return | ||||
|  | ||||
|         # No authentication to be performed | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL) | ||||
|         try: | ||||
|             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err)) | ||||
|             return | ||||
|  | ||||
|         galx = None | ||||
|         dsh = None | ||||
|         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page) | ||||
|         if match: | ||||
|           galx = match.group(1) | ||||
|  | ||||
|         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page) | ||||
|         if match: | ||||
|           dsh = match.group(1) | ||||
|  | ||||
|         # Log in | ||||
|         login_form_strs = { | ||||
|                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', | ||||
|                 u'Email': username, | ||||
|                 u'GALX': galx, | ||||
|                 u'Passwd': password, | ||||
|                 u'PersistentCookie': u'yes', | ||||
|                 u'_utf8': u'霱', | ||||
|                 u'bgresponse': u'js_disabled', | ||||
|                 u'checkConnection': u'', | ||||
|                 u'checkedDomains': u'youtube', | ||||
|                 u'dnConn': u'', | ||||
|                 u'dsh': dsh, | ||||
|                 u'pstMsg': u'0', | ||||
|                 u'rmShown': u'1', | ||||
|                 u'secTok': u'', | ||||
|                 u'signIn': u'Sign in', | ||||
|                 u'timeStmp': u'', | ||||
|                 u'service': u'youtube', | ||||
|                 u'uilel': u'3', | ||||
|                 u'hl': u'en_US', | ||||
|         } | ||||
|         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | ||||
|         # chokes on unicode | ||||
|         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) | ||||
|         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, login_data) | ||||
|         try: | ||||
|             self.report_login() | ||||
|             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: | ||||
|                 self._downloader.report_warning(u'unable to log in: bad username or password') | ||||
|                 return | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) | ||||
|             return | ||||
|  | ||||
|         # Confirm age | ||||
|         age_form = { | ||||
|                 'next_url':     '/', | ||||
|                 'action_confirm':   'Confirm', | ||||
|                 } | ||||
|         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) | ||||
|         try: | ||||
|             self.report_age_confirmation() | ||||
|             compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err)) | ||||
|  | ||||
|     def _extract_id(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group(2) | ||||
|         return video_id | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url): | ||||
|             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).') | ||||
|  | ||||
|         # Extract original video URL from URL with redirection, like age verification, using next_url parameter | ||||
|         mobj = re.search(self._NEXT_URL_RE, url) | ||||
|         if mobj: | ||||
|             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') | ||||
|         video_id = self._extract_id(url) | ||||
|  | ||||
|         # Get video webpage | ||||
|         self.report_video_webpage_download(video_id) | ||||
|         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id | ||||
|         request = compat_urllib_request.Request(url) | ||||
|         try: | ||||
|             video_webpage_bytes = compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err)) | ||||
|  | ||||
|         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore') | ||||
|  | ||||
|         # Attempt to extract SWF player URL | ||||
|         mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) | ||||
|         if mobj is not None: | ||||
|             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1)) | ||||
|         else: | ||||
|             player_url = None | ||||
|  | ||||
|         # Get video info | ||||
|         self.report_video_info_webpage_download(video_id) | ||||
|         for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: | ||||
|             video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' | ||||
|                     % (video_id, el_type)) | ||||
|             video_info_webpage = self._download_webpage(video_info_url, video_id, | ||||
|                                     note=False, | ||||
|                                     errnote='unable to download video info webpage') | ||||
|             video_info = compat_parse_qs(video_info_webpage) | ||||
|             if 'token' in video_info: | ||||
|                 break | ||||
|         if 'token' not in video_info: | ||||
|             if 'reason' in video_info: | ||||
|                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True) | ||||
|             else: | ||||
|                 raise ExtractorError(u'"token" parameter not in video info for unknown reason') | ||||
|  | ||||
|         # Check for "rental" videos | ||||
|         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: | ||||
|             raise ExtractorError(u'"rental" videos not supported') | ||||
|  | ||||
|         # Start extracting information | ||||
|         self.report_information_extraction(video_id) | ||||
|  | ||||
|         # uploader | ||||
|         if 'author' not in video_info: | ||||
|             raise ExtractorError(u'Unable to extract uploader name') | ||||
|         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0]) | ||||
|  | ||||
|         # uploader_id | ||||
|         video_uploader_id = None | ||||
|         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage) | ||||
|         if mobj is not None: | ||||
|             video_uploader_id = mobj.group(1) | ||||
|         else: | ||||
|             self._downloader.report_warning(u'unable to extract uploader nickname') | ||||
|  | ||||
|         # title | ||||
|         if 'title' not in video_info: | ||||
|             raise ExtractorError(u'Unable to extract video title') | ||||
|         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) | ||||
|  | ||||
|         # thumbnail image | ||||
|         if 'thumbnail_url' not in video_info: | ||||
|             self._downloader.report_warning(u'unable to extract video thumbnail') | ||||
|             video_thumbnail = '' | ||||
|         else:   # don't panic if we can't find it | ||||
|             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) | ||||
|  | ||||
|         # upload date | ||||
|         upload_date = None | ||||
|         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL) | ||||
|         if mobj is not None: | ||||
|             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         # description | ||||
|         video_description = get_element_by_id("eow-description", video_webpage) | ||||
|         if video_description: | ||||
|             video_description = clean_html(video_description) | ||||
|         else: | ||||
|             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage) | ||||
|             if fd_mobj: | ||||
|                 video_description = unescapeHTML(fd_mobj.group(1)) | ||||
|             else: | ||||
|                 video_description = u'' | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = None | ||||
|  | ||||
|         if self._downloader.params.get('writesubtitles', False): | ||||
|             video_subtitles = self._extract_subtitle(video_id) | ||||
|             if video_subtitles: | ||||
|                 (sub_error, sub_lang, sub) = video_subtitles[0] | ||||
|                 if sub_error: | ||||
|                     self._downloader.report_warning(sub_error) | ||||
|          | ||||
|         if self._downloader.params.get('writeautomaticsub', False): | ||||
|             video_subtitles = self._request_automatic_caption(video_id, video_webpage) | ||||
|             (sub_error, sub_lang, sub) = video_subtitles[0] | ||||
|             if sub_error: | ||||
|                 self._downloader.report_warning(sub_error) | ||||
|  | ||||
|         if self._downloader.params.get('allsubtitles', False): | ||||
|             video_subtitles = self._extract_all_subtitles(video_id) | ||||
|             for video_subtitle in video_subtitles: | ||||
|                 (sub_error, sub_lang, sub) = video_subtitle | ||||
|                 if sub_error: | ||||
|                     self._downloader.report_warning(sub_error) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id) | ||||
|             return | ||||
|  | ||||
|         if 'length_seconds' not in video_info: | ||||
|             self._downloader.report_warning(u'unable to extract video duration') | ||||
|             video_duration = '' | ||||
|         else: | ||||
|             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) | ||||
|  | ||||
|         # Decide which formats to download | ||||
|         req_format = self._downloader.params.get('format', None) | ||||
|  | ||||
|         try: | ||||
|             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) | ||||
|             if not mobj: | ||||
|                 raise ValueError('Could not find vevo ID') | ||||
|             info = json.loads(mobj.group(1)) | ||||
|             args = info['args'] | ||||
|             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map | ||||
|             # this signatures are encrypted | ||||
|             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map']) | ||||
|             if m_s is not None: | ||||
|                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id) | ||||
|                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] | ||||
|         except ValueError: | ||||
|             pass | ||||
|  | ||||
|         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): | ||||
|             self.report_rtmp_download() | ||||
|             video_url_list = [(None, video_info['conn'][0])] | ||||
|         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: | ||||
|             url_map = {} | ||||
|             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','): | ||||
|                 url_data = compat_parse_qs(url_data_str) | ||||
|                 if 'itag' in url_data and 'url' in url_data: | ||||
|                     url = url_data['url'][0] | ||||
|                     if 'sig' in url_data: | ||||
|                         url += '&signature=' + url_data['sig'][0] | ||||
|                     elif 's' in url_data: | ||||
|                         if self._downloader.params.get('verbose'): | ||||
|                             s = url_data['s'][0] | ||||
|                             player = self._search_regex(r'html5player-(.+?)\.js', video_webpage, | ||||
|                                 'html5 player', fatal=False) | ||||
|                             self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' % | ||||
|                                 (len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player)) | ||||
|                         signature = self._decrypt_signature(url_data['s'][0]) | ||||
|                         url += '&signature=' + signature | ||||
|                     if 'ratebypass' not in url: | ||||
|                         url += '&ratebypass=yes' | ||||
|                     url_map[url_data['itag'][0]] = url | ||||
|  | ||||
|             format_limit = self._downloader.params.get('format_limit', None) | ||||
|             available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats | ||||
|             if format_limit is not None and format_limit in available_formats: | ||||
|                 format_list = available_formats[available_formats.index(format_limit):] | ||||
|             else: | ||||
|                 format_list = available_formats | ||||
|             existing_formats = [x for x in format_list if x in url_map] | ||||
|             if len(existing_formats) == 0: | ||||
|                 raise ExtractorError(u'no known formats available for video') | ||||
|             if self._downloader.params.get('listformats', None): | ||||
|                 self._print_formats(existing_formats) | ||||
|                 return | ||||
|             if req_format is None or req_format == 'best': | ||||
|                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality | ||||
|             elif req_format == 'worst': | ||||
|                 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality | ||||
|             elif req_format in ('-1', 'all'): | ||||
|                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats | ||||
|             else: | ||||
|                 # Specific formats. We pick the first in a slash-delimeted sequence. | ||||
|                 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. | ||||
|                 req_formats = req_format.split('/') | ||||
|                 video_url_list = None | ||||
|                 for rf in req_formats: | ||||
|                     if rf in url_map: | ||||
|                         video_url_list = [(rf, url_map[rf])] | ||||
|                         break | ||||
|                 if video_url_list is None: | ||||
|                     raise ExtractorError(u'requested format not available') | ||||
|         else: | ||||
|             raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info') | ||||
|  | ||||
|         results = [] | ||||
|         for format_param, video_real_url in video_url_list: | ||||
|             # Extension | ||||
|             video_extension = self._video_extensions.get(format_param, 'flv') | ||||
|  | ||||
|             video_format = '{0} - {1}'.format(format_param if format_param else video_extension, | ||||
|                                               self._video_dimensions.get(format_param, '???')) | ||||
|  | ||||
|             results.append({ | ||||
|                 'id':       video_id, | ||||
|                 'url':      video_real_url, | ||||
|                 'uploader': video_uploader, | ||||
|                 'uploader_id': video_uploader_id, | ||||
|                 'upload_date':  upload_date, | ||||
|                 'title':    video_title, | ||||
|                 'ext':      video_extension, | ||||
|                 'format':   video_format, | ||||
|                 'thumbnail':    video_thumbnail, | ||||
|                 'description':  video_description, | ||||
|                 'player_url':   player_url, | ||||
|                 'subtitles':    video_subtitles, | ||||
|                 'duration':     video_duration | ||||
|             }) | ||||
|         return results | ||||
|  | ||||
| class YoutubePlaylistIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com playlists' | ||||
|     _VALID_URL = r"""(?: | ||||
|                         (?:https?://)? | ||||
|                         (?:\w+\.)? | ||||
|                         youtube\.com/ | ||||
|                         (?: | ||||
|                            (?:course|view_play_list|my_playlists|artist|playlist|watch) | ||||
|                            \? (?:.*?&)*? (?:p|a|list)= | ||||
|                         |  p/ | ||||
|                         ) | ||||
|                         ((?:PL|EC|UU)?[0-9A-Za-z-_]{10,}) | ||||
|                         .* | ||||
|                      | | ||||
|                         ((?:PL|EC|UU)[0-9A-Za-z-_]{10,}) | ||||
|                      )""" | ||||
|     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' | ||||
|     _MAX_RESULTS = 50 | ||||
|     IE_NAME = u'youtube:playlist' | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract playlist id | ||||
|         mobj = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         # Download playlist videos from API | ||||
|         playlist_id = mobj.group(1) or mobj.group(2) | ||||
|         page_num = 1 | ||||
|         videos = [] | ||||
|  | ||||
|         while True: | ||||
|             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, self._MAX_RESULTS * (page_num - 1) + 1) | ||||
|             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) | ||||
|  | ||||
|             try: | ||||
|                 response = json.loads(page) | ||||
|             except ValueError as err: | ||||
|                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) | ||||
|  | ||||
|             if 'feed' not in response: | ||||
|                 raise ExtractorError(u'Got a malformed response from YouTube API') | ||||
|             playlist_title = response['feed']['title']['$t'] | ||||
|             if 'entry' not in response['feed']: | ||||
|                 # Number of videos is a multiple of self._MAX_RESULTS | ||||
|                 break | ||||
|  | ||||
|             for entry in response['feed']['entry']: | ||||
|                 index = entry['yt$position']['$t'] | ||||
|                 if 'media$group' in entry and 'media$player' in entry['media$group']: | ||||
|                     videos.append((index, entry['media$group']['media$player']['url'])) | ||||
|  | ||||
|             if len(response['feed']['entry']) < self._MAX_RESULTS: | ||||
|                 break | ||||
|             page_num += 1 | ||||
|  | ||||
|         videos = [v[1] for v in sorted(videos)] | ||||
|  | ||||
|         url_results = [self.url_result(url, 'Youtube') for url in videos] | ||||
|         return [self.playlist_result(url_results, playlist_id, playlist_title)] | ||||
|  | ||||
|  | ||||
| class YoutubeChannelIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com channels' | ||||
|     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" | ||||
|     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en' | ||||
|     _MORE_PAGES_INDICATOR = 'yt-uix-load-more' | ||||
|     _MORE_PAGES_URL = 'http://www.youtube.com/channel_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' | ||||
|     IE_NAME = u'youtube:channel' | ||||
|  | ||||
|     def extract_videos_from_page(self, page): | ||||
|         ids_in_page = [] | ||||
|         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page): | ||||
|             if mobj.group(1) not in ids_in_page: | ||||
|                 ids_in_page.append(mobj.group(1)) | ||||
|         return ids_in_page | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract channel id | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         # Download channel page | ||||
|         channel_id = mobj.group(1) | ||||
|         video_ids = [] | ||||
|         pagenum = 1 | ||||
|  | ||||
|         url = self._TEMPLATE_URL % (channel_id, pagenum) | ||||
|         page = self._download_webpage(url, channel_id, | ||||
|                                       u'Downloading page #%s' % pagenum) | ||||
|  | ||||
|         # Extract video identifiers | ||||
|         ids_in_page = self.extract_videos_from_page(page) | ||||
|         video_ids.extend(ids_in_page) | ||||
|  | ||||
|         # Download any subsequent channel pages using the json-based channel_ajax query | ||||
|         if self._MORE_PAGES_INDICATOR in page: | ||||
|             while True: | ||||
|                 pagenum = pagenum + 1 | ||||
|  | ||||
|                 url = self._MORE_PAGES_URL % (pagenum, channel_id) | ||||
|                 page = self._download_webpage(url, channel_id, | ||||
|                                               u'Downloading page #%s' % pagenum) | ||||
|  | ||||
|                 page = json.loads(page) | ||||
|  | ||||
|                 ids_in_page = self.extract_videos_from_page(page['content_html']) | ||||
|                 video_ids.extend(ids_in_page) | ||||
|  | ||||
|                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']: | ||||
|                     break | ||||
|  | ||||
|         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) | ||||
|  | ||||
|         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] | ||||
|         url_entries = [self.url_result(url, 'Youtube') for url in urls] | ||||
|         return [self.playlist_result(url_entries, channel_id)] | ||||
|  | ||||
|  | ||||
| class YoutubeUserIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' | ||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)' | ||||
|     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' | ||||
|     _GDATA_PAGE_SIZE = 50 | ||||
|     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' | ||||
|     _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]' | ||||
|     IE_NAME = u'youtube:user' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract username | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         username = mobj.group(1) | ||||
|  | ||||
|         # Download video ids using YouTube Data API. Result size per | ||||
|         # query is limited (currently to 50 videos) so we need to query | ||||
|         # page by page until there are no video ids - it means we got | ||||
|         # all of them. | ||||
|  | ||||
|         video_ids = [] | ||||
|         pagenum = 0 | ||||
|  | ||||
|         while True: | ||||
|             start_index = pagenum * self._GDATA_PAGE_SIZE + 1 | ||||
|  | ||||
|             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index) | ||||
|             page = self._download_webpage(gdata_url, username, | ||||
|                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE)) | ||||
|  | ||||
|             # Extract video identifiers | ||||
|             ids_in_page = [] | ||||
|  | ||||
|             for mobj in re.finditer(self._VIDEO_INDICATOR, page): | ||||
|                 if mobj.group(1) not in ids_in_page: | ||||
|                     ids_in_page.append(mobj.group(1)) | ||||
|  | ||||
|             video_ids.extend(ids_in_page) | ||||
|  | ||||
|             # A little optimization - if current page is not | ||||
|             # "full", ie. does not contain PAGE_SIZE video ids then | ||||
|             # we can assume that this page is the last one - there | ||||
|             # are no more ids on further pages - no need to query | ||||
|             # again. | ||||
|  | ||||
|             if len(ids_in_page) < self._GDATA_PAGE_SIZE: | ||||
|                 break | ||||
|  | ||||
|             pagenum += 1 | ||||
|  | ||||
|         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] | ||||
|         url_results = [self.url_result(url, 'Youtube') for url in urls] | ||||
|         return [self.playlist_result(url_results, playlist_title = username)] | ||||
|  | ||||
| class YoutubeSearchIE(SearchInfoExtractor): | ||||
|     IE_DESC = u'YouTube.com searches' | ||||
|     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' | ||||
|     _MAX_RESULTS = 1000 | ||||
|     IE_NAME = u'youtube:search' | ||||
|     _SEARCH_KEY = 'ytsearch' | ||||
|  | ||||
|     def report_download_page(self, query, pagenum): | ||||
|         """Report attempt to download search page with given number.""" | ||||
|         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) | ||||
|  | ||||
|     def _get_n_results(self, query, n): | ||||
|         """Get a specified number of results for a query""" | ||||
|  | ||||
|         video_ids = [] | ||||
|         pagenum = 0 | ||||
|         limit = n | ||||
|  | ||||
|         while (50 * pagenum) < limit: | ||||
|             self.report_download_page(query, pagenum+1) | ||||
|             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1) | ||||
|             request = compat_urllib_request.Request(result_url) | ||||
|             try: | ||||
|                 data = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err)) | ||||
|             api_response = json.loads(data)['data'] | ||||
|  | ||||
|             if not 'items' in api_response: | ||||
|                 raise ExtractorError(u'[youtube] No video results') | ||||
|  | ||||
|             new_ids = list(video['id'] for video in api_response['items']) | ||||
|             video_ids += new_ids | ||||
|  | ||||
|             limit = min(n, api_response['totalItems']) | ||||
|             pagenum += 1 | ||||
|  | ||||
|         if len(video_ids) > n: | ||||
|             video_ids = video_ids[:n] | ||||
|         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] | ||||
|         return self.playlist_result(videos, query) | ||||
|  | ||||
|  | ||||
| class YoutubeShowIE(InfoExtractor): | ||||
|     IE_DESC = u'YouTube.com (multi-season) shows' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' | ||||
|     IE_NAME = u'youtube:show' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         show_name = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, show_name, u'Downloading show webpage') | ||||
|         # There's one playlist for each season of the show | ||||
|         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) | ||||
|         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) | ||||
|         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] | ||||
							
								
								
									
										65
									
								
								youtube_dl/extractor/zdf.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								youtube_dl/extractor/zdf.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
| class ZDFIE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?' | ||||
|     _TITLE = r'<h1(?: class="beitragHeadline")?>(?P<title>.*)</h1>' | ||||
|     _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>' | ||||
|     _MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"' | ||||
|     _RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         html = self._download_webpage(url, video_id) | ||||
|         streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)] | ||||
|         if streams is None: | ||||
|             raise ExtractorError(u'No media url found.') | ||||
|  | ||||
|         # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url | ||||
|         # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url | ||||
|         # choose first/default media type and highest quality for now | ||||
|         for s in streams:        #find 300 - dsl1000mbit | ||||
|             if s['quality'] == '300' and s['media_type'] == 'wstreaming': | ||||
|                 stream_=s | ||||
|                 break | ||||
|         for s in streams:        #find veryhigh - dsl2000mbit | ||||
|             if s['quality'] == 'veryhigh' and s['media_type'] == 'wstreaming': # 'hstreaming' - rtsp is not working | ||||
|                 stream_=s | ||||
|                 break | ||||
|         if stream_ is None: | ||||
|             raise ExtractorError(u'No stream found.') | ||||
|  | ||||
|         media_link = self._download_webpage(stream_['video_url'], video_id,'Get stream URL') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         mobj = re.search(self._TITLE, html) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Cannot extract title') | ||||
|         title = unescapeHTML(mobj.group('title')) | ||||
|  | ||||
|         mobj = re.search(self._MMS_STREAM, media_link) | ||||
|         if mobj is None: | ||||
|             mobj = re.search(self._RTSP_STREAM, media_link) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL') | ||||
|         mms_url = mobj.group('video_url') | ||||
|  | ||||
|         mobj = re.search('(.*)[.](?P<ext>[^.]+)', mms_url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Cannot extract extention') | ||||
|         ext = mobj.group('ext') | ||||
|  | ||||
|         return [{'id': video_id, | ||||
|                  'url': mms_url, | ||||
|                  'title': title, | ||||
|                  'ext': ext | ||||
|                  }] | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user