Compare commits
1000 Commits
2016.06.11
...
2016.09.24
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e6332059ac | ||
|
|
8eec691e8a | ||
|
|
24628cf7db | ||
|
|
71ad00c09f | ||
|
|
45cae3b021 | ||
|
|
4ddcb5999d | ||
|
|
628406db96 | ||
|
|
e3d6bdc8fc | ||
|
|
0a439c5c4c | ||
|
|
1978540a51 | ||
|
|
12f211d0cb | ||
|
|
3a5a18705f | ||
|
|
1ae0ae5db0 | ||
|
|
f62a77b99a | ||
|
|
4bfd294e2f | ||
|
|
e33a7253b2 | ||
|
|
c38f06818d | ||
|
|
cb57386873 | ||
|
|
59fd8f931d | ||
|
|
70b4cf9b1b | ||
|
|
cc764a6da8 | ||
|
|
d8dbf8707d | ||
|
|
a1da888d0c | ||
|
|
3acff9423d | ||
|
|
9ca93b99d1 | ||
|
|
14ae11efab | ||
|
|
190d2027d0 | ||
|
|
26394d021d | ||
|
|
30d0b549be | ||
|
|
86f4d14f81 | ||
|
|
21d21b0c72 | ||
|
|
b4c1d6e800 | ||
|
|
a0d5077c8d | ||
|
|
584d6f3457 | ||
|
|
e14c82bd6b | ||
|
|
c51a7f0b2f | ||
|
|
d05ef09d9d | ||
|
|
30d9e20938 | ||
|
|
fc86d4eed0 | ||
|
|
7d273a387a | ||
|
|
6ad0219556 | ||
|
|
98b7506e96 | ||
|
|
52dc8a9b3f | ||
|
|
9d8985a165 | ||
|
|
f5e008d134 | ||
|
|
e6bf3621e7 | ||
|
|
490b755769 | ||
|
|
1dec2c8a0e | ||
|
|
dcce092e0a | ||
|
|
32443dd346 | ||
|
|
2133565cec | ||
|
|
1da50aa34e | ||
|
|
d2522b86ac | ||
|
|
537f753399 | ||
|
|
c849836854 | ||
|
|
eb5b1fc021 | ||
|
|
95be29e1c6 | ||
|
|
c035dba19e | ||
|
|
87148bb711 | ||
|
|
797c636bcb | ||
|
|
0002962f3f | ||
|
|
3e4185c396 | ||
|
|
f6717dec8a | ||
|
|
a942d6cb48 | ||
|
|
961516bfd1 | ||
|
|
6db354a9f4 | ||
|
|
353f340e11 | ||
|
|
014b7e6b25 | ||
|
|
925194022c | ||
|
|
b690ea15eb | ||
|
|
5712c0f426 | ||
|
|
86d68f906e | ||
|
|
4875ff6847 | ||
|
|
1b6712ab23 | ||
|
|
8414c2da31 | ||
|
|
45396dd2ed | ||
|
|
7a7309219c | ||
|
|
fcba157e80 | ||
|
|
a6ccc3e518 | ||
|
|
1d16035bb4 | ||
|
|
e8bcd982cc | ||
|
|
a5ff05df1a | ||
|
|
d002e91986 | ||
|
|
546edb2efa | ||
|
|
be45730226 | ||
|
|
ee7e672eb0 | ||
|
|
0307d6fba6 | ||
|
|
fc150cba1d | ||
|
|
d667ab7fad | ||
|
|
eb87d4545a | ||
|
|
1c81476cbb | ||
|
|
bc9186c882 | ||
|
|
6599c72527 | ||
|
|
6bb05b32a9 | ||
|
|
fea74acad8 | ||
|
|
f01115c933 | ||
|
|
2cdbc06a1f | ||
|
|
2cb93afcd8 | ||
|
|
bfcda07a27 | ||
|
|
001a5fd3d7 | ||
|
|
1e35999c1e | ||
|
|
2512b17493 | ||
|
|
56c0ead4d3 | ||
|
|
7324243750 | ||
|
|
84a18e9b90 | ||
|
|
b29f842e0e | ||
|
|
f009fcac0d | ||
|
|
6c3affcb18 | ||
|
|
1e19ff2984 | ||
|
|
c6129feb7f | ||
|
|
bb5ebd4453 | ||
|
|
cb9cbd84ed | ||
|
|
4d5726b0d7 | ||
|
|
4614ad7b59 | ||
|
|
b717837190 | ||
|
|
2abad67e52 | ||
|
|
ad0e2b3359 | ||
|
|
37720844f6 | ||
|
|
6cfcb8ac36 | ||
|
|
7a979da8cb | ||
|
|
2fdc7b0e04 | ||
|
|
010d034fca | ||
|
|
02e552886f | ||
|
|
25042f7372 | ||
|
|
3f612f0767 | ||
|
|
17bf6e71cc | ||
|
|
881f35479d | ||
|
|
89f257d6e5 | ||
|
|
e78a5428b6 | ||
|
|
6656a82481 | ||
|
|
d7e794928d | ||
|
|
9c27188988 | ||
|
|
b84d311d53 | ||
|
|
f87feb4b68 | ||
|
|
2841bdcebb | ||
|
|
84b91dd4e3 | ||
|
|
92c9c2a88b | ||
|
|
9d54b02bae | ||
|
|
846d8b76a0 | ||
|
|
aa3f9fe695 | ||
|
|
8258f4457c | ||
|
|
948cd5b72d | ||
|
|
8d3737cda7 | ||
|
|
155bc674c4 | ||
|
|
c33c962adf | ||
|
|
bdcc046d12 | ||
|
|
a493f10208 | ||
|
|
f3eeaacb4e | ||
|
|
b4d6a85d60 | ||
|
|
0b36a96212 | ||
|
|
bc22a79694 | ||
|
|
340e31ca74 | ||
|
|
973dee491f | ||
|
|
1f85029d82 | ||
|
|
95be19d436 | ||
|
|
95843da529 | ||
|
|
abf2c79f95 | ||
|
|
b49ad71ce1 | ||
|
|
9127e1533d | ||
|
|
78e762d23c | ||
|
|
4809490108 | ||
|
|
8112bfeaba | ||
|
|
d9606d9b6c | ||
|
|
433af6ad30 | ||
|
|
feaa5ad787 | ||
|
|
100bd86a68 | ||
|
|
0def758782 | ||
|
|
919cf1a62f | ||
|
|
b29cd56591 | ||
|
|
622638512b | ||
|
|
37c7490ac6 | ||
|
|
091624f9da | ||
|
|
7e5dc339de | ||
|
|
4a69fa04e0 | ||
|
|
2e99cd30c3 | ||
|
|
25afc2a783 | ||
|
|
9603b66012 | ||
|
|
45aab4d30b | ||
|
|
ed2bfe93aa | ||
|
|
cdc783510b | ||
|
|
cf0efe9636 | ||
|
|
dedb177029 | ||
|
|
86c3bbbced | ||
|
|
4b3a607658 | ||
|
|
3a7d35b982 | ||
|
|
6496ccb413 | ||
|
|
3fcce30289 | ||
|
|
c2b2c7e138 | ||
|
|
dacb3a864a | ||
|
|
6066d03db0 | ||
|
|
6562d34a8c | ||
|
|
5e9e3d0f6b | ||
|
|
349fc5c705 | ||
|
|
2c3e0af93e | ||
|
|
6150502e47 | ||
|
|
b207d5ebd4 | ||
|
|
4191779dcd | ||
|
|
f97ec8bcb9 | ||
|
|
8276d3b87a | ||
|
|
af95ee94b4 | ||
|
|
8fb6af6bba | ||
|
|
f6af0f888b | ||
|
|
e816c9d158 | ||
|
|
9250181f37 | ||
|
|
f096ec2625 | ||
|
|
4c8ab6fd71 | ||
|
|
05d4612947 | ||
|
|
746a695b36 | ||
|
|
165c54e97d | ||
|
|
2896dd73bc | ||
|
|
f8fd510eb4 | ||
|
|
7a3e849f6e | ||
|
|
196c6ba067 | ||
|
|
165620e320 | ||
|
|
4fd350611c | ||
|
|
263fef43de | ||
|
|
a249ab83cb | ||
|
|
f7043ef39c | ||
|
|
64fc49aba0 | ||
|
|
245023a861 | ||
|
|
3c77a54d5d | ||
|
|
da30a20a4d | ||
|
|
1fe48afea5 | ||
|
|
42e05be867 | ||
|
|
fe45b0e060 | ||
|
|
a06e1498aa | ||
|
|
5a80e7b43a | ||
|
|
3fb2a23029 | ||
|
|
7be15d4097 | ||
|
|
cd10b3ea63 | ||
|
|
547993dcd0 | ||
|
|
6c9b71bc08 | ||
|
|
93b8404599 | ||
|
|
9ba1e1dcc0 | ||
|
|
b8079a40bc | ||
|
|
5bc8a73af6 | ||
|
|
b3eaeded12 | ||
|
|
ec65b391cb | ||
|
|
2982514072 | ||
|
|
98908bcf7c | ||
|
|
04b32c8f96 | ||
|
|
40eec6b15c | ||
|
|
39efc6e3e0 | ||
|
|
1198fe14a1 | ||
|
|
71e90766b5 | ||
|
|
d7aae610f6 | ||
|
|
92c27a0dbf | ||
|
|
d181cff685 | ||
|
|
3b4b82d4ce | ||
|
|
545ef4f531 | ||
|
|
906b87cf5f | ||
|
|
b281aad2dc | ||
|
|
6b18a24e6e | ||
|
|
c9de980106 | ||
|
|
f9b373afda | ||
|
|
298a120ab7 | ||
|
|
e3faecde30 | ||
|
|
a0f071a50d | ||
|
|
20bad91d76 | ||
|
|
b54a2da433 | ||
|
|
dc2c37f316 | ||
|
|
c1f62dd338 | ||
|
|
5a3efcd27c | ||
|
|
4c8f9c2577 | ||
|
|
f26a298247 | ||
|
|
ea01cdbf61 | ||
|
|
6a76b53355 | ||
|
|
d37708fc86 | ||
|
|
5c13c28566 | ||
|
|
f70e9229e6 | ||
|
|
30afe4aeb2 | ||
|
|
75fa990dc6 | ||
|
|
f39ffc5877 | ||
|
|
07ea9c9b05 | ||
|
|
073ac1225f | ||
|
|
0c6422cdd6 | ||
|
|
08773689f3 | ||
|
|
0c75abbb7b | ||
|
|
97653f81b2 | ||
|
|
d38b27dd9b | ||
|
|
6d94cbd2f4 | ||
|
|
30317f4887 | ||
|
|
8c3e35dd44 | ||
|
|
c86f51ee38 | ||
|
|
6e52bbb413 | ||
|
|
05bddcc512 | ||
|
|
1212e9972f | ||
|
|
ccb6570e9e | ||
|
|
18b6216150 | ||
|
|
fb009b7f53 | ||
|
|
3083e4dc07 | ||
|
|
7367bdef23 | ||
|
|
ad31642584 | ||
|
|
c7c43a93ba | ||
|
|
96229e5f95 | ||
|
|
55d119e2a1 | ||
|
|
6d2679ee26 | ||
|
|
afbab5688e | ||
|
|
3d897cc791 | ||
|
|
cf143c4d97 | ||
|
|
ad120ae1c5 | ||
|
|
d0fa172e5f | ||
|
|
f97f9f71e5 | ||
|
|
526656726b | ||
|
|
9b8c554ea7 | ||
|
|
d13bfc07b7 | ||
|
|
efe470e261 | ||
|
|
e3f6b56909 | ||
|
|
b1e676fde8 | ||
|
|
92d4cfa358 | ||
|
|
3d47ee0a9e | ||
|
|
d164a0d41b | ||
|
|
db29af6d36 | ||
|
|
2c6acdfd2d | ||
|
|
fddaa76a59 | ||
|
|
a809446750 | ||
|
|
d8f30a7e66 | ||
|
|
5b1d85754e | ||
|
|
e25586e471 | ||
|
|
292a2301bf | ||
|
|
dabe15701b | ||
|
|
4245f55880 | ||
|
|
5b9d187cc6 | ||
|
|
39e1c4f08c | ||
|
|
19f35402c5 | ||
|
|
70852b47ca | ||
|
|
a9a3b4a081 | ||
|
|
ecc90093f9 | ||
|
|
520251c093 | ||
|
|
55af45fcab | ||
|
|
b82232036a | ||
|
|
e4659b4547 | ||
|
|
9e5751b9fe | ||
|
|
bd1bcd3ea0 | ||
|
|
93a63b36f1 | ||
|
|
8b2dc4c328 | ||
|
|
850837b67a | ||
|
|
13585d7682 | ||
|
|
fd3ec986a4 | ||
|
|
b0d578ff7b | ||
|
|
b0c8f2e9c8 | ||
|
|
51815886a9 | ||
|
|
08a42f9c74 | ||
|
|
e15ad9ef09 | ||
|
|
4e9fee1015 | ||
|
|
7273e5849b | ||
|
|
b505e98784 | ||
|
|
92cd9fd565 | ||
|
|
b3d7dce429 | ||
|
|
a44694ab4e | ||
|
|
ab19b46b88 | ||
|
|
8804f10e6b | ||
|
|
6be17c0870 | ||
|
|
8652770bd2 | ||
|
|
2a1321a272 | ||
|
|
9c0fa60bf3 | ||
|
|
502d87c546 | ||
|
|
b35b0d73d8 | ||
|
|
6e7e4a6edf | ||
|
|
53fef319f1 | ||
|
|
2cabee2a7d | ||
|
|
11f502fac1 | ||
|
|
98affc1a48 | ||
|
|
70a2829fee | ||
|
|
837e56c8ee | ||
|
|
b5ddee8c77 | ||
|
|
fb64adcbd3 | ||
|
|
4f640f2890 | ||
|
|
254e64a20a | ||
|
|
818ac213eb | ||
|
|
cbef4d5c9f | ||
|
|
bf90c46790 | ||
|
|
69eb4d699f | ||
|
|
6d8ec8c3b7 | ||
|
|
760845ce99 | ||
|
|
5c2d087221 | ||
|
|
b6c4e36728 | ||
|
|
1a57b8c18c | ||
|
|
24eb13b1c6 | ||
|
|
525e0316c0 | ||
|
|
7e60ce9cf7 | ||
|
|
e811bcf8f8 | ||
|
|
6103f59095 | ||
|
|
9fa5789279 | ||
|
|
d2ac04674d | ||
|
|
1fd6e30988 | ||
|
|
884cdb6cd9 | ||
|
|
9771b1f901 | ||
|
|
2118fdd1a9 | ||
|
|
320d597c21 | ||
|
|
aaf44a2f47 | ||
|
|
fafabc0712 | ||
|
|
409760a932 | ||
|
|
097eba019d | ||
|
|
73a85620ee | ||
|
|
a560f28c98 | ||
|
|
5ec5461e1a | ||
|
|
542130a5d9 | ||
|
|
82997dad57 | ||
|
|
647a7bf5e8 | ||
|
|
77afa008dd | ||
|
|
db535435b3 | ||
|
|
c2a453b461 | ||
|
|
cd29eaab95 | ||
|
|
52aa7e7476 | ||
|
|
e97c55ee6a | ||
|
|
acfccacad5 | ||
|
|
5f2c2b7936 | ||
|
|
cb55908e51 | ||
|
|
e581224843 | ||
|
|
f50365e91c | ||
|
|
c366f8d30a | ||
|
|
6a26c5f9d5 | ||
|
|
bd6fb007de | ||
|
|
b69b2ff736 | ||
|
|
794e5dcd7e | ||
|
|
f0d3669437 | ||
|
|
98e698f1ff | ||
|
|
3cddb8d6a7 | ||
|
|
990d533ee4 | ||
|
|
b0081562d2 | ||
|
|
fff37cfd4f | ||
|
|
a3be69b7f0 | ||
|
|
0fd1b1624c | ||
|
|
367976d49f | ||
|
|
0aef0771f8 | ||
|
|
0c070681c5 | ||
|
|
30b25d382d | ||
|
|
e5f878c205 | ||
|
|
e2e84aed7e | ||
|
|
b1927f4e8a | ||
|
|
3b9323d96e | ||
|
|
7f832413d6 | ||
|
|
7f2ed47595 | ||
|
|
c3fa77bdef | ||
|
|
57ce8a6d08 | ||
|
|
69d8eeeec5 | ||
|
|
81c13222c6 | ||
|
|
b1ce2ba197 | ||
|
|
5c8411e968 | ||
|
|
cc9c8ce5df | ||
|
|
20ef4123b9 | ||
|
|
4e62d26aa2 | ||
|
|
b657816684 | ||
|
|
9778b3e7ee | ||
|
|
25dd58ca6a | ||
|
|
5e42f8a0ad | ||
|
|
1ad6b891b2 | ||
|
|
7aa589a5e1 | ||
|
|
065bc35489 | ||
|
|
3a380766d1 | ||
|
|
affaea0688 | ||
|
|
77426a087b | ||
|
|
8991844ea2 | ||
|
|
082395d0a0 | ||
|
|
e8ed7354e6 | ||
|
|
1e7f602e2a | ||
|
|
522f6c066d | ||
|
|
321b5e082a | ||
|
|
3711fa1eb2 | ||
|
|
395c74615c | ||
|
|
3dc240e8c6 | ||
|
|
a41a6c5094 | ||
|
|
d71207121d | ||
|
|
b1c6f21c74 | ||
|
|
412abb8760 | ||
|
|
f17d5f6d14 | ||
|
|
6bb801cfaf | ||
|
|
de02d1f4e9 | ||
|
|
e1f93a0a76 | ||
|
|
d21a661bb4 | ||
|
|
b2bd968f4b | ||
|
|
4a01befb34 | ||
|
|
845dfcdc40 | ||
|
|
d92cb46305 | ||
|
|
a8795327ca | ||
|
|
d34995a9e3 | ||
|
|
958849275f | ||
|
|
998f094452 | ||
|
|
aaa42cf0cf | ||
|
|
9fb64c04cd | ||
|
|
f9622868e7 | ||
|
|
37768f9242 | ||
|
|
a1aadd09a4 | ||
|
|
b47a75017b | ||
|
|
e37b54b140 | ||
|
|
c1decda58c | ||
|
|
d3f8e038fe | ||
|
|
ad152e2d95 | ||
|
|
b0af12154e | ||
|
|
d16b3c6677 | ||
|
|
c57244cdb1 | ||
|
|
a7e5f27412 | ||
|
|
089a40955c | ||
|
|
d73ebac100 | ||
|
|
e563c0d73b | ||
|
|
491c42e690 | ||
|
|
7f2339c617 | ||
|
|
8122e79fef | ||
|
|
fe3ad1d456 | ||
|
|
038a5e1a65 | ||
|
|
84bc23b41b | ||
|
|
46933a15d6 | ||
|
|
3859ebeee6 | ||
|
|
d50aca41f8 | ||
|
|
0ca057b965 | ||
|
|
5ca968d0a6 | ||
|
|
f0d31c624e | ||
|
|
08c655906c | ||
|
|
5a993e1692 | ||
|
|
a7d2953073 | ||
|
|
fdd0b8f8e0 | ||
|
|
f65dc41b72 | ||
|
|
962250f7ea | ||
|
|
7dc2a74e0a | ||
|
|
b02b960c6b | ||
|
|
4f427c4be8 | ||
|
|
8a00ea567b | ||
|
|
8895be01fc | ||
|
|
52e7fcfeb7 | ||
|
|
2396062c74 | ||
|
|
14704aeff6 | ||
|
|
3c2c3af059 | ||
|
|
1891ea2d76 | ||
|
|
1094074c04 | ||
|
|
217d5ae013 | ||
|
|
8b40854529 | ||
|
|
6bb0fbf9fb | ||
|
|
8d3b226b83 | ||
|
|
42b7a5afe0 | ||
|
|
899d2bea63 | ||
|
|
9cb0e65d7e | ||
|
|
b070564efb | ||
|
|
ce28252c48 | ||
|
|
3aa9a73554 | ||
|
|
6a9b3b61ea | ||
|
|
45408eb075 | ||
|
|
eafc66855d | ||
|
|
e03d3e6453 | ||
|
|
a70e45f80a | ||
|
|
697655a7c0 | ||
|
|
e382b953f0 | ||
|
|
116e7e0d04 | ||
|
|
cf03e34ad3 | ||
|
|
2903137292 | ||
|
|
9361f2169c | ||
|
|
35aa6c538f | ||
|
|
fa9f1d16b8 | ||
|
|
485fedf6fd | ||
|
|
da0baba5c8 | ||
|
|
bb9f3bfedf | ||
|
|
dbc0b39b91 | ||
|
|
481c5c5137 | ||
|
|
0cacae2807 | ||
|
|
d9d56deadf | ||
|
|
74ba450a81 | ||
|
|
db19df6ca0 | ||
|
|
fbdf8d15d1 | ||
|
|
94aae01548 | ||
|
|
39eef54cf0 | ||
|
|
05c8268c81 | ||
|
|
289a16b4f3 | ||
|
|
7935926baa | ||
|
|
dcbb07c35a | ||
|
|
40090e8d51 | ||
|
|
3e050d51d4 | ||
|
|
ced70c8640 | ||
|
|
9a700deea4 | ||
|
|
dc35ba0eba | ||
|
|
88bd486b9a | ||
|
|
7f8b92e3cf | ||
|
|
35f6e0ff36 | ||
|
|
326fa4e6e5 | ||
|
|
c74299a72c | ||
|
|
10a1bb3a78 | ||
|
|
4d3e543c73 | ||
|
|
05d1e7aaa9 | ||
|
|
a3aa814b77 | ||
|
|
5c32a77cad | ||
|
|
14a28e705b | ||
|
|
cc99d4f826 | ||
|
|
712c7530ff | ||
|
|
0a147785e8 | ||
|
|
59eaf69e33 | ||
|
|
e8be2943a7 | ||
|
|
8fdc538b46 | ||
|
|
9513c1eb17 | ||
|
|
ae6fff4e64 | ||
|
|
5a65668e25 | ||
|
|
f75e6890db | ||
|
|
d9cb92c840 | ||
|
|
94c04a3c79 | ||
|
|
f094834857 | ||
|
|
111de00289 | ||
|
|
b4a131e1a5 | ||
|
|
f1991ce928 | ||
|
|
6548030a17 | ||
|
|
3a8947650b | ||
|
|
1979969f91 | ||
|
|
0673741af3 | ||
|
|
c8e170b209 | ||
|
|
bbe1f3634a | ||
|
|
4671dd41b2 | ||
|
|
f164b97123 | ||
|
|
5275efe30d | ||
|
|
b13647cf3c | ||
|
|
add7d2a0e2 | ||
|
|
e298d3a08c | ||
|
|
fd8c8c7dcd | ||
|
|
9158af16cc | ||
|
|
c6668e4ad1 | ||
|
|
84e8cca48b | ||
|
|
790b06b7d4 | ||
|
|
740d7c49c2 | ||
|
|
4e51ec5f57 | ||
|
|
05087d1b4c | ||
|
|
a66a73ee90 | ||
|
|
8188b923db | ||
|
|
d993a1354d | ||
|
|
e8882e7043 | ||
|
|
1056821799 | ||
|
|
890e6d3309 | ||
|
|
246080d378 | ||
|
|
b1ea680270 | ||
|
|
45550d1039 | ||
|
|
7cdfc4c90f | ||
|
|
af21f56f98 | ||
|
|
1a8f0773b6 | ||
|
|
59cc5bd8bf | ||
|
|
49bc16b95e | ||
|
|
a2f9ca1e67 | ||
|
|
371ddb14fe | ||
|
|
998895dffa | ||
|
|
aadd3ce21f | ||
|
|
ae7b846203 | ||
|
|
21ba7d0981 | ||
|
|
691fbe7f98 | ||
|
|
2e221ca3a8 | ||
|
|
317f7ab634 | ||
|
|
23495d6a39 | ||
|
|
224db034ab | ||
|
|
ad27649be3 | ||
|
|
84571be645 | ||
|
|
7b0d333a7e | ||
|
|
342f0c3682 | ||
|
|
38e0f16a94 | ||
|
|
e910fe2fe4 | ||
|
|
233b58dec7 | ||
|
|
c39b2ed990 | ||
|
|
35ec86689c | ||
|
|
c485959034 | ||
|
|
a0560d8ab8 | ||
|
|
0385aa6199 | ||
|
|
00f4764cb7 | ||
|
|
51c2cd0b83 | ||
|
|
5f5a9d6158 | ||
|
|
2d19fb5072 | ||
|
|
9d865a1af6 | ||
|
|
41aa44259d | ||
|
|
381ff44756 | ||
|
|
7f29cf545a | ||
|
|
7d1219f3e0 | ||
|
|
f1b4af7d79 | ||
|
|
8a8590a617 | ||
|
|
4a7a5e41f7 | ||
|
|
2a49d01600 | ||
|
|
b99af8a51c | ||
|
|
8e7020daef | ||
|
|
a26bcc61c1 | ||
|
|
5c4dcf8172 | ||
|
|
e9fb6a4bbe | ||
|
|
e2dbcaa1bf | ||
|
|
ae01850165 | ||
|
|
c3baaedfc8 | ||
|
|
0b68de3cc1 | ||
|
|
39e9d524e5 | ||
|
|
865b087224 | ||
|
|
3121b25639 | ||
|
|
0286b85c79 | ||
|
|
ab52bb5137 | ||
|
|
61a98b8623 | ||
|
|
6daf34a045 | ||
|
|
c03adf90bd | ||
|
|
0ece114b7b | ||
|
|
5b6a74856b | ||
|
|
ce43100a01 | ||
|
|
8cc9b4016d | ||
|
|
31eeab9f41 | ||
|
|
9558dcec9c | ||
|
|
6e6b70d65f | ||
|
|
d417fd88d0 | ||
|
|
9e4f5dc1e9 | ||
|
|
1251565ee0 | ||
|
|
1f7258a367 | ||
|
|
0af985069b | ||
|
|
0de168f7ed | ||
|
|
95b31e266b | ||
|
|
6b3a3098b5 | ||
|
|
2de624fdd5 | ||
|
|
3fee7f636c | ||
|
|
89e2fff2b7 | ||
|
|
cedc70b292 | ||
|
|
07d7689f2e | ||
|
|
ae8cb5328d | ||
|
|
2e32ac0b9a | ||
|
|
672f01c370 | ||
|
|
e2d616dd30 | ||
|
|
0ab7f4fe2b | ||
|
|
29c4a07776 | ||
|
|
826e911e41 | ||
|
|
30d22dae8e | ||
|
|
ec3518725b | ||
|
|
5f87d845eb | ||
|
|
571808a7aa | ||
|
|
dfe5fa49ae | ||
|
|
01a0c511eb | ||
|
|
b3d30315ce | ||
|
|
882af14d7d | ||
|
|
47335a0efa | ||
|
|
34bc2d9dfd | ||
|
|
08c7af4afa | ||
|
|
f7291a0b7c | ||
|
|
c65aa4e9e1 | ||
|
|
ad213a1d74 | ||
|
|
43f1e4e41e | ||
|
|
54b0e909d5 | ||
|
|
f8752b86ac | ||
|
|
84c237fb8a | ||
|
|
ab49d7a9fa | ||
|
|
b4173f1551 | ||
|
|
2817b99cf2 | ||
|
|
001fffd004 | ||
|
|
0e94b4713d | ||
|
|
a6d3b89feb | ||
|
|
6c26815d63 | ||
|
|
73c4ac2c95 | ||
|
|
84f214d840 | ||
|
|
e3f88be7a9 | ||
|
|
31af3e35e0 | ||
|
|
94a5cff91d | ||
|
|
77082c7b9e | ||
|
|
252a1f75d2 | ||
|
|
5abf513cf8 | ||
|
|
c6054e3201 | ||
|
|
4080530624 | ||
|
|
c25f1a9b63 | ||
|
|
dfaa86b75e | ||
|
|
d9163ae3b6 | ||
|
|
dafafe7cf1 | ||
|
|
81953d1ae5 | ||
|
|
3a212ed62e | ||
|
|
195f084542 | ||
|
|
aa7a455b2e | ||
|
|
6a4e659c93 | ||
|
|
40f3666f6b | ||
|
|
dd801bbe18 | ||
|
|
38cce791c7 | ||
|
|
bf3ae6a543 | ||
|
|
bff98341d5 | ||
|
|
2644e911be | ||
|
|
a5f67895d3 | ||
|
|
15e4b6b758 | ||
|
|
2b28b892d8 | ||
|
|
7507fc98cb | ||
|
|
477b7a8474 | ||
|
|
034a884957 | ||
|
|
64436cb1a4 | ||
|
|
f138873900 | ||
|
|
e793338c88 | ||
|
|
369bb06206 | ||
|
|
2cb31d288e | ||
|
|
c723d1cd8d | ||
|
|
1f55234057 | ||
|
|
04006fae8d | ||
|
|
4cb13d0d6a | ||
|
|
a1f6f5c768 | ||
|
|
05c7feec77 | ||
|
|
bf83024826 | ||
|
|
a0cfd82dda | ||
|
|
1b734adb2d | ||
|
|
9b724d7277 | ||
|
|
c3a5dd3b5d | ||
|
|
e3755a624b | ||
|
|
95cf60e826 | ||
|
|
6b03e1e25d | ||
|
|
712b0b5b70 | ||
|
|
6a424391d9 | ||
|
|
dbf0157a26 | ||
|
|
7deef1ba67 | ||
|
|
fd6ca38262 | ||
|
|
bdafd88da0 | ||
|
|
7a1e71575e | ||
|
|
ac2d8f54d1 | ||
|
|
14ff6baa0e | ||
|
|
bb08101ec4 | ||
|
|
bc4b2d75ba | ||
|
|
35fc3021ba | ||
|
|
347227237b | ||
|
|
564dc3c6e8 | ||
|
|
9f4576a7eb | ||
|
|
f11315e8d4 | ||
|
|
0c2ac64bb8 | ||
|
|
a9eede3913 | ||
|
|
9e29ef13a3 | ||
|
|
eaaaaec042 | ||
|
|
3cb3b60064 | ||
|
|
044e3d91b5 | ||
|
|
c9e538a3b1 | ||
|
|
76dad392f5 | ||
|
|
9617b557aa | ||
|
|
bf4fa24414 | ||
|
|
20361b4f25 | ||
|
|
05a0068a76 | ||
|
|
66a42309fa | ||
|
|
fd94e2671a | ||
|
|
8ff6697861 | ||
|
|
eafa643715 | ||
|
|
049da7cb6c | ||
|
|
7dbeee7e22 | ||
|
|
93ad6c6bfa | ||
|
|
329179073b | ||
|
|
4d86d2008e | ||
|
|
ab47b6e881 | ||
|
|
df43389ade | ||
|
|
397b305cfe | ||
|
|
e496fa50cd | ||
|
|
06a96da15b | ||
|
|
70157c2c43 | ||
|
|
c58ed8563d | ||
|
|
4c7821227c | ||
|
|
42362fdb5e | ||
|
|
97124e572d | ||
|
|
32616c14cc | ||
|
|
8174d0fe95 | ||
|
|
8704778d95 | ||
|
|
c287f2bc60 | ||
|
|
9ea5c04c0d | ||
|
|
fd7a7498a4 | ||
|
|
e3a6747d8f | ||
|
|
f41ffc00d1 | ||
|
|
81fda15369 | ||
|
|
427cd050a3 | ||
|
|
b0c200f1ec | ||
|
|
92747e664a | ||
|
|
f1f336322d | ||
|
|
bf8dd79045 | ||
|
|
c6781156aa | ||
|
|
59bbe4911a | ||
|
|
4f3c5e0627 | ||
|
|
f484c5fa25 | ||
|
|
88d9f6c0c4 | ||
|
|
3c9c088f9c | ||
|
|
fc3996bfe1 | ||
|
|
5b6ad8630c | ||
|
|
30105f4ac0 | ||
|
|
1143535d76 | ||
|
|
7d52c052ef | ||
|
|
a2406fce3c | ||
|
|
3b34ab538c | ||
|
|
ac782306f1 | ||
|
|
0c00e889f3 | ||
|
|
ce96ed05f4 | ||
|
|
0463b77a1f | ||
|
|
2d185706ea | ||
|
|
b72b44318c | ||
|
|
46f59e89ea | ||
|
|
b4241e308e | ||
|
|
3d4b08dfc7 | ||
|
|
be49068d65 | ||
|
|
525cedb971 | ||
|
|
de3c7fe0d4 | ||
|
|
896cc72750 | ||
|
|
c1ff6e1ad0 | ||
|
|
fee70322d7 | ||
|
|
8065d6c55f | ||
|
|
494172d2e5 | ||
|
|
6e3c2047f8 | ||
|
|
011bd3221b | ||
|
|
b46eabecd3 | ||
|
|
0437307a41 | ||
|
|
22b7ac13ef | ||
|
|
96f88e91b7 | ||
|
|
3331a4644d | ||
|
|
adf1921dc1 | ||
|
|
97674f0419 | ||
|
|
73843ae8ac | ||
|
|
f2bb8c036a | ||
|
|
75ca6bcee2 | ||
|
|
089657ed1f | ||
|
|
b5eab86c24 | ||
|
|
c8e3e0974b | ||
|
|
dfc8f46e1c | ||
|
|
c143ddce5d | ||
|
|
169d836feb | ||
|
|
6ae938b295 | ||
|
|
cf40fdf5c1 | ||
|
|
23bdae0955 | ||
|
|
ca74c90bf5 | ||
|
|
7cfc1e2a10 | ||
|
|
1ac5705f62 | ||
|
|
e4f90ea0a7 | ||
|
|
cdfc187cd5 | ||
|
|
feef925f49 | ||
|
|
19e2d1cdea | ||
|
|
8369a4fe76 | ||
|
|
1f749b6658 | ||
|
|
819707920a | ||
|
|
43518503a6 | ||
|
|
5839d556e4 | ||
|
|
6c83e583b3 | ||
|
|
6aeb64b673 | ||
|
|
6cd64b6806 | ||
|
|
e154c65128 | ||
|
|
a50fd6e026 | ||
|
|
6a55bb66ee | ||
|
|
7c05097633 | ||
|
|
589568789f | ||
|
|
7577d849a6 | ||
|
|
cb23192bc4 | ||
|
|
41c1023300 | ||
|
|
90b6288cce | ||
|
|
c1823c8ad9 | ||
|
|
d7c6c656c5 | ||
|
|
b0b128049a | ||
|
|
e8f13f2637 | ||
|
|
b5aad37f6b | ||
|
|
6d0d4fc26d | ||
|
|
0278aa443f | ||
|
|
1f35745758 | ||
|
|
573c35272f | ||
|
|
09e3f91e40 | ||
|
|
1b6cf16be7 | ||
|
|
26264cb056 | ||
|
|
a72df5f36f | ||
|
|
c878e635de | ||
|
|
0f47cc2e92 | ||
|
|
5fc2757682 | ||
|
|
e3944c2621 | ||
|
|
667d96480b | ||
|
|
e6fe993c31 | ||
|
|
d0d93f76ea | ||
|
|
20a6a154fe | ||
|
|
f011876076 | ||
|
|
6929569403 | ||
|
|
eb451890da | ||
|
|
ded7511a70 | ||
|
|
d2161cade5 | ||
|
|
27e5fa8198 | ||
|
|
efbd1eb51a | ||
|
|
369ff75081 | ||
|
|
47212f7bcb | ||
|
|
4c93ee8d14 | ||
|
|
8bc4dbb1af | ||
|
|
6c3760292c | ||
|
|
4cef70db6c | ||
|
|
ff4af6ec59 | ||
|
|
d01fb21d4c | ||
|
|
a4ea28eee6 | ||
|
|
bc2a871f3e | ||
|
|
1759672eed | ||
|
|
fea55ef4a9 | ||
|
|
16b6bd01d2 | ||
|
|
14d0f4e0f3 | ||
|
|
778f969447 | ||
|
|
79cd8b3d8a | ||
|
|
b4663f12b1 | ||
|
|
b50e02c1e4 | ||
|
|
33b72ce64e | ||
|
|
cf2bf840ba | ||
|
|
bccdac6874 | ||
|
|
e69f9f5d68 | ||
|
|
77a9a9c295 | ||
|
|
84dcd1c4e4 | ||
|
|
971e3b7520 | ||
|
|
4e79011729 | ||
|
|
a936ac321c | ||
|
|
98960c911c | ||
|
|
329ca3bef6 | ||
|
|
2c3322e36e | ||
|
|
80ae228b34 | ||
|
|
6d28c408cf | ||
|
|
c83b35d4aa | ||
|
|
94e5d6aedb | ||
|
|
531a74968c | ||
|
|
c5edd147d1 | ||
|
|
856150d056 | ||
|
|
03ebea89b0 | ||
|
|
15d106787e | ||
|
|
7aab3696dd | ||
|
|
47787efa2b | ||
|
|
4a420119a6 | ||
|
|
33751818d3 | ||
|
|
698f127c1a | ||
|
|
fe458b6596 | ||
|
|
21ac1a8ac3 | ||
|
|
79027c0ea0 | ||
|
|
4cad2929cd | ||
|
|
62666af99f | ||
|
|
9ddc289f88 | ||
|
|
ccff2c404d |
8
.github/ISSUE_TEMPLATE.md
vendored
8
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.1**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.24**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2016.06.11.1
|
[debug] youtube-dl version 2016.09.24
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
@@ -55,4 +55,4 @@ $ youtube-dl -v <your command line>
|
|||||||
### Description of your *issue*, suggested solution and other information
|
### Description of your *issue*, suggested solution and other information
|
||||||
|
|
||||||
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
|
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
|
||||||
If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
|
If work on your *issue* requires account credentials please provide them or explain how one can obtain them.
|
||||||
|
|||||||
2
.github/ISSUE_TEMPLATE_tmpl.md
vendored
2
.github/ISSUE_TEMPLATE_tmpl.md
vendored
@@ -55,4 +55,4 @@ $ youtube-dl -v <your command line>
|
|||||||
### Description of your *issue*, suggested solution and other information
|
### Description of your *issue*, suggested solution and other information
|
||||||
|
|
||||||
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
|
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
|
||||||
If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
|
If work on your *issue* requires account credentials please provide them or explain how one can obtain them.
|
||||||
|
|||||||
22
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
22
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
## Please follow the guide below
|
||||||
|
|
||||||
|
- You will be asked some questions, please read them **carefully** and answer honestly
|
||||||
|
- Put an `x` into all the boxes [ ] relevant to your *pull request* (like that [x])
|
||||||
|
- Use *Preview* tab to see how your *pull request* will actually look like
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Before submitting a *pull request* make sure you have:
|
||||||
|
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
|
||||||
|
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
|
||||||
|
|
||||||
|
### What is the purpose of your *pull request*?
|
||||||
|
- [ ] Bug fix
|
||||||
|
- [ ] New extractor
|
||||||
|
- [ ] New feature
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Description of your *pull request* and other information
|
||||||
|
|
||||||
|
Explanation of your *pull request* in arbitrary form goes here. Please make sure the description explains the purpose and effect of your *pull request* and is worded well enough to be understood. Provide as much context and examples as possible.
|
||||||
@@ -7,9 +7,6 @@ python:
|
|||||||
- "3.4"
|
- "3.4"
|
||||||
- "3.5"
|
- "3.5"
|
||||||
sudo: false
|
sudo: false
|
||||||
install:
|
|
||||||
- bash ./devscripts/install_srelay.sh
|
|
||||||
- export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6
|
|
||||||
script: nosetests test --verbose
|
script: nosetests test --verbose
|
||||||
notifications:
|
notifications:
|
||||||
email:
|
email:
|
||||||
|
|||||||
12
AUTHORS
12
AUTHORS
@@ -173,3 +173,15 @@ Kevin Deldycke
|
|||||||
inondle
|
inondle
|
||||||
Tomáš Čech
|
Tomáš Čech
|
||||||
Déstin Reed
|
Déstin Reed
|
||||||
|
Roman Tsiupa
|
||||||
|
Artur Krysiak
|
||||||
|
Jakub Adam Wieczorek
|
||||||
|
Aleksandar Topuzović
|
||||||
|
Nehal Patel
|
||||||
|
Rob van Bekkum
|
||||||
|
Petr Zvoníček
|
||||||
|
Pratyush Singh
|
||||||
|
Aleksander Nitecki
|
||||||
|
Sebastian Blunt
|
||||||
|
Matěj Cepl
|
||||||
|
Xie Yanbo
|
||||||
|
|||||||
156
CONTRIBUTING.md
156
CONTRIBUTING.md
@@ -46,7 +46,7 @@ Make sure that someone has not already opened the issue you're trying to open. S
|
|||||||
|
|
||||||
### Why are existing options not enough?
|
### Why are existing options not enough?
|
||||||
|
|
||||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||||
|
|
||||||
### Is there enough context in your bug report?
|
### Is there enough context in your bug report?
|
||||||
|
|
||||||
@@ -97,9 +97,17 @@ If you want to add support for a new site, first of all **make sure** this site
|
|||||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||||
|
|
||||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
2. Check out the source code with:
|
||||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
|
||||||
|
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
|
||||||
|
|
||||||
|
3. Start a new git branch with
|
||||||
|
|
||||||
|
cd youtube-dl
|
||||||
|
git checkout -b yourextractor
|
||||||
|
|
||||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
@@ -142,17 +150,149 @@ After you have ensured this site is distributing it's content legally, you can f
|
|||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/extractors.py
|
$ git add youtube_dl/extractor/extractors.py
|
||||||
$ git add youtube_dl/extractor/yourextractor.py
|
$ git add youtube_dl/extractor/yourextractor.py
|
||||||
$ git commit -m '[yourextractor] Add new extractor'
|
$ git commit -m '[yourextractor] Add new extractor'
|
||||||
$ git push origin yourextractor
|
$ git push origin yourextractor
|
||||||
|
|
||||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||||
|
|
||||||
In any case, thank you very much for your contributions!
|
In any case, thank you very much for your contributions!
|
||||||
|
|
||||||
|
## youtube-dl coding conventions
|
||||||
|
|
||||||
|
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
||||||
|
|
||||||
|
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
|
||||||
|
|
||||||
|
### Mandatory and optional metafields
|
||||||
|
|
||||||
|
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
|
||||||
|
|
||||||
|
- `id` (media identifier)
|
||||||
|
- `title` (media title)
|
||||||
|
- `url` (media download URL) or `formats`
|
||||||
|
|
||||||
|
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
|
||||||
|
|
||||||
|
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
meta = self._download_json(url, video_id)
|
||||||
|
```
|
||||||
|
|
||||||
|
Assume at this point `meta`'s layout is:
|
||||||
|
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"summary": "some fancy summary text",
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = meta.get('summary') # correct
|
||||||
|
```
|
||||||
|
|
||||||
|
and not like:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = meta['summary'] # incorrect
|
||||||
|
```
|
||||||
|
|
||||||
|
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
|
||||||
|
|
||||||
|
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = self._search_regex(
|
||||||
|
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
```
|
||||||
|
|
||||||
|
With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
|
||||||
|
|
||||||
|
You can also pass `default=<some fallback value>`, for example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = self._search_regex(
|
||||||
|
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||||
|
webpage, 'description', default=None)
|
||||||
|
```
|
||||||
|
|
||||||
|
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
|
||||||
|
|
||||||
|
### Provide fallbacks
|
||||||
|
|
||||||
|
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = meta['title']
|
||||||
|
```
|
||||||
|
|
||||||
|
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||||
|
|
||||||
|
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = meta.get('title') or self._og_search_title(webpage)
|
||||||
|
```
|
||||||
|
|
||||||
|
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||||
|
|
||||||
|
### Make regular expressions flexible
|
||||||
|
|
||||||
|
When using regular expressions try to write them fuzzy and flexible.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Say you need to extract `title` from the following HTML code:
|
||||||
|
|
||||||
|
```html
|
||||||
|
<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
|
||||||
|
```
|
||||||
|
|
||||||
|
The code for that task should look similar to:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
|
||||||
|
```
|
||||||
|
|
||||||
|
Or even better:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
|
||||||
|
webpage, 'title', group='title')
|
||||||
|
```
|
||||||
|
|
||||||
|
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||||
|
|
||||||
|
The code definitely should not look like:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
|
||||||
|
webpage, 'title', group='title')
|
||||||
|
```
|
||||||
|
|
||||||
|
### Use safe conversion functions
|
||||||
|
|
||||||
|
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||||
|
|
||||||
|
|||||||
710
ChangeLog
Normal file
710
ChangeLog
Normal file
@@ -0,0 +1,710 @@
|
|||||||
|
version 2016.09.24
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ Add support for watchTVeverywhere.com authentication provider based MSOs for
|
||||||
|
Adobe Pass authentication (#10709)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [soundcloud:playlist] Provide video id for early playlist entries (#10733)
|
||||||
|
+ [prosiebensat1] Add support for kabeleinsdoku (#10732)
|
||||||
|
* [cbs] Extract info from thunder videoPlayerService (#10728)
|
||||||
|
* [openload] Fix extraction (#10408)
|
||||||
|
+ [ustream] Support the new HLS streams (#10698)
|
||||||
|
+ [ooyala] Extract all HLS formats
|
||||||
|
+ [cartoonnetwork] Add support for Adobe Pass authentication
|
||||||
|
+ [soundcloud] Extract license metadata
|
||||||
|
+ [fox] Add support for Adobe Pass authentication (#8584)
|
||||||
|
+ [tbs] Add support for Adobe Pass authentication (#10642, #10222)
|
||||||
|
+ [trutv] Add support for Adobe Pass authentication (#10519)
|
||||||
|
+ [turner] Add support for Adobe Pass authentication
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.09.19
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [crunchyroll] Check if already authenticated (#10700)
|
||||||
|
- [twitch:stream] Remove fallback to profile extraction when stream is offline
|
||||||
|
* [thisav] Improve title extraction (#10682)
|
||||||
|
* [vyborymos] Improve station info extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.09.18
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ Introduce manifest_url and fragments fields in formats dictionary for
|
||||||
|
fragmented media
|
||||||
|
+ Provide manifest_url field for DASH segments, HLS and HDS
|
||||||
|
+ Provide fragments field for DASH segments
|
||||||
|
* Rework DASH segments downloader to use fragments field
|
||||||
|
+ Add helper method for Wowza Streaming Engine formats extraction
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [vyborymos] Add extractor for vybory.mos.ru (#10692)
|
||||||
|
+ [xfileshare] Add title regular expression for streamin.to (#10646)
|
||||||
|
+ [globo:article] Add support for multiple videos (#10653)
|
||||||
|
+ [thisav] Recognize HTML5 videos (#10447)
|
||||||
|
* [jwplatform] Improve JWPlayer detection
|
||||||
|
+ [mangomolo] Add support for Mangomolo embeds
|
||||||
|
+ [toutv] Add support for authentication (#10669)
|
||||||
|
* [franceinter] Fix upload date extraction
|
||||||
|
* [tv4] Fix HLS and HDS formats extraction (#10659)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.09.15
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Improve _hidden_inputs
|
||||||
|
+ Introduce improved explicit Adobe Pass support
|
||||||
|
+ Add --ap-mso to provide multiple-system operator identifier
|
||||||
|
+ Add --ap-username to provide MSO account username
|
||||||
|
+ Add --ap-password to provide MSO account password
|
||||||
|
+ Add --ap-list-mso to list all supported MSOs
|
||||||
|
+ Add support for Rogers Cable multiple-system operator (#10606)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [crunchyroll] Fix authentication (#10655)
|
||||||
|
* [twitch] Fix API calls (#10654, #10660)
|
||||||
|
+ [bellmedia] Add support for more Bell Media Television sites
|
||||||
|
* [franceinter] Fix extraction (#10538, #2105)
|
||||||
|
* [kuwo] Improve error detection (#10650)
|
||||||
|
+ [go] Add support for free full episodes (#10439)
|
||||||
|
* [bilibili] Fix extraction for specific videos (#10647)
|
||||||
|
* [nhk] Fix extraction (#10633)
|
||||||
|
* [kaltura] Improve audio detection
|
||||||
|
* [kaltura] Skip chun format
|
||||||
|
+ [vimeo:ondemand] Pass Referer along with embed URL (#10624)
|
||||||
|
+ [nbc] Add support for NBC Olympics (#10361)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.09.11.1
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [tube8] Extract categories and tags (#10579)
|
||||||
|
+ [pornhub] Extract categories and tags (#10499)
|
||||||
|
* [openload] Temporary fix (#10408)
|
||||||
|
+ [foxnews] Add support Fox News articles (#10598)
|
||||||
|
* [viafree] Improve video id extraction (#10615)
|
||||||
|
* [iwara] Fix extraction after relaunch (#10462, #3215)
|
||||||
|
+ [tfo] Add extractor for tfo.org
|
||||||
|
* [lrt] Fix audio extraction (#10566)
|
||||||
|
* [9now] Fix extraction (#10561)
|
||||||
|
+ [canalplus] Add support for c8.fr (#10577)
|
||||||
|
* [newgrounds] Fix uploader extraction (#10584)
|
||||||
|
+ [polskieradio:category] Add support for category lists (#10576)
|
||||||
|
+ [ketnet] Add extractor for ketnet.be (#10343)
|
||||||
|
+ [canvas] Add support for een.be (#10605)
|
||||||
|
+ [telequebec] Add extractor for telequebec.tv (#1999)
|
||||||
|
* [parliamentliveuk] Fix extraction (#9137)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.09.08
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [jwplatform] Extract height from format label
|
||||||
|
+ [yahoo] Extract Brightcove Legacy Studio embeds (#9345)
|
||||||
|
* [videomore] Fix extraction (#10592)
|
||||||
|
* [foxgay] Fix extraction (#10480)
|
||||||
|
+ [rmcdecouverte] Add extractor for rmcdecouverte.bfmtv.com (#9709)
|
||||||
|
* [gamestar] Fix metadata extraction (#10479)
|
||||||
|
* [puls4] Fix extraction (#10583)
|
||||||
|
+ [cctv] Add extractor for CCTV and CNTV (#8153)
|
||||||
|
+ [lci] Add extractor for lci.fr (#10573)
|
||||||
|
+ [wat] Extract DASH formats
|
||||||
|
+ [viafree] Improve video id detection (#10569)
|
||||||
|
+ [trutv] Add extractor for trutv.com (#10519)
|
||||||
|
+ [nick] Add support for nickelodeon.nl (#10559)
|
||||||
|
+ [abcotvs:clips] Add support for clips.abcotvs.com
|
||||||
|
+ [abcotvs] Add support for ABC Owned Television Stations sites (#9551)
|
||||||
|
+ [miaopai] Add extractor for miaopai.com (#10556)
|
||||||
|
* [gamestar] Fix metadata extraction (#10479)
|
||||||
|
+ [bilibili] Add support for episodes (#10190)
|
||||||
|
+ [tvnoe] Add extractor for tvnoe.cz (#10524)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.09.04.1
|
||||||
|
|
||||||
|
Core
|
||||||
|
* In DASH downloader if the first segment fails, abort the whole download
|
||||||
|
process to prevent throttling (#10497)
|
||||||
|
+ Add support for --skip-unavailable-fragments and --fragment retries in
|
||||||
|
hlsnative downloader (#10165, #10448).
|
||||||
|
+ Add support for --skip-unavailable-fragments in DASH downloader
|
||||||
|
+ Introduce --skip-unavailable-fragments option for fragment based downloaders
|
||||||
|
that allows to skip fragments unavailable due to a HTTP error
|
||||||
|
* Fix extraction of video/audio entries with src attribute in
|
||||||
|
_parse_html5_media_entries (#10540)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [theplatform] Relax URL regular expression (#10546)
|
||||||
|
* [youtube:playlist] Extend URL regular expression
|
||||||
|
* [rottentomatoes] Delegate extraction to internetvideoarchive extractor
|
||||||
|
* [internetvideoarchive] Extract all formats
|
||||||
|
* [pornvoisines] Fix extraction (#10469)
|
||||||
|
* [rottentomatoes] Fix extraction (#10467)
|
||||||
|
* [espn] Extend URL regular expression (#10549)
|
||||||
|
* [vimple] Extend URL regular expression (#10547)
|
||||||
|
* [youtube:watchlater] Fix extraction (#10544)
|
||||||
|
* [youjizz] Fix extraction (#10437)
|
||||||
|
+ [foxnews] Add support for FoxNews Insider (#10445)
|
||||||
|
+ [fc2] Recognize Flash player URLs (#10512)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.09.03
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in
|
||||||
|
_extract_m3u8_formats (#10522)
|
||||||
|
* Handle semicolon in mimetype2ext
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [youtube] Add support for rental videos' previews (#10532)
|
||||||
|
* [youtube:playlist] Fallback to video extraction for video/playlist URLs when
|
||||||
|
no playlist is actually served (#10537)
|
||||||
|
+ [drtv] Add support for dr.dk/nyheder (#10536)
|
||||||
|
+ [facebook:plugins:video] Add extractor (#10530)
|
||||||
|
+ [go] Add extractor for *.go.com sites
|
||||||
|
* [adobepass] Check for authz_token expiration (#10527)
|
||||||
|
* [nytimes] improve extraction
|
||||||
|
* [thestar] Fix extraction (#10465)
|
||||||
|
* [glide] Fix extraction (#10478)
|
||||||
|
- [exfm] Remove extractor (#10482)
|
||||||
|
* [youporn] Fix categories and tags extraction (#10521)
|
||||||
|
+ [curiositystream] Add extractor for app.curiositystream.com
|
||||||
|
- [thvideo] Remove extractor (#10464)
|
||||||
|
* [movingimage] Fix for the new site name (#10466)
|
||||||
|
+ [cbs] Add support for once formats (#10515)
|
||||||
|
* [limelight] Skip ism snd duplicate manifests
|
||||||
|
+ [porncom] Extract categories and tags (#10510)
|
||||||
|
+ [facebook] Extract timestamp (#10508)
|
||||||
|
+ [yahoo] Extract more formats
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.31
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505)
|
||||||
|
* [bandcamp:album] Fix title extraction (#10455)
|
||||||
|
* [pyvideo] Fix extraction (#10468)
|
||||||
|
+ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016)
|
||||||
|
* [9c9media] Extract more metadata
|
||||||
|
* [9c9media] Fix multiple stacks extraction (#10016)
|
||||||
|
* [adultswim] Improve video info extraction (#10492)
|
||||||
|
* [vodplatform] Improve embed regular expression
|
||||||
|
- [played] Remove extractor (#10470)
|
||||||
|
+ [tbs] Add extractor for tbs.com and tntdrama.com (#10222)
|
||||||
|
+ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110)
|
||||||
|
* [adultswim] Rework in terms of turner extractor
|
||||||
|
* [cnn] Rework in terms of turner extractor
|
||||||
|
* [nba] Rework in terms of turner extractor
|
||||||
|
+ [turner] Add base extractor for Turner Broadcasting System based sites
|
||||||
|
* [bilibili] Fix extraction (#10375)
|
||||||
|
* [openload] Fix extraction (#10408)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.28
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ Add warning message that ffmpeg doesn't support SOCKS
|
||||||
|
* Improve thumbnail sorting
|
||||||
|
+ Extract formats from #EXT-X-MEDIA tags in _extract_m3u8_formats
|
||||||
|
* Fill IV with leading zeros for IVs shorter than 16 octets in hlsnative
|
||||||
|
+ Add ac-3 to the list of audio codecs in parse_codecs
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [periscope:user] Fix extraction (#10453)
|
||||||
|
* [douyutv] Fix extraction (#10153, #10318, #10444)
|
||||||
|
+ [nhk:vod] Add extractor for www3.nhk.or.jp on demand (#4437, #10424)
|
||||||
|
- [trutube] Remove extractor (#10438)
|
||||||
|
+ [usanetwork] Add extractor for usanetwork.com
|
||||||
|
* [crackle] Fix extraction (#10333)
|
||||||
|
* [spankbang] Fix description and uploader extraction (#10339)
|
||||||
|
* [discoverygo] Detect cable provider restricted videos (#10425)
|
||||||
|
+ [cbc] Add support for watch.cbc.ca
|
||||||
|
* [kickstarter] Silent the warning for og:description (#10415)
|
||||||
|
* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.24.1
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [pluralsight] Add support for subtitles (#9681)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.24
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix authentication (#10392)
|
||||||
|
* [openload] Fix extraction (#10408)
|
||||||
|
+ [bravotv] Add support for Adobe Pass (#10407)
|
||||||
|
* [bravotv] Fix clip info extraction (#10407)
|
||||||
|
* [eagleplatform] Improve embedded videos detection (#10409)
|
||||||
|
* [awaan] Fix extraction
|
||||||
|
* [mtvservices:embedded] Update config URL
|
||||||
|
+ [abc:iview] Add extractor (#6148)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.22
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Improve formats and subtitles extension auto calculation
|
||||||
|
+ Recognize full unit names in parse_filesize
|
||||||
|
+ Add support for m3u8 manifests in HTML5 multimedia tags
|
||||||
|
* Fix octal/hexadecimal number detection in js_to_json
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [ivi] Add support for 720p and 1080p
|
||||||
|
+ [charlierose] Add new extractor (#10382)
|
||||||
|
* [1tv] Fix extraction (#9249)
|
||||||
|
* [twitch] Renew authentication
|
||||||
|
* [kaltura] Improve subtitles extension calculation
|
||||||
|
+ [zingmp3] Add support for video clips
|
||||||
|
* [zingmp3] Fix extraction (#10041)
|
||||||
|
* [kaltura] Improve subtitles extraction (#10279)
|
||||||
|
* [cultureunplugged] Fix extraction (#10330)
|
||||||
|
+ [cnn] Add support for money.cnn.com (#2797)
|
||||||
|
* [cbsnews] Fix extraction (#10362)
|
||||||
|
* [cbs] Fix extraction (#10393)
|
||||||
|
+ [litv] Support 'promo' URLs (#10385)
|
||||||
|
* [snotr] Fix extraction (#10338)
|
||||||
|
* [n-tv.de] Fix extraction (#10331)
|
||||||
|
* [globo:article] Relax URL and video id regular expressions (#10379)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.19
|
||||||
|
|
||||||
|
Core
|
||||||
|
- Remove output template description from --help
|
||||||
|
* Recognize lowercase units in parse_filesize
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [porncom] Add extractor for porn.com (#2251, #10251)
|
||||||
|
+ [generic] Add support for DBTV embeds
|
||||||
|
* [vk:wallpost] Fix audio extraction for new site layout
|
||||||
|
* [vk] Fix authentication
|
||||||
|
+ [hgtvcom:show] Add extractor for hgtv.com shows (#10365)
|
||||||
|
+ [discoverygo] Add support for another GO network sites
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.17
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ Add _get_netrc_login_info
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [mofosex] Extract all formats (#10335)
|
||||||
|
+ [generic] Add support for vbox7 embeds
|
||||||
|
+ [vbox7] Add support for embed URLs
|
||||||
|
+ [viafree] Add extractor (#10358)
|
||||||
|
+ [mtg] Add support for viafree URLs (#10358)
|
||||||
|
* [theplatform] Extract all subtitles per language
|
||||||
|
+ [xvideos] Fix HLS extraction (#10356)
|
||||||
|
+ [amcnetworks] Add extractor
|
||||||
|
+ [bbc:playlist] Add support for pagination (#10349)
|
||||||
|
+ [fxnetworks] Add extractor (#9462)
|
||||||
|
* [cbslocal] Fix extraction for SendtoNews-based videos
|
||||||
|
* [sendtonews] Fix extraction
|
||||||
|
* [jwplatform] Extract video id from JWPlayer data
|
||||||
|
- [zippcast] Remove extractor (#10332)
|
||||||
|
+ [viceland] Add extractor (#8799)
|
||||||
|
+ [adobepass] Add base extractor for Adobe Pass Authentication
|
||||||
|
* [life:embed] Improve extraction
|
||||||
|
* [vgtv] Detect geo restricted videos (#10348)
|
||||||
|
+ [uplynk] Add extractor
|
||||||
|
* [xiami] Fix extraction (#10342)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.13
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Show progress for curl external downloader
|
||||||
|
* Forward more options to curl external downloader
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [pbs] Fix description extraction
|
||||||
|
* [franceculture] Fix extraction (#10324)
|
||||||
|
* [pornotube] Fix extraction (#10322)
|
||||||
|
* [4tube] Fix metadata extraction (#10321)
|
||||||
|
* [imgur] Fix width and height extraction (#10325)
|
||||||
|
* [expotv] Improve extraction
|
||||||
|
+ [vbox7] Fix extraction (#10309)
|
||||||
|
- [tapely] Remove extractor (#10323)
|
||||||
|
* [muenchentv] Fix extraction (#10313)
|
||||||
|
+ [24video] Add support for .me and .xxx TLDs
|
||||||
|
* [24video] Fix comment count extraction
|
||||||
|
* [sunporno] Add support for embed URLs
|
||||||
|
* [sunporno] Fix metadata extraction (#10316)
|
||||||
|
+ [hgtv] Add extractor for hgtv.ca (#3999)
|
||||||
|
- [pbs] Remove request to unavailable API
|
||||||
|
+ [pbs] Add support for high quality HTTP formats
|
||||||
|
+ [crunchyroll] Add support for HLS formats (#10301)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.12
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Subtitles are now written as is. Newline conversions are disabled. (#10268)
|
||||||
|
+ Recognize more formats in unified_timestamp
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
- [goldenmoustache] Remove extractor (#10298)
|
||||||
|
* [drtuber] Improve title extraction
|
||||||
|
* [drtuber] Make dislike count optional (#10297)
|
||||||
|
* [chirbit] Fix extraction (#10296)
|
||||||
|
* [francetvinfo] Relax URL regular expression
|
||||||
|
* [rtlnl] Relax URL regular expression (#10282)
|
||||||
|
* [formula1] Relax URL regular expression (#10283)
|
||||||
|
* [wat] Improve extraction (#10281)
|
||||||
|
* [ctsnews] Fix extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.10
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Make --metadata-from-title non fatal when title does not match the pattern
|
||||||
|
* Introduce options for randomized sleep before each download
|
||||||
|
--min-sleep-interval and --max-sleep-interval (#9930)
|
||||||
|
* Respect default in _search_json_ld
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [uol] Add extractor for uol.com.br (#4263)
|
||||||
|
* [rbmaradio] Fix extraction and extract all formats (#10242)
|
||||||
|
+ [sonyliv] Add extractor for sonyliv.com (#10258)
|
||||||
|
* [aparat] Fix extraction
|
||||||
|
* [cwtv] Extract HTTP formats
|
||||||
|
+ [rozhlas] Add extractor for prehravac.rozhlas.cz (#10253)
|
||||||
|
* [kuwo:singer] Fix extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.07
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ Add support for TV Parental Guidelines ratings in parse_age_limit
|
||||||
|
+ Add decode_png (#9706)
|
||||||
|
+ Add support for partOfTVSeries in JSON-LD
|
||||||
|
* Lower master M3U8 manifest preference for better format sorting
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [discoverygo] Add extractor (#10245)
|
||||||
|
* [flipagram] Make JSON-LD extraction non fatal
|
||||||
|
* [generic] Make JSON-LD extraction non fatal
|
||||||
|
+ [bbc] Add support for morph embeds (#10239)
|
||||||
|
* [tnaflixnetworkbase] Improve title extraction
|
||||||
|
* [tnaflix] Fix metadata extraction (#10249)
|
||||||
|
* [fox] Fix theplatform release URL query
|
||||||
|
* [openload] Fix extraction (#9706)
|
||||||
|
* [bbc] Skip duplicate manifest URLs
|
||||||
|
* [bbc] Improve format code
|
||||||
|
+ [bbc] Add support for DASH and F4M
|
||||||
|
* [bbc] Improve format sorting and listing
|
||||||
|
* [bbc] Improve playlist extraction
|
||||||
|
+ [pokemon] Add extractor (#10093)
|
||||||
|
+ [condenast] Add fallback scenario for video info extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.06
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Add support for JSON-LD root list entries (#10203)
|
||||||
|
* Improve unified_timestamp
|
||||||
|
* Lower preference of RTSP formats in generic sorting
|
||||||
|
+ Add support for multiple properties in _og_search_property
|
||||||
|
* Improve password hiding from verbose output
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [adultswim] Add support for trailers (#10235)
|
||||||
|
* [archiveorg] Improve extraction (#10219)
|
||||||
|
+ [jwplatform] Add support for playlists
|
||||||
|
+ [jwplatform] Add support for relative URLs
|
||||||
|
* [jwplatform] Improve audio detection
|
||||||
|
+ [tvplay] Capture and output native error message
|
||||||
|
+ [tvplay] Extract series metadata
|
||||||
|
+ [tvplay] Add support for subtitles (#10194)
|
||||||
|
* [tvp] Improve extraction (#7799)
|
||||||
|
* [cbslocal] Fix timestamp parsing (#10213)
|
||||||
|
+ [naver] Add support for subtitles (#8096)
|
||||||
|
* [naver] Improve extraction
|
||||||
|
* [condenast] Improve extraction
|
||||||
|
* [engadget] Relax URL regular expression
|
||||||
|
* [5min] Fix extraction
|
||||||
|
+ [nationalgeographic] Add support for Episode Guide
|
||||||
|
+ [kaltura] Add support for subtitles
|
||||||
|
* [kaltura] Optimize network requests
|
||||||
|
+ [vodplatform] Add extractor for vod-platform.net
|
||||||
|
- [gamekings] Remove extractor
|
||||||
|
* [limelight] Extract HTTP formats
|
||||||
|
* [ntvru] Fix extraction
|
||||||
|
+ [comedycentral] Re-add :tds and :thedailyshow shortnames
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.01
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- [yandexmusic:track] Adapt to changes in track location JSON (#10193)
|
||||||
|
- [bloomberg] Support another form of player (#10187)
|
||||||
|
- [limelight] Skip DRM protected videos
|
||||||
|
- [safari] Relax regular expressions for URL matching (#10202)
|
||||||
|
- [cwtv] Add support for cwtvpr.com (#10196)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.30
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- [twitch:clips] Sort formats
|
||||||
|
- [tv2] Use m3u8_native
|
||||||
|
- [tv2:article] Fix video detection (#10188)
|
||||||
|
- rtve (#10076)
|
||||||
|
- [dailymotion:playlist] Optimize download archive processing (#10180)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.28
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- shared (#10170)
|
||||||
|
- soundcloud (#10179)
|
||||||
|
- twitch (#9767)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.26.2
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- smotri
|
||||||
|
- camdemy
|
||||||
|
- mtv
|
||||||
|
- comedycentral
|
||||||
|
- cmt
|
||||||
|
- cbc
|
||||||
|
- mgtv
|
||||||
|
- orf
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.24
|
||||||
|
|
||||||
|
New extractors
|
||||||
|
- arkena (#8682)
|
||||||
|
- lcp (#8682)
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- facebook (#10151)
|
||||||
|
- dailymail
|
||||||
|
- telegraaf
|
||||||
|
- dcn
|
||||||
|
- onet
|
||||||
|
- tvp
|
||||||
|
|
||||||
|
Miscellaneous
|
||||||
|
- Support $Time$ in DASH manifests
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.22
|
||||||
|
|
||||||
|
New extractors
|
||||||
|
- odatv (#9285)
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- bbc
|
||||||
|
- youjizz (#10131)
|
||||||
|
- youtube (#10140)
|
||||||
|
- pornhub (#10138)
|
||||||
|
- eporner (#10139)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.17
|
||||||
|
|
||||||
|
New extractors
|
||||||
|
- nintendo (#9986)
|
||||||
|
- streamable (#9122)
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- ard (#10095)
|
||||||
|
- mtv
|
||||||
|
- comedycentral (#10101)
|
||||||
|
- viki (#10098)
|
||||||
|
- spike (#10106)
|
||||||
|
|
||||||
|
Miscellaneous
|
||||||
|
- Improved twitter player detection (#10090)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.16
|
||||||
|
|
||||||
|
New extractors
|
||||||
|
- ninenow (#5181)
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- rtve (#10076)
|
||||||
|
- brightcove
|
||||||
|
- 3qsdn
|
||||||
|
- syfy (#9087, #3820, #2388)
|
||||||
|
- youtube (#10083)
|
||||||
|
|
||||||
|
Miscellaneous
|
||||||
|
- Fix subtitle embedding for video-only and audio-only files (#10081)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.13
|
||||||
|
|
||||||
|
New extractors
|
||||||
|
- rudo
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- biobiochiletv
|
||||||
|
- tvplay
|
||||||
|
- dbtv
|
||||||
|
- brightcove
|
||||||
|
- tmz
|
||||||
|
- youtube (#10059)
|
||||||
|
- shahid (#10062)
|
||||||
|
- vk
|
||||||
|
- ellentv (#10067)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.11
|
||||||
|
|
||||||
|
New Extractors
|
||||||
|
- roosterteeth (#9864)
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- miomio (#9605)
|
||||||
|
- vuclip
|
||||||
|
- youtube
|
||||||
|
- vidzi (#10058)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.09.2
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- vimeo (#1638)
|
||||||
|
- facebook (#10048)
|
||||||
|
- lynda (#10047)
|
||||||
|
- animeondemand
|
||||||
|
|
||||||
|
Fixed/improved features
|
||||||
|
- Embedding subtitles no longer throws an error with problematic inputs (#9063)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.09.1
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- youtube
|
||||||
|
- ard
|
||||||
|
- srmediatek (#9373)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.09
|
||||||
|
|
||||||
|
New extractors
|
||||||
|
- Flipagram (#9898)
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- telecinco
|
||||||
|
- toutv
|
||||||
|
- radiocanada
|
||||||
|
- tweakers (#9516)
|
||||||
|
- lynda
|
||||||
|
- nick (#7542)
|
||||||
|
- polskieradio (#10028)
|
||||||
|
- le
|
||||||
|
- facebook (#9851)
|
||||||
|
- mgtv
|
||||||
|
- animeondemand (#10031)
|
||||||
|
|
||||||
|
Fixed/improved features
|
||||||
|
- `--postprocessor-args` and `--downloader-args` now accepts non-ASCII inputs
|
||||||
|
on non-Windows systems
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.07
|
||||||
|
|
||||||
|
New extractors
|
||||||
|
- kamcord (#10001)
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- spiegel (#10018)
|
||||||
|
- metacafe (#8539, #3253)
|
||||||
|
- onet (#9950)
|
||||||
|
- francetv (#9955)
|
||||||
|
- brightcove (#9965)
|
||||||
|
- daum (#9972)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.06
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- youtube (#10007, #10009)
|
||||||
|
- xuite
|
||||||
|
- stitcher
|
||||||
|
- spiegel
|
||||||
|
- slideshare
|
||||||
|
- sandia
|
||||||
|
- rtvnh
|
||||||
|
- prosiebensat1
|
||||||
|
- onionstudios
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.05
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- brightcove
|
||||||
|
- yahoo (#9995)
|
||||||
|
- pornhub (#9997)
|
||||||
|
- iqiyi
|
||||||
|
- kaltura (#5557)
|
||||||
|
- la7
|
||||||
|
- Changed features
|
||||||
|
- Rename --cn-verfication-proxy to --geo-verification-proxy
|
||||||
|
Miscellaneous
|
||||||
|
- Add script for displaying downloads statistics
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.03.1
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- theplatform
|
||||||
|
- aenetworks
|
||||||
|
- nationalgeographic
|
||||||
|
- hrti (#9482)
|
||||||
|
- facebook (#5701)
|
||||||
|
- buzzfeed (#5701)
|
||||||
|
- rai (#8617, #9157, #9232, #8552, #8551)
|
||||||
|
- nationalgeographic (#9991)
|
||||||
|
- iqiyi
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.03
|
||||||
|
|
||||||
|
New extractors
|
||||||
|
- hrti (#9482)
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- vk (#9981)
|
||||||
|
- facebook (#9938)
|
||||||
|
- xtube (#9953, #9961)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.02
|
||||||
|
|
||||||
|
New extractors
|
||||||
|
- fusion (#9958)
|
||||||
|
|
||||||
|
Fixed/improved extractors
|
||||||
|
- twitch (#9975)
|
||||||
|
- vine (#9970)
|
||||||
|
- periscope (#9967)
|
||||||
|
- pornhub (#8696)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.07.01
|
||||||
|
|
||||||
|
New extractors
|
||||||
|
- 9c9media
|
||||||
|
- ctvnews (#2156)
|
||||||
|
- ctv (#4077)
|
||||||
|
|
||||||
|
Fixed/Improved extractors
|
||||||
|
- rds
|
||||||
|
- meta (#8789)
|
||||||
|
- pornhub (#9964)
|
||||||
|
- sixplay (#2183)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- Accept quoted strings across multiple lines (#9940)
|
||||||
6
Makefile
6
Makefile
@@ -1,7 +1,7 @@
|
|||||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||||
find . -name "*.pyc" -delete
|
find . -name "*.pyc" -delete
|
||||||
find . -name "*.class" -delete
|
find . -name "*.class" -delete
|
||||||
|
|
||||||
@@ -94,7 +94,7 @@ _EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'la
|
|||||||
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||||
|
|
||||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog
|
||||||
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
||||||
--exclude '*.DS_Store' \
|
--exclude '*.DS_Store' \
|
||||||
--exclude '*.kate-swp' \
|
--exclude '*.kate-swp' \
|
||||||
@@ -107,7 +107,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
|||||||
--exclude 'docs/_build' \
|
--exclude 'docs/_build' \
|
||||||
-- \
|
-- \
|
||||||
bin devscripts test youtube_dl docs \
|
bin devscripts test youtube_dl docs \
|
||||||
LICENSE README.md README.txt \
|
ChangeLog LICENSE README.md README.txt \
|
||||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
|
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
|
||||||
youtube-dl.zsh youtube-dl.fish setup.py \
|
youtube-dl.zsh youtube-dl.fish setup.py \
|
||||||
youtube-dl
|
youtube-dl
|
||||||
|
|||||||
282
README.md
282
README.md
@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
|||||||
|
|
||||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||||
|
|
||||||
sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||||
|
|
||||||
If you do not have curl, you can alternatively use a recent wget:
|
If you do not have curl, you can alternatively use a recent wget:
|
||||||
@@ -44,7 +44,7 @@ Or with [MacPorts](https://www.macports.org/):
|
|||||||
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
|
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
|
||||||
|
|
||||||
# DESCRIPTION
|
# DESCRIPTION
|
||||||
**youtube-dl** is a small command-line program to download videos from
|
**youtube-dl** is a command-line program to download videos from
|
||||||
YouTube.com and a few more sites. It requires the Python interpreter, version
|
YouTube.com and a few more sites. It requires the Python interpreter, version
|
||||||
2.6, 2.7, or 3.2+, and it is not platform specific. It should work on
|
2.6, 2.7, or 3.2+, and it is not platform specific. It should work on
|
||||||
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
|
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
|
||||||
@@ -89,6 +89,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--mark-watched Mark videos watched (YouTube only)
|
--mark-watched Mark videos watched (YouTube only)
|
||||||
--no-mark-watched Do not mark videos watched (YouTube only)
|
--no-mark-watched Do not mark videos watched (YouTube only)
|
||||||
--no-color Do not emit color codes in output
|
--no-color Do not emit color codes in output
|
||||||
|
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||||
|
available
|
||||||
|
|
||||||
## Network Options:
|
## Network Options:
|
||||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||||
@@ -103,9 +105,9 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
(experimental)
|
(experimental)
|
||||||
-6, --force-ipv6 Make all connections via IPv6
|
-6, --force-ipv6 Make all connections via IPv6
|
||||||
(experimental)
|
(experimental)
|
||||||
--cn-verification-proxy URL Use this proxy to verify the IP address for
|
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||||
some Chinese sites. The default proxy
|
some geo-restricted sites. The default
|
||||||
specified by --proxy (or none, if the
|
proxy specified by --proxy (or none, if the
|
||||||
options is not present) is used for the
|
options is not present) is used for the
|
||||||
actual downloading. (experimental)
|
actual downloading. (experimental)
|
||||||
|
|
||||||
@@ -173,7 +175,10 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
-R, --retries RETRIES Number of retries (default is 10), or
|
-R, --retries RETRIES Number of retries (default is 10), or
|
||||||
"infinite".
|
"infinite".
|
||||||
--fragment-retries RETRIES Number of retries for a fragment (default
|
--fragment-retries RETRIES Number of retries for a fragment (default
|
||||||
is 10), or "infinite" (DASH only)
|
is 10), or "infinite" (DASH and hlsnative
|
||||||
|
only)
|
||||||
|
--skip-unavailable-fragments Skip unavailable fragments (DASH and
|
||||||
|
hlsnative only)
|
||||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||||
(default is 1024)
|
(default is 1024)
|
||||||
--no-resize-buffer Do not automatically adjust the buffer
|
--no-resize-buffer Do not automatically adjust the buffer
|
||||||
@@ -201,32 +206,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
-a, --batch-file FILE File containing URLs to download ('-' for
|
-a, --batch-file FILE File containing URLs to download ('-' for
|
||||||
stdin)
|
stdin)
|
||||||
--id Use only video ID in file name
|
--id Use only video ID in file name
|
||||||
-o, --output TEMPLATE Output filename template. Use %(title)s to
|
-o, --output TEMPLATE Output filename template, see the "OUTPUT
|
||||||
get the title, %(uploader)s for the
|
TEMPLATE" for all the info
|
||||||
uploader name, %(uploader_id)s for the
|
|
||||||
uploader nickname if different,
|
|
||||||
%(autonumber)s to get an automatically
|
|
||||||
incremented number, %(ext)s for the
|
|
||||||
filename extension, %(format)s for the
|
|
||||||
format description (like "22 - 1280x720" or
|
|
||||||
"HD"), %(format_id)s for the unique id of
|
|
||||||
the format (like YouTube's itags: "137"),
|
|
||||||
%(upload_date)s for the upload date
|
|
||||||
(YYYYMMDD), %(extractor)s for the provider
|
|
||||||
(youtube, metacafe, etc), %(id)s for the
|
|
||||||
video id, %(playlist_title)s,
|
|
||||||
%(playlist_id)s, or %(playlist)s (=title if
|
|
||||||
present, ID otherwise) for the playlist the
|
|
||||||
video is in, %(playlist_index)s for the
|
|
||||||
position in the playlist. %(height)s and
|
|
||||||
%(width)s for the width and height of the
|
|
||||||
video format. %(resolution)s for a textual
|
|
||||||
description of the resolution of the video
|
|
||||||
format. %% for a literal percent. Use - to
|
|
||||||
output to stdout. Can also be used to
|
|
||||||
download to a different directory, for
|
|
||||||
example with -o '/my/downloads/%(uploader)s
|
|
||||||
/%(title)s-%(id)s.%(ext)s' .
|
|
||||||
--autonumber-size NUMBER Specify the number of digits in
|
--autonumber-size NUMBER Specify the number of digits in
|
||||||
%(autonumber)s when it is present in output
|
%(autonumber)s when it is present in output
|
||||||
filename template or --auto-number option
|
filename template or --auto-number option
|
||||||
@@ -330,7 +311,15 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
bidirectional text support. Requires bidiv
|
bidirectional text support. Requires bidiv
|
||||||
or fribidi executable in PATH
|
or fribidi executable in PATH
|
||||||
--sleep-interval SECONDS Number of seconds to sleep before each
|
--sleep-interval SECONDS Number of seconds to sleep before each
|
||||||
download.
|
download when used alone or a lower bound
|
||||||
|
of a range for randomized sleep before each
|
||||||
|
download (minimum possible number of
|
||||||
|
seconds to sleep) when used along with
|
||||||
|
--max-sleep-interval.
|
||||||
|
--max-sleep-interval SECONDS Upper bound of a range for randomized sleep
|
||||||
|
before each download (maximum possible
|
||||||
|
number of seconds to sleep). Must only be
|
||||||
|
used along with --min-sleep-interval.
|
||||||
|
|
||||||
## Video Format Options:
|
## Video Format Options:
|
||||||
-f, --format FORMAT Video format code, see the "FORMAT
|
-f, --format FORMAT Video format code, see the "FORMAT
|
||||||
@@ -369,6 +358,17 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
-n, --netrc Use .netrc authentication data
|
-n, --netrc Use .netrc authentication data
|
||||||
--video-password PASSWORD Video password (vimeo, smotri, youku)
|
--video-password PASSWORD Video password (vimeo, smotri, youku)
|
||||||
|
|
||||||
|
## Adobe Pass Options:
|
||||||
|
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||||
|
provider) identifier, use --ap-list-mso for
|
||||||
|
a list of available MSOs
|
||||||
|
--ap-username USERNAME Multiple-system operator account login
|
||||||
|
--ap-password PASSWORD Multiple-system operator account password.
|
||||||
|
If this option is left out, youtube-dl will
|
||||||
|
ask interactively.
|
||||||
|
--ap-list-mso List all supported multiple-system
|
||||||
|
operators
|
||||||
|
|
||||||
## Post-processing Options:
|
## Post-processing Options:
|
||||||
-x, --extract-audio Convert video files to audio-only files
|
-x, --extract-audio Convert video files to audio-only files
|
||||||
(requires ffmpeg or avconv and ffprobe or
|
(requires ffmpeg or avconv and ffprobe or
|
||||||
@@ -424,13 +424,22 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
|
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
|
|
||||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
|
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||||
|
|
||||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||||
```
|
```
|
||||||
|
# Lines starting with # are comments
|
||||||
|
|
||||||
|
# Always extract audio
|
||||||
-x
|
-x
|
||||||
|
|
||||||
|
# Do not copy the mtime
|
||||||
--no-mtime
|
--no-mtime
|
||||||
|
|
||||||
|
# Use this proxy
|
||||||
--proxy 127.0.0.1:3128
|
--proxy 127.0.0.1:3128
|
||||||
|
|
||||||
|
# Save all videos under Movies directory in your home directory
|
||||||
-o ~/Movies/%(title)s.%(ext)s
|
-o ~/Movies/%(title)s.%(ext)s
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -511,6 +520,9 @@ The basic usage is not to set any template arguments when downloading a single f
|
|||||||
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
||||||
- `playlist`: Name or id of the playlist that contains the video
|
- `playlist`: Name or id of the playlist that contains the video
|
||||||
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||||
|
- `playlist_id`: Playlist identifier
|
||||||
|
- `playlist_title`: Playlist title
|
||||||
|
|
||||||
|
|
||||||
Available for the video that belongs to some logical chapter or section:
|
Available for the video that belongs to some logical chapter or section:
|
||||||
- `chapter`: Name or title of the chapter the video belongs to
|
- `chapter`: Name or title of the chapter the video belongs to
|
||||||
@@ -550,6 +562,10 @@ The current default template is `%(title)s-%(id)s.%(ext)s`.
|
|||||||
|
|
||||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||||
|
|
||||||
|
#### Output template and Windows batch files
|
||||||
|
|
||||||
|
If you are using output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
|
||||||
|
|
||||||
#### Output template examples
|
#### Output template examples
|
||||||
|
|
||||||
Note on Windows you may need to use double quotes instead of single.
|
Note on Windows you may need to use double quotes instead of single.
|
||||||
@@ -653,7 +669,11 @@ $ youtube-dl -f 'best[filesize<50M]'
|
|||||||
|
|
||||||
# Download best format available via direct link over HTTP/HTTPS protocol
|
# Download best format available via direct link over HTTP/HTTPS protocol
|
||||||
$ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]'
|
$ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]'
|
||||||
|
|
||||||
|
# Download the best video format and the best audio format without merging them
|
||||||
|
$ youtube-dl -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s'
|
||||||
```
|
```
|
||||||
|
Note that in the last example, an output template is recommended as bestvideo and bestaudio may have the same file name.
|
||||||
|
|
||||||
|
|
||||||
# VIDEO SELECTION
|
# VIDEO SELECTION
|
||||||
@@ -734,7 +754,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
|
|||||||
|
|
||||||
### I have downloaded a video but how can I play it?
|
### I have downloaded a video but how can I play it?
|
||||||
|
|
||||||
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
|
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
|
||||||
|
|
||||||
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
|
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
|
||||||
|
|
||||||
@@ -816,10 +836,42 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
|
|||||||
|
|
||||||
### How do I pass cookies to youtube-dl?
|
### How do I pass cookies to youtube-dl?
|
||||||
|
|
||||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||||
|
|
||||||
|
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||||
|
|
||||||
|
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||||
|
|
||||||
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
||||||
|
|
||||||
|
### How do I stream directly to media player?
|
||||||
|
|
||||||
|
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
|
||||||
|
|
||||||
|
youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
|
||||||
|
|
||||||
|
### How do I download only new videos from a playlist?
|
||||||
|
|
||||||
|
Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file.
|
||||||
|
|
||||||
|
For example, at first,
|
||||||
|
|
||||||
|
youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
|
||||||
|
|
||||||
|
will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any:
|
||||||
|
|
||||||
|
youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
|
||||||
|
|
||||||
|
### Should I add `--hls-prefer-native` into my config?
|
||||||
|
|
||||||
|
When youtube-dl detects an HLS video, it can download it either with the built-in downloader or ffmpeg. Since many HLS streams are slightly invalid and ffmpeg/youtube-dl each handle some invalid cases better than the other, there is an option to switch the downloader if needed.
|
||||||
|
|
||||||
|
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
|
||||||
|
|
||||||
|
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
|
||||||
|
|
||||||
|
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
|
||||||
|
|
||||||
### Can you add support for this anime video site, or site which shows current movies for free?
|
### Can you add support for this anime video site, or site which shows current movies for free?
|
||||||
|
|
||||||
As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
|
As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
|
||||||
@@ -883,9 +935,17 @@ If you want to add support for a new site, first of all **make sure** this site
|
|||||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||||
|
|
||||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
2. Check out the source code with:
|
||||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
|
||||||
|
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
|
||||||
|
|
||||||
|
3. Start a new git branch with
|
||||||
|
|
||||||
|
cd youtube-dl
|
||||||
|
git checkout -b yourextractor
|
||||||
|
|
||||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
@@ -928,20 +988,152 @@ After you have ensured this site is distributing it's content legally, you can f
|
|||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/extractors.py
|
$ git add youtube_dl/extractor/extractors.py
|
||||||
$ git add youtube_dl/extractor/yourextractor.py
|
$ git add youtube_dl/extractor/yourextractor.py
|
||||||
$ git commit -m '[yourextractor] Add new extractor'
|
$ git commit -m '[yourextractor] Add new extractor'
|
||||||
$ git push origin yourextractor
|
$ git push origin yourextractor
|
||||||
|
|
||||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||||
|
|
||||||
In any case, thank you very much for your contributions!
|
In any case, thank you very much for your contributions!
|
||||||
|
|
||||||
|
## youtube-dl coding conventions
|
||||||
|
|
||||||
|
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
||||||
|
|
||||||
|
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
|
||||||
|
|
||||||
|
### Mandatory and optional metafields
|
||||||
|
|
||||||
|
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
|
||||||
|
|
||||||
|
- `id` (media identifier)
|
||||||
|
- `title` (media title)
|
||||||
|
- `url` (media download URL) or `formats`
|
||||||
|
|
||||||
|
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
|
||||||
|
|
||||||
|
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
meta = self._download_json(url, video_id)
|
||||||
|
```
|
||||||
|
|
||||||
|
Assume at this point `meta`'s layout is:
|
||||||
|
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"summary": "some fancy summary text",
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = meta.get('summary') # correct
|
||||||
|
```
|
||||||
|
|
||||||
|
and not like:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = meta['summary'] # incorrect
|
||||||
|
```
|
||||||
|
|
||||||
|
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
|
||||||
|
|
||||||
|
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = self._search_regex(
|
||||||
|
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
```
|
||||||
|
|
||||||
|
With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
|
||||||
|
|
||||||
|
You can also pass `default=<some fallback value>`, for example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = self._search_regex(
|
||||||
|
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||||
|
webpage, 'description', default=None)
|
||||||
|
```
|
||||||
|
|
||||||
|
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
|
||||||
|
|
||||||
|
### Provide fallbacks
|
||||||
|
|
||||||
|
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = meta['title']
|
||||||
|
```
|
||||||
|
|
||||||
|
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||||
|
|
||||||
|
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = meta.get('title') or self._og_search_title(webpage)
|
||||||
|
```
|
||||||
|
|
||||||
|
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||||
|
|
||||||
|
### Make regular expressions flexible
|
||||||
|
|
||||||
|
When using regular expressions try to write them fuzzy and flexible.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Say you need to extract `title` from the following HTML code:
|
||||||
|
|
||||||
|
```html
|
||||||
|
<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
|
||||||
|
```
|
||||||
|
|
||||||
|
The code for that task should look similar to:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
|
||||||
|
```
|
||||||
|
|
||||||
|
Or even better:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
|
||||||
|
webpage, 'title', group='title')
|
||||||
|
```
|
||||||
|
|
||||||
|
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||||
|
|
||||||
|
The code definitely should not look like:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
|
||||||
|
webpage, 'title', group='title')
|
||||||
|
```
|
||||||
|
|
||||||
|
### Use safe conversion functions
|
||||||
|
|
||||||
|
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||||
|
|
||||||
# EMBEDDING YOUTUBE-DL
|
# EMBEDDING YOUTUBE-DL
|
||||||
|
|
||||||
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/rg3/youtube-dl/issues/new).
|
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/rg3/youtube-dl/issues/new).
|
||||||
@@ -957,7 +1149,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
|||||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||||
```
|
```
|
||||||
|
|
||||||
Most likely, you'll want to use various options. For a list of what can be done, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L121-L269). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L128-L278). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||||
|
|
||||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||||
|
|
||||||
@@ -1048,7 +1240,7 @@ Make sure that someone has not already opened the issue you're trying to open. S
|
|||||||
|
|
||||||
### Why are existing options not enough?
|
### Why are existing options not enough?
|
||||||
|
|
||||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||||
|
|
||||||
### Is there enough context in your bug report?
|
### Is there enough context in your bug report?
|
||||||
|
|
||||||
|
|||||||
@@ -15,13 +15,9 @@ data = urllib.request.urlopen(URL).read()
|
|||||||
with open('download.html.in', 'r', encoding='utf-8') as tmplf:
|
with open('download.html.in', 'r', encoding='utf-8') as tmplf:
|
||||||
template = tmplf.read()
|
template = tmplf.read()
|
||||||
|
|
||||||
md5sum = hashlib.md5(data).hexdigest()
|
|
||||||
sha1sum = hashlib.sha1(data).hexdigest()
|
|
||||||
sha256sum = hashlib.sha256(data).hexdigest()
|
sha256sum = hashlib.sha256(data).hexdigest()
|
||||||
template = template.replace('@PROGRAM_VERSION@', version)
|
template = template.replace('@PROGRAM_VERSION@', version)
|
||||||
template = template.replace('@PROGRAM_URL@', URL)
|
template = template.replace('@PROGRAM_URL@', URL)
|
||||||
template = template.replace('@PROGRAM_MD5SUM@', md5sum)
|
|
||||||
template = template.replace('@PROGRAM_SHA1SUM@', sha1sum)
|
|
||||||
template = template.replace('@PROGRAM_SHA256SUM@', sha256sum)
|
template = template.replace('@PROGRAM_SHA256SUM@', sha256sum)
|
||||||
template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0])
|
template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0])
|
||||||
template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1])
|
template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1])
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
mkdir -p tmp && cd tmp
|
|
||||||
wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz
|
|
||||||
tar zxvf srelay-0.4.8b6.tar.gz
|
|
||||||
cd srelay-0.4.8b6
|
|
||||||
./configure
|
|
||||||
make
|
|
||||||
@@ -14,15 +14,17 @@ if os.path.exists(lazy_extractors_filename):
|
|||||||
os.remove(lazy_extractors_filename)
|
os.remove(lazy_extractors_filename)
|
||||||
|
|
||||||
from youtube_dl.extractor import _ALL_CLASSES
|
from youtube_dl.extractor import _ALL_CLASSES
|
||||||
from youtube_dl.extractor.common import InfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||||
|
|
||||||
with open('devscripts/lazy_load_template.py', 'rt') as f:
|
with open('devscripts/lazy_load_template.py', 'rt') as f:
|
||||||
module_template = f.read()
|
module_template = f.read()
|
||||||
|
|
||||||
module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)]
|
module_contents = [
|
||||||
|
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
|
||||||
|
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
|
||||||
|
|
||||||
ie_template = '''
|
ie_template = '''
|
||||||
class {name}(LazyLoadExtractor):
|
class {name}({bases}):
|
||||||
_VALID_URL = {valid_url!r}
|
_VALID_URL = {valid_url!r}
|
||||||
_module = '{module}'
|
_module = '{module}'
|
||||||
'''
|
'''
|
||||||
@@ -34,10 +36,20 @@ make_valid_template = '''
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
def get_base_name(base):
|
||||||
|
if base is InfoExtractor:
|
||||||
|
return 'LazyLoadExtractor'
|
||||||
|
elif base is SearchInfoExtractor:
|
||||||
|
return 'LazyLoadSearchExtractor'
|
||||||
|
else:
|
||||||
|
return base.__name__
|
||||||
|
|
||||||
|
|
||||||
def build_lazy_ie(ie, name):
|
def build_lazy_ie(ie, name):
|
||||||
valid_url = getattr(ie, '_VALID_URL', None)
|
valid_url = getattr(ie, '_VALID_URL', None)
|
||||||
s = ie_template.format(
|
s = ie_template.format(
|
||||||
name=name,
|
name=name,
|
||||||
|
bases=', '.join(map(get_base_name, ie.__bases__)),
|
||||||
valid_url=valid_url,
|
valid_url=valid_url,
|
||||||
module=ie.__module__)
|
module=ie.__module__)
|
||||||
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
||||||
@@ -47,12 +59,35 @@ def build_lazy_ie(ie, name):
|
|||||||
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
# find the correct sorting and add the required base classes so that sublcasses
|
||||||
|
# can be correctly created
|
||||||
|
classes = _ALL_CLASSES[:-1]
|
||||||
|
ordered_cls = []
|
||||||
|
while classes:
|
||||||
|
for c in classes[:]:
|
||||||
|
bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor))
|
||||||
|
stop = False
|
||||||
|
for b in bases:
|
||||||
|
if b not in classes and b not in ordered_cls:
|
||||||
|
if b.__name__ == 'GenericIE':
|
||||||
|
exit()
|
||||||
|
classes.insert(0, b)
|
||||||
|
stop = True
|
||||||
|
if stop:
|
||||||
|
break
|
||||||
|
if all(b in ordered_cls for b in bases):
|
||||||
|
ordered_cls.append(c)
|
||||||
|
classes.remove(c)
|
||||||
|
break
|
||||||
|
ordered_cls.append(_ALL_CLASSES[-1])
|
||||||
|
|
||||||
names = []
|
names = []
|
||||||
for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]:
|
for ie in ordered_cls:
|
||||||
name = ie.ie_key() + 'IE'
|
name = ie.__name__
|
||||||
src = build_lazy_ie(ie, name)
|
src = build_lazy_ie(ie, name)
|
||||||
module_contents.append(src)
|
module_contents.append(src)
|
||||||
names.append(name)
|
if ie in _ALL_CLASSES:
|
||||||
|
names.append(name)
|
||||||
|
|
||||||
module_contents.append(
|
module_contents.append(
|
||||||
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))
|
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))
|
||||||
|
|||||||
@@ -54,17 +54,21 @@ def filter_options(readme):
|
|||||||
|
|
||||||
if in_options:
|
if in_options:
|
||||||
if line.lstrip().startswith('-'):
|
if line.lstrip().startswith('-'):
|
||||||
option, description = re.split(r'\s{2,}', line.lstrip())
|
split = re.split(r'\s{2,}', line.lstrip())
|
||||||
split_option = option.split(' ')
|
# Description string may start with `-` as well. If there is
|
||||||
|
# only one piece then it's a description bit not an option.
|
||||||
|
if len(split) > 1:
|
||||||
|
option, description = split
|
||||||
|
split_option = option.split(' ')
|
||||||
|
|
||||||
if not split_option[-1].startswith('-'): # metavar
|
if not split_option[-1].startswith('-'): # metavar
|
||||||
option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
|
option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
|
||||||
|
|
||||||
# Pandoc's definition_lists. See http://pandoc.org/README.html
|
# Pandoc's definition_lists. See http://pandoc.org/README.html
|
||||||
# for more information.
|
# for more information.
|
||||||
ret += '\n%s\n: %s\n' % (option, description)
|
ret += '\n%s\n: %s\n' % (option, description)
|
||||||
else:
|
continue
|
||||||
ret += line.lstrip() + '\n'
|
ret += line.lstrip() + '\n'
|
||||||
else:
|
else:
|
||||||
ret += line + '\n'
|
ret += line + '\n'
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
skip_tests=true
|
skip_tests=true
|
||||||
|
gpg_sign_commits=""
|
||||||
buildserver='localhost:8142'
|
buildserver='localhost:8142'
|
||||||
|
|
||||||
while true
|
while true
|
||||||
@@ -24,6 +25,10 @@ case "$1" in
|
|||||||
skip_tests=false
|
skip_tests=false
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
|
--gpg-sign-commits|-S)
|
||||||
|
gpg_sign_commits="-S"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
--buildserver)
|
--buildserver)
|
||||||
buildserver="$2"
|
buildserver="$2"
|
||||||
shift 2
|
shift 2
|
||||||
@@ -55,6 +60,9 @@ if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; e
|
|||||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
||||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
||||||
|
|
||||||
|
read -p "Is ChangeLog up to date? (y/n) " -n 1
|
||||||
|
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
||||||
|
|
||||||
/bin/echo -e "\n### First of all, testing..."
|
/bin/echo -e "\n### First of all, testing..."
|
||||||
make clean
|
make clean
|
||||||
if $skip_tests ; then
|
if $skip_tests ; then
|
||||||
@@ -66,10 +74,13 @@ fi
|
|||||||
/bin/echo -e "\n### Changing version in version.py..."
|
/bin/echo -e "\n### Changing version in version.py..."
|
||||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||||
|
|
||||||
|
/bin/echo -e "\n### Changing version in ChangeLog..."
|
||||||
|
sed -i "s/<unreleased>/$version/" ChangeLog
|
||||||
|
|
||||||
/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
|
/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
|
||||||
make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites
|
make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites
|
||||||
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py
|
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py ChangeLog
|
||||||
git commit -m "release $version"
|
git commit $gpg_sign_commits -m "release $version"
|
||||||
|
|
||||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||||
git tag -s -m "Release $version" "$version"
|
git tag -s -m "Release $version" "$version"
|
||||||
@@ -116,7 +127,7 @@ git clone --branch gh-pages --single-branch . build/gh-pages
|
|||||||
"$ROOT/devscripts/gh-pages/update-copyright.py"
|
"$ROOT/devscripts/gh-pages/update-copyright.py"
|
||||||
"$ROOT/devscripts/gh-pages/update-sites.py"
|
"$ROOT/devscripts/gh-pages/update-sites.py"
|
||||||
git add *.html *.html.in update
|
git add *.html *.html.in update
|
||||||
git commit -m "release $version"
|
git commit $gpg_sign_commits -m "release $version"
|
||||||
git push "$ROOT" gh-pages
|
git push "$ROOT" gh-pages
|
||||||
git push "$ORIGIN_URL" gh-pages
|
git push "$ORIGIN_URL" gh-pages
|
||||||
)
|
)
|
||||||
|
|||||||
47
devscripts/show-downloads-statistics.py
Normal file
47
devscripts/show-downloads-statistics.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_print,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
from youtube_dl.utils import format_bytes
|
||||||
|
|
||||||
|
|
||||||
|
def format_size(bytes):
|
||||||
|
return '%s (%d bytes)' % (format_bytes(bytes), bytes)
|
||||||
|
|
||||||
|
|
||||||
|
total_bytes = 0
|
||||||
|
|
||||||
|
for page in itertools.count(1):
|
||||||
|
releases = json.loads(compat_urllib_request.urlopen(
|
||||||
|
'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page
|
||||||
|
).read().decode('utf-8'))
|
||||||
|
|
||||||
|
if not releases:
|
||||||
|
break
|
||||||
|
|
||||||
|
for release in releases:
|
||||||
|
compat_print(release['name'])
|
||||||
|
for asset in release['assets']:
|
||||||
|
asset_name = asset['name']
|
||||||
|
total_bytes += asset['download_count'] * asset['size']
|
||||||
|
if all(not re.match(p, asset_name) for p in (
|
||||||
|
r'^youtube-dl$',
|
||||||
|
r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
|
||||||
|
r'^youtube-dl\.exe$')):
|
||||||
|
continue
|
||||||
|
compat_print(
|
||||||
|
' %s size: %s downloads: %d'
|
||||||
|
% (asset_name, format_size(asset['size']), asset['download_count']))
|
||||||
|
|
||||||
|
compat_print('total downloads traffic: %s' % format_size(total_bytes))
|
||||||
@@ -13,11 +13,16 @@
|
|||||||
- **5min**
|
- **5min**
|
||||||
- **8tracks**
|
- **8tracks**
|
||||||
- **91porn**
|
- **91porn**
|
||||||
|
- **9c9media**
|
||||||
|
- **9c9media:stack**
|
||||||
- **9gag**
|
- **9gag**
|
||||||
|
- **9now.com.au**
|
||||||
- **abc.net.au**
|
- **abc.net.au**
|
||||||
- **Abc7News**
|
- **abc.net.au:iview**
|
||||||
- **abcnews**
|
- **abcnews**
|
||||||
- **abcnews:video**
|
- **abcnews:video**
|
||||||
|
- **abcotvs**: ABC Owned Television Stations
|
||||||
|
- **abcotvs:clips**
|
||||||
- **AcademicEarth:Course**
|
- **AcademicEarth:Course**
|
||||||
- **acast**
|
- **acast**
|
||||||
- **acast:channel**
|
- **acast:channel**
|
||||||
@@ -34,6 +39,7 @@
|
|||||||
- **AlJazeera**
|
- **AlJazeera**
|
||||||
- **Allocine**
|
- **Allocine**
|
||||||
- **AlphaPorno**
|
- **AlphaPorno**
|
||||||
|
- **AMCNetworks**
|
||||||
- **AnimeOnDemand**
|
- **AnimeOnDemand**
|
||||||
- **anitube.se**
|
- **anitube.se**
|
||||||
- **AnySex**
|
- **AnySex**
|
||||||
@@ -45,7 +51,7 @@
|
|||||||
- **archive.org**: archive.org videos
|
- **archive.org**: archive.org videos
|
||||||
- **ARD**
|
- **ARD**
|
||||||
- **ARD:mediathek**
|
- **ARD:mediathek**
|
||||||
- **ARD:mediathek**: Saarländischer Rundfunk
|
- **Arkena**
|
||||||
- **arte.tv**
|
- **arte.tv**
|
||||||
- **arte.tv:+7**
|
- **arte.tv:+7**
|
||||||
- **arte.tv:cinema**
|
- **arte.tv:cinema**
|
||||||
@@ -64,6 +70,10 @@
|
|||||||
- **audiomack**
|
- **audiomack**
|
||||||
- **audiomack:album**
|
- **audiomack:album**
|
||||||
- **auroravid**: AuroraVid
|
- **auroravid**: AuroraVid
|
||||||
|
- **AWAAN**
|
||||||
|
- **awaan:live**
|
||||||
|
- **awaan:season**
|
||||||
|
- **awaan:video**
|
||||||
- **Azubu**
|
- **Azubu**
|
||||||
- **AzubuLive**
|
- **AzubuLive**
|
||||||
- **BaiduVideo**: 百度视频
|
- **BaiduVideo**: 百度视频
|
||||||
@@ -74,9 +84,12 @@
|
|||||||
- **bbc**: BBC
|
- **bbc**: BBC
|
||||||
- **bbc.co.uk**: BBC iPlayer
|
- **bbc.co.uk**: BBC iPlayer
|
||||||
- **bbc.co.uk:article**: BBC articles
|
- **bbc.co.uk:article**: BBC articles
|
||||||
|
- **bbc.co.uk:iplayer:playlist**
|
||||||
|
- **bbc.co.uk:playlist**
|
||||||
- **BeatportPro**
|
- **BeatportPro**
|
||||||
- **Beeg**
|
- **Beeg**
|
||||||
- **BehindKink**
|
- **BehindKink**
|
||||||
|
- **BellMedia**
|
||||||
- **Bet**
|
- **Bet**
|
||||||
- **Bigflix**
|
- **Bigflix**
|
||||||
- **Bild**: Bild.de
|
- **Bild**: Bild.de
|
||||||
@@ -104,17 +117,24 @@
|
|||||||
- **canalc2.tv**
|
- **canalc2.tv**
|
||||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||||
- **Canvas**
|
- **Canvas**
|
||||||
- **CBC**
|
- **CarambaTV**
|
||||||
- **CBCPlayer**
|
- **CarambaTVPage**
|
||||||
|
- **CartoonNetwork**
|
||||||
|
- **cbc.ca**
|
||||||
|
- **cbc.ca:player**
|
||||||
|
- **cbc.ca:watch**
|
||||||
|
- **cbc.ca:watch:video**
|
||||||
- **CBS**
|
- **CBS**
|
||||||
- **CBSInteractive**
|
- **CBSInteractive**
|
||||||
- **CBSLocal**
|
- **CBSLocal**
|
||||||
- **CBSNews**: CBS News
|
- **CBSNews**: CBS News
|
||||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||||
- **CBSSports**
|
- **CBSSports**
|
||||||
|
- **CCTV**
|
||||||
- **CDA**
|
- **CDA**
|
||||||
- **CeskaTelevize**
|
- **CeskaTelevize**
|
||||||
- **channel9**: Channel 9
|
- **channel9**: Channel 9
|
||||||
|
- **CharlieRose**
|
||||||
- **Chaturbate**
|
- **Chaturbate**
|
||||||
- **Chilloutzone**
|
- **Chilloutzone**
|
||||||
- **chirbit**
|
- **chirbit**
|
||||||
@@ -124,6 +144,7 @@
|
|||||||
- **cliphunter**
|
- **cliphunter**
|
||||||
- **ClipRs**
|
- **ClipRs**
|
||||||
- **Clipsyndicate**
|
- **Clipsyndicate**
|
||||||
|
- **CloserToTruth**
|
||||||
- **cloudtime**: CloudTime
|
- **cloudtime**: CloudTime
|
||||||
- **Cloudy**
|
- **Cloudy**
|
||||||
- **Clubic**
|
- **Clubic**
|
||||||
@@ -136,7 +157,8 @@
|
|||||||
- **CollegeRama**
|
- **CollegeRama**
|
||||||
- **ComCarCoff**
|
- **ComCarCoff**
|
||||||
- **ComedyCentral**
|
- **ComedyCentral**
|
||||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
- **ComedyCentralShortname**
|
||||||
|
- **ComedyCentralTV**
|
||||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
- **Coub**
|
- **Coub**
|
||||||
- **Cracked**
|
- **Cracked**
|
||||||
@@ -148,8 +170,11 @@
|
|||||||
- **CSNNE**
|
- **CSNNE**
|
||||||
- **CSpan**: C-SPAN
|
- **CSpan**: C-SPAN
|
||||||
- **CtsNews**: 華視新聞
|
- **CtsNews**: 華視新聞
|
||||||
|
- **CTVNews**
|
||||||
- **culturebox.francetvinfo.fr**
|
- **culturebox.francetvinfo.fr**
|
||||||
- **CultureUnplugged**
|
- **CultureUnplugged**
|
||||||
|
- **curiositystream**
|
||||||
|
- **curiositystream:collection**
|
||||||
- **CWTV**
|
- **CWTV**
|
||||||
- **DailyMail**
|
- **DailyMail**
|
||||||
- **dailymotion**
|
- **dailymotion**
|
||||||
@@ -161,10 +186,6 @@
|
|||||||
- **daum.net:playlist**
|
- **daum.net:playlist**
|
||||||
- **daum.net:user**
|
- **daum.net:user**
|
||||||
- **DBTV**
|
- **DBTV**
|
||||||
- **DCN**
|
|
||||||
- **dcn:live**
|
|
||||||
- **dcn:season**
|
|
||||||
- **dcn:video**
|
|
||||||
- **DctpTv**
|
- **DctpTv**
|
||||||
- **DeezerPlaylist**
|
- **DeezerPlaylist**
|
||||||
- **defense.gouv.fr**
|
- **defense.gouv.fr**
|
||||||
@@ -173,6 +194,7 @@
|
|||||||
- **DigitallySpeaking**
|
- **DigitallySpeaking**
|
||||||
- **Digiteka**
|
- **Digiteka**
|
||||||
- **Discovery**
|
- **Discovery**
|
||||||
|
- **DiscoveryGo**
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
- **DouyuTV**: 斗鱼
|
- **DouyuTV**: 斗鱼
|
||||||
- **DPlay**
|
- **DPlay**
|
||||||
@@ -205,29 +227,32 @@
|
|||||||
- **EsriVideo**
|
- **EsriVideo**
|
||||||
- **Europa**
|
- **Europa**
|
||||||
- **EveryonesMixtape**
|
- **EveryonesMixtape**
|
||||||
- **exfm**: ex.fm
|
|
||||||
- **ExpoTV**
|
- **ExpoTV**
|
||||||
- **ExtremeTube**
|
- **ExtremeTube**
|
||||||
- **EyedoTV**
|
- **EyedoTV**
|
||||||
- **facebook**
|
- **facebook**
|
||||||
|
- **FacebookPluginsVideo**
|
||||||
- **faz.net**
|
- **faz.net**
|
||||||
- **fc2**
|
- **fc2**
|
||||||
|
- **fc2:embed**
|
||||||
- **Fczenit**
|
- **Fczenit**
|
||||||
- **features.aol.com**
|
- **features.aol.com**
|
||||||
- **fernsehkritik.tv**
|
- **fernsehkritik.tv**
|
||||||
- **Firstpost**
|
- **Firstpost**
|
||||||
- **FiveTV**
|
- **FiveTV**
|
||||||
- **Flickr**
|
- **Flickr**
|
||||||
|
- **Flipagram**
|
||||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
- **FootyRoom**
|
- **FootyRoom**
|
||||||
- **Formula1**
|
- **Formula1**
|
||||||
- **FOX**
|
- **FOX**
|
||||||
- **Foxgay**
|
- **Foxgay**
|
||||||
- **FoxNews**: Fox News and Fox Business Video
|
- **foxnews**: Fox News and Fox Business Video
|
||||||
|
- **foxnews:article**
|
||||||
|
- **foxnews:insider**
|
||||||
- **FoxSports**
|
- **FoxSports**
|
||||||
- **france2.fr:generation-quoi**
|
- **france2.fr:generation-quoi**
|
||||||
- **FranceCulture**
|
- **FranceCulture**
|
||||||
- **FranceCultureEmission**
|
|
||||||
- **FranceInter**
|
- **FranceInter**
|
||||||
- **francetv**: France 2, 3, 4, 5 and Ô
|
- **francetv**: France 2, 3, 4, 5 and Ô
|
||||||
- **francetvinfo.fr**
|
- **francetvinfo.fr**
|
||||||
@@ -236,14 +261,14 @@
|
|||||||
- **FreeVideo**
|
- **FreeVideo**
|
||||||
- **Funimation**
|
- **Funimation**
|
||||||
- **FunnyOrDie**
|
- **FunnyOrDie**
|
||||||
|
- **Fusion**
|
||||||
|
- **FXNetworks**
|
||||||
- **GameInformer**
|
- **GameInformer**
|
||||||
- **Gamekings**
|
|
||||||
- **GameOne**
|
- **GameOne**
|
||||||
- **gameone:playlist**
|
- **gameone:playlist**
|
||||||
- **Gamersyde**
|
- **Gamersyde**
|
||||||
- **GameSpot**
|
- **GameSpot**
|
||||||
- **GameStar**
|
- **GameStar**
|
||||||
- **Gametrailers**
|
|
||||||
- **Gazeta**
|
- **Gazeta**
|
||||||
- **GDCVault**
|
- **GDCVault**
|
||||||
- **generic**: Generic downloader that works on some sites
|
- **generic**: Generic downloader that works on some sites
|
||||||
@@ -253,9 +278,9 @@
|
|||||||
- **Glide**: Glide mobile video messages (glide.me)
|
- **Glide**: Glide mobile video messages (glide.me)
|
||||||
- **Globo**
|
- **Globo**
|
||||||
- **GloboArticle**
|
- **GloboArticle**
|
||||||
|
- **Go**
|
||||||
- **GodTube**
|
- **GodTube**
|
||||||
- **GodTV**
|
- **GodTV**
|
||||||
- **GoldenMoustache**
|
|
||||||
- **Golem**
|
- **Golem**
|
||||||
- **GoogleDrive**
|
- **GoogleDrive**
|
||||||
- **Goshgay**
|
- **Goshgay**
|
||||||
@@ -268,7 +293,10 @@
|
|||||||
- **HellPorno**
|
- **HellPorno**
|
||||||
- **Helsinki**: helsinki.fi
|
- **Helsinki**: helsinki.fi
|
||||||
- **HentaiStigma**
|
- **HentaiStigma**
|
||||||
|
- **HGTV**
|
||||||
|
- **hgtv.com:show**
|
||||||
- **HistoricFilms**
|
- **HistoricFilms**
|
||||||
|
- **history:topic**: History.com Topic
|
||||||
- **hitbox**
|
- **hitbox**
|
||||||
- **hitbox:live**
|
- **hitbox:live**
|
||||||
- **HornBunny**
|
- **HornBunny**
|
||||||
@@ -276,6 +304,8 @@
|
|||||||
- **HotStar**
|
- **HotStar**
|
||||||
- **Howcast**
|
- **Howcast**
|
||||||
- **HowStuffWorks**
|
- **HowStuffWorks**
|
||||||
|
- **HRTi**
|
||||||
|
- **HRTiPlaylist**
|
||||||
- **HuffPost**: Huffington Post
|
- **HuffPost**: Huffington Post
|
||||||
- **Hypem**
|
- **Hypem**
|
||||||
- **Iconosquare**
|
- **Iconosquare**
|
||||||
@@ -297,18 +327,21 @@
|
|||||||
- **ivi**: ivi.ru
|
- **ivi**: ivi.ru
|
||||||
- **ivi:compilation**: ivi.ru compilations
|
- **ivi:compilation**: ivi.ru compilations
|
||||||
- **ivideon**: Ivideon TV
|
- **ivideon**: Ivideon TV
|
||||||
|
- **Iwara**
|
||||||
- **Izlesene**
|
- **Izlesene**
|
||||||
- **JeuxVideo**
|
- **JeuxVideo**
|
||||||
- **Jove**
|
- **Jove**
|
||||||
- **jpopsuki.tv**
|
- **jpopsuki.tv**
|
||||||
- **JWPlatform**
|
- **JWPlatform**
|
||||||
- **Kaltura**
|
- **Kaltura**
|
||||||
|
- **Kamcord**
|
||||||
- **KanalPlay**: Kanal 5/9/11 Play
|
- **KanalPlay**: Kanal 5/9/11 Play
|
||||||
- **Kankan**
|
- **Kankan**
|
||||||
- **Karaoketv**
|
- **Karaoketv**
|
||||||
- **KarriereVideos**
|
- **KarriereVideos**
|
||||||
- **keek**
|
- **keek**
|
||||||
- **KeezMovies**
|
- **KeezMovies**
|
||||||
|
- **Ketnet**
|
||||||
- **KhanAcademy**
|
- **KhanAcademy**
|
||||||
- **KickStarter**
|
- **KickStarter**
|
||||||
- **KonserthusetPlay**
|
- **KonserthusetPlay**
|
||||||
@@ -322,8 +355,11 @@
|
|||||||
- **kuwo:mv**: 酷我音乐 - MV
|
- **kuwo:mv**: 酷我音乐 - MV
|
||||||
- **kuwo:singer**: 酷我音乐 - 歌手
|
- **kuwo:singer**: 酷我音乐 - 歌手
|
||||||
- **kuwo:song**: 酷我音乐
|
- **kuwo:song**: 酷我音乐
|
||||||
- **la7.tv**
|
- **la7.it**
|
||||||
- **Laola1Tv**
|
- **Laola1Tv**
|
||||||
|
- **LCI**
|
||||||
|
- **Lcp**
|
||||||
|
- **LcpPlay**
|
||||||
- **Le**: 乐视网
|
- **Le**: 乐视网
|
||||||
- **Learnr**
|
- **Learnr**
|
||||||
- **Lecture2Go**
|
- **Lecture2Go**
|
||||||
@@ -352,13 +388,17 @@
|
|||||||
- **mailru**: Видео@Mail.Ru
|
- **mailru**: Видео@Mail.Ru
|
||||||
- **MakersChannel**
|
- **MakersChannel**
|
||||||
- **MakerTV**
|
- **MakerTV**
|
||||||
|
- **mangomolo:live**
|
||||||
|
- **mangomolo:video**
|
||||||
- **MatchTV**
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
|
- **META**
|
||||||
- **metacafe**
|
- **metacafe**
|
||||||
- **Metacritic**
|
- **Metacritic**
|
||||||
- **Mgoon**
|
- **Mgoon**
|
||||||
- **MGTV**: 芒果TV
|
- **MGTV**: 芒果TV
|
||||||
|
- **MiaoPai**
|
||||||
- **Minhateca**
|
- **Minhateca**
|
||||||
- **MinistryGrid**
|
- **MinistryGrid**
|
||||||
- **Minoto**
|
- **Minoto**
|
||||||
@@ -380,11 +420,12 @@
|
|||||||
- **MovieClips**
|
- **MovieClips**
|
||||||
- **MovieFap**
|
- **MovieFap**
|
||||||
- **Moviezine**
|
- **Moviezine**
|
||||||
|
- **MovingImage**
|
||||||
- **MPORA**
|
- **MPORA**
|
||||||
- **MSNBC**
|
- **MSN**
|
||||||
|
- **mtg**: MTG services
|
||||||
- **MTV**
|
- **MTV**
|
||||||
- **mtv.de**
|
- **mtv.de**
|
||||||
- **mtviggy.com**
|
|
||||||
- **mtvservices:embedded**
|
- **mtvservices:embedded**
|
||||||
- **MuenchenTV**: münchen.tv
|
- **MuenchenTV**: münchen.tv
|
||||||
- **MusicPlayOn**
|
- **MusicPlayOn**
|
||||||
@@ -400,11 +441,13 @@
|
|||||||
- **MyVidster**
|
- **MyVidster**
|
||||||
- **n-tv.de**
|
- **n-tv.de**
|
||||||
- **natgeo**
|
- **natgeo**
|
||||||
- **natgeo:channel**
|
- **natgeo:episodeguide**
|
||||||
|
- **natgeo:video**
|
||||||
- **Naver**
|
- **Naver**
|
||||||
- **NBA**
|
- **NBA**
|
||||||
- **NBC**
|
- **NBC**
|
||||||
- **NBCNews**
|
- **NBCNews**
|
||||||
|
- **NBCOlympics**
|
||||||
- **NBCSports**
|
- **NBCSports**
|
||||||
- **NBCSportsVPlayer**
|
- **NBCSportsVPlayer**
|
||||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||||
@@ -424,16 +467,18 @@
|
|||||||
- **Newstube**
|
- **Newstube**
|
||||||
- **NextMedia**: 蘋果日報
|
- **NextMedia**: 蘋果日報
|
||||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||||
- **nextmovie.com**
|
|
||||||
- **nfb**: National Film Board of Canada
|
- **nfb**: National Film Board of Canada
|
||||||
- **nfl.com**
|
- **nfl.com**
|
||||||
|
- **NhkVod**
|
||||||
- **nhl.com**
|
- **nhl.com**
|
||||||
- **nhl.com:news**: NHL news
|
- **nhl.com:news**: NHL news
|
||||||
- **nhl.com:videocenter**
|
- **nhl.com:videocenter**
|
||||||
- **nhl.com:videocenter:category**: NHL videocenter category
|
- **nhl.com:videocenter:category**: NHL videocenter category
|
||||||
- **nick.com**
|
- **nick.com**
|
||||||
|
- **nick.de**
|
||||||
- **niconico**: ニコニコ動画
|
- **niconico**: ニコニコ動画
|
||||||
- **NiconicoPlaylist**
|
- **NiconicoPlaylist**
|
||||||
|
- **Nintendo**
|
||||||
- **njoy**: N-JOY
|
- **njoy**: N-JOY
|
||||||
- **njoy:embed**
|
- **njoy:embed**
|
||||||
- **Noco**
|
- **Noco**
|
||||||
@@ -461,9 +506,12 @@
|
|||||||
- **NYTimes**
|
- **NYTimes**
|
||||||
- **NYTimesArticle**
|
- **NYTimesArticle**
|
||||||
- **ocw.mit.edu**
|
- **ocw.mit.edu**
|
||||||
|
- **OdaTV**
|
||||||
- **Odnoklassniki**
|
- **Odnoklassniki**
|
||||||
- **OktoberfestTV**
|
- **OktoberfestTV**
|
||||||
- **on.aol.com**
|
- **on.aol.com**
|
||||||
|
- **onet.tv**
|
||||||
|
- **onet.tv:channel**
|
||||||
- **OnionStudios**
|
- **OnionStudios**
|
||||||
- **Ooyala**
|
- **Ooyala**
|
||||||
- **OoyalaExternal**
|
- **OoyalaExternal**
|
||||||
@@ -487,7 +535,6 @@
|
|||||||
- **Pinkbike**
|
- **Pinkbike**
|
||||||
- **Pladform**
|
- **Pladform**
|
||||||
- **play.fm**
|
- **play.fm**
|
||||||
- **played.to**
|
|
||||||
- **PlaysTV**
|
- **PlaysTV**
|
||||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||||
- **Playvid**
|
- **Playvid**
|
||||||
@@ -497,8 +544,12 @@
|
|||||||
- **plus.google**: Google Plus
|
- **plus.google**: Google Plus
|
||||||
- **pluzz.francetv.fr**
|
- **pluzz.francetv.fr**
|
||||||
- **podomatic**
|
- **podomatic**
|
||||||
|
- **Pokemon**
|
||||||
|
- **PolskieRadio**
|
||||||
|
- **PolskieRadioCategory**
|
||||||
|
- **PornCom**
|
||||||
- **PornHd**
|
- **PornHd**
|
||||||
- **PornHub**
|
- **PornHub**: PornHub and Thumbzilla
|
||||||
- **PornHubPlaylist**
|
- **PornHubPlaylist**
|
||||||
- **PornHubUserVideos**
|
- **PornHubUserVideos**
|
||||||
- **Pornotube**
|
- **Pornotube**
|
||||||
@@ -516,6 +567,7 @@
|
|||||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||||
- **R7**
|
- **R7**
|
||||||
|
- **R7Article**
|
||||||
- **radio.de**
|
- **radio.de**
|
||||||
- **radiobremen**
|
- **radiobremen**
|
||||||
- **radiocanada**
|
- **radiocanada**
|
||||||
@@ -535,8 +587,12 @@
|
|||||||
- **revision3:embed**
|
- **revision3:embed**
|
||||||
- **RICE**
|
- **RICE**
|
||||||
- **RingTV**
|
- **RingTV**
|
||||||
|
- **RMCDecouverte**
|
||||||
|
- **RockstarGames**
|
||||||
|
- **RoosterTeeth**
|
||||||
- **RottenTomatoes**
|
- **RottenTomatoes**
|
||||||
- **Roxwel**
|
- **Roxwel**
|
||||||
|
- **Rozhlas**
|
||||||
- **RTBF**
|
- **RTBF**
|
||||||
- **rte**: Raidió Teilifís Éireann TV
|
- **rte**: Raidió Teilifís Éireann TV
|
||||||
- **rte:radio**: Raidió Teilifís Éireann radio
|
- **rte:radio**: Raidió Teilifís Éireann radio
|
||||||
@@ -547,7 +603,9 @@
|
|||||||
- **rtve.es:alacarta**: RTVE a la carta
|
- **rtve.es:alacarta**: RTVE a la carta
|
||||||
- **rtve.es:infantil**: RTVE infantil
|
- **rtve.es:infantil**: RTVE infantil
|
||||||
- **rtve.es:live**: RTVE.es live streams
|
- **rtve.es:live**: RTVE.es live streams
|
||||||
|
- **rtve.es:television**
|
||||||
- **RTVNH**
|
- **RTVNH**
|
||||||
|
- **Rudo**
|
||||||
- **RUHD**
|
- **RUHD**
|
||||||
- **RulePorn**
|
- **RulePorn**
|
||||||
- **rutube**: Rutube videos
|
- **rutube**: Rutube videos
|
||||||
@@ -580,8 +638,10 @@
|
|||||||
- **Shared**: shared.sx and vivo.sx
|
- **Shared**: shared.sx and vivo.sx
|
||||||
- **ShareSix**
|
- **ShareSix**
|
||||||
- **Sina**
|
- **Sina**
|
||||||
|
- **SixPlay**
|
||||||
|
- **skynewsarabia:article**
|
||||||
- **skynewsarabia:video**
|
- **skynewsarabia:video**
|
||||||
- **skynewsarabia:video**
|
- **SkySports**
|
||||||
- **Slideshare**
|
- **Slideshare**
|
||||||
- **Slutload**
|
- **Slutload**
|
||||||
- **smotri**: Smotri.com
|
- **smotri**: Smotri.com
|
||||||
@@ -590,6 +650,7 @@
|
|||||||
- **smotri:user**: Smotri.com user videos
|
- **smotri:user**: Smotri.com user videos
|
||||||
- **Snotr**
|
- **Snotr**
|
||||||
- **Sohu**
|
- **Sohu**
|
||||||
|
- **SonyLIV**
|
||||||
- **soundcloud**
|
- **soundcloud**
|
||||||
- **soundcloud:playlist**
|
- **soundcloud:playlist**
|
||||||
- **soundcloud:search**: Soundcloud search
|
- **soundcloud:search**: Soundcloud search
|
||||||
@@ -613,12 +674,13 @@
|
|||||||
- **SportBoxEmbed**
|
- **SportBoxEmbed**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
- **Sportschau**
|
- **Sportschau**
|
||||||
|
- **sr:mediathek**: Saarländischer Rundfunk
|
||||||
- **SRGSSR**
|
- **SRGSSR**
|
||||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||||
- **SSA**
|
|
||||||
- **stanfordoc**: Stanford Open ClassRoom
|
- **stanfordoc**: Stanford Open ClassRoom
|
||||||
- **Steam**
|
- **Steam**
|
||||||
- **Stitcher**
|
- **Stitcher**
|
||||||
|
- **Streamable**
|
||||||
- **streamcloud.eu**
|
- **streamcloud.eu**
|
||||||
- **StreamCZ**
|
- **StreamCZ**
|
||||||
- **StreetVoice**
|
- **StreetVoice**
|
||||||
@@ -630,8 +692,8 @@
|
|||||||
- **SztvHu**
|
- **SztvHu**
|
||||||
- **Tagesschau**
|
- **Tagesschau**
|
||||||
- **tagesschau:player**
|
- **tagesschau:player**
|
||||||
- **Tapely**
|
|
||||||
- **Tass**
|
- **Tass**
|
||||||
|
- **TBS**
|
||||||
- **TDSLifeway**
|
- **TDSLifeway**
|
||||||
- **teachertube**: teachertube.com videos
|
- **teachertube**: teachertube.com videos
|
||||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||||
@@ -646,8 +708,11 @@
|
|||||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||||
- **Telegraaf**
|
- **Telegraaf**
|
||||||
- **TeleMB**
|
- **TeleMB**
|
||||||
|
- **TeleQuebec**
|
||||||
- **TeleTask**
|
- **TeleTask**
|
||||||
|
- **Telewebion**
|
||||||
- **TF1**
|
- **TF1**
|
||||||
|
- **TFO**
|
||||||
- **TheIntercept**
|
- **TheIntercept**
|
||||||
- **ThePlatform**
|
- **ThePlatform**
|
||||||
- **ThePlatformFeed**
|
- **ThePlatformFeed**
|
||||||
@@ -656,8 +721,6 @@
|
|||||||
- **TheStar**
|
- **TheStar**
|
||||||
- **ThisAmericanLife**
|
- **ThisAmericanLife**
|
||||||
- **ThisAV**
|
- **ThisAV**
|
||||||
- **THVideo**
|
|
||||||
- **THVideoPlaylist**
|
|
||||||
- **tinypic**: tinypic.com videos
|
- **tinypic**: tinypic.com videos
|
||||||
- **tlc.de**
|
- **tlc.de**
|
||||||
- **TMZ**
|
- **TMZ**
|
||||||
@@ -665,13 +728,13 @@
|
|||||||
- **TNAFlix**
|
- **TNAFlix**
|
||||||
- **TNAFlixNetworkEmbed**
|
- **TNAFlixNetworkEmbed**
|
||||||
- **toggle**
|
- **toggle**
|
||||||
|
- **Tosh**: Tosh.0
|
||||||
- **tou.tv**
|
- **tou.tv**
|
||||||
- **Toypics**: Toypics user profile
|
- **Toypics**: Toypics user profile
|
||||||
- **ToypicsUser**: Toypics user profile
|
- **ToypicsUser**: Toypics user profile
|
||||||
- **TrailerAddict** (Currently broken)
|
- **TrailerAddict** (Currently broken)
|
||||||
- **Trilulilu**
|
- **Trilulilu**
|
||||||
- **trollvids**
|
- **TruTV**
|
||||||
- **TruTube**
|
|
||||||
- **Tube8**
|
- **Tube8**
|
||||||
- **TubiTv**
|
- **TubiTv**
|
||||||
- **tudou**
|
- **tudou**
|
||||||
@@ -693,11 +756,13 @@
|
|||||||
- **TVCArticle**
|
- **TVCArticle**
|
||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvland.com**
|
- **tvland.com**
|
||||||
|
- **TVNoe**
|
||||||
- **tvp**: Telewizja Polska
|
- **tvp**: Telewizja Polska
|
||||||
|
- **tvp:embed**: Telewizja Polska
|
||||||
- **tvp:series**
|
- **tvp:series**
|
||||||
- **TVPlay**: TV3Play and related services
|
|
||||||
- **Tweakers**
|
- **Tweakers**
|
||||||
- **twitch:chapter**
|
- **twitch:chapter**
|
||||||
|
- **twitch:clips**
|
||||||
- **twitch:past_broadcasts**
|
- **twitch:past_broadcasts**
|
||||||
- **twitch:profile**
|
- **twitch:profile**
|
||||||
- **twitch:stream**
|
- **twitch:stream**
|
||||||
@@ -710,7 +775,12 @@
|
|||||||
- **udemy:course**
|
- **udemy:course**
|
||||||
- **UDNEmbed**: 聯合影音
|
- **UDNEmbed**: 聯合影音
|
||||||
- **Unistra**
|
- **Unistra**
|
||||||
|
- **uol.com.br**
|
||||||
|
- **uplynk**
|
||||||
|
- **uplynk:preplay**
|
||||||
- **Urort**: NRK P3 Urørt
|
- **Urort**: NRK P3 Urørt
|
||||||
|
- **URPlay**
|
||||||
|
- **USANetwork**
|
||||||
- **USAToday**
|
- **USAToday**
|
||||||
- **ustream**
|
- **ustream**
|
||||||
- **ustream:channel**
|
- **ustream:channel**
|
||||||
@@ -726,8 +796,11 @@
|
|||||||
- **VevoPlaylist**
|
- **VevoPlaylist**
|
||||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||||
- **vh1.com**
|
- **vh1.com**
|
||||||
|
- **Viafree**
|
||||||
- **Vice**
|
- **Vice**
|
||||||
|
- **Viceland**
|
||||||
- **ViceShow**
|
- **ViceShow**
|
||||||
|
- **Vidbit**
|
||||||
- **Viddler**
|
- **Viddler**
|
||||||
- **video.google:search**: Google Video search
|
- **video.google:search**: Google Video search
|
||||||
- **video.mit.edu**
|
- **video.mit.edu**
|
||||||
@@ -767,8 +840,10 @@
|
|||||||
- **vine:user**
|
- **vine:user**
|
||||||
- **vk**: VK
|
- **vk**: VK
|
||||||
- **vk:uservideos**: VK - User's Videos
|
- **vk:uservideos**: VK - User's Videos
|
||||||
|
- **vk:wallpost**
|
||||||
- **vlive**
|
- **vlive**
|
||||||
- **Vodlocker**
|
- **Vodlocker**
|
||||||
|
- **VODPlatform**
|
||||||
- **VoiceRepublic**
|
- **VoiceRepublic**
|
||||||
- **VoxMedia**
|
- **VoxMedia**
|
||||||
- **Vporn**
|
- **Vporn**
|
||||||
@@ -776,6 +851,7 @@
|
|||||||
- **VRT**
|
- **VRT**
|
||||||
- **vube**: Vube.com
|
- **vube**: Vube.com
|
||||||
- **VuClip**
|
- **VuClip**
|
||||||
|
- **VyboryMos**
|
||||||
- **Walla**
|
- **Walla**
|
||||||
- **washingtonpost**
|
- **washingtonpost**
|
||||||
- **washingtonpost:article**
|
- **washingtonpost:article**
|
||||||
@@ -792,10 +868,11 @@
|
|||||||
- **WNL**
|
- **WNL**
|
||||||
- **WorldStarHipHop**
|
- **WorldStarHipHop**
|
||||||
- **wrzuta.pl**
|
- **wrzuta.pl**
|
||||||
|
- **wrzuta.pl:playlist**
|
||||||
- **WSJ**: Wall Street Journal
|
- **WSJ**: Wall Street Journal
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To
|
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE
|
||||||
- **XHamster**
|
- **XHamster**
|
||||||
- **XHamsterEmbed**
|
- **XHamsterEmbed**
|
||||||
- **xiami:album**: 虾米音乐 - 专辑
|
- **xiami:album**: 虾米音乐 - 专辑
|
||||||
@@ -834,6 +911,7 @@
|
|||||||
- **youtube:search**: YouTube.com searches
|
- **youtube:search**: YouTube.com searches
|
||||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||||
- **youtube:search_url**: YouTube.com search URLs
|
- **youtube:search_url**: YouTube.com search URLs
|
||||||
|
- **youtube:shared**
|
||||||
- **youtube:show**: YouTube.com (multi-season) shows
|
- **youtube:show**: YouTube.com (multi-season) shows
|
||||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||||
@@ -841,6 +919,4 @@
|
|||||||
- **Zapiks**
|
- **Zapiks**
|
||||||
- **ZDF**
|
- **ZDF**
|
||||||
- **ZDFChannel**
|
- **ZDFChannel**
|
||||||
- **zingmp3:album**: mp3.zing.vn albums
|
- **zingmp3**: mp3.zing.vn
|
||||||
- **zingmp3:song**: mp3.zing.vn songs
|
|
||||||
- **ZippCast**
|
|
||||||
|
|||||||
62
setup.py
62
setup.py
@@ -21,25 +21,37 @@ try:
|
|||||||
import py2exe
|
import py2exe
|
||||||
except ImportError:
|
except ImportError:
|
||||||
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
||||||
print("Cannot import py2exe", file=sys.stderr)
|
print('Cannot import py2exe', file=sys.stderr)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
py2exe_options = {
|
py2exe_options = {
|
||||||
"bundle_files": 1,
|
'bundle_files': 1,
|
||||||
"compressed": 1,
|
'compressed': 1,
|
||||||
"optimize": 2,
|
'optimize': 2,
|
||||||
"dist_dir": '.',
|
'dist_dir': '.',
|
||||||
"dll_excludes": ['w9xpopen.exe', 'crypt32.dll'],
|
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Get the version from youtube_dl/version.py without importing the package
|
||||||
|
exec(compile(open('youtube_dl/version.py').read(),
|
||||||
|
'youtube_dl/version.py', 'exec'))
|
||||||
|
|
||||||
|
DESCRIPTION = 'YouTube video downloader'
|
||||||
|
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
|
||||||
|
|
||||||
py2exe_console = [{
|
py2exe_console = [{
|
||||||
"script": "./youtube_dl/__main__.py",
|
'script': './youtube_dl/__main__.py',
|
||||||
"dest_base": "youtube-dl",
|
'dest_base': 'youtube-dl',
|
||||||
|
'version': __version__,
|
||||||
|
'description': DESCRIPTION,
|
||||||
|
'comments': LONG_DESCRIPTION,
|
||||||
|
'product_name': 'youtube-dl',
|
||||||
|
'product_version': __version__,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
py2exe_params = {
|
py2exe_params = {
|
||||||
'console': py2exe_console,
|
'console': py2exe_console,
|
||||||
'options': {"py2exe": py2exe_options},
|
'options': {'py2exe': py2exe_options},
|
||||||
'zipfile': None
|
'zipfile': None
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,7 +84,7 @@ else:
|
|||||||
params['scripts'] = ['bin/youtube-dl']
|
params['scripts'] = ['bin/youtube-dl']
|
||||||
|
|
||||||
class build_lazy_extractors(Command):
|
class build_lazy_extractors(Command):
|
||||||
description = "Build the extractor lazy loading module"
|
description = 'Build the extractor lazy loading module'
|
||||||
user_options = []
|
user_options = []
|
||||||
|
|
||||||
def initialize_options(self):
|
def initialize_options(self):
|
||||||
@@ -87,16 +99,11 @@ class build_lazy_extractors(Command):
|
|||||||
dry_run=self.dry_run,
|
dry_run=self.dry_run,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get the version from youtube_dl/version.py without importing the package
|
|
||||||
exec(compile(open('youtube_dl/version.py').read(),
|
|
||||||
'youtube_dl/version.py', 'exec'))
|
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='youtube_dl',
|
name='youtube_dl',
|
||||||
version=__version__,
|
version=__version__,
|
||||||
description='YouTube video downloader',
|
description=DESCRIPTION,
|
||||||
long_description='Small command-line program to download videos from'
|
long_description=LONG_DESCRIPTION,
|
||||||
' YouTube.com and other video sites.',
|
|
||||||
url='https://github.com/rg3/youtube-dl',
|
url='https://github.com/rg3/youtube-dl',
|
||||||
author='Ricardo Garcia',
|
author='Ricardo Garcia',
|
||||||
author_email='ytdl@yt-dl.org',
|
author_email='ytdl@yt-dl.org',
|
||||||
@@ -112,16 +119,17 @@ setup(
|
|||||||
# test_requires = ['nosetest'],
|
# test_requires = ['nosetest'],
|
||||||
|
|
||||||
classifiers=[
|
classifiers=[
|
||||||
"Topic :: Multimedia :: Video",
|
'Topic :: Multimedia :: Video',
|
||||||
"Development Status :: 5 - Production/Stable",
|
'Development Status :: 5 - Production/Stable',
|
||||||
"Environment :: Console",
|
'Environment :: Console',
|
||||||
"License :: Public Domain",
|
'License :: Public Domain',
|
||||||
"Programming Language :: Python :: 2.6",
|
'Programming Language :: Python :: 2.6',
|
||||||
"Programming Language :: Python :: 2.7",
|
'Programming Language :: Python :: 2.7',
|
||||||
"Programming Language :: Python :: 3",
|
'Programming Language :: Python :: 3',
|
||||||
"Programming Language :: Python :: 3.2",
|
'Programming Language :: Python :: 3.2',
|
||||||
"Programming Language :: Python :: 3.3",
|
'Programming Language :: Python :: 3.3',
|
||||||
"Programming Language :: Python :: 3.4",
|
'Programming Language :: Python :: 3.4',
|
||||||
|
'Programming Language :: Python :: 3.5',
|
||||||
],
|
],
|
||||||
|
|
||||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
from test.helper import FakeYDL
|
from test.helper import FakeYDL
|
||||||
from youtube_dl.extractor.common import InfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError
|
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||||
|
|
||||||
|
|
||||||
class TestIE(InfoExtractor):
|
class TestIE(InfoExtractor):
|
||||||
@@ -48,6 +48,9 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
||||||
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
||||||
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
||||||
|
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
||||||
|
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
||||||
|
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
||||||
|
|
||||||
def test_html_search_meta(self):
|
def test_html_search_meta(self):
|
||||||
ie = self.ie
|
ie = self.ie
|
||||||
@@ -66,6 +69,11 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
self.assertEqual(ie._html_search_meta('d', html), '4')
|
self.assertEqual(ie._html_search_meta('d', html), '4')
|
||||||
self.assertEqual(ie._html_search_meta('e', html), '5')
|
self.assertEqual(ie._html_search_meta('e', html), '5')
|
||||||
self.assertEqual(ie._html_search_meta('f', html), '6')
|
self.assertEqual(ie._html_search_meta('f', html), '6')
|
||||||
|
self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1')
|
||||||
|
self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3')
|
||||||
|
self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3')
|
||||||
|
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
||||||
|
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
||||||
|
|
||||||
def test_download_json(self):
|
def test_download_json(self):
|
||||||
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
|
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
|
||||||
|
|||||||
@@ -335,6 +335,40 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||||
|
|
||||||
|
def test_audio_only_extractor_format_selection(self):
|
||||||
|
# For extractors with incomplete formats (all formats are audio-only or
|
||||||
|
# video-only) best and worst should fallback to corresponding best/worst
|
||||||
|
# video-only or audio-only formats (as per
|
||||||
|
# https://github.com/rg3/youtube-dl/pull/5556)
|
||||||
|
formats = [
|
||||||
|
{'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||||
|
{'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
|
||||||
|
]
|
||||||
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'best'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'high')
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'worst'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'low')
|
||||||
|
|
||||||
|
def test_format_not_available(self):
|
||||||
|
formats = [
|
||||||
|
{'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL},
|
||||||
|
{'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
|
||||||
|
]
|
||||||
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
|
# This must fail since complete video-audio format does not match filter
|
||||||
|
# and extractor does not provide incomplete only formats (i.e. only
|
||||||
|
# video-only or audio-only).
|
||||||
|
ydl = YDL({'format': 'best[height>360]'})
|
||||||
|
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||||
|
|
||||||
def test_invalid_format_specs(self):
|
def test_invalid_format_specs(self):
|
||||||
def assert_syntax_error(format_spec):
|
def assert_syntax_error(format_spec):
|
||||||
ydl = YDL({'format': format_spec})
|
ydl = YDL({'format': format_spec})
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from __future__ import unicode_literals
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
import collections
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
@@ -100,8 +101,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||||
self.assertMatch(':ythistory', ['youtube:history'])
|
self.assertMatch(':ythistory', ['youtube:history'])
|
||||||
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
|
||||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
|
||||||
|
|
||||||
def test_vimeo_matching(self):
|
def test_vimeo_matching(self):
|
||||||
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||||
@@ -130,6 +129,15 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
|
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
|
||||||
['Yahoo'])
|
['Yahoo'])
|
||||||
|
|
||||||
|
def test_no_duplicated_ie_names(self):
|
||||||
|
name_accu = collections.defaultdict(list)
|
||||||
|
for ie in self.ies:
|
||||||
|
name_accu[ie.IE_NAME.lower()].append(type(ie).__name__)
|
||||||
|
for (ie_name, ie_list) in name_accu.items():
|
||||||
|
self.assertEqual(
|
||||||
|
len(ie_list), 1,
|
||||||
|
'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list)))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -87,6 +87,8 @@ class TestCompat(unittest.TestCase):
|
|||||||
|
|
||||||
def test_compat_shlex_split(self):
|
def test_compat_shlex_split(self):
|
||||||
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
|
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
|
||||||
|
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
|
||||||
|
self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
|
||||||
|
|
||||||
def test_compat_etree_fromstring(self):
|
def test_compat_etree_fromstring(self):
|
||||||
xml = '''
|
xml = '''
|
||||||
|
|||||||
@@ -138,27 +138,27 @@ class TestProxy(unittest.TestCase):
|
|||||||
self.proxy_thread.daemon = True
|
self.proxy_thread.daemon = True
|
||||||
self.proxy_thread.start()
|
self.proxy_thread.start()
|
||||||
|
|
||||||
self.cn_proxy = compat_http_server.HTTPServer(
|
self.geo_proxy = compat_http_server.HTTPServer(
|
||||||
('localhost', 0), _build_proxy_handler('cn'))
|
('localhost', 0), _build_proxy_handler('geo'))
|
||||||
self.cn_port = http_server_port(self.cn_proxy)
|
self.geo_port = http_server_port(self.geo_proxy)
|
||||||
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
|
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
|
||||||
self.cn_proxy_thread.daemon = True
|
self.geo_proxy_thread.daemon = True
|
||||||
self.cn_proxy_thread.start()
|
self.geo_proxy_thread.start()
|
||||||
|
|
||||||
def test_proxy(self):
|
def test_proxy(self):
|
||||||
cn_proxy = 'localhost:{0}'.format(self.cn_port)
|
geo_proxy = 'localhost:{0}'.format(self.geo_port)
|
||||||
ydl = YoutubeDL({
|
ydl = YoutubeDL({
|
||||||
'proxy': 'localhost:{0}'.format(self.port),
|
'proxy': 'localhost:{0}'.format(self.port),
|
||||||
'cn_verification_proxy': cn_proxy,
|
'geo_verification_proxy': geo_proxy,
|
||||||
})
|
})
|
||||||
url = 'http://foo.com/bar'
|
url = 'http://foo.com/bar'
|
||||||
response = ydl.urlopen(url).read().decode('utf-8')
|
response = ydl.urlopen(url).read().decode('utf-8')
|
||||||
self.assertEqual(response, 'normal: {0}'.format(url))
|
self.assertEqual(response, 'normal: {0}'.format(url))
|
||||||
|
|
||||||
req = compat_urllib_request.Request(url)
|
req = compat_urllib_request.Request(url)
|
||||||
req.add_header('Ytdl-request-proxy', cn_proxy)
|
req.add_header('Ytdl-request-proxy', geo_proxy)
|
||||||
response = ydl.urlopen(req).read().decode('utf-8')
|
response = ydl.urlopen(req).read().decode('utf-8')
|
||||||
self.assertEqual(response, 'cn: {0}'.format(url))
|
self.assertEqual(response, 'geo: {0}'.format(url))
|
||||||
|
|
||||||
def test_proxy_with_idn(self):
|
def test_proxy_with_idn(self):
|
||||||
ydl = YoutubeDL({
|
ydl = YoutubeDL({
|
||||||
|
|||||||
@@ -33,14 +33,18 @@ from youtube_dl.utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
|
get_element_by_class,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
is_html,
|
is_html,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
limit_length,
|
limit_length,
|
||||||
|
mimetype2ext,
|
||||||
|
month_by_name,
|
||||||
ohdave_rsa_encrypt,
|
ohdave_rsa_encrypt,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
parse_count,
|
parse_count,
|
||||||
@@ -60,11 +64,13 @@ from youtube_dl.utils import (
|
|||||||
timeconvert,
|
timeconvert,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
uppercase_escape,
|
uppercase_escape,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
url_basename,
|
url_basename,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
urshift,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
@@ -78,6 +84,7 @@ from youtube_dl.utils import (
|
|||||||
cli_option,
|
cli_option,
|
||||||
cli_valueless_option,
|
cli_valueless_option,
|
||||||
cli_bool_option,
|
cli_bool_option,
|
||||||
|
parse_codecs,
|
||||||
)
|
)
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
@@ -283,8 +290,29 @@ class TestUtil(unittest.TestCase):
|
|||||||
'20150202')
|
'20150202')
|
||||||
self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
|
self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
|
||||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||||
|
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
|
||||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||||
|
|
||||||
|
def test_unified_timestamps(self):
|
||||||
|
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
||||||
|
self.assertEqual(unified_timestamp('8/7/2009'), 1247011200)
|
||||||
|
self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200)
|
||||||
|
self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598)
|
||||||
|
self.assertEqual(unified_timestamp('1968 12 10'), -33436800)
|
||||||
|
self.assertEqual(unified_timestamp('1968-12-10'), -33436800)
|
||||||
|
self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200)
|
||||||
|
self.assertEqual(
|
||||||
|
unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False),
|
||||||
|
1417001400)
|
||||||
|
self.assertEqual(
|
||||||
|
unified_timestamp('2/2/2015 6:47:40 PM', day_first=False),
|
||||||
|
1422902860)
|
||||||
|
self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900)
|
||||||
|
self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
|
||||||
|
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
|
||||||
|
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
||||||
|
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||||
|
|
||||||
def test_determine_ext(self):
|
def test_determine_ext(self):
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
|
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
|
||||||
@@ -383,6 +411,12 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(res_url, url)
|
self.assertEqual(res_url, url)
|
||||||
self.assertEqual(res_data, None)
|
self.assertEqual(res_data, None)
|
||||||
|
|
||||||
|
smug_url = smuggle_url(url, {'a': 'b'})
|
||||||
|
smug_smug_url = smuggle_url(smug_url, {'c': 'd'})
|
||||||
|
res_url, res_data = unsmuggle_url(smug_smug_url)
|
||||||
|
self.assertEqual(res_url, url)
|
||||||
|
self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
|
||||||
|
|
||||||
def test_shell_quote(self):
|
def test_shell_quote(self):
|
||||||
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
|
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
|
||||||
self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
|
self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
|
||||||
@@ -401,6 +435,20 @@ class TestUtil(unittest.TestCase):
|
|||||||
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
||||||
'trailer.mp4')
|
'trailer.mp4')
|
||||||
|
|
||||||
|
def test_parse_age_limit(self):
|
||||||
|
self.assertEqual(parse_age_limit(None), None)
|
||||||
|
self.assertEqual(parse_age_limit(False), None)
|
||||||
|
self.assertEqual(parse_age_limit('invalid'), None)
|
||||||
|
self.assertEqual(parse_age_limit(0), 0)
|
||||||
|
self.assertEqual(parse_age_limit(18), 18)
|
||||||
|
self.assertEqual(parse_age_limit(21), 21)
|
||||||
|
self.assertEqual(parse_age_limit(22), None)
|
||||||
|
self.assertEqual(parse_age_limit('18'), 18)
|
||||||
|
self.assertEqual(parse_age_limit('18+'), 18)
|
||||||
|
self.assertEqual(parse_age_limit('PG-13'), 13)
|
||||||
|
self.assertEqual(parse_age_limit('TV-14'), 14)
|
||||||
|
self.assertEqual(parse_age_limit('TV-MA'), 17)
|
||||||
|
|
||||||
def test_parse_duration(self):
|
def test_parse_duration(self):
|
||||||
self.assertEqual(parse_duration(None), None)
|
self.assertEqual(parse_duration(None), None)
|
||||||
self.assertEqual(parse_duration(False), None)
|
self.assertEqual(parse_duration(False), None)
|
||||||
@@ -579,6 +627,45 @@ class TestUtil(unittest.TestCase):
|
|||||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||||
|
|
||||||
|
def test_mimetype2ext(self):
|
||||||
|
self.assertEqual(mimetype2ext(None), None)
|
||||||
|
self.assertEqual(mimetype2ext('video/x-flv'), 'flv')
|
||||||
|
self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8')
|
||||||
|
self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
|
||||||
|
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
|
||||||
|
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
|
||||||
|
|
||||||
|
def test_month_by_name(self):
|
||||||
|
self.assertEqual(month_by_name(None), None)
|
||||||
|
self.assertEqual(month_by_name('December', 'en'), 12)
|
||||||
|
self.assertEqual(month_by_name('décembre', 'fr'), 12)
|
||||||
|
self.assertEqual(month_by_name('December'), 12)
|
||||||
|
self.assertEqual(month_by_name('décembre'), None)
|
||||||
|
self.assertEqual(month_by_name('Unknown', 'unknown'), None)
|
||||||
|
|
||||||
|
def test_parse_codecs(self):
|
||||||
|
self.assertEqual(parse_codecs(''), {})
|
||||||
|
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
|
||||||
|
'vcodec': 'avc1.77.30',
|
||||||
|
'acodec': 'mp4a.40.2',
|
||||||
|
})
|
||||||
|
self.assertEqual(parse_codecs('mp4a.40.2'), {
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp4a.40.2',
|
||||||
|
})
|
||||||
|
self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
|
||||||
|
'vcodec': 'avc1.42001e',
|
||||||
|
'acodec': 'mp4a.40.5',
|
||||||
|
})
|
||||||
|
self.assertEqual(parse_codecs('avc3.640028'), {
|
||||||
|
'vcodec': 'avc3.640028',
|
||||||
|
'acodec': 'none',
|
||||||
|
})
|
||||||
|
self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
|
||||||
|
'vcodec': 'h264',
|
||||||
|
'acodec': 'aac',
|
||||||
|
})
|
||||||
|
|
||||||
def test_escape_rfc3986(self):
|
def test_escape_rfc3986(self):
|
||||||
reserved = "!*'();:@&=+$,/?#[]"
|
reserved = "!*'();:@&=+$,/?#[]"
|
||||||
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
||||||
@@ -640,6 +727,12 @@ class TestUtil(unittest.TestCase):
|
|||||||
"1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
|
"1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
|
||||||
}''')
|
}''')
|
||||||
|
|
||||||
|
inp = '''{"foo":101}'''
|
||||||
|
self.assertEqual(js_to_json(inp), '''{"foo":101}''')
|
||||||
|
|
||||||
|
inp = '''{"duration": "00:01:07"}'''
|
||||||
|
self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
|
||||||
|
|
||||||
def test_js_to_json_edgecases(self):
|
def test_js_to_json_edgecases(self):
|
||||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||||
@@ -745,7 +838,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(parse_filesize('2 MiB'), 2097152)
|
self.assertEqual(parse_filesize('2 MiB'), 2097152)
|
||||||
self.assertEqual(parse_filesize('5 GB'), 5000000000)
|
self.assertEqual(parse_filesize('5 GB'), 5000000000)
|
||||||
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
|
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
|
||||||
|
self.assertEqual(parse_filesize('1.2tb'), 1200000000000)
|
||||||
self.assertEqual(parse_filesize('1,24 KB'), 1240)
|
self.assertEqual(parse_filesize('1,24 KB'), 1240)
|
||||||
|
self.assertEqual(parse_filesize('1,24 kb'), 1240)
|
||||||
|
self.assertEqual(parse_filesize('8.5 megabytes'), 8500000)
|
||||||
|
|
||||||
def test_parse_count(self):
|
def test_parse_count(self):
|
||||||
self.assertEqual(parse_count(None), None)
|
self.assertEqual(parse_count(None), None)
|
||||||
@@ -896,6 +992,7 @@ The first line
|
|||||||
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||||
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||||
self.assertEqual(cli_option({}, '--proxy', 'proxy'), [])
|
self.assertEqual(cli_option({}, '--proxy', 'proxy'), [])
|
||||||
|
self.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10'])
|
||||||
|
|
||||||
def test_cli_valueless_option(self):
|
def test_cli_valueless_option(self):
|
||||||
self.assertEqual(cli_valueless_option(
|
self.assertEqual(cli_valueless_option(
|
||||||
@@ -956,5 +1053,17 @@ The first line
|
|||||||
self.assertRaises(ValueError, encode_base_n, 0, 70)
|
self.assertRaises(ValueError, encode_base_n, 0, 70)
|
||||||
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
|
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
|
||||||
|
|
||||||
|
def test_urshift(self):
|
||||||
|
self.assertEqual(urshift(3, 1), 1)
|
||||||
|
self.assertEqual(urshift(-3, 1), 2147483646)
|
||||||
|
|
||||||
|
def test_get_element_by_class(self):
|
||||||
|
html = '''
|
||||||
|
<span class="foo bar">nice</span>
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||||
|
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
70
test/test_verbose_output.py
Normal file
70
test/test_verbose_output.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
|
||||||
|
class TestVerboseOutput(unittest.TestCase):
|
||||||
|
def test_private_info_arg(self):
|
||||||
|
outp = subprocess.Popen(
|
||||||
|
[
|
||||||
|
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||||
|
'--username', 'johnsmith@gmail.com',
|
||||||
|
'--password', 'secret',
|
||||||
|
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
sout, serr = outp.communicate()
|
||||||
|
self.assertTrue(b'--username' in serr)
|
||||||
|
self.assertTrue(b'johnsmith' not in serr)
|
||||||
|
self.assertTrue(b'--password' in serr)
|
||||||
|
self.assertTrue(b'secret' not in serr)
|
||||||
|
|
||||||
|
def test_private_info_shortarg(self):
|
||||||
|
outp = subprocess.Popen(
|
||||||
|
[
|
||||||
|
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||||
|
'-u', 'johnsmith@gmail.com',
|
||||||
|
'-p', 'secret',
|
||||||
|
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
sout, serr = outp.communicate()
|
||||||
|
self.assertTrue(b'-u' in serr)
|
||||||
|
self.assertTrue(b'johnsmith' not in serr)
|
||||||
|
self.assertTrue(b'-p' in serr)
|
||||||
|
self.assertTrue(b'secret' not in serr)
|
||||||
|
|
||||||
|
def test_private_info_eq(self):
|
||||||
|
outp = subprocess.Popen(
|
||||||
|
[
|
||||||
|
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||||
|
'--username=johnsmith@gmail.com',
|
||||||
|
'--password=secret',
|
||||||
|
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
sout, serr = outp.communicate()
|
||||||
|
self.assertTrue(b'--username' in serr)
|
||||||
|
self.assertTrue(b'johnsmith' not in serr)
|
||||||
|
self.assertTrue(b'--password' in serr)
|
||||||
|
self.assertTrue(b'secret' not in serr)
|
||||||
|
|
||||||
|
def test_private_info_shortarg_eq(self):
|
||||||
|
outp = subprocess.Popen(
|
||||||
|
[
|
||||||
|
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||||
|
'-u=johnsmith@gmail.com',
|
||||||
|
'-p=secret',
|
||||||
|
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
sout, serr = outp.communicate()
|
||||||
|
self.assertTrue(b'-u' in serr)
|
||||||
|
self.assertTrue(b'johnsmith' not in serr)
|
||||||
|
self.assertTrue(b'-p' in serr)
|
||||||
|
self.assertTrue(b'secret' not in serr)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
@@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals
|
|||||||
|
|
||||||
import collections
|
import collections
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import copy
|
||||||
import datetime
|
import datetime
|
||||||
import errno
|
import errno
|
||||||
import fileinput
|
import fileinput
|
||||||
@@ -130,6 +131,9 @@ class YoutubeDL(object):
|
|||||||
username: Username for authentication purposes.
|
username: Username for authentication purposes.
|
||||||
password: Password for authentication purposes.
|
password: Password for authentication purposes.
|
||||||
videopassword: Password for accessing a video.
|
videopassword: Password for accessing a video.
|
||||||
|
ap_mso: Adobe Pass multiple-system operator identifier.
|
||||||
|
ap_username: Multiple-system operator account username.
|
||||||
|
ap_password: Multiple-system operator account password.
|
||||||
usenetrc: Use netrc for authentication instead.
|
usenetrc: Use netrc for authentication instead.
|
||||||
verbose: Print additional info to stdout.
|
verbose: Print additional info to stdout.
|
||||||
quiet: Do not print messages to stdout.
|
quiet: Do not print messages to stdout.
|
||||||
@@ -196,8 +200,8 @@ class YoutubeDL(object):
|
|||||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||||
At the moment, this is only supported by YouTube.
|
At the moment, this is only supported by YouTube.
|
||||||
proxy: URL of the proxy server to use
|
proxy: URL of the proxy server to use
|
||||||
cn_verification_proxy: URL of the proxy to use for IP address verification
|
geo_verification_proxy: URL of the proxy to use for IP address verification
|
||||||
on Chinese sites. (Experimental)
|
on geo-restricted sites. (Experimental)
|
||||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||||
support, using fridibi
|
support, using fridibi
|
||||||
@@ -248,7 +252,16 @@ class YoutubeDL(object):
|
|||||||
source_address: (Experimental) Client-side IP address to bind to.
|
source_address: (Experimental) Client-side IP address to bind to.
|
||||||
call_home: Boolean, true iff we are allowed to contact the
|
call_home: Boolean, true iff we are allowed to contact the
|
||||||
youtube-dl servers for debugging.
|
youtube-dl servers for debugging.
|
||||||
sleep_interval: Number of seconds to sleep before each download.
|
sleep_interval: Number of seconds to sleep before each download when
|
||||||
|
used alone or a lower bound of a range for randomized
|
||||||
|
sleep before each download (minimum possible number
|
||||||
|
of seconds to sleep) when used along with
|
||||||
|
max_sleep_interval.
|
||||||
|
max_sleep_interval:Upper bound of a range for randomized sleep before each
|
||||||
|
download (maximum possible number of seconds to sleep).
|
||||||
|
Must only be used along with sleep_interval.
|
||||||
|
Actual sleep time will be a random float from range
|
||||||
|
[sleep_interval; max_sleep_interval].
|
||||||
listformats: Print an overview of available video formats and exit.
|
listformats: Print an overview of available video formats and exit.
|
||||||
list_thumbnails: Print a table of all thumbnails and exit.
|
list_thumbnails: Print a table of all thumbnails and exit.
|
||||||
match_filter: A function that gets called with the info_dict of
|
match_filter: A function that gets called with the info_dict of
|
||||||
@@ -304,6 +317,11 @@ class YoutubeDL(object):
|
|||||||
self.params.update(params)
|
self.params.update(params)
|
||||||
self.cache = Cache(self)
|
self.cache = Cache(self)
|
||||||
|
|
||||||
|
if self.params.get('cn_verification_proxy') is not None:
|
||||||
|
self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
|
||||||
|
if self.params.get('geo_verification_proxy') is None:
|
||||||
|
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
|
||||||
|
|
||||||
if params.get('bidi_workaround', False):
|
if params.get('bidi_workaround', False):
|
||||||
try:
|
try:
|
||||||
import pty
|
import pty
|
||||||
@@ -1046,9 +1064,9 @@ class YoutubeDL(object):
|
|||||||
if isinstance(selector, list):
|
if isinstance(selector, list):
|
||||||
fs = [_build_selector_function(s) for s in selector]
|
fs = [_build_selector_function(s) for s in selector]
|
||||||
|
|
||||||
def selector_function(formats):
|
def selector_function(ctx):
|
||||||
for f in fs:
|
for f in fs:
|
||||||
for format in f(formats):
|
for format in f(ctx):
|
||||||
yield format
|
yield format
|
||||||
return selector_function
|
return selector_function
|
||||||
elif selector.type == GROUP:
|
elif selector.type == GROUP:
|
||||||
@@ -1056,17 +1074,17 @@ class YoutubeDL(object):
|
|||||||
elif selector.type == PICKFIRST:
|
elif selector.type == PICKFIRST:
|
||||||
fs = [_build_selector_function(s) for s in selector.selector]
|
fs = [_build_selector_function(s) for s in selector.selector]
|
||||||
|
|
||||||
def selector_function(formats):
|
def selector_function(ctx):
|
||||||
for f in fs:
|
for f in fs:
|
||||||
picked_formats = list(f(formats))
|
picked_formats = list(f(ctx))
|
||||||
if picked_formats:
|
if picked_formats:
|
||||||
return picked_formats
|
return picked_formats
|
||||||
return []
|
return []
|
||||||
elif selector.type == SINGLE:
|
elif selector.type == SINGLE:
|
||||||
format_spec = selector.selector
|
format_spec = selector.selector
|
||||||
|
|
||||||
def selector_function(formats):
|
def selector_function(ctx):
|
||||||
formats = list(formats)
|
formats = list(ctx['formats'])
|
||||||
if not formats:
|
if not formats:
|
||||||
return
|
return
|
||||||
if format_spec == 'all':
|
if format_spec == 'all':
|
||||||
@@ -1079,9 +1097,10 @@ class YoutubeDL(object):
|
|||||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||||
if audiovideo_formats:
|
if audiovideo_formats:
|
||||||
yield audiovideo_formats[format_idx]
|
yield audiovideo_formats[format_idx]
|
||||||
# for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
|
# for extractors with incomplete formats (audio only (soundcloud)
|
||||||
elif (all(f.get('acodec') != 'none' for f in formats) or
|
# or video only (imgur)) we will fallback to best/worst
|
||||||
all(f.get('vcodec') != 'none' for f in formats)):
|
# {video,audio}-only format
|
||||||
|
elif ctx['incomplete_formats']:
|
||||||
yield formats[format_idx]
|
yield formats[format_idx]
|
||||||
elif format_spec == 'bestaudio':
|
elif format_spec == 'bestaudio':
|
||||||
audio_formats = [
|
audio_formats = [
|
||||||
@@ -1155,17 +1174,18 @@ class YoutubeDL(object):
|
|||||||
}
|
}
|
||||||
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
||||||
|
|
||||||
def selector_function(formats):
|
def selector_function(ctx):
|
||||||
formats = list(formats)
|
for pair in itertools.product(
|
||||||
for pair in itertools.product(video_selector(formats), audio_selector(formats)):
|
video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
|
||||||
yield _merge(pair)
|
yield _merge(pair)
|
||||||
|
|
||||||
filters = [self._build_format_filter(f) for f in selector.filters]
|
filters = [self._build_format_filter(f) for f in selector.filters]
|
||||||
|
|
||||||
def final_selector(formats):
|
def final_selector(ctx):
|
||||||
|
ctx_copy = copy.deepcopy(ctx)
|
||||||
for _filter in filters:
|
for _filter in filters:
|
||||||
formats = list(filter(_filter, formats))
|
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
|
||||||
return selector_function(formats)
|
return selector_function(ctx_copy)
|
||||||
return final_selector
|
return final_selector
|
||||||
|
|
||||||
stream = io.BytesIO(format_spec.encode('utf-8'))
|
stream = io.BytesIO(format_spec.encode('utf-8'))
|
||||||
@@ -1239,8 +1259,10 @@ class YoutubeDL(object):
|
|||||||
info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
|
info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
|
||||||
if thumbnails:
|
if thumbnails:
|
||||||
thumbnails.sort(key=lambda t: (
|
thumbnails.sort(key=lambda t: (
|
||||||
t.get('preference'), t.get('width'), t.get('height'),
|
t.get('preference') if t.get('preference') is not None else -1,
|
||||||
t.get('id'), t.get('url')))
|
t.get('width') if t.get('width') is not None else -1,
|
||||||
|
t.get('height') if t.get('height') is not None else -1,
|
||||||
|
t.get('id') if t.get('id') is not None else '', t.get('url')))
|
||||||
for i, t in enumerate(thumbnails):
|
for i, t in enumerate(thumbnails):
|
||||||
t['url'] = sanitize_url(t['url'])
|
t['url'] = sanitize_url(t['url'])
|
||||||
if t.get('width') and t.get('height'):
|
if t.get('width') and t.get('height'):
|
||||||
@@ -1282,7 +1304,7 @@ class YoutubeDL(object):
|
|||||||
for subtitle_format in subtitle:
|
for subtitle_format in subtitle:
|
||||||
if subtitle_format.get('url'):
|
if subtitle_format.get('url'):
|
||||||
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
||||||
if 'ext' not in subtitle_format:
|
if subtitle_format.get('ext') is None:
|
||||||
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
||||||
|
|
||||||
if self.params.get('listsubtitles', False):
|
if self.params.get('listsubtitles', False):
|
||||||
@@ -1337,7 +1359,7 @@ class YoutubeDL(object):
|
|||||||
note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
||||||
)
|
)
|
||||||
# Automatically determine file extension if missing
|
# Automatically determine file extension if missing
|
||||||
if 'ext' not in format:
|
if format.get('ext') is None:
|
||||||
format['ext'] = determine_ext(format['url']).lower()
|
format['ext'] = determine_ext(format['url']).lower()
|
||||||
# Automatically determine protocol if missing (useful for format
|
# Automatically determine protocol if missing (useful for format
|
||||||
# selection purposes)
|
# selection purposes)
|
||||||
@@ -1372,7 +1394,34 @@ class YoutubeDL(object):
|
|||||||
req_format_list.append('best')
|
req_format_list.append('best')
|
||||||
req_format = '/'.join(req_format_list)
|
req_format = '/'.join(req_format_list)
|
||||||
format_selector = self.build_format_selector(req_format)
|
format_selector = self.build_format_selector(req_format)
|
||||||
formats_to_download = list(format_selector(formats))
|
|
||||||
|
# While in format selection we may need to have an access to the original
|
||||||
|
# format set in order to calculate some metrics or do some processing.
|
||||||
|
# For now we need to be able to guess whether original formats provided
|
||||||
|
# by extractor are incomplete or not (i.e. whether extractor provides only
|
||||||
|
# video-only or audio-only formats) for proper formats selection for
|
||||||
|
# extractors with such incomplete formats (see
|
||||||
|
# https://github.com/rg3/youtube-dl/pull/5556).
|
||||||
|
# Since formats may be filtered during format selection and may not match
|
||||||
|
# the original formats the results may be incorrect. Thus original formats
|
||||||
|
# or pre-calculated metrics should be passed to format selection routines
|
||||||
|
# as well.
|
||||||
|
# We will pass a context object containing all necessary additional data
|
||||||
|
# instead of just formats.
|
||||||
|
# This fixes incorrect format selection issue (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/10083).
|
||||||
|
incomplete_formats = (
|
||||||
|
# All formats are video-only or
|
||||||
|
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
|
||||||
|
# all formats are audio-only
|
||||||
|
all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
|
||||||
|
|
||||||
|
ctx = {
|
||||||
|
'formats': formats,
|
||||||
|
'incomplete_formats': incomplete_formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
formats_to_download = list(format_selector(ctx))
|
||||||
if not formats_to_download:
|
if not formats_to_download:
|
||||||
raise ExtractorError('requested format not available',
|
raise ExtractorError('requested format not available',
|
||||||
expected=True)
|
expected=True)
|
||||||
@@ -1559,7 +1608,9 @@ class YoutubeDL(object):
|
|||||||
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
||||||
else:
|
else:
|
||||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
# Use newline='' to prevent conversion of newline characters
|
||||||
|
# See https://github.com/rg3/youtube-dl/issues/10268
|
||||||
|
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||||
subfile.write(sub_data)
|
subfile.write(sub_data)
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||||
|
|||||||
@@ -34,12 +34,14 @@ from .utils import (
|
|||||||
setproctitle,
|
setproctitle,
|
||||||
std_headers,
|
std_headers,
|
||||||
write_string,
|
write_string,
|
||||||
|
render_table,
|
||||||
)
|
)
|
||||||
from .update import update_self
|
from .update import update_self
|
||||||
from .downloader import (
|
from .downloader import (
|
||||||
FileDownloader,
|
FileDownloader,
|
||||||
)
|
)
|
||||||
from .extractor import gen_extractors, list_extractors
|
from .extractor import gen_extractors, list_extractors
|
||||||
|
from .extractor.adobepass import MSO_INFO
|
||||||
from .YoutubeDL import YoutubeDL
|
from .YoutubeDL import YoutubeDL
|
||||||
|
|
||||||
|
|
||||||
@@ -118,18 +120,26 @@ def _real_main(argv=None):
|
|||||||
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
||||||
write_string(desc + '\n', out=sys.stdout)
|
write_string(desc + '\n', out=sys.stdout)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
if opts.ap_list_mso:
|
||||||
|
table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
|
||||||
|
write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
# Conflicting, missing and erroneous options
|
# Conflicting, missing and erroneous options
|
||||||
if opts.usenetrc and (opts.username is not None or opts.password is not None):
|
if opts.usenetrc and (opts.username is not None or opts.password is not None):
|
||||||
parser.error('using .netrc conflicts with giving username/password')
|
parser.error('using .netrc conflicts with giving username/password')
|
||||||
if opts.password is not None and opts.username is None:
|
if opts.password is not None and opts.username is None:
|
||||||
parser.error('account username missing\n')
|
parser.error('account username missing\n')
|
||||||
|
if opts.ap_password is not None and opts.ap_username is None:
|
||||||
|
parser.error('TV Provider account username missing\n')
|
||||||
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
|
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
|
||||||
parser.error('using output template conflicts with using title, video ID or auto number')
|
parser.error('using output template conflicts with using title, video ID or auto number')
|
||||||
if opts.usetitle and opts.useid:
|
if opts.usetitle and opts.useid:
|
||||||
parser.error('using title conflicts with using video ID')
|
parser.error('using title conflicts with using video ID')
|
||||||
if opts.username is not None and opts.password is None:
|
if opts.username is not None and opts.password is None:
|
||||||
opts.password = compat_getpass('Type account password and press [Return]: ')
|
opts.password = compat_getpass('Type account password and press [Return]: ')
|
||||||
|
if opts.ap_username is not None and opts.ap_password is None:
|
||||||
|
opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
|
||||||
if opts.ratelimit is not None:
|
if opts.ratelimit is not None:
|
||||||
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
|
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
|
||||||
if numeric_limit is None:
|
if numeric_limit is None:
|
||||||
@@ -145,6 +155,18 @@ def _real_main(argv=None):
|
|||||||
if numeric_limit is None:
|
if numeric_limit is None:
|
||||||
parser.error('invalid max_filesize specified')
|
parser.error('invalid max_filesize specified')
|
||||||
opts.max_filesize = numeric_limit
|
opts.max_filesize = numeric_limit
|
||||||
|
if opts.sleep_interval is not None:
|
||||||
|
if opts.sleep_interval < 0:
|
||||||
|
parser.error('sleep interval must be positive or 0')
|
||||||
|
if opts.max_sleep_interval is not None:
|
||||||
|
if opts.max_sleep_interval < 0:
|
||||||
|
parser.error('max sleep interval must be positive or 0')
|
||||||
|
if opts.max_sleep_interval < opts.sleep_interval:
|
||||||
|
parser.error('max sleep interval must be greater than or equal to min sleep interval')
|
||||||
|
else:
|
||||||
|
opts.max_sleep_interval = opts.sleep_interval
|
||||||
|
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
||||||
|
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
||||||
|
|
||||||
def parse_retries(retries):
|
def parse_retries(retries):
|
||||||
if retries in ('inf', 'infinite'):
|
if retries in ('inf', 'infinite'):
|
||||||
@@ -283,6 +305,9 @@ def _real_main(argv=None):
|
|||||||
'password': opts.password,
|
'password': opts.password,
|
||||||
'twofactor': opts.twofactor,
|
'twofactor': opts.twofactor,
|
||||||
'videopassword': opts.videopassword,
|
'videopassword': opts.videopassword,
|
||||||
|
'ap_mso': opts.ap_mso,
|
||||||
|
'ap_username': opts.ap_username,
|
||||||
|
'ap_password': opts.ap_password,
|
||||||
'quiet': (opts.quiet or any_getting or any_printing),
|
'quiet': (opts.quiet or any_getting or any_printing),
|
||||||
'no_warnings': opts.no_warnings,
|
'no_warnings': opts.no_warnings,
|
||||||
'forceurl': opts.geturl,
|
'forceurl': opts.geturl,
|
||||||
@@ -308,6 +333,7 @@ def _real_main(argv=None):
|
|||||||
'nooverwrites': opts.nooverwrites,
|
'nooverwrites': opts.nooverwrites,
|
||||||
'retries': opts.retries,
|
'retries': opts.retries,
|
||||||
'fragment_retries': opts.fragment_retries,
|
'fragment_retries': opts.fragment_retries,
|
||||||
|
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
||||||
'buffersize': opts.buffersize,
|
'buffersize': opts.buffersize,
|
||||||
'noresizebuffer': opts.noresizebuffer,
|
'noresizebuffer': opts.noresizebuffer,
|
||||||
'continuedl': opts.continue_dl,
|
'continuedl': opts.continue_dl,
|
||||||
@@ -370,6 +396,7 @@ def _real_main(argv=None):
|
|||||||
'source_address': opts.source_address,
|
'source_address': opts.source_address,
|
||||||
'call_home': opts.call_home,
|
'call_home': opts.call_home,
|
||||||
'sleep_interval': opts.sleep_interval,
|
'sleep_interval': opts.sleep_interval,
|
||||||
|
'max_sleep_interval': opts.max_sleep_interval,
|
||||||
'external_downloader': opts.external_downloader,
|
'external_downloader': opts.external_downloader,
|
||||||
'list_thumbnails': opts.list_thumbnails,
|
'list_thumbnails': opts.list_thumbnails,
|
||||||
'playlist_items': opts.playlist_items,
|
'playlist_items': opts.playlist_items,
|
||||||
@@ -382,6 +409,8 @@ def _real_main(argv=None):
|
|||||||
'external_downloader_args': external_downloader_args,
|
'external_downloader_args': external_downloader_args,
|
||||||
'postprocessor_args': postprocessor_args,
|
'postprocessor_args': postprocessor_args,
|
||||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||||
|
'geo_verification_proxy': opts.geo_verification_proxy,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import binascii
|
import binascii
|
||||||
@@ -2594,15 +2595,19 @@ except ImportError: # Python < 3.3
|
|||||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info >= (2, 7, 3):
|
try:
|
||||||
|
args = shlex.split('中文')
|
||||||
|
assert (isinstance(args, list) and
|
||||||
|
isinstance(args[0], compat_str) and
|
||||||
|
args[0] == '中文')
|
||||||
compat_shlex_split = shlex.split
|
compat_shlex_split = shlex.split
|
||||||
else:
|
except (AssertionError, UnicodeEncodeError):
|
||||||
# Working around shlex issue with unicode strings on some python 2
|
# Working around shlex issue with unicode strings on some python 2
|
||||||
# versions (see http://bugs.python.org/issue1548891)
|
# versions (see http://bugs.python.org/issue1548891)
|
||||||
def compat_shlex_split(s, comments=False, posix=True):
|
def compat_shlex_split(s, comments=False, posix=True):
|
||||||
if isinstance(s, compat_str):
|
if isinstance(s, compat_str):
|
||||||
s = s.encode('utf-8')
|
s = s.encode('utf-8')
|
||||||
return shlex.split(s, comments, posix)
|
return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
|
||||||
|
|
||||||
|
|
||||||
def compat_ord(c):
|
def compat_ord(c):
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import random
|
||||||
|
|
||||||
from ..compat import compat_os_name
|
from ..compat import compat_os_name
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -342,8 +343,11 @@ class FileDownloader(object):
|
|||||||
})
|
})
|
||||||
return True
|
return True
|
||||||
|
|
||||||
sleep_interval = self.params.get('sleep_interval')
|
min_sleep_interval = self.params.get('sleep_interval')
|
||||||
if sleep_interval:
|
if min_sleep_interval:
|
||||||
|
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||||
|
print(min_sleep_interval, max_sleep_interval)
|
||||||
|
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||||
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
|
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
|
||||||
time.sleep(sleep_interval)
|
time.sleep(sleep_interval)
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
from ..compat import compat_urllib_error
|
from ..compat import compat_urllib_error
|
||||||
@@ -19,32 +18,32 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
FD_NAME = 'dashsegments'
|
FD_NAME = 'dashsegments'
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
base_url = info_dict['url']
|
segments = info_dict['fragments'][:1] if self.params.get(
|
||||||
segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls']
|
'test', False) else info_dict['fragments']
|
||||||
initialization_url = info_dict.get('initialization_url')
|
|
||||||
|
|
||||||
ctx = {
|
ctx = {
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'total_frags': len(segment_urls) + (1 if initialization_url else 0),
|
'total_frags': len(segments),
|
||||||
}
|
}
|
||||||
|
|
||||||
self._prepare_and_start_frag_download(ctx)
|
self._prepare_and_start_frag_download(ctx)
|
||||||
|
|
||||||
def combine_url(base_url, target_url):
|
|
||||||
if re.match(r'^https?://', target_url):
|
|
||||||
return target_url
|
|
||||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
|
||||||
|
|
||||||
segments_filenames = []
|
segments_filenames = []
|
||||||
|
|
||||||
fragment_retries = self.params.get('fragment_retries', 0)
|
fragment_retries = self.params.get('fragment_retries', 0)
|
||||||
|
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||||
|
|
||||||
def append_url_to_file(target_url, tmp_filename, segment_name):
|
def process_segment(segment, tmp_filename, num):
|
||||||
|
segment_url = segment['url']
|
||||||
|
segment_name = 'Frag%d' % num
|
||||||
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
||||||
|
# In DASH, the first segment contains necessary headers to
|
||||||
|
# generate a valid MP4 file, so always abort for the first segment
|
||||||
|
fatal = num == 0 or not skip_unavailable_fragments
|
||||||
count = 0
|
count = 0
|
||||||
while count <= fragment_retries:
|
while count <= fragment_retries:
|
||||||
try:
|
try:
|
||||||
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
|
success = ctx['dl'].download(target_filename, {'url': segment_url})
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||||
@@ -52,26 +51,27 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
down.close()
|
down.close()
|
||||||
segments_filenames.append(target_sanitized)
|
segments_filenames.append(target_sanitized)
|
||||||
break
|
break
|
||||||
except (compat_urllib_error.HTTPError, ) as err:
|
except compat_urllib_error.HTTPError as err:
|
||||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||||
# whole download to fail. However if the same fragment is immediately
|
# whole download to fail. However if the same fragment is immediately
|
||||||
# retried with the same request data this usually succeeds (1-2 attemps
|
# retried with the same request data this usually succeeds (1-2 attemps
|
||||||
# is usually enough) thus allowing to download the whole file successfully.
|
# is usually enough) thus allowing to download the whole file successfully.
|
||||||
# So, we will retry all fragments that fail with 404 HTTP error for now.
|
# To be future-proof we will retry all fragments that fail with any
|
||||||
if err.code != 404:
|
# HTTP error.
|
||||||
raise
|
|
||||||
# Retry fragment
|
|
||||||
count += 1
|
count += 1
|
||||||
if count <= fragment_retries:
|
if count <= fragment_retries:
|
||||||
self.report_retry_fragment(segment_name, count, fragment_retries)
|
self.report_retry_fragment(err, segment_name, count, fragment_retries)
|
||||||
if count > fragment_retries:
|
if count > fragment_retries:
|
||||||
|
if not fatal:
|
||||||
|
self.report_skip_fragment(segment_name)
|
||||||
|
return True
|
||||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||||
return False
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
if initialization_url:
|
for i, segment in enumerate(segments):
|
||||||
append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init')
|
if not process_segment(segment, ctx['tmpfilename'], i):
|
||||||
for i, segment_url in enumerate(segment_urls):
|
return False
|
||||||
append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i)
|
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
|
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ class ExternalFD(FileDownloader):
|
|||||||
cmd, stderr=subprocess.PIPE)
|
cmd, stderr=subprocess.PIPE)
|
||||||
_, stderr = p.communicate()
|
_, stderr = p.communicate()
|
||||||
if p.returncode != 0:
|
if p.returncode != 0:
|
||||||
self.to_stderr(stderr)
|
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||||
return p.returncode
|
return p.returncode
|
||||||
|
|
||||||
|
|
||||||
@@ -96,6 +96,12 @@ class CurlFD(ExternalFD):
|
|||||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
||||||
|
cmd += self._valueless_option('--silent', 'noprogress')
|
||||||
|
cmd += self._valueless_option('--verbose', 'verbose')
|
||||||
|
cmd += self._option('--limit-rate', 'ratelimit')
|
||||||
|
cmd += self._option('--retry', 'retries')
|
||||||
|
cmd += self._option('--max-filesize', 'max_filesize')
|
||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
cmd += self._option('--proxy', 'proxy')
|
cmd += self._option('--proxy', 'proxy')
|
||||||
cmd += self._valueless_option('--insecure', 'nocheckcertificate')
|
cmd += self._valueless_option('--insecure', 'nocheckcertificate')
|
||||||
@@ -103,6 +109,16 @@ class CurlFD(ExternalFD):
|
|||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
|
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||||
|
|
||||||
|
self._debug_cmd(cmd)
|
||||||
|
|
||||||
|
# curl writes the progress to stderr so don't capture it.
|
||||||
|
p = subprocess.Popen(cmd)
|
||||||
|
p.communicate()
|
||||||
|
return p.returncode
|
||||||
|
|
||||||
|
|
||||||
class AxelFD(ExternalFD):
|
class AxelFD(ExternalFD):
|
||||||
AVAILABLE_OPT = '-V'
|
AVAILABLE_OPT = '-V'
|
||||||
@@ -204,6 +220,12 @@ class FFmpegFD(ExternalFD):
|
|||||||
if proxy:
|
if proxy:
|
||||||
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
||||||
proxy = 'http://%s' % proxy
|
proxy = 'http://%s' % proxy
|
||||||
|
|
||||||
|
if proxy.startswith('socks'):
|
||||||
|
self.report_warning(
|
||||||
|
'%s does not support SOCKS proxies. Downloading is likely to fail. '
|
||||||
|
'Consider adding --hls-prefer-native to your command.' % self.get_basename())
|
||||||
|
|
||||||
# Since December 2015 ffmpeg supports -http_proxy option (see
|
# Since December 2015 ffmpeg supports -http_proxy option (see
|
||||||
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
|
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
|
||||||
# We could switch to the following code if we are able to detect version properly
|
# We could switch to the following code if we are able to detect version properly
|
||||||
|
|||||||
@@ -196,6 +196,11 @@ def build_fragments_list(boot_info):
|
|||||||
first_frag_number = fragment_run_entry_table[0]['first']
|
first_frag_number = fragment_run_entry_table[0]['first']
|
||||||
fragments_counter = itertools.count(first_frag_number)
|
fragments_counter = itertools.count(first_frag_number)
|
||||||
for segment, fragments_count in segment_run_table['segment_run']:
|
for segment, fragments_count in segment_run_table['segment_run']:
|
||||||
|
# In some live HDS streams (for example Rai), `fragments_count` is
|
||||||
|
# abnormal and causing out-of-memory errors. It's OK to change the
|
||||||
|
# number of fragments for live streams as they are updated periodically
|
||||||
|
if fragments_count == 4294967295 and boot_info['live']:
|
||||||
|
fragments_count = 2
|
||||||
for _ in range(fragments_count):
|
for _ in range(fragments_count):
|
||||||
res.append((segment, next(fragments_counter)))
|
res.append((segment, next(fragments_counter)))
|
||||||
|
|
||||||
@@ -329,7 +334,11 @@ class F4mFD(FragmentFD):
|
|||||||
|
|
||||||
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
||||||
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
|
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
|
||||||
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
|
# From Adobe F4M 3.0 spec:
|
||||||
|
# The <baseURL> element SHALL be the base URL for all relative
|
||||||
|
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
|
||||||
|
# URLs should be relative to the location of the containing document.
|
||||||
|
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
|
||||||
live = boot_info['live']
|
live = boot_info['live']
|
||||||
metadata_node = media.find(_add_ns('metadata'))
|
metadata_node = media.find(_add_ns('metadata'))
|
||||||
if metadata_node is not None:
|
if metadata_node is not None:
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import time
|
|||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from .http import HttpFD
|
from .http import HttpFD
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
error_to_compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
)
|
)
|
||||||
@@ -22,13 +23,19 @@ class FragmentFD(FileDownloader):
|
|||||||
|
|
||||||
Available options:
|
Available options:
|
||||||
|
|
||||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH only)
|
fragment_retries: Number of times to retry a fragment for HTTP error (DASH
|
||||||
|
and hlsnative only)
|
||||||
|
skip_unavailable_fragments:
|
||||||
|
Skip unavailable fragments (DASH and hlsnative only)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def report_retry_fragment(self, fragment_name, count, retries):
|
def report_retry_fragment(self, err, fragment_name, count, retries):
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...'
|
'[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...'
|
||||||
% (fragment_name, count, self.format_retries(retries)))
|
% (error_to_compat_str(err), fragment_name, count, self.format_retries(retries)))
|
||||||
|
|
||||||
|
def report_skip_fragment(self, fragment_name):
|
||||||
|
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
|
||||||
|
|
||||||
def _prepare_and_start_frag_download(self, ctx):
|
def _prepare_and_start_frag_download(self, ctx):
|
||||||
self._prepare_frag_download(ctx)
|
self._prepare_frag_download(ctx)
|
||||||
|
|||||||
@@ -2,14 +2,26 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
|
import binascii
|
||||||
|
try:
|
||||||
|
from Crypto.Cipher import AES
|
||||||
|
can_decrypt_frag = True
|
||||||
|
except ImportError:
|
||||||
|
can_decrypt_frag = False
|
||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
from .external import FFmpegFD
|
from .external import FFmpegFD
|
||||||
|
|
||||||
from ..compat import compat_urlparse
|
from ..compat import (
|
||||||
|
compat_urllib_error,
|
||||||
|
compat_urlparse,
|
||||||
|
compat_struct_pack,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
|
parse_m3u8_attributes,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -21,7 +33,7 @@ class HlsFD(FragmentFD):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def can_download(manifest):
|
def can_download(manifest):
|
||||||
UNSUPPORTED_FEATURES = (
|
UNSUPPORTED_FEATURES = (
|
||||||
r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1]
|
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||||
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||||
|
|
||||||
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
||||||
@@ -39,7 +51,9 @@ class HlsFD(FragmentFD):
|
|||||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||||
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
||||||
)
|
)
|
||||||
return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
|
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||||
|
check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
|
||||||
|
return all(check_results)
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
man_url = info_dict['url']
|
man_url = info_dict['url']
|
||||||
@@ -57,36 +71,94 @@ class HlsFD(FragmentFD):
|
|||||||
fd.add_progress_hook(ph)
|
fd.add_progress_hook(ph)
|
||||||
return fd.real_download(filename, info_dict)
|
return fd.real_download(filename, info_dict)
|
||||||
|
|
||||||
fragment_urls = []
|
total_frags = 0
|
||||||
for line in s.splitlines():
|
for line in s.splitlines():
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line and not line.startswith('#'):
|
if line and not line.startswith('#'):
|
||||||
segment_url = (
|
total_frags += 1
|
||||||
line
|
|
||||||
if re.match(r'^https?://', line)
|
|
||||||
else compat_urlparse.urljoin(man_url, line))
|
|
||||||
fragment_urls.append(segment_url)
|
|
||||||
# We only download the first fragment during the test
|
|
||||||
if self.params.get('test', False):
|
|
||||||
break
|
|
||||||
|
|
||||||
ctx = {
|
ctx = {
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'total_frags': len(fragment_urls),
|
'total_frags': total_frags,
|
||||||
}
|
}
|
||||||
|
|
||||||
self._prepare_and_start_frag_download(ctx)
|
self._prepare_and_start_frag_download(ctx)
|
||||||
|
|
||||||
|
fragment_retries = self.params.get('fragment_retries', 0)
|
||||||
|
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||||
|
test = self.params.get('test', False)
|
||||||
|
|
||||||
|
extra_query = None
|
||||||
|
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||||
|
if extra_param_to_segment_url:
|
||||||
|
extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
|
||||||
|
i = 0
|
||||||
|
media_sequence = 0
|
||||||
|
decrypt_info = {'METHOD': 'NONE'}
|
||||||
frags_filenames = []
|
frags_filenames = []
|
||||||
for i, frag_url in enumerate(fragment_urls):
|
for line in s.splitlines():
|
||||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
line = line.strip()
|
||||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
if line:
|
||||||
if not success:
|
if not line.startswith('#'):
|
||||||
return False
|
frag_url = (
|
||||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
line
|
||||||
ctx['dest_stream'].write(down.read())
|
if re.match(r'^https?://', line)
|
||||||
down.close()
|
else compat_urlparse.urljoin(man_url, line))
|
||||||
frags_filenames.append(frag_sanitized)
|
frag_name = 'Frag%d' % i
|
||||||
|
frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)
|
||||||
|
if extra_query:
|
||||||
|
frag_url = update_url_query(frag_url, extra_query)
|
||||||
|
count = 0
|
||||||
|
while count <= fragment_retries:
|
||||||
|
try:
|
||||||
|
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||||
|
if not success:
|
||||||
|
return False
|
||||||
|
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||||
|
frag_content = down.read()
|
||||||
|
down.close()
|
||||||
|
break
|
||||||
|
except compat_urllib_error.HTTPError as err:
|
||||||
|
# Unavailable (possibly temporary) fragments may be served.
|
||||||
|
# First we try to retry then either skip or abort.
|
||||||
|
# See https://github.com/rg3/youtube-dl/issues/10165,
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/10448).
|
||||||
|
count += 1
|
||||||
|
if count <= fragment_retries:
|
||||||
|
self.report_retry_fragment(err, frag_name, count, fragment_retries)
|
||||||
|
if count > fragment_retries:
|
||||||
|
if skip_unavailable_fragments:
|
||||||
|
i += 1
|
||||||
|
media_sequence += 1
|
||||||
|
self.report_skip_fragment(frag_name)
|
||||||
|
continue
|
||||||
|
self.report_error(
|
||||||
|
'giving up after %s fragment retries' % fragment_retries)
|
||||||
|
return False
|
||||||
|
if decrypt_info['METHOD'] == 'AES-128':
|
||||||
|
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||||
|
frag_content = AES.new(
|
||||||
|
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||||
|
ctx['dest_stream'].write(frag_content)
|
||||||
|
frags_filenames.append(frag_sanitized)
|
||||||
|
# We only download the first fragment during the test
|
||||||
|
if test:
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
media_sequence += 1
|
||||||
|
elif line.startswith('#EXT-X-KEY'):
|
||||||
|
decrypt_info = parse_m3u8_attributes(line[11:])
|
||||||
|
if decrypt_info['METHOD'] == 'AES-128':
|
||||||
|
if 'IV' in decrypt_info:
|
||||||
|
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
|
||||||
|
if not re.match(r'^https?://', decrypt_info['URI']):
|
||||||
|
decrypt_info['URI'] = compat_urlparse.urljoin(
|
||||||
|
man_url, decrypt_info['URI'])
|
||||||
|
if extra_query:
|
||||||
|
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||||
|
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||||
|
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||||
|
media_sequence = int(line[22:])
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
|
|
||||||
|
|||||||
@@ -7,12 +7,13 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ABCIE(InfoExtractor):
|
class ABCIE(InfoExtractor):
|
||||||
IE_NAME = 'abc.net.au'
|
IE_NAME = 'abc.net.au'
|
||||||
_VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
||||||
@@ -93,3 +94,59 @@ class ABCIE(InfoExtractor):
|
|||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ABCIViewIE(InfoExtractor):
|
||||||
|
IE_NAME = 'abc.net.au:iview'
|
||||||
|
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
|
||||||
|
|
||||||
|
# ABC iview programs are normally available for 14 days only.
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00',
|
||||||
|
'md5': '979d10b2939101f0d27a06b79edad536',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'FA1505V024S00',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Series 27 Ep 24',
|
||||||
|
'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d',
|
||||||
|
'upload_date': '20160820',
|
||||||
|
'uploader_id': 'abc1',
|
||||||
|
'timestamp': 1471719600,
|
||||||
|
},
|
||||||
|
'skip': 'Video gone',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_params = self._parse_json(self._search_regex(
|
||||||
|
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
|
||||||
|
title = video_params['title']
|
||||||
|
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
||||||
|
|
||||||
|
formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
src_vtt = stream.get('captions', {}).get('src-vtt')
|
||||||
|
if src_vtt:
|
||||||
|
subtitles['en'] = [{
|
||||||
|
'url': src_vtt,
|
||||||
|
'ext': 'vtt',
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||||
|
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
|
||||||
|
'duration': int_or_none(video_params.get('eventDuration')),
|
||||||
|
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||||
|
'series': video_params.get('seriesTitle'),
|
||||||
|
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||||
|
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)),
|
||||||
|
'episode': self._html_search_meta('episode_title', webpage),
|
||||||
|
'uploader_id': video_params.get('channel'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from ..compat import compat_urlparse
|
|||||||
|
|
||||||
class AbcNewsVideoIE(AMPIE):
|
class AbcNewsVideoIE(AMPIE):
|
||||||
IE_NAME = 'abcnews:video'
|
IE_NAME = 'abcnews:video'
|
||||||
_VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
_VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||||
@@ -49,7 +49,7 @@ class AbcNewsVideoIE(AMPIE):
|
|||||||
|
|
||||||
class AbcNewsIE(InfoExtractor):
|
class AbcNewsIE(InfoExtractor):
|
||||||
IE_NAME = 'abcnews'
|
IE_NAME = 'abcnews'
|
||||||
_VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||||
|
|||||||
@@ -1,13 +1,19 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import parse_iso8601
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Abc7NewsIE(InfoExtractor):
|
class ABCOTVSIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
|
IE_NAME = 'abcotvs'
|
||||||
|
IE_DESC = 'ABC Owned Television Stations'
|
||||||
|
_VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
|
'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
|
||||||
@@ -15,7 +21,7 @@ class Abc7NewsIE(InfoExtractor):
|
|||||||
'id': '472581',
|
'id': '472581',
|
||||||
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'East Bay museum celebrates history of synthesized music',
|
'title': 'East Bay museum celebrates vintage synthesizers',
|
||||||
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
|
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'timestamp': 1421123075,
|
'timestamp': 1421123075,
|
||||||
@@ -41,7 +47,7 @@ class Abc7NewsIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
m3u8 = self._html_search_meta(
|
m3u8 = self._html_search_meta(
|
||||||
'contentURL', webpage, 'm3u8 url', fatal=True)
|
'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0]
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
|
formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@@ -66,3 +72,41 @@ class Abc7NewsIE(InfoExtractor):
|
|||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ABCOTVSClipsIE(InfoExtractor):
|
||||||
|
IE_NAME = 'abcotvs:clips'
|
||||||
|
_VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://clips.abcotvs.com/kabc/video/214814',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '214814',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'SpaceX launch pad explosion destroys rocket, satellite',
|
||||||
|
'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b',
|
||||||
|
'upload_date': '20160901',
|
||||||
|
'timestamp': 1472756695,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0]
|
||||||
|
title = video_data['title']
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
video_data['videoURL'].split('?')[0], video_id, 'mp4')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'thumbnail': video_data.get('thumbnailURL'),
|
||||||
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
|
'timestamp': int_or_none(video_data.get('pubDate')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
1428
youtube_dl/extractor/adobepass.py
Normal file
1428
youtube_dl/extractor/adobepass.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -156,7 +156,10 @@ class AdobeTVVideoIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_data = self._download_json(url + '?format=json', video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_data = self._parse_json(self._search_regex(
|
||||||
|
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
|
'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
|
||||||
|
|||||||
@@ -3,16 +3,14 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .turner import TurnerBaseIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
int_or_none,
|
||||||
xpath_text,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AdultSwimIE(InfoExtractor):
|
class AdultSwimIE(TurnerBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
|
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -83,6 +81,21 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# heroMetadata.trailer
|
||||||
|
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Decker - Inside Decker: A New Hero',
|
||||||
|
'description': 'md5:c916df071d425d62d70c86d4399d3ee0',
|
||||||
|
'duration': 249.008,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -133,79 +146,56 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
if video_info is None:
|
if video_info is None:
|
||||||
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||||
video_info = bootstrapped_data['slugged_video']
|
video_info = bootstrapped_data['slugged_video']
|
||||||
else:
|
if not video_info:
|
||||||
raise ExtractorError('Unable to find video info')
|
video_info = bootstrapped_data.get(
|
||||||
|
'heroMetadata', {}).get('trailer', {}).get('video')
|
||||||
|
if not video_info:
|
||||||
|
video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
|
||||||
|
if not video_info:
|
||||||
|
raise ExtractorError('Unable to find video info')
|
||||||
|
|
||||||
show = bootstrapped_data['show']
|
show = bootstrapped_data['show']
|
||||||
show_title = show['title']
|
show_title = show['title']
|
||||||
stream = video_info.get('stream')
|
stream = video_info.get('stream')
|
||||||
clips = [stream] if stream else video_info.get('clips')
|
if stream and stream.get('videoPlaybackID'):
|
||||||
if not clips:
|
segment_ids = [stream['videoPlaybackID']]
|
||||||
raise ExtractorError(
|
elif video_info.get('clips'):
|
||||||
'This video is only available via cable service provider subscription that'
|
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||||
' is not currently supported. You may want to use --cookies.'
|
elif video_info.get('videoPlaybackID'):
|
||||||
if video_info.get('auth') is True else 'Unable to find stream or clips',
|
segment_ids = [video_info['videoPlaybackID']]
|
||||||
expected=True)
|
else:
|
||||||
segment_ids = [clip['videoPlaybackID'] for clip in clips]
|
if video_info.get('auth') is True:
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video is only available via cable service provider subscription that'
|
||||||
|
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unable to find stream or clips')
|
||||||
|
|
||||||
episode_id = video_info['id']
|
episode_id = video_info['id']
|
||||||
episode_title = video_info['title']
|
episode_title = video_info['title']
|
||||||
episode_description = video_info['description']
|
episode_description = video_info.get('description')
|
||||||
episode_duration = video_info.get('duration')
|
episode_duration = int_or_none(video_info.get('duration'))
|
||||||
|
view_count = int_or_none(video_info.get('views'))
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for part_num, segment_id in enumerate(segment_ids):
|
for part_num, segment_id in enumerate(segment_ids):
|
||||||
segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id
|
segement_info = self._extract_cvp_info(
|
||||||
|
'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
|
||||||
|
segment_id, {
|
||||||
|
'secure': {
|
||||||
|
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
|
||||||
|
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
|
||||||
|
},
|
||||||
|
})
|
||||||
segment_title = '%s - %s' % (show_title, episode_title)
|
segment_title = '%s - %s' % (show_title, episode_title)
|
||||||
if len(segment_ids) > 1:
|
if len(segment_ids) > 1:
|
||||||
segment_title += ' Part %d' % (part_num + 1)
|
segment_title += ' Part %d' % (part_num + 1)
|
||||||
|
segement_info.update({
|
||||||
idoc = self._download_xml(
|
|
||||||
segment_url, segment_title,
|
|
||||||
'Downloading segment information', 'Unable to download segment information')
|
|
||||||
|
|
||||||
segment_duration = float_or_none(
|
|
||||||
xpath_text(idoc, './/trt', 'segment duration').strip())
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
file_els = idoc.findall('.//files/file') or idoc.findall('./files/file')
|
|
||||||
|
|
||||||
unique_urls = []
|
|
||||||
unique_file_els = []
|
|
||||||
for file_el in file_els:
|
|
||||||
media_url = file_el.text
|
|
||||||
if not media_url or determine_ext(media_url) == 'f4m':
|
|
||||||
continue
|
|
||||||
if file_el.text not in unique_urls:
|
|
||||||
unique_urls.append(file_el.text)
|
|
||||||
unique_file_els.append(file_el)
|
|
||||||
|
|
||||||
for file_el in unique_file_els:
|
|
||||||
bitrate = file_el.attrib.get('bitrate')
|
|
||||||
ftype = file_el.attrib.get('type')
|
|
||||||
media_url = file_el.text
|
|
||||||
if determine_ext(media_url) == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
media_url, segment_title, 'mp4', preference=0,
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'format_id': '%s_%s' % (bitrate, ftype),
|
|
||||||
'url': file_el.text.strip(),
|
|
||||||
# The bitrate may not be a number (for example: 'iphone')
|
|
||||||
'tbr': int(bitrate) if bitrate.isdigit() else None,
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
entries.append({
|
|
||||||
'id': segment_id,
|
'id': segment_id,
|
||||||
'title': segment_title,
|
'title': segment_title,
|
||||||
'formats': formats,
|
'description': episode_description,
|
||||||
'duration': segment_duration,
|
|
||||||
'description': episode_description
|
|
||||||
})
|
})
|
||||||
|
entries.append(segement_info)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
@@ -214,5 +204,6 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
'entries': entries,
|
'entries': entries,
|
||||||
'title': '%s - %s' % (show_title, episode_title),
|
'title': '%s - %s' % (show_title, episode_title),
|
||||||
'description': episode_description,
|
'description': episode_description,
|
||||||
'duration': episode_duration
|
'duration': episode_duration,
|
||||||
|
'view_count': view_count,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,23 +2,140 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .theplatform import ThePlatformIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
extract_attributes,
|
||||||
|
get_element_by_attribute,
|
||||||
|
)
|
||||||
|
from ..compat import (
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AENetworksIE(InfoExtractor):
|
class AENetworksBaseIE(ThePlatformIE):
|
||||||
|
_THEPLATFORM_KEY = 'crazyjava'
|
||||||
|
_THEPLATFORM_SECRET = 's3cr3t'
|
||||||
|
|
||||||
|
|
||||||
|
class AENetworksIE(AENetworksBaseIE):
|
||||||
IE_NAME = 'aenetworks'
|
IE_NAME = 'aenetworks'
|
||||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||||
|
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '22253814',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Winter Is Coming',
|
||||||
|
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||||
|
'timestamp': 1338306241,
|
||||||
|
'upload_date': '20120529',
|
||||||
|
'uploader': 'AENE-NEW',
|
||||||
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '71889446852',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'SERIES4317',
|
||||||
|
'title': 'Atlanta Plastic',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
_DOMAIN_TO_REQUESTOR_ID = {
|
||||||
|
'history.com': 'HISTORY',
|
||||||
|
'aetv.com': 'AETV',
|
||||||
|
'mylifetime.com': 'LIFETIME',
|
||||||
|
'fyi.tv': 'FYI',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
domain, show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
display_id = show_path or movie_display_id
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
if show_path:
|
||||||
|
url_parts = show_path.split('/')
|
||||||
|
url_parts_len = len(url_parts)
|
||||||
|
if url_parts_len == 1:
|
||||||
|
entries = []
|
||||||
|
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
|
||||||
|
entries.append(self.url_result(
|
||||||
|
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, self._html_search_meta('aetn:SeriesId', webpage),
|
||||||
|
self._html_search_meta('aetn:SeriesTitle', webpage))
|
||||||
|
elif url_parts_len == 2:
|
||||||
|
entries = []
|
||||||
|
for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
|
||||||
|
episode_attributes = extract_attributes(episode_item)
|
||||||
|
episode_url = compat_urlparse.urljoin(
|
||||||
|
url, episode_attributes['data-canonical'])
|
||||||
|
entries.append(self.url_result(
|
||||||
|
episode_url, 'AENetworks',
|
||||||
|
episode_attributes['data-videoid']))
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, self._html_search_meta('aetn:SeasonId', webpage))
|
||||||
|
|
||||||
|
query = {
|
||||||
|
'mbr': 'true',
|
||||||
|
'assetTypes': 'medium_video_s3'
|
||||||
|
}
|
||||||
|
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||||
|
media_url = self._search_regex(
|
||||||
|
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
|
||||||
|
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||||
|
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||||
|
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||||
|
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||||
|
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||||
|
resource = self._get_mvpd_resource(
|
||||||
|
requestor_id, theplatform_metadata['title'],
|
||||||
|
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||||
|
theplatform_metadata['ratings'][0]['rating'])
|
||||||
|
query['auth'] = self._extract_mvpd_auth(
|
||||||
|
url, video_id, requestor_id, resource)
|
||||||
|
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
||||||
|
media_url = update_url_query(media_url, query)
|
||||||
|
media_url = self._sign_url(media_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
|
||||||
|
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info.update({
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class HistoryTopicIE(AENetworksBaseIE):
|
||||||
|
IE_NAME = 'history:topic'
|
||||||
|
IE_DESC = 'History.com Topic'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)(?:/[^/]+(?:/(?P<video_display_id>[^/?#]+))?)?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'g12m5Gyt3fdR',
|
'id': '40700995724',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Bet You Didn't Know: Valentine's Day",
|
'title': "Bet You Didn't Know: Valentine's Day",
|
||||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||||
@@ -31,57 +148,61 @@ class AENetworksIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
'expected_warnings': ['JSON-LD'],
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos',
|
||||||
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
|
'info_dict':
|
||||||
'info_dict': {
|
{
|
||||||
'id': 'eg47EERs_JsZ',
|
'id': 'world-war-i-history',
|
||||||
'ext': 'mp4',
|
'title': 'World War I History',
|
||||||
'title': 'Winter Is Coming',
|
|
||||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
|
||||||
'timestamp': 1338306241,
|
|
||||||
'upload_date': '20120529',
|
|
||||||
'uploader': 'AENE-NEW',
|
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'playlist_mincount': 24,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
|
'url': 'http://www.history.com/topics/world-war-i-history/videos',
|
||||||
'only_matching': True
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
|
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history',
|
||||||
'only_matching': True
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
|
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/speeches',
|
||||||
'only_matching': True
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def theplatform_url_result(self, theplatform_url, video_id, query):
|
||||||
page_type, video_id = re.match(self._VALID_URL, url).groups()
|
return {
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_url_re = [
|
|
||||||
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
|
|
||||||
r"media_url\s*=\s*'([^']+)'"
|
|
||||||
]
|
|
||||||
video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))
|
|
||||||
query = {'mbr': 'true'}
|
|
||||||
if page_type == 'shows':
|
|
||||||
query['assetTypes'] = 'medium_video_s3'
|
|
||||||
if 'switch=hds' in video_url:
|
|
||||||
query['switch'] = 'hls'
|
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
|
||||||
info.update({
|
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
|
'id': video_id,
|
||||||
'url': smuggle_url(
|
'url': smuggle_url(
|
||||||
update_url_query(video_url, query),
|
update_url_query(theplatform_url, query),
|
||||||
{
|
{
|
||||||
'sig': {
|
'sig': {
|
||||||
'key': 'crazyjava',
|
'key': self._THEPLATFORM_KEY,
|
||||||
'secret': 's3cr3t'},
|
'secret': self._THEPLATFORM_SECRET,
|
||||||
|
},
|
||||||
'force_smil_url': True
|
'force_smil_url': True
|
||||||
}),
|
}),
|
||||||
})
|
'ie_key': 'ThePlatform',
|
||||||
return info
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
topic_id, video_display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
if video_display_id:
|
||||||
|
webpage = self._download_webpage(url, video_display_id)
|
||||||
|
release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
|
||||||
|
release_url = unescapeHTML(release_url)
|
||||||
|
|
||||||
|
return self.theplatform_url_result(
|
||||||
|
release_url, video_id, {
|
||||||
|
'mbr': 'true',
|
||||||
|
'switch': 'hls'
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
webpage = self._download_webpage(url, topic_id)
|
||||||
|
entries = []
|
||||||
|
for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage):
|
||||||
|
video_attributes = extract_attributes(episode_item)
|
||||||
|
entries.append(self.theplatform_url_result(
|
||||||
|
video_attributes['data-release-url'], video_attributes['data-id'], {
|
||||||
|
'mbr': 'true',
|
||||||
|
'switch': 'hls'
|
||||||
|
}))
|
||||||
|
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))
|
||||||
|
|||||||
@@ -24,10 +24,10 @@ class AftonbladetIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
# find internal video meta data
|
# find internal video meta data
|
||||||
meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json'
|
||||||
player_config = self._parse_json(self._html_search_regex(
|
player_config = self._parse_json(self._html_search_regex(
|
||||||
r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
|
r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
|
||||||
internal_meta_id = player_config['videoId']
|
internal_meta_id = player_config['aptomaVideoId']
|
||||||
internal_meta_url = meta_url % internal_meta_id
|
internal_meta_url = meta_url % internal_meta_id
|
||||||
internal_meta_json = self._download_json(
|
internal_meta_json = self._download_json(
|
||||||
internal_meta_url, video_id, 'Downloading video meta data')
|
internal_meta_url, video_id, 'Downloading video meta data')
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class AlJazeeraIE(InfoExtractor):
|
class AlJazeeraIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||||
|
|||||||
91
youtube_dl/extractor/amcnetworks.py
Normal file
91
youtube_dl/extractor/amcnetworks.py
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .theplatform import ThePlatformIE
|
||||||
|
from ..utils import (
|
||||||
|
update_url_query,
|
||||||
|
parse_age_limit,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AMCNetworksIE(ThePlatformIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||||
|
'md5': '',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's3MX01Nl4vPH',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Maron - Season 4 - Step 1',
|
||||||
|
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
|
||||||
|
'age_limit': 17,
|
||||||
|
'upload_date': '20160505',
|
||||||
|
'timestamp': 1462468831,
|
||||||
|
'uploader': 'AMCN',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ifc.com/movies/chaos',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
query = {
|
||||||
|
'mbr': 'true',
|
||||||
|
'manifest': 'm3u',
|
||||||
|
}
|
||||||
|
media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
|
||||||
|
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||||
|
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
|
||||||
|
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||||
|
video_id = theplatform_metadata['pid']
|
||||||
|
title = theplatform_metadata['title']
|
||||||
|
rating = theplatform_metadata['ratings'][0]['rating']
|
||||||
|
auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
|
||||||
|
if auth_required == 'true':
|
||||||
|
requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
|
||||||
|
resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
|
||||||
|
query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||||
|
media_url = update_url_query(media_url, query)
|
||||||
|
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info.update({
|
||||||
|
'id': video_id,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'formats': formats,
|
||||||
|
'age_limit': parse_age_limit(parse_age_limit(rating)),
|
||||||
|
})
|
||||||
|
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||||
|
if ns_keys:
|
||||||
|
ns = list(ns_keys)[0]
|
||||||
|
series = theplatform_metadata.get(ns + '$show')
|
||||||
|
season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
|
||||||
|
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
||||||
|
episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
|
||||||
|
if season_number:
|
||||||
|
title = 'Season %d - %s' % (season_number, title)
|
||||||
|
if series:
|
||||||
|
title = '%s - %s' % (series, title)
|
||||||
|
info.update({
|
||||||
|
'title': title,
|
||||||
|
'series': series,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
})
|
||||||
|
return info
|
||||||
@@ -5,6 +5,8 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
mimetype2ext,
|
||||||
|
determine_ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -50,21 +52,25 @@ class AMPIE(InfoExtractor):
|
|||||||
if isinstance(media_content, dict):
|
if isinstance(media_content, dict):
|
||||||
media_content = [media_content]
|
media_content = [media_content]
|
||||||
for media_data in media_content:
|
for media_data in media_content:
|
||||||
media = media_data['@attributes']
|
media = media_data.get('@attributes', {})
|
||||||
media_type = media['type']
|
media_url = media.get('url')
|
||||||
if media_type in ('video/f4m', 'application/f4m+xml'):
|
if not media_url:
|
||||||
|
continue
|
||||||
|
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
||||||
|
if ext == 'f4m':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||||
video_id, f4m_id='hds', fatal=False))
|
video_id, f4m_id='hds', fatal=False))
|
||||||
elif media_type == 'application/x-mpegURL':
|
elif ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||||
'url': media['url'],
|
'url': media['url'],
|
||||||
'tbr': int_or_none(media.get('bitrate')),
|
'tbr': int_or_none(media.get('bitrate')),
|
||||||
'filesize': int_or_none(media.get('fileSize')),
|
'filesize': int_or_none(media.get('fileSize')),
|
||||||
|
'ext': ext,
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||||
_NETRC_MACHINE = 'animeondemand'
|
_NETRC_MACHINE = 'animeondemand'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# jap, OmU
|
||||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '161',
|
'id': '161',
|
||||||
@@ -30,17 +31,21 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
}, {
|
}, {
|
||||||
# Film wording is used instead of Episode
|
# Film wording is used instead of Episode, ger/jap, Dub/OmU
|
||||||
'url': 'https://www.anime-on-demand.de/anime/39',
|
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# Episodes without titles
|
# Episodes without titles, jap, OmU
|
||||||
'url': 'https://www.anime-on-demand.de/anime/162',
|
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# ger/jap, Dub/OmU, account required
|
# ger/jap, Dub/OmU, account required
|
||||||
'url': 'https://www.anime-on-demand.de/anime/169',
|
'url': 'https://www.anime-on-demand.de/anime/169',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||||
|
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@@ -110,35 +115,12 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
for num, episode_html in enumerate(re.findall(
|
def extract_info(html, video_id, num=None):
|
||||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
|
title, description = [None] * 2
|
||||||
episodebox_title = self._search_regex(
|
|
||||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
|
||||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
|
||||||
episode_html, 'episodebox title', default=None, group='title')
|
|
||||||
if not episodebox_title:
|
|
||||||
continue
|
|
||||||
|
|
||||||
episode_number = int(self._search_regex(
|
|
||||||
r'(?:Episode|Film)\s*(\d+)',
|
|
||||||
episodebox_title, 'episode number', default=num))
|
|
||||||
episode_title = self._search_regex(
|
|
||||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
|
||||||
episodebox_title, 'episode title', default=None)
|
|
||||||
|
|
||||||
video_id = 'episode-%d' % episode_number
|
|
||||||
|
|
||||||
common_info = {
|
|
||||||
'id': video_id,
|
|
||||||
'series': anime_title,
|
|
||||||
'episode': episode_title,
|
|
||||||
'episode_number': episode_number,
|
|
||||||
}
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for input_ in re.findall(
|
for input_ in re.findall(
|
||||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
|
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
|
||||||
attributes = extract_attributes(input_)
|
attributes = extract_attributes(input_)
|
||||||
playlist_urls = []
|
playlist_urls = []
|
||||||
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
||||||
@@ -161,7 +143,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
format_id_list.append(lang)
|
format_id_list.append(lang)
|
||||||
if kind:
|
if kind:
|
||||||
format_id_list.append(kind)
|
format_id_list.append(kind)
|
||||||
if not format_id_list:
|
if not format_id_list and num is not None:
|
||||||
format_id_list.append(compat_str(num))
|
format_id_list.append(compat_str(num))
|
||||||
format_id = '-'.join(format_id_list)
|
format_id = '-'.join(format_id_list)
|
||||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||||
@@ -215,28 +197,74 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
formats.extend(file_formats)
|
formats.extend(file_formats)
|
||||||
|
|
||||||
if formats:
|
return {
|
||||||
self._sort_formats(formats)
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
def extract_entries(html, video_id, common_info, num=None):
|
||||||
|
info = extract_info(html, video_id, num)
|
||||||
|
|
||||||
|
if info['formats']:
|
||||||
|
self._sort_formats(info['formats'])
|
||||||
f = common_info.copy()
|
f = common_info.copy()
|
||||||
f.update({
|
f.update(info)
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'formats': formats,
|
|
||||||
})
|
|
||||||
entries.append(f)
|
entries.append(f)
|
||||||
|
|
||||||
# Extract teaser only when full episode is not available
|
# Extract teaser/trailer only when full episode is not available
|
||||||
if not formats:
|
if not info['formats']:
|
||||||
m = re.search(
|
m = re.search(
|
||||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
|
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
|
||||||
episode_html)
|
html)
|
||||||
if m:
|
if m:
|
||||||
f = common_info.copy()
|
f = common_info.copy()
|
||||||
f.update({
|
f.update({
|
||||||
'id': '%s-teaser' % f['id'],
|
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||||
'title': m.group('title'),
|
'title': m.group('title'),
|
||||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
'url': compat_urlparse.urljoin(url, m.group('href')),
|
||||||
})
|
})
|
||||||
entries.append(f)
|
entries.append(f)
|
||||||
|
|
||||||
|
def extract_episodes(html):
|
||||||
|
for num, episode_html in enumerate(re.findall(
|
||||||
|
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
|
||||||
|
episodebox_title = self._search_regex(
|
||||||
|
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||||
|
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||||
|
episode_html, 'episodebox title', default=None, group='title')
|
||||||
|
if not episodebox_title:
|
||||||
|
continue
|
||||||
|
|
||||||
|
episode_number = int(self._search_regex(
|
||||||
|
r'(?:Episode|Film)\s*(\d+)',
|
||||||
|
episodebox_title, 'episode number', default=num))
|
||||||
|
episode_title = self._search_regex(
|
||||||
|
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||||
|
episodebox_title, 'episode title', default=None)
|
||||||
|
|
||||||
|
video_id = 'episode-%d' % episode_number
|
||||||
|
|
||||||
|
common_info = {
|
||||||
|
'id': video_id,
|
||||||
|
'series': anime_title,
|
||||||
|
'episode': episode_title,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
}
|
||||||
|
|
||||||
|
extract_entries(episode_html, video_id, common_info)
|
||||||
|
|
||||||
|
def extract_film(html, video_id):
|
||||||
|
common_info = {
|
||||||
|
'id': anime_id,
|
||||||
|
'title': anime_title,
|
||||||
|
'description': anime_description,
|
||||||
|
}
|
||||||
|
extract_entries(html, video_id, common_info)
|
||||||
|
|
||||||
|
extract_episodes(webpage)
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
extract_film(webpage, anime_id)
|
||||||
|
|
||||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
||||||
|
|||||||
@@ -123,6 +123,10 @@ class AolFeaturesIE(InfoExtractor):
|
|||||||
'title': 'What To Watch - February 17, 2016',
|
'title': 'What To Watch - February 17, 2016',
|
||||||
},
|
},
|
||||||
'add_ie': ['FiveMin'],
|
'add_ie': ['FiveMin'],
|
||||||
|
'params': {
|
||||||
|
# encrypted m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@@ -15,7 +13,7 @@ class AparatIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.aparat.com/v/wP8On',
|
'url': 'http://www.aparat.com/v/wP8On',
|
||||||
'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1',
|
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'wP8On',
|
'id': 'wP8On',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -31,13 +29,13 @@ class AparatIE(InfoExtractor):
|
|||||||
# Note: There is an easier-to-parse configuration at
|
# Note: There is an easier-to-parse configuration at
|
||||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||||
# but the URL in there does not work
|
# but the URL in there does not work
|
||||||
embed_url = ('http://www.aparat.com/video/video/embed/videohash/' +
|
embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
|
||||||
video_id + '/vt/frame')
|
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
|
|
||||||
video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
|
file_list = self._parse_json(self._search_regex(
|
||||||
r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
|
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
|
||||||
for i, video_url in enumerate(video_urls):
|
for i, item in enumerate(file_list[0]):
|
||||||
|
video_url = item['file']
|
||||||
req = HEADRequest(video_url)
|
req = HEADRequest(video_url)
|
||||||
res = self._request_webpage(
|
res = self._request_webpage(
|
||||||
req, video_id, note='Testing video URL %d' % i, errnote=False)
|
req, video_id, note='Testing video URL %d' % i, errnote=False)
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
|
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'manofsteel',
|
'id': '5111',
|
||||||
|
'title': 'Man of Steel',
|
||||||
},
|
},
|
||||||
'playlist': [
|
'playlist': [
|
||||||
{
|
{
|
||||||
@@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
'id': 'blackthorn',
|
'id': 'blackthorn',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
|
'expected_warnings': ['Unable to download JSON metadata'],
|
||||||
|
}, {
|
||||||
|
# json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
|
||||||
|
'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '15881',
|
||||||
|
'title': 'Kung Fu Panda 3',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 4,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
movie = mobj.group('movie')
|
movie = mobj.group('movie')
|
||||||
uploader_id = mobj.group('company')
|
uploader_id = mobj.group('company')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, movie)
|
||||||
|
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
|
||||||
|
film_data = self._download_json(
|
||||||
|
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
|
||||||
|
film_id, fatal=False)
|
||||||
|
|
||||||
|
if film_data:
|
||||||
|
entries = []
|
||||||
|
for clip in film_data.get('clips', []):
|
||||||
|
clip_title = clip['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for version, version_data in clip.get('versions', {}).items():
|
||||||
|
for size, size_data in version_data.get('sizes', {}).items():
|
||||||
|
src = size_data.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'format_id': '%s-%s' % (version, size),
|
||||||
|
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
|
||||||
|
'width': int_or_none(size_data.get('width')),
|
||||||
|
'height': int_or_none(size_data.get('height')),
|
||||||
|
'language': version[:2],
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
|
||||||
|
'formats': formats,
|
||||||
|
'title': clip_title,
|
||||||
|
'thumbnail': clip.get('screen') or clip.get('thumb'),
|
||||||
|
'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
|
||||||
|
'upload_date': unified_strdate(clip.get('posted')),
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
page_data = film_data.get('page', {})
|
||||||
|
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
|
||||||
|
|
||||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||||
|
|
||||||
def fix_html(s):
|
def fix_html(s):
|
||||||
|
|||||||
@@ -1,67 +1,65 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .jwplatform import JWPlatformBaseIE
|
||||||
from ..utils import unified_strdate
|
from ..utils import (
|
||||||
|
unified_strdate,
|
||||||
|
clean_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ArchiveOrgIE(InfoExtractor):
|
class ArchiveOrgIE(JWPlatformBaseIE):
|
||||||
IE_NAME = 'archive.org'
|
IE_NAME = 'archive.org'
|
||||||
IE_DESC = 'archive.org videos'
|
IE_DESC = 'archive.org videos'
|
||||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
'ext': 'ogv',
|
'ext': 'ogg',
|
||||||
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
||||||
'description': 'md5:1780b464abaca9991d8968c877bb53ed',
|
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
|
||||||
'upload_date': '19681210',
|
'upload_date': '19681210',
|
||||||
'uploader': 'SRI International'
|
'uploader': 'SRI International'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://archive.org/details/Cops1922',
|
'url': 'https://archive.org/details/Cops1922',
|
||||||
'md5': '18f2a19e6d89af8425671da1cf3d4e04',
|
'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Cops1922',
|
'id': 'Cops1922',
|
||||||
'ext': 'ogv',
|
'ext': 'mp4',
|
||||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||||
'description': 'md5:70f72ee70882f713d4578725461ffcc3',
|
'description': 'md5:b4544662605877edd99df22f9620d858',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://archive.org/embed/' + video_id, video_id)
|
||||||
|
jwplayer_playlist = self._parse_json(self._search_regex(
|
||||||
|
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\);",
|
||||||
|
webpage, 'jwplayer playlist'), video_id)
|
||||||
|
info = self._parse_jwplayer_data(
|
||||||
|
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||||
|
|
||||||
json_url = url + ('&' if '?' in url else '?') + 'output=json'
|
def get_optional(metadata, field):
|
||||||
data = self._download_json(json_url, video_id)
|
return metadata.get(field, [None])[0]
|
||||||
|
|
||||||
def get_optional(data_dict, field):
|
metadata = self._download_json(
|
||||||
return data_dict['metadata'].get(field, [None])[0]
|
'http://archive.org/details/' + video_id, video_id, query={
|
||||||
|
'output': 'json',
|
||||||
title = get_optional(data, 'title')
|
})['metadata']
|
||||||
description = get_optional(data, 'description')
|
info.update({
|
||||||
uploader = get_optional(data, 'creator')
|
'title': get_optional(metadata, 'title') or info.get('title'),
|
||||||
upload_date = unified_strdate(get_optional(data, 'date'))
|
'description': clean_html(get_optional(metadata, 'description')),
|
||||||
|
})
|
||||||
formats = [
|
if info.get('_type') != 'playlist':
|
||||||
{
|
info.update({
|
||||||
'format': fdata['format'],
|
'uploader': get_optional(metadata, 'creator'),
|
||||||
'url': 'http://' + data['server'] + data['dir'] + fn,
|
'upload_date': unified_strdate(get_optional(metadata, 'date')),
|
||||||
'file_size': int(fdata['size']),
|
})
|
||||||
}
|
return info
|
||||||
for fn, fdata in data['files'].items()
|
|
||||||
if 'Video' in fdata['format']]
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'video',
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'description': description,
|
|
||||||
'uploader': uploader,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'thumbnail': data.get('misc', {}).get('image'),
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -8,19 +8,19 @@ from .generic import GenericIE
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
get_element_by_attribute,
|
|
||||||
qualities,
|
qualities,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
from ..compat import compat_etree_fromstring
|
from ..compat import compat_etree_fromstring
|
||||||
|
|
||||||
|
|
||||||
class ARDMediathekIE(InfoExtractor):
|
class ARDMediathekIE(InfoExtractor):
|
||||||
IE_NAME = 'ARD:mediathek'
|
IE_NAME = 'ARD:mediathek'
|
||||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
||||||
@@ -35,6 +35,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||||
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||||
@@ -45,6 +46,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||||
'duration': 5252,
|
'duration': 5252,
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
# audio
|
# audio
|
||||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||||
@@ -56,9 +58,22 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||||
'duration': 3240,
|
'duration': 3240,
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# audio
|
||||||
|
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||||
|
'md5': '4e8f00631aac0395fee17368ac0e9867',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '30796318',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Vor dem Fest',
|
||||||
|
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
|
||||||
|
'duration': 3287,
|
||||||
|
},
|
||||||
|
'skip': 'Video is no longer available',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||||
@@ -114,11 +129,14 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
if ext == 'f4m':
|
if ext == 'f4m':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
update_url_query(stream_url, {
|
||||||
video_id, preference=-1, f4m_id='hds', fatal=False))
|
'hdcore': '3.1.1',
|
||||||
|
'plugin': 'aasp-3.1.1.69.124'
|
||||||
|
}),
|
||||||
|
video_id, f4m_id='hds', fatal=False))
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False))
|
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
else:
|
else:
|
||||||
if server and server.startswith('rtmp'):
|
if server and server.startswith('rtmp'):
|
||||||
f = {
|
f = {
|
||||||
@@ -220,7 +238,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class ARDIE(InfoExtractor):
|
class ARDIE(InfoExtractor):
|
||||||
_VALID_URL = '(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||||
'md5': 'd216c3a86493f9322545e045ddc3eb35',
|
'md5': 'd216c3a86493f9322545e045ddc3eb35',
|
||||||
@@ -232,7 +250,8 @@ class ARDIE(InfoExtractor):
|
|||||||
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
||||||
'upload_date': '20140804',
|
'upload_date': '20140804',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -274,41 +293,3 @@ class ARDIE(InfoExtractor):
|
|||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class SportschauIE(ARDMediathekIE):
|
|
||||||
IE_NAME = 'Sportschau'
|
|
||||||
_VALID_URL = r'(?P<baseurl>https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P<id>[^/#?]+))\.html'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"',
|
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
|
||||||
'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
base_url = mobj.group('baseurl')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
title = get_element_by_attribute('class', 'headline', webpage)
|
|
||||||
description = self._html_search_meta('description', webpage, 'description')
|
|
||||||
|
|
||||||
info = self._extract_media_info(
|
|
||||||
base_url + '-mc_defaultQuality-h.json', webpage, video_id)
|
|
||||||
|
|
||||||
info.update({
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
})
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
|||||||
115
youtube_dl/extractor/arkena.py
Normal file
115
youtube_dl/extractor/arkena.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
parse_iso8601,
|
||||||
|
strip_jsonp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ArkenaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||||
|
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Big Buck Bunny',
|
||||||
|
'description': 'Royalty free test video',
|
||||||
|
'timestamp': 1432816365,
|
||||||
|
'upload_date': '20150528',
|
||||||
|
'is_live': False,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
# See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
account_id = mobj.group('account_id')
|
||||||
|
|
||||||
|
playlist = self._download_json(
|
||||||
|
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
|
||||||
|
% (video_id, account_id),
|
||||||
|
video_id, transform_source=strip_jsonp)['Playlist'][0]
|
||||||
|
|
||||||
|
media_info = playlist['MediaInfo']
|
||||||
|
title = media_info['Title']
|
||||||
|
media_files = playlist['MediaFiles']
|
||||||
|
|
||||||
|
is_live = False
|
||||||
|
formats = []
|
||||||
|
for kind_case, kind_formats in media_files.items():
|
||||||
|
kind = kind_case.lower()
|
||||||
|
for f in kind_formats:
|
||||||
|
f_url = f.get('Url')
|
||||||
|
if not f_url:
|
||||||
|
continue
|
||||||
|
is_live = f.get('Live') == 'true'
|
||||||
|
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||||
|
if kind == 'm3u8' or 'm3u8' in exts:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
f_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||||
|
m3u8_id=kind, fatal=False, live=is_live))
|
||||||
|
elif kind == 'flash' or 'f4m' in exts:
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
f_url, video_id, f4m_id=kind, fatal=False))
|
||||||
|
elif kind == 'dash' or 'mpd' in exts:
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
f_url, video_id, mpd_id=kind, fatal=False))
|
||||||
|
elif kind == 'silverlight':
|
||||||
|
# TODO: process when ism is supported (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/8118)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
tbr = float_or_none(f.get('Bitrate'), 1000)
|
||||||
|
formats.append({
|
||||||
|
'url': f_url,
|
||||||
|
'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
|
||||||
|
'tbr': tbr,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
description = media_info.get('Description')
|
||||||
|
video_id = media_info.get('VideoId') or video_id
|
||||||
|
timestamp = parse_iso8601(media_info.get('PublishDate'))
|
||||||
|
thumbnails = [{
|
||||||
|
'url': thumbnail['Url'],
|
||||||
|
'width': int_or_none(thumbnail.get('Size')),
|
||||||
|
} for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'is_live': is_live,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
@@ -180,11 +180,14 @@ class ArteTVBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||||
IE_NAME = 'arte.tv:+7'
|
IE_NAME = 'arte.tv:+7'
|
||||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
|
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -240,10 +243,10 @@ class ArteTVPlus7IE(ArteTVBaseIE):
|
|||||||
return self._extract_from_json_url(json_url, video_id, lang, title=title)
|
return self._extract_from_json_url(json_url, video_id, lang, title=title)
|
||||||
# Different kind of embed URL (e.g.
|
# Different kind of embed URL (e.g.
|
||||||
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
||||||
embed_url = self._search_regex(
|
entries = [
|
||||||
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
|
self.url_result(url)
|
||||||
webpage, 'embed url', group='url')
|
for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)]
|
||||||
return self.url_result(embed_url)
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
|
||||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||||
@@ -252,22 +255,17 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
|
|||||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '72176',
|
'id': '057405-001-A',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Folge 2 - Corporate Design',
|
'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)',
|
||||||
'upload_date': '20131004',
|
'upload_date': '20150716',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
|
'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
|
||||||
'info_dict': {
|
'playlist_count': 11,
|
||||||
'id': '160676',
|
'add_ie': ['Youtube'],
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Monty Python live (mostly)',
|
|
||||||
'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
|
|
||||||
'upload_date': '20140805',
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde',
|
'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -349,14 +347,13 @@ class ArteTVCinemaIE(ArteTVPlus7IE):
|
|||||||
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
|
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://cinema.arte.tv/de/node/38291',
|
'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck',
|
||||||
'md5': '6b275511a5107c60bacbeeda368c3aa1',
|
'md5': 'a5b9dd5575a11d93daf0e3f404f45438',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '055876-000_PWA12025-D',
|
'id': '062494-000-A',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Tod auf dem Nil',
|
'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck',
|
||||||
'upload_date': '20160122',
|
'upload_date': '20150807',
|
||||||
'description': 'md5:7f749bbb77d800ef2be11d54529b96bc',
|
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@@ -422,6 +419,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'PL-013263',
|
'id': 'PL-013263',
|
||||||
'title': 'Areva & Uramin',
|
'title': 'Areva & Uramin',
|
||||||
|
'description': 'md5:a1dc0312ce357c262259139cfd48c9bf',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 6,
|
||||||
}, {
|
}, {
|
||||||
|
|||||||
184
youtube_dl/extractor/awaan.py
Normal file
184
youtube_dl/extractor/awaan.py
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import base64
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse_urlencode,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
smuggle_url,
|
||||||
|
unsmuggle_url,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AWAANIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
if video_id and int(video_id) > 0:
|
||||||
|
return self.url_result(
|
||||||
|
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
|
||||||
|
elif season_id and int(season_id) > 0:
|
||||||
|
return self.url_result(smuggle_url(
|
||||||
|
'http://awaan.ae/program/season/%s' % season_id,
|
||||||
|
{'show_id': show_id}), 'AWAANSeason')
|
||||||
|
else:
|
||||||
|
return self.url_result(
|
||||||
|
'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
|
||||||
|
|
||||||
|
|
||||||
|
class AWAANBaseIE(InfoExtractor):
|
||||||
|
def _parse_video_data(self, video_data, video_id, is_live):
|
||||||
|
title = video_data.get('title_en') or video_data['title_ar']
|
||||||
|
img = video_data.get('img')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'description': video_data.get('description_en') or video_data.get('description_ar'),
|
||||||
|
'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None,
|
||||||
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
|
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AWAANVideoIE(AWAANBaseIE):
|
||||||
|
IE_NAME = 'awaan:video'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
|
||||||
|
'md5': '5f61c33bfc7794315c671a62d43116aa',
|
||||||
|
'info_dict':
|
||||||
|
{
|
||||||
|
'id': '17375',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'رحلة العمر : الحلقة 1',
|
||||||
|
'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
|
||||||
|
'duration': 2041,
|
||||||
|
'timestamp': 1227504126,
|
||||||
|
'upload_date': '20081124',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video_data = self._download_json(
|
||||||
|
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||||
|
video_id, headers={'Origin': 'http://awaan.ae'})
|
||||||
|
info = self._parse_video_data(video_data, video_id, False)
|
||||||
|
|
||||||
|
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
|
||||||
|
'id': video_data['id'],
|
||||||
|
'user_id': video_data['user_id'],
|
||||||
|
'signature': video_data['signature'],
|
||||||
|
'countries': 'Q0M=',
|
||||||
|
'filter': 'DENY',
|
||||||
|
})
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': embed_url,
|
||||||
|
'ie_key': 'MangomoloVideo',
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class AWAANLiveIE(AWAANBaseIE):
|
||||||
|
IE_NAME = 'awaan:live'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://awaan.ae/live/6/dubai-tv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'upload_date': '20150107',
|
||||||
|
'timestamp': 1420588800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_id = self._match_id(url)
|
||||||
|
|
||||||
|
channel_data = self._download_json(
|
||||||
|
'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
|
||||||
|
channel_id, headers={'Origin': 'http://awaan.ae'})
|
||||||
|
info = self._parse_video_data(channel_data, channel_id, True)
|
||||||
|
|
||||||
|
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
|
||||||
|
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||||
|
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||||
|
'signature': channel_data['signature'],
|
||||||
|
'countries': 'Q0M=',
|
||||||
|
'filter': 'DENY',
|
||||||
|
})
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': embed_url,
|
||||||
|
'ie_key': 'MangomoloLive',
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class AWAANSeasonIE(InfoExtractor):
|
||||||
|
IE_NAME = 'awaan:season'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
|
||||||
|
'info_dict':
|
||||||
|
{
|
||||||
|
'id': '7910',
|
||||||
|
'title': 'محاضرات الشيخ الشعراوي',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 27,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
show_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
|
data = {}
|
||||||
|
if season_id:
|
||||||
|
data['season'] = season_id
|
||||||
|
show_id = smuggled_data.get('show_id')
|
||||||
|
if show_id is None:
|
||||||
|
season = self._download_json(
|
||||||
|
'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
|
||||||
|
season_id, headers={'Origin': 'http://awaan.ae'})
|
||||||
|
show_id = season['id']
|
||||||
|
data['show_id'] = show_id
|
||||||
|
show = self._download_json(
|
||||||
|
'http://admin.mangomolo.com/analytics/index.php/plus/show',
|
||||||
|
show_id, data=urlencode_postdata(data), headers={
|
||||||
|
'Origin': 'http://awaan.ae',
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
|
})
|
||||||
|
if not season_id:
|
||||||
|
season_id = show['default_season']
|
||||||
|
for season in show['seasons']:
|
||||||
|
if season['id'] == season_id:
|
||||||
|
title = season.get('title_en') or season['title_ar']
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for video in show['videos']:
|
||||||
|
video_id = compat_str(video['id'])
|
||||||
|
entries.append(self.url_result(
|
||||||
|
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, season_id, title)
|
||||||
@@ -46,6 +46,7 @@ class AzubuIE(InfoExtractor):
|
|||||||
'uploader_id': 272749,
|
'uploader_id': 272749,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
'skip': 'Channel offline',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -56,22 +57,26 @@ class AzubuIE(InfoExtractor):
|
|||||||
'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
|
'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
|
||||||
|
|
||||||
title = data['title'].strip()
|
title = data['title'].strip()
|
||||||
description = data['description']
|
description = data.get('description')
|
||||||
thumbnail = data['thumbnail']
|
thumbnail = data.get('thumbnail')
|
||||||
view_count = data['view_count']
|
view_count = data.get('view_count')
|
||||||
uploader = data['user']['username']
|
user = data.get('user', {})
|
||||||
uploader_id = data['user']['id']
|
uploader = user.get('username')
|
||||||
|
uploader_id = user.get('id')
|
||||||
|
|
||||||
stream_params = json.loads(data['stream_params'])
|
stream_params = json.loads(data['stream_params'])
|
||||||
|
|
||||||
timestamp = float_or_none(stream_params['creationDate'], 1000)
|
timestamp = float_or_none(stream_params.get('creationDate'), 1000)
|
||||||
duration = float_or_none(stream_params['length'], 1000)
|
duration = float_or_none(stream_params.get('length'), 1000)
|
||||||
|
|
||||||
renditions = stream_params.get('renditions') or []
|
renditions = stream_params.get('renditions') or []
|
||||||
video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
|
video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
|
||||||
if video:
|
if video:
|
||||||
renditions.append(video)
|
renditions.append(video)
|
||||||
|
|
||||||
|
if not renditions and not user.get('channel', {}).get('is_live', True):
|
||||||
|
raise ExtractorError('%s said: channel is offline.' % self.IE_NAME, expected=True)
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': fmt['url'],
|
'url': fmt['url'],
|
||||||
'width': fmt['frameWidth'],
|
'width': fmt['frameWidth'],
|
||||||
@@ -98,7 +103,7 @@ class AzubuIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class AzubuLiveIE(InfoExtractor):
|
class AzubuLiveIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$'
|
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P<id>[^/]+)$'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||||
|
|||||||
@@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'uploader_id': 'dotscale',
|
'uploader_id': 'dotscale',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 7,
|
||||||
|
}, {
|
||||||
|
# with escaped quote in title
|
||||||
|
'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
|
||||||
|
'info_dict': {
|
||||||
|
'title': '"Entropy" EP',
|
||||||
|
'uploader_id': 'jstrecords',
|
||||||
|
'id': 'entropy-ep',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
entries = [
|
entries = [
|
||||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||||
for t_path in tracks_paths]
|
for t_path in tracks_paths]
|
||||||
title = self._search_regex(
|
title = self._html_search_regex(
|
||||||
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||||
|
webpage, 'title', fatal=False)
|
||||||
|
if title:
|
||||||
|
title = title.replace(r'\"', '"')
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
|
|||||||
@@ -2,19 +2,23 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -31,7 +35,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
music/clips[/#]|
|
music/clips[/#]|
|
||||||
radio/player/
|
radio/player/
|
||||||
)
|
)
|
||||||
(?P<id>%s)
|
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||||
''' % _ID_REGEX
|
''' % _ID_REGEX
|
||||||
|
|
||||||
_MEDIASELECTOR_URLS = [
|
_MEDIASELECTOR_URLS = [
|
||||||
@@ -192,6 +196,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Now it\'s really geo-restricted',
|
||||||
}, {
|
}, {
|
||||||
# compact player (https://github.com/rg3/youtube-dl/issues/8147)
|
# compact player (https://github.com/rg3/youtube-dl/issues/8147)
|
||||||
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
|
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
|
||||||
@@ -228,51 +233,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||||
|
|
||||||
def _extract_connection(self, connection, programme_id):
|
|
||||||
formats = []
|
|
||||||
kind = connection.get('kind')
|
|
||||||
protocol = connection.get('protocol')
|
|
||||||
supplier = connection.get('supplier')
|
|
||||||
if protocol == 'http':
|
|
||||||
href = connection.get('href')
|
|
||||||
transfer_format = connection.get('transferFormat')
|
|
||||||
# ASX playlist
|
|
||||||
if supplier == 'asx':
|
|
||||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
|
||||||
formats.append({
|
|
||||||
'url': ref,
|
|
||||||
'format_id': 'ref%s_%s' % (i, supplier),
|
|
||||||
})
|
|
||||||
# Skip DASH until supported
|
|
||||||
elif transfer_format == 'dash':
|
|
||||||
pass
|
|
||||||
elif transfer_format == 'hls':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id=supplier, fatal=False))
|
|
||||||
# Direct link
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': href,
|
|
||||||
'format_id': supplier or kind or protocol,
|
|
||||||
})
|
|
||||||
elif protocol == 'rtmp':
|
|
||||||
application = connection.get('application', 'ondemand')
|
|
||||||
auth_string = connection.get('authString')
|
|
||||||
identifier = connection.get('identifier')
|
|
||||||
server = connection.get('server')
|
|
||||||
formats.append({
|
|
||||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
|
||||||
'play_path': identifier,
|
|
||||||
'app': '%s?%s' % (application, auth_string),
|
|
||||||
'page_url': 'http://www.bbc.co.uk',
|
|
||||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
|
||||||
'rtmp_live': False,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': supplier,
|
|
||||||
})
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_items(self, playlist):
|
def _extract_items(self, playlist):
|
||||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||||
|
|
||||||
@@ -293,46 +253,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
def _extract_connections(self, media):
|
def _extract_connections(self, media):
|
||||||
return self._findall_ns(media, './{%s}connection')
|
return self._findall_ns(media, './{%s}connection')
|
||||||
|
|
||||||
def _extract_video(self, media, programme_id):
|
|
||||||
formats = []
|
|
||||||
vbr = int_or_none(media.get('bitrate'))
|
|
||||||
vcodec = media.get('encoding')
|
|
||||||
service = media.get('service')
|
|
||||||
width = int_or_none(media.get('width'))
|
|
||||||
height = int_or_none(media.get('height'))
|
|
||||||
file_size = int_or_none(media.get('media_file_size'))
|
|
||||||
for connection in self._extract_connections(media):
|
|
||||||
conn_formats = self._extract_connection(connection, programme_id)
|
|
||||||
for format in conn_formats:
|
|
||||||
format.update({
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'vbr': vbr,
|
|
||||||
'vcodec': vcodec,
|
|
||||||
'filesize': file_size,
|
|
||||||
})
|
|
||||||
if service:
|
|
||||||
format['format_id'] = '%s_%s' % (service, format['format_id'])
|
|
||||||
formats.extend(conn_formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_audio(self, media, programme_id):
|
|
||||||
formats = []
|
|
||||||
abr = int_or_none(media.get('bitrate'))
|
|
||||||
acodec = media.get('encoding')
|
|
||||||
service = media.get('service')
|
|
||||||
for connection in self._extract_connections(media):
|
|
||||||
conn_formats = self._extract_connection(connection, programme_id)
|
|
||||||
for format in conn_formats:
|
|
||||||
format.update({
|
|
||||||
'format_id': '%s_%s' % (service, format['format_id']),
|
|
||||||
'abr': abr,
|
|
||||||
'acodec': acodec,
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
formats.extend(conn_formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _get_subtitles(self, media, programme_id):
|
def _get_subtitles(self, media, programme_id):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for connection in self._extract_connections(media):
|
for connection in self._extract_connections(media):
|
||||||
@@ -378,13 +298,87 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
def _process_media_selector(self, media_selection, programme_id):
|
def _process_media_selector(self, media_selection, programme_id):
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = None
|
subtitles = None
|
||||||
|
urls = []
|
||||||
|
|
||||||
for media in self._extract_medias(media_selection):
|
for media in self._extract_medias(media_selection):
|
||||||
kind = media.get('kind')
|
kind = media.get('kind')
|
||||||
if kind == 'audio':
|
if kind in ('video', 'audio'):
|
||||||
formats.extend(self._extract_audio(media, programme_id))
|
bitrate = int_or_none(media.get('bitrate'))
|
||||||
elif kind == 'video':
|
encoding = media.get('encoding')
|
||||||
formats.extend(self._extract_video(media, programme_id))
|
service = media.get('service')
|
||||||
|
width = int_or_none(media.get('width'))
|
||||||
|
height = int_or_none(media.get('height'))
|
||||||
|
file_size = int_or_none(media.get('media_file_size'))
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
href = connection.get('href')
|
||||||
|
if href in urls:
|
||||||
|
continue
|
||||||
|
if href:
|
||||||
|
urls.append(href)
|
||||||
|
conn_kind = connection.get('kind')
|
||||||
|
protocol = connection.get('protocol')
|
||||||
|
supplier = connection.get('supplier')
|
||||||
|
transfer_format = connection.get('transferFormat')
|
||||||
|
format_id = supplier or conn_kind or protocol
|
||||||
|
if service:
|
||||||
|
format_id = '%s_%s' % (service, format_id)
|
||||||
|
# ASX playlist
|
||||||
|
if supplier == 'asx':
|
||||||
|
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||||
|
formats.append({
|
||||||
|
'url': ref,
|
||||||
|
'format_id': 'ref%s_%s' % (i, format_id),
|
||||||
|
})
|
||||||
|
elif transfer_format == 'dash':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
href, programme_id, mpd_id=format_id, fatal=False))
|
||||||
|
elif transfer_format == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
|
elif transfer_format == 'hds':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
href, programme_id, f4m_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
if not service and not supplier and bitrate:
|
||||||
|
format_id += '-%d' % bitrate
|
||||||
|
fmt = {
|
||||||
|
'format_id': format_id,
|
||||||
|
'filesize': file_size,
|
||||||
|
}
|
||||||
|
if kind == 'video':
|
||||||
|
fmt.update({
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'vbr': bitrate,
|
||||||
|
'vcodec': encoding,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
fmt.update({
|
||||||
|
'abr': bitrate,
|
||||||
|
'acodec': encoding,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
if protocol == 'http':
|
||||||
|
# Direct link
|
||||||
|
fmt.update({
|
||||||
|
'url': href,
|
||||||
|
})
|
||||||
|
elif protocol == 'rtmp':
|
||||||
|
application = connection.get('application', 'ondemand')
|
||||||
|
auth_string = connection.get('authString')
|
||||||
|
identifier = connection.get('identifier')
|
||||||
|
server = connection.get('server')
|
||||||
|
fmt.update({
|
||||||
|
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||||
|
'play_path': identifier,
|
||||||
|
'app': '%s?%s' % (application, auth_string),
|
||||||
|
'page_url': 'http://www.bbc.co.uk',
|
||||||
|
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
elif kind == 'captions':
|
elif kind == 'captions':
|
||||||
subtitles = self.extract_subtitles(media, programme_id)
|
subtitles = self.extract_subtitles(media, programme_id)
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
@@ -589,6 +583,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'id': '150615_telabyad_kentin_cogu',
|
'id': '150615_telabyad_kentin_cogu',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||||
|
'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
|
||||||
'timestamp': 1434397334,
|
'timestamp': 1434397334,
|
||||||
'upload_date': '20150615',
|
'upload_date': '20150615',
|
||||||
},
|
},
|
||||||
@@ -602,6 +597,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||||
|
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
||||||
'timestamp': 1434713142,
|
'timestamp': 1434713142,
|
||||||
'upload_date': '20150619',
|
'upload_date': '20150619',
|
||||||
},
|
},
|
||||||
@@ -651,6 +647,23 @@ class BBCIE(BBCCoUkIE):
|
|||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# single video embedded with Morph
|
||||||
|
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p041vhd0',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Nigeria v Japan - Men's First Round",
|
||||||
|
'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
|
||||||
|
'duration': 7980,
|
||||||
|
'uploader': 'BBC Sport',
|
||||||
|
'uploader_id': 'bbc_sport',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Georestricted to UK',
|
||||||
}, {
|
}, {
|
||||||
# single video with playlist.sxml URL in playlist param
|
# single video with playlist.sxml URL in playlist param
|
||||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||||
@@ -698,7 +711,9 @@ class BBCIE(BBCCoUkIE):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if BBCCoUkIE.suitable(url) or BBCCoUkArticleIE.suitable(url) else super(BBCIE, cls).suitable(url)
|
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
|
||||||
|
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
|
||||||
|
else super(BBCIE, cls).suitable(url))
|
||||||
|
|
||||||
def _extract_from_media_meta(self, media_meta, video_id):
|
def _extract_from_media_meta(self, media_meta, video_id):
|
||||||
# Direct links to media in media metadata (e.g.
|
# Direct links to media in media metadata (e.g.
|
||||||
@@ -746,7 +761,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
|
json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
|
||||||
timestamp = json_ld_info.get('timestamp')
|
timestamp = json_ld_info.get('timestamp')
|
||||||
|
|
||||||
playlist_title = json_ld_info.get('title')
|
playlist_title = json_ld_info.get('title')
|
||||||
@@ -815,8 +830,29 @@ class BBCIE(BBCCoUkIE):
|
|||||||
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
||||||
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
||||||
if playlist:
|
if playlist:
|
||||||
entries.append(self._extract_from_playlist_sxml(
|
entry = None
|
||||||
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
|
for key in ('streaming', 'progressiveDownload'):
|
||||||
|
playlist_url = playlist.get('%sUrl' % key)
|
||||||
|
if not playlist_url:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
info = self._extract_from_playlist_sxml(
|
||||||
|
playlist_url, playlist_id, timestamp)
|
||||||
|
if not entry:
|
||||||
|
entry = info
|
||||||
|
else:
|
||||||
|
entry['title'] = info['title']
|
||||||
|
entry['formats'].extend(info['formats'])
|
||||||
|
except Exception as e:
|
||||||
|
# Some playlist URL may fail with 500, at the same time
|
||||||
|
# the other one may work fine (e.g.
|
||||||
|
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
if entry:
|
||||||
|
self._sort_formats(entry['formats'])
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
if entries:
|
if entries:
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
@@ -849,6 +885,50 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||||
|
# There are several setPayload calls may be present but the video
|
||||||
|
# seems to be always related to the first one
|
||||||
|
morph_payload = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
||||||
|
webpage, 'morph payload', default='{}'),
|
||||||
|
playlist_id, fatal=False)
|
||||||
|
if morph_payload:
|
||||||
|
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
||||||
|
for component in components:
|
||||||
|
if not isinstance(component, dict):
|
||||||
|
continue
|
||||||
|
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
||||||
|
if not lead_media:
|
||||||
|
continue
|
||||||
|
identifiers = lead_media.get('identifiers')
|
||||||
|
if not identifiers or not isinstance(identifiers, dict):
|
||||||
|
continue
|
||||||
|
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
||||||
|
if not programme_id:
|
||||||
|
continue
|
||||||
|
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
description = lead_media.get('summary')
|
||||||
|
uploader = lead_media.get('masterBrand')
|
||||||
|
uploader_id = lead_media.get('mid')
|
||||||
|
duration = None
|
||||||
|
duration_d = lead_media.get('duration')
|
||||||
|
if isinstance(duration_d, dict):
|
||||||
|
duration = parse_duration(dict_get(
|
||||||
|
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||||
|
return {
|
||||||
|
'id': programme_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
def extract_all(pattern):
|
def extract_all(pattern):
|
||||||
return list(filter(None, map(
|
return list(filter(None, map(
|
||||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||||
@@ -866,7 +946,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
||||||
if entries:
|
if entries:
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
[self.url_result(entry, 'BBCCoUk') for entry in entries],
|
[self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
|
||||||
playlist_id, playlist_title, playlist_description)
|
playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
||||||
@@ -948,7 +1028,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
|
|
||||||
|
|
||||||
class BBCCoUkArticleIE(InfoExtractor):
|
class BBCCoUkArticleIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
|
||||||
IE_NAME = 'bbc.co.uk:article'
|
IE_NAME = 'bbc.co.uk:article'
|
||||||
IE_DESC = 'BBC articles'
|
IE_DESC = 'BBC articles'
|
||||||
|
|
||||||
@@ -975,3 +1055,116 @@ class BBCCoUkArticleIE(InfoExtractor):
|
|||||||
r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
|
r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
||||||
|
|
||||||
|
class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||||
|
def _entries(self, webpage, url, playlist_id):
|
||||||
|
single_page = 'page' in compat_urlparse.parse_qs(
|
||||||
|
compat_urlparse.urlparse(url).query)
|
||||||
|
for page_num in itertools.count(2):
|
||||||
|
for video_id in re.findall(
|
||||||
|
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
|
||||||
|
yield self.url_result(
|
||||||
|
self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
|
||||||
|
if single_page:
|
||||||
|
return
|
||||||
|
next_page = self._search_regex(
|
||||||
|
r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
|
||||||
|
webpage, 'next page url', default=None, group='url')
|
||||||
|
if not next_page:
|
||||||
|
break
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
compat_urlparse.urljoin(url, next_page), playlist_id,
|
||||||
|
'Downloading page %d' % page_num, page_num)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
title, description = self._extract_title_and_description(webpage)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(webpage, url, playlist_id),
|
||||||
|
playlist_id, title, description)
|
||||||
|
|
||||||
|
|
||||||
|
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||||
|
IE_NAME = 'bbc.co.uk:iplayer:playlist'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||||
|
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
|
||||||
|
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b05rcz9v',
|
||||||
|
'title': 'The Disappearance',
|
||||||
|
'description': 'French thriller serial about a missing teenager.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 6,
|
||||||
|
'skip': 'This programme is not currently available on BBC iPlayer',
|
||||||
|
}, {
|
||||||
|
# Available for over a year unlike 30 days for most other programmes
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p02tcc32',
|
||||||
|
'title': 'Bohemian Icons',
|
||||||
|
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_title_and_description(self, webpage):
|
||||||
|
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||||
|
description = self._search_regex(
|
||||||
|
r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
|
||||||
|
webpage, 'description', fatal=False, group='value')
|
||||||
|
return title, description
|
||||||
|
|
||||||
|
|
||||||
|
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||||
|
IE_NAME = 'bbc.co.uk:playlist'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
|
||||||
|
_URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
|
||||||
|
_VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b05rcz9v',
|
||||||
|
'title': 'The Disappearance - Clips - BBC Four',
|
||||||
|
'description': 'French thriller serial about a missing teenager.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 7,
|
||||||
|
}, {
|
||||||
|
# multipage playlist, explicit page
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b00mfl7n',
|
||||||
|
'title': 'Frozen Planet - Clips - BBC One',
|
||||||
|
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 24,
|
||||||
|
}, {
|
||||||
|
# multipage playlist, all pages
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b00mfl7n',
|
||||||
|
'title': 'Frozen Planet - Clips - BBC One',
|
||||||
|
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 142,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_title_and_description(self, webpage):
|
||||||
|
title = self._og_search_title(webpage, fatal=False)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
return title, description
|
||||||
|
|||||||
75
youtube_dl/extractor/bellmedia.py
Normal file
75
youtube_dl/extractor/bellmedia.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class BellMediaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||||
|
(?P<domain>
|
||||||
|
(?:
|
||||||
|
ctv|
|
||||||
|
tsn|
|
||||||
|
bnn|
|
||||||
|
thecomedynetwork|
|
||||||
|
discovery|
|
||||||
|
discoveryvelocity|
|
||||||
|
sciencechannel|
|
||||||
|
investigationdiscovery|
|
||||||
|
animalplanet|
|
||||||
|
bravo|
|
||||||
|
mtv|
|
||||||
|
space
|
||||||
|
)\.ca|
|
||||||
|
much\.com
|
||||||
|
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||||
|
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '706966',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
|
||||||
|
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
|
||||||
|
'upload_date': '20150919',
|
||||||
|
'timestamp': 1442624700,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['HTTP Error 404'],
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_DOMAINS = {
|
||||||
|
'thecomedynetwork': 'comedy',
|
||||||
|
'discoveryvelocity': 'discvel',
|
||||||
|
'sciencechannel': 'discsci',
|
||||||
|
'investigationdiscovery': 'invdisc',
|
||||||
|
'animalplanet': 'aniplan',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
domain = domain.split('.')[0]
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': video_id,
|
||||||
|
'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
|
||||||
|
'ie_key': 'NineCNineMedia',
|
||||||
|
}
|
||||||
@@ -1,31 +1,26 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..utils import unified_strdate
|
||||||
from ..utils import (
|
|
||||||
xpath_text,
|
|
||||||
xpath_with_ns,
|
|
||||||
int_or_none,
|
|
||||||
parse_iso8601,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BetIE(InfoExtractor):
|
class BetIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
|
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'news/national/2014/a-conversation-with-president-obama',
|
'id': '07e96bd3-8850-3051-b856-271b457f0ab8',
|
||||||
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'A Conversation With President Obama',
|
'title': 'A Conversation With President Obama',
|
||||||
'description': 'md5:699d0652a350cf3e491cd15cc745b5da',
|
'description': 'President Obama urges persistence in confronting racism and bias.',
|
||||||
'duration': 1534,
|
'duration': 1534,
|
||||||
'timestamp': 1418075340,
|
|
||||||
'upload_date': '20141208',
|
'upload_date': '20141208',
|
||||||
'uploader': 'admin',
|
|
||||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||||
|
'subtitles': {
|
||||||
|
'en': 'mincount:2',
|
||||||
|
}
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
@@ -35,16 +30,17 @@ class BetIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
|
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'news/national/2014/justice-for-ferguson-a-community-reacts',
|
'id': '9f516bf1-7543-39c4-8076-dd441b459ba9',
|
||||||
'display_id': 'justice-for-ferguson-a-community-reacts',
|
'display_id': 'justice-for-ferguson-a-community-reacts',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Justice for Ferguson: A Community Reacts',
|
'title': 'Justice for Ferguson: A Community Reacts',
|
||||||
'description': 'A BET News special.',
|
'description': 'A BET News special.',
|
||||||
'duration': 1696,
|
'duration': 1696,
|
||||||
'timestamp': 1416942360,
|
|
||||||
'upload_date': '20141125',
|
'upload_date': '20141125',
|
||||||
'uploader': 'admin',
|
|
||||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||||
|
'subtitles': {
|
||||||
|
'en': 'mincount:2',
|
||||||
|
}
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
@@ -53,57 +49,32 @@ class BetIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
|
||||||
|
|
||||||
|
def _get_feed_query(self, uri):
|
||||||
|
return {
|
||||||
|
'uuid': uri,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_mgid(self, webpage):
|
||||||
|
return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
mgid = self._extract_mgid(webpage)
|
||||||
|
videos_info = self._get_videos_info(mgid)
|
||||||
|
|
||||||
media_url = compat_urllib_parse_unquote(self._search_regex(
|
info_dict = videos_info['entries'][0]
|
||||||
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
|
|
||||||
webpage, 'media URL'))
|
|
||||||
|
|
||||||
video_id = self._search_regex(
|
upload_date = unified_strdate(self._html_search_meta('date', webpage))
|
||||||
r'/video/(.*)/_jcr_content/', media_url, 'video id')
|
description = self._html_search_meta('description', webpage)
|
||||||
|
|
||||||
mrss = self._download_xml(media_url, display_id)
|
info_dict.update({
|
||||||
|
|
||||||
item = mrss.find('./channel/item')
|
|
||||||
|
|
||||||
NS_MAP = {
|
|
||||||
'dc': 'http://purl.org/dc/elements/1.1/',
|
|
||||||
'media': 'http://search.yahoo.com/mrss/',
|
|
||||||
'ka': 'http://kickapps.com/karss',
|
|
||||||
}
|
|
||||||
|
|
||||||
title = xpath_text(item, './title', 'title')
|
|
||||||
description = xpath_text(
|
|
||||||
item, './description', 'description', fatal=False)
|
|
||||||
|
|
||||||
timestamp = parse_iso8601(xpath_text(
|
|
||||||
item, xpath_with_ns('./dc:date', NS_MAP),
|
|
||||||
'upload date', fatal=False))
|
|
||||||
uploader = xpath_text(
|
|
||||||
item, xpath_with_ns('./dc:creator', NS_MAP),
|
|
||||||
'uploader', fatal=False)
|
|
||||||
|
|
||||||
media_content = item.find(
|
|
||||||
xpath_with_ns('./media:content', NS_MAP))
|
|
||||||
duration = int_or_none(media_content.get('duration'))
|
|
||||||
smil_url = media_content.get('url')
|
|
||||||
|
|
||||||
thumbnail = media_content.find(
|
|
||||||
xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
|
|
||||||
|
|
||||||
formats = self._extract_smil_formats(smil_url, display_id)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'upload_date': upload_date,
|
||||||
'timestamp': timestamp,
|
})
|
||||||
'uploader': uploader,
|
|
||||||
'duration': duration,
|
return info_dict
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -11,22 +11,13 @@ from ..compat import compat_urllib_parse_unquote
|
|||||||
class BigflixIE(InfoExtractor):
|
class BigflixIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
|
|
||||||
'md5': 'ec76aa9b1129e2e5b301a474e54fab74',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '16537',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Singham Returns',
|
|
||||||
'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# 2 formats
|
# 2 formats
|
||||||
'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
|
'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '16070',
|
'id': '16070',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Madarasapatinam',
|
'title': 'Madarasapatinam',
|
||||||
'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca',
|
'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
|
||||||
'formats': 'mincount:2',
|
'formats': 'mincount:2',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
|||||||
@@ -1,205 +1,101 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import calendar
|
import hashlib
|
||||||
import datetime
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_parse_qs
|
||||||
compat_etree_fromstring,
|
|
||||||
compat_str,
|
|
||||||
compat_parse_qs,
|
|
||||||
compat_xml_parse_error,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
xpath_text,
|
unified_timestamp,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BiliBiliIE(InfoExtractor):
|
class BiliBiliIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TEST = {
|
||||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1554319',
|
'id': '1074402',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': '【金坷垃】金泡沫',
|
'title': '【金坷垃】金泡沫',
|
||||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||||
'duration': 308.067,
|
'duration': 308.315,
|
||||||
'timestamp': 1398012660,
|
'timestamp': 1398012660,
|
||||||
'upload_date': '20140420',
|
'upload_date': '20140420',
|
||||||
'thumbnail': 're:^https?://.+\.jpg',
|
'thumbnail': 're:^https?://.+\.jpg',
|
||||||
'uploader': '菊子桑',
|
'uploader': '菊子桑',
|
||||||
'uploader_id': '156160',
|
'uploader_id': '156160',
|
||||||
},
|
},
|
||||||
}, {
|
}
|
||||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1041170',
|
|
||||||
'title': '【BD1080P】刀语【诸神&异域】',
|
|
||||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
|
||||||
},
|
|
||||||
'playlist_count': 9,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.bilibili.com/video/av4808130/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4808130',
|
|
||||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
|
||||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
|
||||||
},
|
|
||||||
'playlist': [{
|
|
||||||
'md5': '55cdadedf3254caaa0d5d27cf20a8f9c',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4808130_part1',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
|
||||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
|
||||||
'timestamp': 1464564180,
|
|
||||||
'upload_date': '20160529',
|
|
||||||
'uploader': '喜欢拉面',
|
|
||||||
'uploader_id': '151066',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'md5': '926f9f67d0c482091872fbd8eca7ea3d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4808130_part2',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
|
||||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
|
||||||
'timestamp': 1464564180,
|
|
||||||
'upload_date': '20160529',
|
|
||||||
'uploader': '喜欢拉面',
|
|
||||||
'uploader_id': '151066',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'md5': '4b7b225b968402d7c32348c646f1fd83',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4808130_part3',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
|
||||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
|
||||||
'timestamp': 1464564180,
|
|
||||||
'upload_date': '20160529',
|
|
||||||
'uploader': '喜欢拉面',
|
|
||||||
'uploader_id': '151066',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'md5': '7b795e214166501e9141139eea236e91',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4808130_part4',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
|
||||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
|
||||||
'timestamp': 1464564180,
|
|
||||||
'upload_date': '20160529',
|
|
||||||
'uploader': '喜欢拉面',
|
|
||||||
'uploader_id': '151066',
|
|
||||||
},
|
|
||||||
}],
|
|
||||||
}, {
|
|
||||||
# Missing upload time
|
|
||||||
'url': 'http://www.bilibili.com/video/av1867637/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2880301',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
|
||||||
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
|
||||||
'uploader': '黑夜为猫',
|
|
||||||
'uploader_id': '610729',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# Just to test metadata extraction
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['upload time'],
|
|
||||||
}]
|
|
||||||
|
|
||||||
# BiliBili blocks keys from time to time. The current key is extracted from
|
_APP_KEY = '6f90a59ac58a4123'
|
||||||
# the Android client
|
_BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
|
||||||
# TODO: find the sign algorithm used in the flash player
|
|
||||||
_APP_KEY = '86385cdc024c0f6c'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
params = compat_parse_qs(self._search_regex(
|
if 'anime/v' not in url:
|
||||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
cid = compat_parse_qs(self._search_regex(
|
||||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||||
webpage, 'player parameters'))
|
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||||
cid = params['cid'][0]
|
webpage, 'player parameters'))['cid'][0]
|
||||||
|
|
||||||
info_xml_str = self._download_webpage(
|
|
||||||
'http://interface.bilibili.com/v_cdn_play',
|
|
||||||
cid, query={'appkey': self._APP_KEY, 'cid': cid},
|
|
||||||
note='Downloading video info page')
|
|
||||||
|
|
||||||
err_msg = None
|
|
||||||
durls = None
|
|
||||||
info_xml = None
|
|
||||||
try:
|
|
||||||
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
|
|
||||||
except compat_xml_parse_error:
|
|
||||||
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
|
|
||||||
err_msg = (info_json or {}).get('error_text')
|
|
||||||
else:
|
else:
|
||||||
err_msg = xpath_text(info_xml, './message')
|
js = self._download_json(
|
||||||
|
'http://bangumi.bilibili.com/web_api/get_source', video_id,
|
||||||
|
data=urlencode_postdata({'episode_id': video_id}),
|
||||||
|
headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||||
|
cid = js['result']['cid']
|
||||||
|
|
||||||
if info_xml is not None:
|
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||||
durls = info_xml.findall('./durl')
|
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||||
if not durls:
|
|
||||||
if err_msg:
|
video_info = self._download_json(
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
|
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||||
else:
|
video_id, note='Downloading video info page')
|
||||||
raise ExtractorError('No videos found!')
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
for durl in durls:
|
for idx, durl in enumerate(video_info['durl']):
|
||||||
size = xpath_text(durl, ['./filesize', './size'])
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': durl.find('./url').text,
|
'url': durl['url'],
|
||||||
'filesize': int_or_none(size),
|
'filesize': int_or_none(durl['size']),
|
||||||
}]
|
}]
|
||||||
for backup_url in durl.findall('./backup_url/url'):
|
for backup_url in durl.get('backup_url', []):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': backup_url.text,
|
'url': backup_url,
|
||||||
# backup URLs have lower priorities
|
# backup URLs have lower priorities
|
||||||
'preference': -2 if 'hd.mp4' in backup_url.text else -3,
|
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
|
'id': '%s_part%s' % (video_id, idx),
|
||||||
'duration': int_or_none(xpath_text(durl, './length'), 1000),
|
'duration': float_or_none(durl.get('length'), 1000),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
})
|
||||||
|
|
||||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
datetime_str = self._html_search_regex(
|
timestamp = unified_timestamp(self._html_search_regex(
|
||||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
|
||||||
timestamp = None
|
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||||
if datetime_str:
|
|
||||||
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
|
||||||
|
|
||||||
# TODO 'view_count' requires deobfuscating Javascript
|
# TODO 'view_count' requires deobfuscating Javascript
|
||||||
info = {
|
info = {
|
||||||
'id': compat_str(cid),
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
|
'thumbnail': thumbnail,
|
||||||
'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
|
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||||
}
|
}
|
||||||
|
|
||||||
uploader_mobj = re.search(
|
uploader_mobj = re.search(
|
||||||
|
|||||||
@@ -2,11 +2,15 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import remove_end
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
from .rudo import RudoIE
|
||||||
|
|
||||||
|
|
||||||
class BioBioChileTVIE(InfoExtractor):
|
class BioBioChileTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
_VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
|
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
|
||||||
@@ -18,6 +22,7 @@ class BioBioChileTVIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Fernando Atria',
|
'uploader': 'Fernando Atria',
|
||||||
},
|
},
|
||||||
|
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||||
}, {
|
}, {
|
||||||
# different uploader layout
|
# different uploader layout
|
||||||
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
|
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
|
||||||
@@ -32,6 +37,16 @@ class BioBioChileTVIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': '(none)',
|
||||||
|
'upload_date': '20160708',
|
||||||
|
'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -45,42 +60,22 @@ class BioBioChileTVIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
rudo_url = RudoIE._extract_url(webpage)
|
||||||
|
if not rudo_url:
|
||||||
|
raise ExtractorError('No videos found')
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
|
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
|
||||||
|
|
||||||
file_url = self._search_regex(
|
|
||||||
r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
|
|
||||||
webpage, 'file url', group='url')
|
|
||||||
|
|
||||||
base_url = self._search_regex(
|
|
||||||
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
|
|
||||||
'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
|
|
||||||
group='url')
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
'%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
|
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
|
||||||
f = {
|
|
||||||
'url': '%s%s' % (base_url, file_url),
|
|
||||||
'format_id': 'http',
|
|
||||||
'protocol': 'http',
|
|
||||||
'preference': 1,
|
|
||||||
}
|
|
||||||
if formats:
|
|
||||||
f_copy = formats[-1].copy()
|
|
||||||
f_copy.update(f)
|
|
||||||
f = f_copy
|
|
||||||
formats.append(f)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
|
r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': rudo_url,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'formats': formats,
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,7 +24,8 @@ class BIQLEIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ребенок в шоке от автоматической мойки',
|
'title': 'Ребенок в шоке от автоматической мойки',
|
||||||
'uploader': 'Dmitry Kotov',
|
'uploader': 'Dmitry Kotov',
|
||||||
}
|
},
|
||||||
|
'skip': ' This video was marked as adult. Embedding adult videos on external sites is prohibited.',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@@ -20,6 +21,18 @@ class BloombergIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'format': 'best[format_id^=hds]',
|
'format': 'best[format_id^=hds]',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# video ID in BPlayer(...)
|
||||||
|
'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Meet the Real-Life Tech Wizards of Middle Earth',
|
||||||
|
'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'best[format_id^=hds]',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -33,7 +46,11 @@ class BloombergIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1',
|
r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1',
|
||||||
webpage, 'id', group='url')
|
webpage, 'id', group='url', default=None)
|
||||||
|
if not video_id:
|
||||||
|
bplayer_data = self._parse_json(self._search_regex(
|
||||||
|
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
|
||||||
|
video_id = bplayer_data['id']
|
||||||
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||||
|
|
||||||
embed_info = self._download_json(
|
embed_info = self._download_json(
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class BpbIE(InfoExtractor):
|
class BpbIE(InfoExtractor):
|
||||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||||
_VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||||
|
|||||||
@@ -29,7 +29,8 @@ class BRIE(InfoExtractor):
|
|||||||
'duration': 180,
|
'duration': 180,
|
||||||
'uploader': 'Reinhard Weber',
|
'uploader': 'Reinhard Weber',
|
||||||
'upload_date': '20150422',
|
'upload_date': '20150422',
|
||||||
}
|
},
|
||||||
|
'skip': '404 not found',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
|
'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
|
||||||
@@ -40,7 +41,8 @@ class BRIE(InfoExtractor):
|
|||||||
'title': 'Manfred Schreiber ist tot',
|
'title': 'Manfred Schreiber ist tot',
|
||||||
'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
|
'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
|
||||||
'duration': 26,
|
'duration': 26,
|
||||||
}
|
},
|
||||||
|
'skip': '404 not found',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
|
'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
|
||||||
@@ -51,7 +53,8 @@ class BRIE(InfoExtractor):
|
|||||||
'title': 'Kurzweilig und sehr bewegend',
|
'title': 'Kurzweilig und sehr bewegend',
|
||||||
'description': 'md5:0351996e3283d64adeb38ede91fac54e',
|
'description': 'md5:0351996e3283d64adeb38ede91fac54e',
|
||||||
'duration': 296,
|
'duration': 296,
|
||||||
}
|
},
|
||||||
|
'skip': '404 not found',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
|
'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
|
||||||
|
|||||||
@@ -1,31 +1,74 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .adobepass import AdobePassIE
|
||||||
from ..utils import smuggle_url
|
from ..utils import (
|
||||||
|
smuggle_url,
|
||||||
|
update_url_query,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BravoTVIE(InfoExtractor):
|
class BravoTVIE(AdobePassIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
|
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
|
'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
|
||||||
'md5': 'd60cdf68904e854fac669bd26cccf801',
|
'md5': '9086d0b7ef0ea2aabc4781d75f4e5863',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'LitrBdX64qLn',
|
'id': 'zHyk1_HU_mPy',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Last Chance Kitchen Returns',
|
'title': 'LCK Ep 12: Fishy Finale',
|
||||||
'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
|
'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.',
|
||||||
'timestamp': 1448926740,
|
|
||||||
'upload_date': '20151130',
|
|
||||||
'uploader': 'NBCU-BRAV',
|
'uploader': 'NBCU-BRAV',
|
||||||
|
'upload_date': '20160302',
|
||||||
|
'timestamp': 1456945320,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
|
settings = self._parse_json(self._search_regex(
|
||||||
release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
|
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
|
||||||
return self.url_result(smuggle_url(
|
display_id)
|
||||||
'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
|
info = {}
|
||||||
{'force_smil_url': True}), 'ThePlatform', release_pid)
|
query = {
|
||||||
|
'mbr': 'true',
|
||||||
|
}
|
||||||
|
account_pid, release_pid = [None] * 2
|
||||||
|
tve = settings.get('sharedTVE')
|
||||||
|
if tve:
|
||||||
|
query['manifest'] = 'm3u'
|
||||||
|
account_pid = 'HNK2IC'
|
||||||
|
release_pid = tve['release_pid']
|
||||||
|
if tve.get('entitlement') == 'auth':
|
||||||
|
adobe_pass = settings.get('adobePass', {})
|
||||||
|
resource = self._get_mvpd_resource(
|
||||||
|
adobe_pass.get('adobePassResourceId', 'bravo'),
|
||||||
|
tve['title'], release_pid, tve.get('rating'))
|
||||||
|
query['auth'] = self._extract_mvpd_auth(
|
||||||
|
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
|
||||||
|
else:
|
||||||
|
shared_playlist = settings['shared_playlist']
|
||||||
|
account_pid = shared_playlist['account_pid']
|
||||||
|
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
|
||||||
|
release_pid = metadata['release_pid']
|
||||||
|
info.update({
|
||||||
|
'title': metadata['title'],
|
||||||
|
'description': metadata.get('description'),
|
||||||
|
'season_number': int_or_none(metadata.get('season_num')),
|
||||||
|
'episode_number': int_or_none(metadata.get('episode_num')),
|
||||||
|
})
|
||||||
|
query['switch'] = 'progressive'
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': release_pid,
|
||||||
|
'url': smuggle_url(update_url_query(
|
||||||
|
'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid),
|
||||||
|
query), {'force_smil_url': True}),
|
||||||
|
'ie_key': 'ThePlatform',
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|||||||
@@ -26,6 +26,8 @@ from ..utils import (
|
|||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
clean_html,
|
||||||
|
mimetype2ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -90,6 +92,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'description': 'md5:363109c02998fee92ec02211bd8000df',
|
'description': 'md5:363109c02998fee92ec02211bd8000df',
|
||||||
'uploader': 'National Ballet of Canada',
|
'uploader': 'National Ballet of Canada',
|
||||||
},
|
},
|
||||||
|
'skip': 'Video gone',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# test flv videos served by akamaihd.net
|
# test flv videos served by akamaihd.net
|
||||||
@@ -108,7 +111,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# playlist test
|
# playlist with 'videoList'
|
||||||
# from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
|
# from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
|
||||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -117,6 +120,15 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 7,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# playlist with 'playlistTab' (https://github.com/rg3/youtube-dl/issues/9965)
|
||||||
|
'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1522758701001',
|
||||||
|
'title': 'Lesson 08',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
FLV_VCODECS = {
|
FLV_VCODECS = {
|
||||||
1: 'SORENSON',
|
1: 'SORENSON',
|
||||||
@@ -298,13 +310,19 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
info_url, player_key, 'Downloading playlist information')
|
info_url, player_key, 'Downloading playlist information')
|
||||||
|
|
||||||
json_data = json.loads(playlist_info)
|
json_data = json.loads(playlist_info)
|
||||||
if 'videoList' not in json_data:
|
if 'videoList' in json_data:
|
||||||
|
playlist_info = json_data['videoList']
|
||||||
|
playlist_dto = playlist_info['mediaCollectionDTO']
|
||||||
|
elif 'playlistTabs' in json_data:
|
||||||
|
playlist_info = json_data['playlistTabs']
|
||||||
|
playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
|
||||||
|
else:
|
||||||
raise ExtractorError('Empty playlist')
|
raise ExtractorError('Empty playlist')
|
||||||
playlist_info = json_data['videoList']
|
|
||||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
|
||||||
|
|
||||||
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
playlist_title=playlist_dto['displayName'])
|
||||||
|
|
||||||
def _extract_video_info(self, video_info):
|
def _extract_video_info(self, video_info):
|
||||||
video_id = compat_str(video_info['id'])
|
video_id = compat_str(video_info['id'])
|
||||||
@@ -528,14 +546,16 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for source in json_data.get('sources', []):
|
for source in json_data.get('sources', []):
|
||||||
container = source.get('container')
|
container = source.get('container')
|
||||||
source_type = source.get('type')
|
ext = mimetype2ext(source.get('type'))
|
||||||
src = source.get('src')
|
src = source.get('src')
|
||||||
if source_type == 'application/x-mpegURL' or container == 'M2TS':
|
if ext == 'ism':
|
||||||
|
continue
|
||||||
|
elif ext == 'm3u8' or container == 'M2TS':
|
||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
elif source_type == 'application/dash+xml':
|
elif ext == 'mpd':
|
||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
|
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
|
||||||
@@ -551,7 +571,7 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'filesize': int_or_none(source.get('size')),
|
'filesize': int_or_none(source.get('size')),
|
||||||
'container': container,
|
'container': container,
|
||||||
'ext': container.lower(),
|
'ext': ext or container.lower(),
|
||||||
}
|
}
|
||||||
if width == 0 and height == 0:
|
if width == 0 and height == 0:
|
||||||
f.update({
|
f.update({
|
||||||
@@ -585,6 +605,13 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
'format_id': build_format_id('rtmp'),
|
'format_id': build_format_id('rtmp'),
|
||||||
})
|
})
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
|
errors = json_data.get('errors')
|
||||||
|
if not formats and errors:
|
||||||
|
error = errors[0]
|
||||||
|
raise ExtractorError(
|
||||||
|
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@@ -597,7 +624,7 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': json_data.get('description'),
|
'description': clean_html(json_data.get('description')),
|
||||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||||
'duration': float_or_none(json_data.get('duration'), 1000),
|
'duration': float_or_none(json_data.get('duration'), 1000),
|
||||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .facebook import FacebookIE
|
||||||
|
|
||||||
|
|
||||||
class BuzzFeedIE(InfoExtractor):
|
class BuzzFeedIE(InfoExtractor):
|
||||||
@@ -20,11 +21,11 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'aVCR29aE_OQ',
|
'id': 'aVCR29aE_OQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'title': 'Angry Ram destroys a punching bag..',
|
||||||
|
'description': 'md5:c59533190ef23fd4458a5e8c8c872345',
|
||||||
'upload_date': '20141024',
|
'upload_date': '20141024',
|
||||||
'uploader_id': 'Buddhanz1',
|
'uploader_id': 'Buddhanz1',
|
||||||
'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl',
|
'uploader': 'Angry Ram',
|
||||||
'uploader': 'Buddhanz',
|
|
||||||
'title': 'Angry Ram destroys a punching bag',
|
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
}, {
|
}, {
|
||||||
@@ -41,13 +42,30 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mVmBL8B-In0',
|
'id': 'mVmBL8B-In0',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||||
|
'description': 'md5:28faab95cda6e361bcff06ec12fc21d8',
|
||||||
'upload_date': '20141124',
|
'upload_date': '20141124',
|
||||||
'uploader_id': 'CindysMunchkin',
|
'uploader_id': 'CindysMunchkin',
|
||||||
'description': 're:© 2014 Munchkin the',
|
|
||||||
'uploader': 're:^Munchkin the',
|
'uploader': 're:^Munchkin the',
|
||||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'the-most-adorable-crash-landing-ever',
|
||||||
|
'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing',
|
||||||
|
'description': 'This gosling knows how to stick a landing.',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': '763ca415512f91ca62e4621086900a23',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '971793786185728',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...',
|
||||||
|
'uploader': 'Calgary Outdoor Centre-University of Calgary',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'add_ie': ['Facebook'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -66,6 +84,10 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
entries.append(self.url_result(video['url']))
|
entries.append(self.url_result(video['url']))
|
||||||
|
|
||||||
|
facebook_url = FacebookIE._extract_url(webpage)
|
||||||
|
if facebook_url:
|
||||||
|
entries.append(self.url_result(facebook_url))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -10,8 +9,10 @@ from ..compat import (
|
|||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
clean_html,
|
||||||
|
parse_duration,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -26,14 +27,14 @@ class CamdemyIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'description': '',
|
|
||||||
'creator': 'ss11spring',
|
'creator': 'ss11spring',
|
||||||
|
'duration': 1591,
|
||||||
'upload_date': '20130114',
|
'upload_date': '20130114',
|
||||||
'timestamp': 1358154556,
|
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# With non-empty description
|
# With non-empty description
|
||||||
|
# webpage returns "No permission or not login"
|
||||||
'url': 'http://www.camdemy.com/media/13885',
|
'url': 'http://www.camdemy.com/media/13885',
|
||||||
'md5': '4576a3bb2581f86c61044822adbd1249',
|
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -41,70 +42,77 @@ class CamdemyIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'EverCam + Camdemy QuickStart',
|
'title': 'EverCam + Camdemy QuickStart',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
|
'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
|
||||||
'creator': 'evercam',
|
'creator': 'evercam',
|
||||||
'upload_date': '20140620',
|
'duration': 318,
|
||||||
'timestamp': 1403271569,
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# External source
|
# External source (YouTube)
|
||||||
'url': 'http://www.camdemy.com/media/14842',
|
'url': 'http://www.camdemy.com/media/14842',
|
||||||
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2vsYQzNIsJo',
|
'id': '2vsYQzNIsJo',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||||
|
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||||
'upload_date': '20130211',
|
'upload_date': '20130211',
|
||||||
'uploader': 'Hun Kim',
|
'uploader': 'Hun Kim',
|
||||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
|
||||||
'uploader_id': 'hunkimtutorials',
|
'uploader_id': 'hunkimtutorials',
|
||||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
},
|
||||||
}
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
page = self._download_webpage(url, video_id)
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
src_from = self._html_search_regex(
|
src_from = self._html_search_regex(
|
||||||
r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
|
r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
|
||||||
'external source', default=None)
|
webpage, 'external source', default=None, group='url')
|
||||||
if src_from:
|
if src_from:
|
||||||
return self.url_result(src_from)
|
return self.url_result(src_from)
|
||||||
|
|
||||||
oembed_obj = self._download_json(
|
oembed_obj = self._download_json(
|
||||||
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||||
|
|
||||||
|
title = oembed_obj['title']
|
||||||
thumb_url = oembed_obj['thumbnail_url']
|
thumb_url = oembed_obj['thumbnail_url']
|
||||||
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||||
file_list_doc = self._download_xml(
|
file_list_doc = self._download_xml(
|
||||||
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||||
video_id, 'Filelist XML')
|
video_id, 'Downloading filelist XML')
|
||||||
file_name = file_list_doc.find('./video/item/fileName').text
|
file_name = file_list_doc.find('./video/item/fileName').text
|
||||||
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||||
|
|
||||||
timestamp = parse_iso8601(self._html_search_regex(
|
# Some URLs return "No permission or not login" in a webpage despite being
|
||||||
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
|
# freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
|
||||||
page, 'creation time', fatal=False),
|
upload_date = unified_strdate(self._search_regex(
|
||||||
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
r'>published on ([^<]+)<', webpage,
|
||||||
view_count = str_to_int(self._html_search_regex(
|
'upload date', default=None))
|
||||||
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
|
view_count = str_to_int(self._search_regex(
|
||||||
page, 'view count', fatal=False))
|
r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
|
||||||
|
webpage, 'view count', default=None))
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'description', webpage, default=None) or clean_html(
|
||||||
|
oembed_obj.get('description'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': oembed_obj['title'],
|
'title': title,
|
||||||
'thumbnail': thumb_url,
|
'thumbnail': thumb_url,
|
||||||
'description': self._html_search_meta('description', page),
|
'description': description,
|
||||||
'creator': oembed_obj['author_name'],
|
'creator': oembed_obj.get('author_name'),
|
||||||
'duration': oembed_obj['duration'],
|
'duration': parse_duration(oembed_obj.get('duration')),
|
||||||
'timestamp': timestamp,
|
'upload_date': upload_date,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class CamdemyFolderIE(InfoExtractor):
|
class CamdemyFolderIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# links with trailing slash
|
# links with trailing slash
|
||||||
'url': 'http://www.camdemy.com/folder/450',
|
'url': 'http://www.camdemy.com/folder/450',
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor):
|
|||||||
(?:(?:www|m)\.)?canalplus\.fr|
|
(?:(?:www|m)\.)?canalplus\.fr|
|
||||||
(?:www\.)?piwiplus\.fr|
|
(?:www\.)?piwiplus\.fr|
|
||||||
(?:www\.)?d8\.tv|
|
(?:www\.)?d8\.tv|
|
||||||
|
(?:www\.)?c8\.fr|
|
||||||
(?:www\.)?d17\.tv|
|
(?:www\.)?d17\.tv|
|
||||||
(?:www\.)?itele\.fr
|
(?:www\.)?itele\.fr
|
||||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||||
@@ -35,6 +36,7 @@ class CanalplusIE(InfoExtractor):
|
|||||||
'canalplus': 'cplus',
|
'canalplus': 'cplus',
|
||||||
'piwiplus': 'teletoon',
|
'piwiplus': 'teletoon',
|
||||||
'd8': 'd8',
|
'd8': 'd8',
|
||||||
|
'c8': 'd8',
|
||||||
'd17': 'd17',
|
'd17': 'd17',
|
||||||
'itele': 'itele',
|
'itele': 'itele',
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import float_or_none
|
from ..utils import float_or_none
|
||||||
|
|
||||||
|
|
||||||
class CanvasIE(InfoExtractor):
|
class CanvasIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||||
'md5': 'ea838375a547ac787d4064d8c7860a6c',
|
'md5': 'ea838375a547ac787d4064d8c7860a6c',
|
||||||
@@ -38,22 +40,42 @@ class CanvasIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f',
|
||||||
|
'display_id': 'herbekijk-sorry-voor-alles',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Herbekijk Sorry voor alles',
|
||||||
|
'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 3788.06,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
site_id, display_id = mobj.group('site_id'), mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
title = self._search_regex(
|
title = (self._search_regex(
|
||||||
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
||||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
webpage, 'title', default=None) or self._og_search_title(
|
||||||
|
webpage)).strip()
|
||||||
|
|
||||||
video_id = self._html_search_regex(
|
video_id = self._html_search_regex(
|
||||||
r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id')
|
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id)
|
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||||
|
% (site_id, video_id), display_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for target in data['targetUrls']:
|
for target in data['targetUrls']:
|
||||||
|
|||||||
88
youtube_dl/extractor/carambatv.py
Normal file
88
youtube_dl/extractor/carambatv.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CarambaTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://video1.carambatv.ru/v/191910501',
|
||||||
|
'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '191910501',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 2678.31,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'carambatv:191910501',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
|
||||||
|
base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': base_url + f['fn'],
|
||||||
|
'height': int_or_none(f.get('height')),
|
||||||
|
'format_id': '%sp' % f['height'] if f.get('height') else None,
|
||||||
|
} for f in video['qualities'] if f.get('fn')]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = video.get('splash')
|
||||||
|
duration = float_or_none(try_get(
|
||||||
|
video, lambda x: x['annotations'][0]['end_time'], compat_str))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CarambaTVPageIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
|
||||||
|
'md5': '',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '191910501',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 2678.31,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self._og_search_property('video:iframe', webpage, default=None)
|
||||||
|
|
||||||
|
if not video_url:
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)',
|
||||||
|
webpage, 'video id')
|
||||||
|
video_url = 'carambatv:%s' % video_id
|
||||||
|
|
||||||
|
return self.url_result(video_url, CarambaTVIE.ie_key())
|
||||||
42
youtube_dl/extractor/cartoonnetwork.py
Normal file
42
youtube_dl/extractor/cartoonnetwork.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .turner import TurnerBaseIE
|
||||||
|
|
||||||
|
|
||||||
|
class CartoonNetworkIE(TurnerBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Starfire the Cat Lady',
|
||||||
|
'description': 'Robin decides to become a cat so that Starfire will finally love him.',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
|
||||||
|
query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
|
||||||
|
return self._extract_cvp_info(
|
||||||
|
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
|
||||||
|
'secure': {
|
||||||
|
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
|
||||||
|
'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': url,
|
||||||
|
'site_name': 'CartoonNetwork',
|
||||||
|
'auth_required': self._search_regex(
|
||||||
|
r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
|
||||||
|
webpage, 'auth required', default='false') == 'true',
|
||||||
|
})
|
||||||
@@ -4,13 +4,24 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
try_get,
|
||||||
|
xpath_text,
|
||||||
|
xpath_element,
|
||||||
|
xpath_with_ns,
|
||||||
|
find_xpath_attr,
|
||||||
|
parse_iso8601,
|
||||||
|
parse_age_limit,
|
||||||
|
int_or_none,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CBCIE(InfoExtractor):
|
class CBCIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cbc.ca'
|
||||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# with mediaId
|
# with mediaId
|
||||||
@@ -25,8 +36,22 @@ class CBCIE(InfoExtractor):
|
|||||||
'upload_date': '20160203',
|
'upload_date': '20160203',
|
||||||
'uploader': 'CBCC-NEW',
|
'uploader': 'CBCC-NEW',
|
||||||
},
|
},
|
||||||
|
'skip': 'Geo-restricted to Canada',
|
||||||
}, {
|
}, {
|
||||||
# with clipId
|
# with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
|
||||||
|
'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
|
||||||
|
'md5': '162adfa070274b144f4fdc3c3b8207db',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2414435309',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '22 Minutes Update: What Not To Wear Quebec',
|
||||||
|
'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
|
||||||
|
'upload_date': '20131025',
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
|
'timestamp': 1382717907,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# with clipId, feed only available via tpfeed.cbc.ca
|
||||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||||
'md5': '0274a90b51a9b4971fe005c63f592f12',
|
'md5': '0274a90b51a9b4971fe005c63f592f12',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -64,6 +89,7 @@ class CBCIE(InfoExtractor):
|
|||||||
'uploader': 'CBCC-NEW',
|
'uploader': 'CBCC-NEW',
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
|
'skip': 'Geo-restricted to Canada',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -81,9 +107,15 @@ class CBCIE(InfoExtractor):
|
|||||||
media_id = player_info.get('mediaId')
|
media_id = player_info.get('mediaId')
|
||||||
if not media_id:
|
if not media_id:
|
||||||
clip_id = player_info['clipId']
|
clip_id = player_info['clipId']
|
||||||
media_id = self._download_json(
|
feed = self._download_json(
|
||||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
||||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
clip_id, fatal=False)
|
||||||
|
if feed:
|
||||||
|
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
||||||
|
if not media_id:
|
||||||
|
media_id = self._download_json(
|
||||||
|
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||||
|
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||||
else:
|
else:
|
||||||
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
||||||
@@ -91,6 +123,7 @@ class CBCIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class CBCPlayerIE(InfoExtractor):
|
class CBCPlayerIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cbc.ca:player'
|
||||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||||
@@ -104,6 +137,7 @@ class CBCPlayerIE(InfoExtractor):
|
|||||||
'upload_date': '20160210',
|
'upload_date': '20160210',
|
||||||
'uploader': 'CBCC-NEW',
|
'uploader': 'CBCC-NEW',
|
||||||
},
|
},
|
||||||
|
'skip': 'Geo-restricted to Canada',
|
||||||
}, {
|
}, {
|
||||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||||
@@ -143,3 +177,165 @@ class CBCPlayerIE(InfoExtractor):
|
|||||||
}),
|
}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CBCWatchBaseIE(InfoExtractor):
|
||||||
|
_device_id = None
|
||||||
|
_device_token = None
|
||||||
|
_API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/'
|
||||||
|
_NS_MAP = {
|
||||||
|
'media': 'http://search.yahoo.com/mrss/',
|
||||||
|
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id):
|
||||||
|
url = path if path.startswith('http') else self._API_BASE_URL + path
|
||||||
|
result = self._download_xml(url, video_id, headers={
|
||||||
|
'X-Clearleap-DeviceId': self._device_id,
|
||||||
|
'X-Clearleap-DeviceToken': self._device_token,
|
||||||
|
})
|
||||||
|
error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
|
||||||
|
if error_message:
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
if not self._device_id or not self._device_token:
|
||||||
|
device = self._downloader.cache.load('cbcwatch', 'device') or {}
|
||||||
|
self._device_id, self._device_token = device.get('id'), device.get('token')
|
||||||
|
if not self._device_id or not self._device_token:
|
||||||
|
result = self._download_xml(
|
||||||
|
self._API_BASE_URL + 'device/register',
|
||||||
|
None, data=b'<device><type>web</type></device>')
|
||||||
|
self._device_id = xpath_text(result, 'deviceId', fatal=True)
|
||||||
|
self._device_token = xpath_text(result, 'deviceToken', fatal=True)
|
||||||
|
self._downloader.cache.store(
|
||||||
|
'cbcwatch', 'device', {
|
||||||
|
'id': self._device_id,
|
||||||
|
'token': self._device_token,
|
||||||
|
})
|
||||||
|
|
||||||
|
def _parse_rss_feed(self, rss):
|
||||||
|
channel = xpath_element(rss, 'channel', fatal=True)
|
||||||
|
|
||||||
|
def _add_ns(path):
|
||||||
|
return xpath_with_ns(path, self._NS_MAP)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for item in channel.findall('item'):
|
||||||
|
guid = xpath_text(item, 'guid', fatal=True)
|
||||||
|
title = xpath_text(item, 'title', fatal=True)
|
||||||
|
|
||||||
|
media_group = xpath_element(item, _add_ns('media:group'), fatal=True)
|
||||||
|
content = xpath_element(media_group, _add_ns('media:content'), fatal=True)
|
||||||
|
content_url = content.attrib['url']
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for thumbnail in media_group.findall(_add_ns('media:thumbnail')):
|
||||||
|
thumbnail_url = thumbnail.get('url')
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'id': thumbnail.get('profile'),
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int_or_none(thumbnail.get('width')),
|
||||||
|
'height': int_or_none(thumbnail.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
timestamp = None
|
||||||
|
release_date = find_xpath_attr(
|
||||||
|
item, _add_ns('media:credit'), 'role', 'releaseDate')
|
||||||
|
if release_date is not None:
|
||||||
|
timestamp = parse_iso8601(release_date.text)
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': content_url,
|
||||||
|
'id': guid,
|
||||||
|
'title': title,
|
||||||
|
'description': xpath_text(item, 'description'),
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': int_or_none(content.get('duration')),
|
||||||
|
'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))),
|
||||||
|
'episode': xpath_text(item, _add_ns('clearleap:episode')),
|
||||||
|
'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))),
|
||||||
|
'series': xpath_text(item, _add_ns('clearleap:series')),
|
||||||
|
'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'ie_key': 'CBCWatchVideo',
|
||||||
|
})
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, xpath_text(channel, 'guid'),
|
||||||
|
xpath_text(channel, 'title'),
|
||||||
|
xpath_text(channel, 'description'))
|
||||||
|
|
||||||
|
|
||||||
|
class CBCWatchVideoIE(CBCWatchBaseIE):
|
||||||
|
IE_NAME = 'cbc.ca:watch:video'
|
||||||
|
_VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
result = self._call_api(url, video_id)
|
||||||
|
|
||||||
|
m3u8_url = xpath_text(result, 'url', fatal=True)
|
||||||
|
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
|
||||||
|
if len(formats) < 2:
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||||
|
# Despite metadata in m3u8 all video+audio formats are
|
||||||
|
# actually video-only (no audio)
|
||||||
|
for f in formats:
|
||||||
|
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
|
||||||
|
f['acodec'] = 'none'
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
rss = xpath_element(result, 'rss')
|
||||||
|
if rss:
|
||||||
|
info.update(self._parse_rss_feed(rss)['entries'][0])
|
||||||
|
del info['url']
|
||||||
|
del info['_type']
|
||||||
|
del info['ie_key']
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class CBCWatchIE(CBCWatchBaseIE):
|
||||||
|
IE_NAME = 'cbc.ca:watch'
|
||||||
|
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '38e815a-009e3ab12e4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Customer (Dis)Service',
|
||||||
|
'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
|
||||||
|
'upload_date': '20160219',
|
||||||
|
'timestamp': 1455840000,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
'skip': 'Geo-restricted to Canada',
|
||||||
|
}, {
|
||||||
|
'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||||
|
'title': 'Arthur',
|
||||||
|
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 30,
|
||||||
|
'skip': 'Geo-restricted to Canada',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
rss = self._call_api('web/browse/' + video_id, video_id)
|
||||||
|
return self._parse_rss_feed(rss)
|
||||||
|
|||||||
@@ -1,17 +1,16 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
from .theplatform import ThePlatformFeedIE
|
||||||
|
|
||||||
from .theplatform import ThePlatformIE
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
xpath_text,
|
|
||||||
xpath_element,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
|
xpath_element,
|
||||||
|
xpath_text,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CBSBaseIE(ThePlatformIE):
|
class CBSBaseIE(ThePlatformFeedIE):
|
||||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||||
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
|
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
|
||||||
return {
|
return {
|
||||||
@@ -23,13 +22,12 @@ class CBSBaseIE(ThePlatformIE):
|
|||||||
|
|
||||||
|
|
||||||
class CBSIE(CBSBaseIE):
|
class CBSIE(CBSBaseIE):
|
||||||
_VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
|
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
|
'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
|
||||||
'display_id': 'connect-chat-feat-garth-brooks',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Connect Chat feat. Garth Brooks',
|
'title': 'Connect Chat feat. Garth Brooks',
|
||||||
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||||
@@ -39,22 +37,7 @@ class CBSIE(CBSBaseIE):
|
|||||||
'uploader': 'CBSI-NEW',
|
'uploader': 'CBSI-NEW',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# m3u8 download
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'_skip': 'Blocked outside the US',
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'WWF_5KqY3PK1',
|
|
||||||
'display_id': 'st-vincent',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Live on Letterman - St. Vincent',
|
|
||||||
'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
|
|
||||||
'duration': 3221,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'_skip': 'Blocked outside the US',
|
'_skip': 'Blocked outside the US',
|
||||||
@@ -65,40 +48,42 @@ class CBSIE(CBSBaseIE):
|
|||||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _extract_video_info(self, content_id):
|
||||||
content_id, display_id = re.match(self._VALID_URL, url).groups()
|
|
||||||
if not content_id:
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
content_id = self._search_regex(
|
|
||||||
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
|
||||||
webpage, 'content id')
|
|
||||||
items_data = self._download_xml(
|
items_data = self._download_xml(
|
||||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
||||||
video_data = xpath_element(items_data, './/item')
|
video_data = xpath_element(items_data, './/item')
|
||||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||||
|
tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id
|
||||||
|
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
||||||
|
|
||||||
|
asset_types = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
formats = []
|
formats = []
|
||||||
for item in items_data.findall('.//item'):
|
for item in items_data.findall('.//item'):
|
||||||
pid = xpath_text(item, 'pid')
|
asset_type = xpath_text(item, 'assetType')
|
||||||
if not pid:
|
if not asset_type or asset_type in asset_types:
|
||||||
continue
|
continue
|
||||||
tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid
|
asset_types.append(asset_type)
|
||||||
if '.m3u8' in xpath_text(item, 'contentUrl', default=''):
|
query = {
|
||||||
tp_release_url += '&manifest=m3u'
|
'mbr': 'true',
|
||||||
|
'assetTypes': asset_type,
|
||||||
|
}
|
||||||
|
if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
|
||||||
|
query['formats'] = 'MPEG4,M3U'
|
||||||
|
elif asset_type in ('RTMP', 'WIFI', '3G'):
|
||||||
|
query['formats'] = 'MPEG4,FLV'
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||||
tp_release_url, content_id, 'Downloading %s SMIL data' % pid)
|
update_url_query(tp_release_url, query), content_id,
|
||||||
|
'Downloading %s SMIL data' % asset_type)
|
||||||
formats.extend(tp_formats)
|
formats.extend(tp_formats)
|
||||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id)
|
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||||
info.update({
|
info.update({
|
||||||
'id': content_id,
|
'id': content_id,
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'series': xpath_text(video_data, 'seriesTitle'),
|
'series': xpath_text(video_data, 'seriesTitle'),
|
||||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||||
@@ -109,3 +94,7 @@ class CBSIE(CBSBaseIE):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
})
|
})
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
content_id = self._match_id(url)
|
||||||
|
return self._extract_video_info(content_id)
|
||||||
|
|||||||
@@ -80,9 +80,6 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
|
|
||||||
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
|
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
if site == 'cnet':
|
|
||||||
formats, subtitles = self._extract_theplatform_smil(
|
|
||||||
self.TP_RELEASE_URL_TEMPLATE % media_guid_path, video_id)
|
|
||||||
for (fkey, vid) in vdata['files'].items():
|
for (fkey, vid) in vdata['files'].items():
|
||||||
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
||||||
continue
|
continue
|
||||||
@@ -94,7 +91,7 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = self.get_metadata('kYEXFC/%s' % media_guid_path, video_id)
|
info = self._extract_theplatform_metadata('kYEXFC/%s' % media_guid_path, video_id)
|
||||||
info.update({
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
|
|||||||
@@ -1,12 +1,10 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import calendar
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
from .anvato import AnvatoIE
|
from .anvato import AnvatoIE
|
||||||
from .sendtonews import SendtoNewsIE
|
from .sendtonews import SendtoNewsIE
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
|
from ..utils import unified_timestamp
|
||||||
|
|
||||||
|
|
||||||
class CBSLocalIE(AnvatoIE):
|
class CBSLocalIE(AnvatoIE):
|
||||||
@@ -43,13 +41,8 @@ class CBSLocalIE(AnvatoIE):
|
|||||||
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
|
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'GxfCe0Zo7D-175909-5588',
|
'id': 'GxfCe0Zo7D-175909-5588',
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Recap: CLE 15, CIN 6',
|
|
||||||
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
|
||||||
'upload_date': '20160516',
|
|
||||||
'timestamp': 1463433840,
|
|
||||||
'duration': 49,
|
|
||||||
},
|
},
|
||||||
|
'playlist_count': 9,
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -62,19 +55,15 @@ class CBSLocalIE(AnvatoIE):
|
|||||||
|
|
||||||
sendtonews_url = SendtoNewsIE._extract_url(webpage)
|
sendtonews_url = SendtoNewsIE._extract_url(webpage)
|
||||||
if sendtonews_url:
|
if sendtonews_url:
|
||||||
info_dict = {
|
return self.url_result(
|
||||||
'_type': 'url_transparent',
|
compat_urlparse.urljoin(url, sendtonews_url),
|
||||||
'url': compat_urlparse.urljoin(url, sendtonews_url),
|
ie=SendtoNewsIE.ie_key())
|
||||||
}
|
|
||||||
else:
|
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
|
||||||
|
|
||||||
time_str = self._html_search_regex(
|
time_str = self._html_search_regex(
|
||||||
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
||||||
timestamp = None
|
timestamp = unified_timestamp(time_str)
|
||||||
if time_str:
|
|
||||||
timestamp = calendar.timegm(datetime.datetime.strptime(
|
|
||||||
time_str, '%b %d, %Y %I:%M %p').timetuple())
|
|
||||||
|
|
||||||
info_dict.update({
|
info_dict.update({
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
|
|||||||
@@ -2,13 +2,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .cbs import CBSBaseIE
|
from .cbs import CBSIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CBSNewsIE(CBSBaseIE):
|
class CBSNewsIE(CBSIE):
|
||||||
IE_DESC = 'CBS News'
|
IE_DESC = 'CBS News'
|
||||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||||
|
|
||||||
@@ -26,13 +26,18 @@ class CBSNewsIE(CBSBaseIE):
|
|||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Subscribers only',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
|
'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
|
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
|
||||||
|
'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
|
||||||
|
'upload_date': '20140404',
|
||||||
|
'timestamp': 1396650660,
|
||||||
|
'uploader': 'CBSI-NEW',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'duration': 205,
|
'duration': 205,
|
||||||
'subtitles': {
|
'subtitles': {
|
||||||
@@ -58,36 +63,15 @@ class CBSNewsIE(CBSBaseIE):
|
|||||||
webpage, 'video JSON info'), video_id)
|
webpage, 'video JSON info'), video_id)
|
||||||
|
|
||||||
item = video_info['item'] if 'item' in video_info else video_info
|
item = video_info['item'] if 'item' in video_info else video_info
|
||||||
title = item.get('articleTitle') or item.get('hed')
|
guid = item['mpxRefId']
|
||||||
duration = item.get('duration')
|
return self._extract_video_info(guid)
|
||||||
thumbnail = item.get('mediaImage') or item.get('thumbnail')
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
formats = []
|
|
||||||
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
|
|
||||||
pid = item.get('media' + format_id)
|
|
||||||
if not pid:
|
|
||||||
continue
|
|
||||||
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid
|
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
|
|
||||||
formats.extend(tp_formats)
|
|
||||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||||
IE_DESC = 'CBS News Live Videos'
|
IE_DESC = 'CBS News Live Videos'
|
||||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||||
|
|
||||||
|
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -96,6 +80,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
|||||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||||
'duration': 334,
|
'duration': 334,
|
||||||
},
|
},
|
||||||
|
'skip': 'Video gone',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -1,30 +1,31 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
from .cbs import CBSBaseIE
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class CBSSportsIE(InfoExtractor):
|
class CBSSportsIE(CBSBaseIE):
|
||||||
_VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
|
'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '_d5_GbO8p1sT',
|
'id': '708337219968',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'US Open flashbacks: 1990s',
|
'title': 'Ben Simmons the next LeBron? Not so fast',
|
||||||
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
|
'description': 'md5:854294f627921baba1f4b9a990d87197',
|
||||||
|
'timestamp': 1466293740,
|
||||||
|
'upload_date': '20160618',
|
||||||
|
'uploader': 'CBSI-NEW',
|
||||||
},
|
},
|
||||||
}
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_video_info(self, filter_query, video_id):
|
||||||
|
return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
section = mobj.group('section')
|
return self._extract_video_info('byId=%s' % video_id, video_id)
|
||||||
video_id = mobj.group('id')
|
|
||||||
all_videos = self._download_json(
|
|
||||||
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
|
|
||||||
video_id)
|
|
||||||
# The json file contains the info of all the videos in the section
|
|
||||||
video_info = next(v for v in all_videos if v['pcid'] == video_id)
|
|
||||||
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
|
|
||||||
|
|||||||
53
youtube_dl/extractor/cctv.py
Normal file
53
youtube_dl/extractor/cctv.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import float_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class CCTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)https?://(?:.+?\.)?
|
||||||
|
(?:
|
||||||
|
cctv\.(?:com|cn)|
|
||||||
|
cntv\.cn
|
||||||
|
)/
|
||||||
|
(?:
|
||||||
|
video/[^/]+/(?P<id>[0-9a-f]{32})|
|
||||||
|
\d{4}/\d{2}/\d{2}/(?P<display_id>VID[0-9A-Za-z]+)
|
||||||
|
)'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
|
||||||
|
'md5': '819c7b49fc3927d529fb4cd555621823',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '454368eb19ad44a1925bf1eb96140a61',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
if not video_id:
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})',
|
||||||
|
webpage, 'video_id')
|
||||||
|
api_data = self._download_json(
|
||||||
|
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id)
|
||||||
|
m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': api_data['title'],
|
||||||
|
'formats': self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False),
|
||||||
|
'duration': float_or_none(api_data.get('video', {}).get('totalLength')),
|
||||||
|
}
|
||||||
@@ -58,7 +58,8 @@ class CDAIE(InfoExtractor):
|
|||||||
def extract_format(page, version):
|
def extract_format(page, version):
|
||||||
unpacked = decode_packed_codes(page)
|
unpacked = decode_packed_codes(page)
|
||||||
format_url = self._search_regex(
|
format_url = self._search_regex(
|
||||||
r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False)
|
r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
|
||||||
|
'%s url' % version, fatal=False, group='url')
|
||||||
if not format_url:
|
if not format_url:
|
||||||
return
|
return
|
||||||
f = {
|
f = {
|
||||||
@@ -75,7 +76,8 @@ class CDAIE(InfoExtractor):
|
|||||||
info_dict['formats'].append(f)
|
info_dict['formats'].append(f)
|
||||||
if not info_dict['duration']:
|
if not info_dict['duration']:
|
||||||
info_dict['duration'] = parse_duration(self._search_regex(
|
info_dict['duration'] = parse_duration(self._search_regex(
|
||||||
r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False))
|
r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
|
||||||
|
unpacked, 'duration', fatal=False, group='duration'))
|
||||||
|
|
||||||
extract_format(webpage, 'default')
|
extract_format(webpage, 'default')
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CeskaTelevizeIE(InfoExtractor):
|
class CeskaTelevizeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|||||||
51
youtube_dl/extractor/charlierose.py
Normal file
51
youtube_dl/extractor/charlierose.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import remove_end
|
||||||
|
|
||||||
|
|
||||||
|
class CharlieRoseIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://charlierose.com/videos/27996',
|
||||||
|
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '27996',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Remembering Zaha Hadid',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg\?\d+',
|
||||||
|
'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.',
|
||||||
|
'subtitles': {
|
||||||
|
'en': [{
|
||||||
|
'ext': 'vtt',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://charlierose.com/videos/27996',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id)
|
||||||
|
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' - Charlie Rose')
|
||||||
|
|
||||||
|
info_dict = self._parse_html5_media_entries(
|
||||||
|
self._PLAYER_BASE % video_id, webpage, video_id,
|
||||||
|
m3u8_entry_protocol='m3u8_native')[0]
|
||||||
|
|
||||||
|
self._sort_formats(info_dict['formats'])
|
||||||
|
self._remove_duplicate_formats(info_dict['formats'])
|
||||||
|
|
||||||
|
info_dict.update({
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
})
|
||||||
|
|
||||||
|
return info_dict
|
||||||
@@ -17,7 +17,8 @@ class ChaturbateIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'skip': 'Room is offline',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://en.chaturbate.com/siswet19/',
|
'url': 'https://en.chaturbate.com/siswet19/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|||||||
@@ -1,30 +1,33 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import parse_duration
|
||||||
parse_duration,
|
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ChirbitIE(InfoExtractor):
|
class ChirbitIE(InfoExtractor):
|
||||||
IE_NAME = 'chirbit'
|
IE_NAME = 'chirbit'
|
||||||
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
|
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://chirb.it/PrIPv5',
|
'url': 'http://chirb.it/be2abG',
|
||||||
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'PrIPv5',
|
'id': 'be2abG',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Фасадстрой',
|
'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
|
||||||
'duration': 52,
|
'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
|
||||||
'view_count': int,
|
'duration': 306,
|
||||||
'comment_count': int,
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
|
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://chirb.it/wp/MN58c2',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -33,33 +36,36 @@ class ChirbitIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://chirb.it/%s' % audio_id, audio_id)
|
'http://chirb.it/%s' % audio_id, audio_id)
|
||||||
|
|
||||||
audio_url = self._search_regex(
|
data_fd = self._search_regex(
|
||||||
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
|
r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'data fd', group='url')
|
||||||
|
|
||||||
|
# Reverse engineered from https://chirb.it/js/chirbit.player.js (look
|
||||||
|
# for soundURL)
|
||||||
|
audio_url = base64.b64decode(
|
||||||
|
data_fd[::-1].encode('ascii')).decode('utf-8')
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
r'itemprop="name">([^<]+)', webpage, 'title')
|
r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')
|
||||||
duration = parse_duration(self._html_search_meta(
|
description = self._search_regex(
|
||||||
'duration', webpage, 'duration', fatal=False))
|
r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>',
|
||||||
view_count = int_or_none(self._search_regex(
|
webpage, 'description', default=None)
|
||||||
r'itemprop="playCount"\s*>(\d+)', webpage,
|
duration = parse_duration(self._search_regex(
|
||||||
'listen count', fatal=False))
|
r'class=["\']c-length["\'][^>]*>([^<]+)',
|
||||||
comment_count = int_or_none(self._search_regex(
|
webpage, 'duration', fatal=False))
|
||||||
r'>(\d+) Comments?:', webpage,
|
|
||||||
'comment count', fatal=False))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': audio_id,
|
'id': audio_id,
|
||||||
'url': audio_url,
|
'url': audio_url,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'description': description,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
|
||||||
'comment_count': comment_count,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ChirbitProfileIE(InfoExtractor):
|
class ChirbitProfileIE(InfoExtractor):
|
||||||
IE_NAME = 'chirbit:profile'
|
IE_NAME = 'chirbit:profile'
|
||||||
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://chirbit.com/ScarletBeauty',
|
'url': 'http://chirbit.com/ScarletBeauty',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class CliphunterIE(InfoExtractor):
|
|||||||
(?P<id>[0-9]+)/
|
(?P<id>[0-9]+)/
|
||||||
(?P<seo>.+?)(?:$|[#\?])
|
(?P<seo>.+?)(?:$|[#\?])
|
||||||
'''
|
'''
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
||||||
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
|
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -32,8 +32,19 @@ class CliphunterIE(InfoExtractor):
|
|||||||
'title': 'Fun Jynx Maze solo',
|
'title': 'Fun Jynx Maze solo',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
},
|
||||||
}
|
'skip': 'Video gone',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
|
||||||
|
'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2019449',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|||||||
@@ -1,16 +1,10 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .onet import OnetBaseIE
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
|
||||||
parse_iso8601,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ClipRsIE(InfoExtractor):
|
class ClipRsIE(OnetBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
|
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
|
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
|
||||||
@@ -27,64 +21,13 @@ class ClipRsIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
video_id = self._search_regex(
|
mvp_id = self._search_mvp_id(webpage)
|
||||||
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
|
|
||||||
|
|
||||||
response = self._download_json(
|
info_dict = self._extract_from_id(mvp_id, webpage)
|
||||||
'http://qi.ckm.onetapi.pl/', video_id,
|
info_dict['display_id'] = display_id
|
||||||
query={
|
|
||||||
'body[id]': video_id,
|
|
||||||
'body[jsonrpc]': '2.0',
|
|
||||||
'body[method]': 'get_asset_detail',
|
|
||||||
'body[params][ID_Publikacji]': video_id,
|
|
||||||
'body[params][Service]': 'www.onet.pl',
|
|
||||||
'content-type': 'application/jsonp',
|
|
||||||
'x-onet-app': 'player.front.onetapi.pl',
|
|
||||||
})
|
|
||||||
|
|
||||||
error = response.get('error')
|
return info_dict
|
||||||
if error:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s said: %s' % (self.IE_NAME, error['message']), expected=True)
|
|
||||||
|
|
||||||
video = response['result'].get('0')
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for _, formats_dict in video['formats'].items():
|
|
||||||
if not isinstance(formats_dict, dict):
|
|
||||||
continue
|
|
||||||
for format_id, format_list in formats_dict.items():
|
|
||||||
if not isinstance(format_list, list):
|
|
||||||
continue
|
|
||||||
for f in format_list:
|
|
||||||
if not f.get('url'):
|
|
||||||
continue
|
|
||||||
formats.append({
|
|
||||||
'url': f['url'],
|
|
||||||
'format_id': format_id,
|
|
||||||
'height': int_or_none(f.get('vertical_resolution')),
|
|
||||||
'width': int_or_none(f.get('horizontal_resolution')),
|
|
||||||
'abr': float_or_none(f.get('audio_bitrate')),
|
|
||||||
'vbr': float_or_none(f.get('video_bitrate')),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
meta = video.get('meta', {})
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None) or meta['title']
|
|
||||||
description = self._og_search_description(webpage, default=None) or meta.get('description')
|
|
||||||
duration = meta.get('length') or meta.get('lenght')
|
|
||||||
timestamp = parse_iso8601(meta.get('addDate'), ' ')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|||||||
92
youtube_dl/extractor/closertotruth.py
Normal file
92
youtube_dl/extractor/closertotruth.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class CloserToTruthIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0_zof1ktre',
|
||||||
|
'display_id': 'solutions-the-mind-body-problem',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'Solutions to the Mind-Body Problem?',
|
||||||
|
'upload_date': '20140221',
|
||||||
|
'timestamp': 1392956007,
|
||||||
|
'uploader_id': 'CTTXML'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://closertotruth.com/episodes/how-do-brains-work',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0_iuxai6g6',
|
||||||
|
'display_id': 'how-do-brains-work',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'How do Brains Work?',
|
||||||
|
'upload_date': '20140221',
|
||||||
|
'timestamp': 1392956024,
|
||||||
|
'uploader_id': 'CTTXML'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://closertotruth.com/interviews/1725',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1725',
|
||||||
|
'title': 'AyaFr-002',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 2,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
|
||||||
|
webpage, 'kaltura partner_id')
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')
|
||||||
|
|
||||||
|
select = self._search_regex(
|
||||||
|
r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
|
||||||
|
webpage, 'select version', default=None)
|
||||||
|
if select:
|
||||||
|
entry_ids = set()
|
||||||
|
entries = []
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
|
||||||
|
webpage):
|
||||||
|
entry_id = mobj.group('id')
|
||||||
|
if entry_id in entry_ids:
|
||||||
|
continue
|
||||||
|
entry_ids.add(entry_id)
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||||
|
'ie_key': 'Kaltura',
|
||||||
|
'title': mobj.group('title'),
|
||||||
|
})
|
||||||
|
if entries:
|
||||||
|
return self.playlist_result(entries, display_id, title)
|
||||||
|
|
||||||
|
entry_id = self._search_regex(
|
||||||
|
r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
|
||||||
|
webpage, 'kaltura entry_id', group='id')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||||
|
'ie_key': 'Kaltura',
|
||||||
|
'title': title
|
||||||
|
}
|
||||||
@@ -6,7 +6,6 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -17,37 +16,26 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CloudyIE(InfoExtractor):
|
class CloudyIE(InfoExtractor):
|
||||||
_IE_DESC = 'cloudy.ec and videoraj.ch'
|
_IE_DESC = 'cloudy.ec'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/
|
https?://(?:www\.)?cloudy\.ec/
|
||||||
(?:v/|embed\.php\?id=)
|
(?:v/|embed\.php\?id=)
|
||||||
(?P<id>[A-Za-z0-9]+)
|
(?P<id>[A-Za-z0-9]+)
|
||||||
'''
|
'''
|
||||||
_EMBED_URL = 'http://www.%s/embed.php?id=%s'
|
_EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
|
||||||
_API_URL = 'http://www.%s/api/player.api.php?%s'
|
_API_URL = 'http://www.cloudy.ec/api/player.api.php'
|
||||||
_MAX_TRIES = 2
|
_MAX_TRIES = 2
|
||||||
_TESTS = [
|
_TEST = {
|
||||||
{
|
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
'md5': '5cb253ace826a42f35b4740539bedf07',
|
||||||
'md5': '5cb253ace826a42f35b4740539bedf07',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': 'af511e2527aac',
|
||||||
'id': 'af511e2527aac',
|
'ext': 'flv',
|
||||||
'ext': 'flv',
|
'title': 'Funny Cats and Animals Compilation june 2013',
|
||||||
'title': 'Funny Cats and Animals Compilation june 2013',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.videoraj.to/v/47f399fd8bb60',
|
|
||||||
'md5': '7d0f8799d91efd4eda26587421c3c3b0',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '47f399fd8bb60',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?',
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
]
|
}
|
||||||
|
|
||||||
def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):
|
def _extract_video(self, video_id, file_key, error_url=None, try_num=0):
|
||||||
|
|
||||||
if try_num > self._MAX_TRIES - 1:
|
if try_num > self._MAX_TRIES - 1:
|
||||||
raise ExtractorError('Unable to extract video URL', expected=True)
|
raise ExtractorError('Unable to extract video URL', expected=True)
|
||||||
@@ -64,9 +52,8 @@ class CloudyIE(InfoExtractor):
|
|||||||
'errorUrl': error_url,
|
'errorUrl': error_url,
|
||||||
})
|
})
|
||||||
|
|
||||||
data_url = self._API_URL % (video_host, compat_urllib_parse_urlencode(form))
|
|
||||||
player_data = self._download_webpage(
|
player_data = self._download_webpage(
|
||||||
data_url, video_id, 'Downloading player data')
|
self._API_URL, video_id, 'Downloading player data', query=form)
|
||||||
data = compat_parse_qs(player_data)
|
data = compat_parse_qs(player_data)
|
||||||
|
|
||||||
try_num += 1
|
try_num += 1
|
||||||
@@ -88,7 +75,7 @@ class CloudyIE(InfoExtractor):
|
|||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
|
||||||
self.report_warning('Invalid video URL, requesting another', video_id)
|
self.report_warning('Invalid video URL, requesting another', video_id)
|
||||||
return self._extract_video(video_host, video_id, file_key, video_url, try_num)
|
return self._extract_video(video_id, file_key, video_url, try_num)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -98,14 +85,13 @@ class CloudyIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_host = mobj.group('host')
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
url = self._EMBED_URL % (video_host, video_id)
|
url = self._EMBED_URL % video_id
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
file_key = self._search_regex(
|
file_key = self._search_regex(
|
||||||
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
||||||
webpage, 'file_key')
|
webpage, 'file_key')
|
||||||
|
|
||||||
return self._extract_video(video_host, video_id, file_key)
|
return self._extract_video(video_id, file_key)
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mtv import MTVIE
|
from .mtv import MTVIE
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class CMTIE(MTVIE):
|
class CMTIE(MTVIE):
|
||||||
IE_NAME = 'cmt.com'
|
IE_NAME = 'cmt.com'
|
||||||
_VALID_URL = r'https?://www\.cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
|
||||||
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
|
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -16,7 +18,27 @@ class CMTIE(MTVIE):
|
|||||||
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||||
'description': 'Blame It All On My Roots',
|
'description': 'Blame It All On My Roots',
|
||||||
},
|
},
|
||||||
|
'skip': 'Video not available',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908',
|
||||||
|
'md5': 'e61a801ca4a183a466c08bd98dccbb1c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1504699',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Still The King Ep. 109 in 3 Minutes',
|
||||||
|
'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
|
||||||
|
'timestamp': 1469421000.0,
|
||||||
|
'upload_date': '20160725',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||||
|
if 'error_not_available.swf' in rtmp_video_url:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: video is not available' % cls.IE_NAME, expected=True)
|
||||||
|
|
||||||
|
return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||||
|
|||||||
@@ -3,15 +3,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from .turner import TurnerBaseIE
|
||||||
int_or_none,
|
from ..utils import url_basename
|
||||||
parse_duration,
|
|
||||||
url_basename,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class CNNIE(InfoExtractor):
|
class CNNIE(TurnerBaseIE):
|
||||||
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -25,6 +22,7 @@ class CNNIE(InfoExtractor):
|
|||||||
'duration': 135,
|
'duration': 135,
|
||||||
'upload_date': '20130609',
|
'upload_date': '20130609',
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||||
@@ -34,7 +32,8 @@ class CNNIE(InfoExtractor):
|
|||||||
'title': "Student's epic speech stuns new freshmen",
|
'title': "Student's epic speech stuns new freshmen",
|
||||||
'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||||
'upload_date': '20130821',
|
'upload_date': '20130821',
|
||||||
}
|
},
|
||||||
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||||
@@ -44,80 +43,61 @@ class CNNIE(InfoExtractor):
|
|||||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||||
'upload_date': '20141222',
|
'upload_date': '20141222',
|
||||||
}
|
},
|
||||||
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
|
}, {
|
||||||
|
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||||
|
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '5 stunning stats about Netflix',
|
||||||
|
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||||
|
'upload_date': '20160819',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_CONFIG = {
|
||||||
|
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
|
||||||
|
'edition': {
|
||||||
|
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
|
||||||
|
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
|
||||||
|
},
|
||||||
|
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
|
||||||
|
'money': {
|
||||||
|
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
|
||||||
|
'media_src': 'http://ht3.cdn.turner.com/money/big',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_timestamp(self, video_data):
|
||||||
|
# TODO: fix timestamp extraction
|
||||||
|
return None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
sub_domain, path, page_title = re.match(self._VALID_URL, url).groups()
|
||||||
path = mobj.group('path')
|
if sub_domain not in ('money', 'edition'):
|
||||||
page_title = mobj.group('title')
|
sub_domain = 'edition'
|
||||||
info_url = 'http://edition.cnn.com/video/data/3.0/%s/index.xml' % path
|
config = self._CONFIG[sub_domain]
|
||||||
info = self._download_xml(info_url, page_title)
|
return self._extract_cvp_info(
|
||||||
|
config['data_src'] % path, page_title, {
|
||||||
formats = []
|
'default': {
|
||||||
rex = re.compile(r'''(?x)
|
'media_src': config['media_src'],
|
||||||
(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
}
|
||||||
(?:_(?P<bitrate>[0-9]+)k)?
|
})
|
||||||
''')
|
|
||||||
for f in info.findall('files/file'):
|
|
||||||
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip())
|
|
||||||
fdct = {
|
|
||||||
'format_id': f.attrib['bitrate'],
|
|
||||||
'url': video_url,
|
|
||||||
}
|
|
||||||
|
|
||||||
mf = rex.match(f.attrib['bitrate'])
|
|
||||||
if mf:
|
|
||||||
fdct['width'] = int(mf.group('width'))
|
|
||||||
fdct['height'] = int(mf.group('height'))
|
|
||||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
|
||||||
else:
|
|
||||||
mf = rex.search(f.text)
|
|
||||||
if mf:
|
|
||||||
fdct['width'] = int(mf.group('width'))
|
|
||||||
fdct['height'] = int(mf.group('height'))
|
|
||||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
|
||||||
else:
|
|
||||||
mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate'])
|
|
||||||
if mi:
|
|
||||||
if mi.group(1) == 'audio':
|
|
||||||
fdct['vcodec'] = 'none'
|
|
||||||
fdct['ext'] = 'm4a'
|
|
||||||
else:
|
|
||||||
fdct['tbr'] = int(mi.group(1))
|
|
||||||
|
|
||||||
formats.append(fdct)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'height': int(t.attrib['height']),
|
|
||||||
'width': int(t.attrib['width']),
|
|
||||||
'url': t.text,
|
|
||||||
} for t in info.findall('images/image')]
|
|
||||||
|
|
||||||
metas_el = info.find('metas')
|
|
||||||
upload_date = (
|
|
||||||
metas_el.attrib.get('version') if metas_el is not None else None)
|
|
||||||
|
|
||||||
duration_el = info.find('length')
|
|
||||||
duration = parse_duration(duration_el.text)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': info.attrib['id'],
|
|
||||||
'title': info.find('headline').text,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'description': info.find('description').text,
|
|
||||||
'duration': duration,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class CNNBlogsIE(InfoExtractor):
|
class CNNBlogsIE(InfoExtractor):
|
||||||
@@ -132,6 +112,7 @@ class CNNBlogsIE(InfoExtractor):
|
|||||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||||
'upload_date': '20140209',
|
'upload_date': '20140209',
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
'add_ie': ['CNN'],
|
'add_ie': ['CNN'],
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -146,7 +127,7 @@ class CNNBlogsIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class CNNArticleIE(InfoExtractor):
|
class CNNArticleIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
|
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||||
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
||||||
@@ -154,9 +135,10 @@ class CNNArticleIE(InfoExtractor):
|
|||||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Obama: Cyberattack not an act of war',
|
'title': 'Obama: Cyberattack not an act of war',
|
||||||
'description': 'md5:51ce6750450603795cad0cdfbd7d05c5',
|
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||||
'upload_date': '20141221',
|
'upload_date': '20141221',
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
'add_ie': ['CNN'],
|
'add_ie': ['CNN'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,17 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
from ..compat import (
|
from .common import InfoExtractor
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||||
@@ -26,8 +16,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
|
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
|
||||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
||||||
|
'timestamp': 1376798400,
|
||||||
|
'upload_date': '20130818',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
||||||
@@ -35,241 +27,92 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
class ToshIE(MTVServicesInfoExtractor):
|
||||||
IE_DESC = 'The Daily Show / The Colbert Report'
|
IE_DESC = 'Tosh.0'
|
||||||
# urls can be abbreviations like :thedailyshow
|
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
||||||
# urls for episodes like:
|
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
|
||||||
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
|
||||||
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
|
||||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
|
||||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
|
||||||
|https?://(:www\.)?
|
|
||||||
(?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
|
|
||||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
|
||||||
(?P<clip>
|
|
||||||
(?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
|
||||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
|
||||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
|
||||||
)|
|
|
||||||
(?P<interview>
|
|
||||||
extended-interviews/(?P<interID>[0-9a-z]+)/
|
|
||||||
(?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
|
|
||||||
(?:/[^/?#]?|[?#]|$))))
|
|
||||||
'''
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
|
||||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20121213',
|
|
||||||
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
|
||||||
'uploader': 'thedailyshow',
|
|
||||||
'title': 'thedailyshow kristen-stewart part 1',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'sarah-chayes-extended-interview',
|
|
||||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
|
||||||
'title': 'thedailyshow Sarah Chayes Extended Interview',
|
|
||||||
},
|
|
||||||
'playlist': [
|
|
||||||
{
|
|
||||||
'info_dict': {
|
|
||||||
'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20150129',
|
|
||||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
|
||||||
'uploader': 'thedailyshow',
|
|
||||||
'title': 'thedailyshow sarah-chayes-extended-interview part 1',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20150129',
|
|
||||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
|
||||||
'uploader': 'thedailyshow',
|
|
||||||
'title': 'thedailyshow sarah-chayes-extended-interview part 2',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
],
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||||
|
'info_dict': {
|
||||||
|
'description': 'Tosh asked fans to share their summer plans.',
|
||||||
|
'title': 'Twitter Users Share Summer Plans',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'f269e88114c1805bb6d7653fecea9e06',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
|
||||||
|
'description': 'Tosh asked fans to share their summer plans.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
# It's really reported to be published on year 2077
|
||||||
|
'upload_date': '20770610',
|
||||||
|
'timestamp': 3390510600,
|
||||||
|
'subtitles': {
|
||||||
|
'en': 'mincount:3',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
}, {
|
||||||
|
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
@classmethod
|
||||||
|
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||||
|
new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||||
|
new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm')
|
||||||
|
return new_urls
|
||||||
|
|
||||||
_video_extensions = {
|
|
||||||
'3500': 'mp4',
|
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||||
'2200': 'mp4',
|
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
||||||
'1700': 'mp4',
|
_TESTS = [{
|
||||||
'1200': 'mp4',
|
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
|
||||||
'750': 'mp4',
|
'info_dict': {
|
||||||
'400': 'mp4',
|
'id': 'local_playlist-f99b626bdfe13568579a',
|
||||||
}
|
'ext': 'flv',
|
||||||
_video_dimensions = {
|
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
|
||||||
'3500': (1280, 720),
|
},
|
||||||
'2200': (960, 540),
|
'params': {
|
||||||
'1700': (768, 432),
|
# rtmp download
|
||||||
'1200': (640, 360),
|
'skip_download': True,
|
||||||
'750': (512, 288),
|
},
|
||||||
'400': (384, 216),
|
}, {
|
||||||
}
|
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
if mobj.group('shortname'):
|
webpage = self._download_webpage(url, video_id)
|
||||||
return self.url_result('http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes')
|
|
||||||
|
|
||||||
if mobj.group('clip'):
|
mrss_url = self._search_regex(
|
||||||
if mobj.group('videotitle'):
|
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
epTitle = mobj.group('videotitle')
|
webpage, 'mrss url', group='url')
|
||||||
elif mobj.group('showname') == 'thedailyshow':
|
|
||||||
epTitle = mobj.group('tdstitle')
|
|
||||||
else:
|
|
||||||
epTitle = mobj.group('cntitle')
|
|
||||||
dlNewest = False
|
|
||||||
elif mobj.group('interview'):
|
|
||||||
epTitle = mobj.group('interview_title')
|
|
||||||
dlNewest = False
|
|
||||||
else:
|
|
||||||
dlNewest = not mobj.group('episode')
|
|
||||||
if dlNewest:
|
|
||||||
epTitle = mobj.group('showname')
|
|
||||||
else:
|
|
||||||
epTitle = mobj.group('episode')
|
|
||||||
show_name = mobj.group('showname')
|
|
||||||
|
|
||||||
webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
|
return self._get_videos_info_from_url(mrss_url, video_id)
|
||||||
if dlNewest:
|
|
||||||
url = htmlHandle.geturl()
|
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError('Invalid redirected URL: ' + url)
|
|
||||||
if mobj.group('episode') == '':
|
|
||||||
raise ExtractorError('Redirected URL is still not specific: ' + url)
|
|
||||||
epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
|
|
||||||
|
|
||||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
|
||||||
if len(mMovieParams) == 0:
|
|
||||||
# The Colbert Report embeds the information in a without
|
|
||||||
# a URL prefix; so extract the alternate reference
|
|
||||||
# and then add the URL prefix manually.
|
|
||||||
|
|
||||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
|
class ComedyCentralShortnameIE(InfoExtractor):
|
||||||
if len(altMovieParams) == 0:
|
_VALID_URL = r'^:(?P<id>tds|thedailyshow)$'
|
||||||
raise ExtractorError('unable to find Flash URL in webpage ' + url)
|
_TESTS = [{
|
||||||
else:
|
'url': ':tds',
|
||||||
mMovieParams = [('http://media.mtvnservices.com/' + altMovieParams[0], altMovieParams[0])]
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': ':thedailyshow',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
uri = mMovieParams[0][1]
|
def _real_extract(self, url):
|
||||||
# Correct cc.com in uri
|
video_id = self._match_id(url)
|
||||||
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
|
shortcut_map = {
|
||||||
|
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||||
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri}))
|
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||||
idoc = self._download_xml(
|
|
||||||
index_url, epTitle,
|
|
||||||
'Downloading show index', 'Unable to download episode index')
|
|
||||||
|
|
||||||
title = idoc.find('./channel/title').text
|
|
||||||
description = idoc.find('./channel/description').text
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
item_els = idoc.findall('.//item')
|
|
||||||
for part_num, itemEl in enumerate(item_els):
|
|
||||||
upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
|
||||||
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
|
|
||||||
|
|
||||||
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
|
||||||
duration = float_or_none(content.attrib.get('duration'))
|
|
||||||
mediagen_url = content.attrib['url']
|
|
||||||
guid = itemEl.find('./guid').text.rpartition(':')[-1]
|
|
||||||
|
|
||||||
cdoc = self._download_xml(
|
|
||||||
mediagen_url, epTitle,
|
|
||||||
'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
|
|
||||||
|
|
||||||
turls = []
|
|
||||||
for rendition in cdoc.findall('.//rendition'):
|
|
||||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
|
||||||
turls.append(finfo)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for format, rtmp_video_url in turls:
|
|
||||||
w, h = self._video_dimensions.get(format, (None, None))
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'vhttp-%s' % format,
|
|
||||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
|
||||||
'ext': self._video_extensions.get(format, 'mp4'),
|
|
||||||
'height': h,
|
|
||||||
'width': w,
|
|
||||||
})
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'rtmp-%s' % format,
|
|
||||||
'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
|
|
||||||
'ext': self._video_extensions.get(format, 'mp4'),
|
|
||||||
'height': h,
|
|
||||||
'width': w,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
subtitles = self._extract_subtitles(cdoc, guid)
|
|
||||||
|
|
||||||
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
|
||||||
entries.append({
|
|
||||||
'id': guid,
|
|
||||||
'title': virtual_id,
|
|
||||||
'formats': formats,
|
|
||||||
'uploader': show_name,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'duration': duration,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': description,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': epTitle,
|
|
||||||
'entries': entries,
|
|
||||||
'title': show_name + ' ' + title,
|
|
||||||
'description': description,
|
|
||||||
}
|
}
|
||||||
|
return self.url_result(shortcut_map[video_id])
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ from ..utils import (
|
|||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
url_basename,
|
url_basename,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
@@ -53,6 +54,9 @@ from ..utils import (
|
|||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
update_Request,
|
update_Request,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
parse_m3u8_attributes,
|
||||||
|
extract_attributes,
|
||||||
|
parse_codecs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -83,6 +87,9 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
Potential fields:
|
Potential fields:
|
||||||
* url Mandatory. The URL of the video file
|
* url Mandatory. The URL of the video file
|
||||||
|
* manifest_url
|
||||||
|
The URL of the manifest file in case of
|
||||||
|
fragmented media (DASH, hls, hds)
|
||||||
* ext Will be calculated from URL if missing
|
* ext Will be calculated from URL if missing
|
||||||
* format A human-readable description of the format
|
* format A human-readable description of the format
|
||||||
("mp4 container with h264/opus").
|
("mp4 container with h264/opus").
|
||||||
@@ -111,6 +118,11 @@ class InfoExtractor(object):
|
|||||||
download, lower-case.
|
download, lower-case.
|
||||||
"http", "https", "rtsp", "rtmp", "rtmpe",
|
"http", "https", "rtsp", "rtmp", "rtmpe",
|
||||||
"m3u8", "m3u8_native" or "http_dash_segments".
|
"m3u8", "m3u8_native" or "http_dash_segments".
|
||||||
|
* fragments A list of fragments of the fragmented media,
|
||||||
|
with the following entries:
|
||||||
|
* "url" (mandatory) - fragment's URL
|
||||||
|
* "duration" (optional, int or float)
|
||||||
|
* "filesize" (optional, int)
|
||||||
* preference Order number of this format. If this field is
|
* preference Order number of this format. If this field is
|
||||||
present and not None, the formats get sorted
|
present and not None, the formats get sorted
|
||||||
by this field, regardless of all other values.
|
by this field, regardless of all other values.
|
||||||
@@ -160,6 +172,7 @@ class InfoExtractor(object):
|
|||||||
* "height" (optional, int)
|
* "height" (optional, int)
|
||||||
* "resolution" (optional, string "{width}x{height"},
|
* "resolution" (optional, string "{width}x{height"},
|
||||||
deprecated)
|
deprecated)
|
||||||
|
* "filesize" (optional, int)
|
||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
description: Full video description.
|
description: Full video description.
|
||||||
uploader: Full name of the video uploader.
|
uploader: Full name of the video uploader.
|
||||||
@@ -657,35 +670,48 @@ class InfoExtractor(object):
|
|||||||
else:
|
else:
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _get_login_info(self):
|
def _get_netrc_login_info(self, netrc_machine=None):
|
||||||
|
username = None
|
||||||
|
password = None
|
||||||
|
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||||
|
|
||||||
|
if self._downloader.params.get('usenetrc', False):
|
||||||
|
try:
|
||||||
|
info = netrc.netrc().authenticators(netrc_machine)
|
||||||
|
if info is not None:
|
||||||
|
username = info[0]
|
||||||
|
password = info[2]
|
||||||
|
else:
|
||||||
|
raise netrc.NetrcParseError(
|
||||||
|
'No authenticators for %s' % netrc_machine)
|
||||||
|
except (IOError, netrc.NetrcParseError) as err:
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||||
|
|
||||||
|
return username, password
|
||||||
|
|
||||||
|
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||||
"""
|
"""
|
||||||
Get the login info as (username, password)
|
Get the login info as (username, password)
|
||||||
It will look in the netrc file using the _NETRC_MACHINE value
|
First look for the manually specified credentials using username_option
|
||||||
|
and password_option as keys in params dictionary. If no such credentials
|
||||||
|
available look in the netrc file using the netrc_machine or _NETRC_MACHINE
|
||||||
|
value.
|
||||||
If there's no info available, return (None, None)
|
If there's no info available, return (None, None)
|
||||||
"""
|
"""
|
||||||
if self._downloader is None:
|
if self._downloader is None:
|
||||||
return (None, None)
|
return (None, None)
|
||||||
|
|
||||||
username = None
|
|
||||||
password = None
|
|
||||||
downloader_params = self._downloader.params
|
downloader_params = self._downloader.params
|
||||||
|
|
||||||
# Attempt to use provided username and password or .netrc data
|
# Attempt to use provided username and password or .netrc data
|
||||||
if downloader_params.get('username') is not None:
|
if downloader_params.get(username_option) is not None:
|
||||||
username = downloader_params['username']
|
username = downloader_params[username_option]
|
||||||
password = downloader_params['password']
|
password = downloader_params[password_option]
|
||||||
elif downloader_params.get('usenetrc', False):
|
else:
|
||||||
try:
|
username, password = self._get_netrc_login_info(netrc_machine)
|
||||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
|
||||||
if info is not None:
|
|
||||||
username = info[0]
|
|
||||||
password = info[2]
|
|
||||||
else:
|
|
||||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
|
||||||
except (IOError, netrc.NetrcParseError) as err:
|
|
||||||
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
|
|
||||||
|
|
||||||
return (username, password)
|
return username, password
|
||||||
|
|
||||||
def _get_tfa_info(self, note='two-factor verification code'):
|
def _get_tfa_info(self, note='two-factor verification code'):
|
||||||
"""
|
"""
|
||||||
@@ -722,9 +748,14 @@ class InfoExtractor(object):
|
|||||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
||||||
|
|
||||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||||
|
if not isinstance(prop, (list, tuple)):
|
||||||
|
prop = [prop]
|
||||||
if name is None:
|
if name is None:
|
||||||
name = 'OpenGraph %s' % prop
|
name = 'OpenGraph %s' % prop[0]
|
||||||
escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
|
og_regexes = []
|
||||||
|
for p in prop:
|
||||||
|
og_regexes.extend(self._og_regexes(p))
|
||||||
|
escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
|
||||||
if escaped is None:
|
if escaped is None:
|
||||||
return None
|
return None
|
||||||
return unescapeHTML(escaped)
|
return unescapeHTML(escaped)
|
||||||
@@ -748,10 +779,12 @@ class InfoExtractor(object):
|
|||||||
return self._og_search_property('url', html, **kargs)
|
return self._og_search_property('url', html, **kargs)
|
||||||
|
|
||||||
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
|
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
|
||||||
|
if not isinstance(name, (list, tuple)):
|
||||||
|
name = [name]
|
||||||
if display_name is None:
|
if display_name is None:
|
||||||
display_name = name
|
display_name = name[0]
|
||||||
return self._html_search_regex(
|
return self._html_search_regex(
|
||||||
self._meta_regex(name),
|
[self._meta_regex(n) for n in name],
|
||||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||||
|
|
||||||
def _dc_search_uploader(self, html):
|
def _dc_search_uploader(self, html):
|
||||||
@@ -800,56 +833,82 @@ class InfoExtractor(object):
|
|||||||
return self._html_search_meta('twitter:player', html,
|
return self._html_search_meta('twitter:player', html,
|
||||||
'twitter card player')
|
'twitter card player')
|
||||||
|
|
||||||
def _search_json_ld(self, html, video_id, **kwargs):
|
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||||
json_ld = self._search_regex(
|
json_ld = self._search_regex(
|
||||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||||
|
default = kwargs.get('default', NO_DEFAULT)
|
||||||
if not json_ld:
|
if not json_ld:
|
||||||
return {}
|
return default if default is not NO_DEFAULT else {}
|
||||||
return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
|
# JSON-LD may be malformed and thus `fatal` should be respected.
|
||||||
|
# At the same time `default` may be passed that assumes `fatal=False`
|
||||||
|
# for _search_regex. Let's simulate the same behavior here as well.
|
||||||
|
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
|
||||||
|
return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
|
||||||
|
|
||||||
def _json_ld(self, json_ld, video_id, fatal=True):
|
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||||
if isinstance(json_ld, compat_str):
|
if isinstance(json_ld, compat_str):
|
||||||
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
|
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
|
||||||
if not json_ld:
|
if not json_ld:
|
||||||
return {}
|
return {}
|
||||||
info = {}
|
info = {}
|
||||||
if json_ld.get('@context') == 'http://schema.org':
|
if not isinstance(json_ld, (list, tuple, dict)):
|
||||||
item_type = json_ld.get('@type')
|
return info
|
||||||
if item_type == 'TVEpisode':
|
if isinstance(json_ld, dict):
|
||||||
info.update({
|
json_ld = [json_ld]
|
||||||
'episode': unescapeHTML(json_ld.get('name')),
|
for e in json_ld:
|
||||||
'episode_number': int_or_none(json_ld.get('episodeNumber')),
|
if e.get('@context') == 'http://schema.org':
|
||||||
'description': unescapeHTML(json_ld.get('description')),
|
item_type = e.get('@type')
|
||||||
})
|
if expected_type is not None and expected_type != item_type:
|
||||||
part_of_season = json_ld.get('partOfSeason')
|
return info
|
||||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
if item_type == 'TVEpisode':
|
||||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
info.update({
|
||||||
part_of_series = json_ld.get('partOfSeries')
|
'episode': unescapeHTML(e.get('name')),
|
||||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
'description': unescapeHTML(e.get('description')),
|
||||||
elif item_type == 'Article':
|
})
|
||||||
info.update({
|
part_of_season = e.get('partOfSeason')
|
||||||
'timestamp': parse_iso8601(json_ld.get('datePublished')),
|
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
||||||
'title': unescapeHTML(json_ld.get('headline')),
|
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||||
'description': unescapeHTML(json_ld.get('articleBody')),
|
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||||
})
|
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
||||||
|
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||||
|
elif item_type == 'Article':
|
||||||
|
info.update({
|
||||||
|
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||||
|
'title': unescapeHTML(e.get('headline')),
|
||||||
|
'description': unescapeHTML(e.get('articleBody')),
|
||||||
|
})
|
||||||
|
elif item_type == 'VideoObject':
|
||||||
|
info.update({
|
||||||
|
'url': e.get('contentUrl'),
|
||||||
|
'title': unescapeHTML(e.get('name')),
|
||||||
|
'description': unescapeHTML(e.get('description')),
|
||||||
|
'thumbnail': e.get('thumbnailUrl'),
|
||||||
|
'duration': parse_duration(e.get('duration')),
|
||||||
|
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||||
|
'filesize': float_or_none(e.get('contentSize')),
|
||||||
|
'tbr': int_or_none(e.get('bitrate')),
|
||||||
|
'width': int_or_none(e.get('width')),
|
||||||
|
'height': int_or_none(e.get('height')),
|
||||||
|
})
|
||||||
|
break
|
||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _hidden_inputs(html):
|
def _hidden_inputs(html):
|
||||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||||
hidden_inputs = {}
|
hidden_inputs = {}
|
||||||
for input in re.findall(r'(?i)<input([^>]+)>', html):
|
for input in re.findall(r'(?i)(<input[^>]+>)', html):
|
||||||
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
|
attrs = extract_attributes(input)
|
||||||
|
if not input:
|
||||||
continue
|
continue
|
||||||
name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input)
|
if attrs.get('type') not in ('hidden', 'submit'):
|
||||||
if not name:
|
|
||||||
continue
|
continue
|
||||||
value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
|
name = attrs.get('name') or attrs.get('id')
|
||||||
if not value:
|
value = attrs.get('value')
|
||||||
continue
|
if name and value is not None:
|
||||||
hidden_inputs[name.group('value')] = value.group('value')
|
hidden_inputs[name] = value
|
||||||
return hidden_inputs
|
return hidden_inputs
|
||||||
|
|
||||||
def _form_hidden_inputs(self, form_id, html):
|
def _form_hidden_inputs(self, form_id, html):
|
||||||
@@ -875,7 +934,11 @@ class InfoExtractor(object):
|
|||||||
f['ext'] = determine_ext(f['url'])
|
f['ext'] = determine_ext(f['url'])
|
||||||
|
|
||||||
if isinstance(field_preference, (list, tuple)):
|
if isinstance(field_preference, (list, tuple)):
|
||||||
return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference)
|
return tuple(
|
||||||
|
f.get(field)
|
||||||
|
if f.get(field) is not None
|
||||||
|
else ('' if field == 'format_id' else -1)
|
||||||
|
for field in field_preference)
|
||||||
|
|
||||||
preference = f.get('preference')
|
preference = f.get('preference')
|
||||||
if preference is None:
|
if preference is None:
|
||||||
@@ -883,7 +946,8 @@ class InfoExtractor(object):
|
|||||||
if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
|
if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
|
||||||
preference -= 0.5
|
preference -= 0.5
|
||||||
|
|
||||||
proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1
|
protocol = f.get('protocol') or determine_protocol(f)
|
||||||
|
proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
|
||||||
|
|
||||||
if f.get('vcodec') == 'none': # audio only
|
if f.get('vcodec') == 'none': # audio only
|
||||||
preference -= 50
|
preference -= 50
|
||||||
@@ -1086,6 +1150,7 @@ class InfoExtractor(object):
|
|||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': manifest_url,
|
'url': manifest_url,
|
||||||
|
'manifest_url': manifest_url,
|
||||||
'ext': 'flv' if bootstrap_info is not None else None,
|
'ext': 'flv' if bootstrap_info is not None else None,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'width': width,
|
'width': width,
|
||||||
@@ -1100,7 +1165,7 @@ class InfoExtractor(object):
|
|||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'protocol': 'm3u8',
|
'protocol': 'm3u8',
|
||||||
'preference': preference - 1 if preference else -1,
|
'preference': preference - 100 if preference else -100,
|
||||||
'resolution': 'multiple',
|
'resolution': 'multiple',
|
||||||
'format_note': 'Quality selection URL',
|
'format_note': 'Quality selection URL',
|
||||||
}
|
}
|
||||||
@@ -1110,13 +1175,6 @@ class InfoExtractor(object):
|
|||||||
m3u8_id=None, note=None, errnote=None,
|
m3u8_id=None, note=None, errnote=None,
|
||||||
fatal=True, live=False):
|
fatal=True, live=False):
|
||||||
|
|
||||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
|
||||||
|
|
||||||
format_url = lambda u: (
|
|
||||||
u
|
|
||||||
if re.match(r'^https?://', u)
|
|
||||||
else compat_urlparse.urljoin(m3u8_url, u))
|
|
||||||
|
|
||||||
res = self._download_webpage_handle(
|
res = self._download_webpage_handle(
|
||||||
m3u8_url, video_id,
|
m3u8_url, video_id,
|
||||||
note=note or 'Downloading m3u8 information',
|
note=note or 'Downloading m3u8 information',
|
||||||
@@ -1127,6 +1185,13 @@ class InfoExtractor(object):
|
|||||||
m3u8_doc, urlh = res
|
m3u8_doc, urlh = res
|
||||||
m3u8_url = urlh.geturl()
|
m3u8_url = urlh.geturl()
|
||||||
|
|
||||||
|
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||||
|
|
||||||
|
format_url = lambda u: (
|
||||||
|
u
|
||||||
|
if re.match(r'^https?://', u)
|
||||||
|
else compat_urlparse.urljoin(m3u8_url, u))
|
||||||
|
|
||||||
# We should try extracting formats only from master playlists [1], i.e.
|
# We should try extracting formats only from master playlists [1], i.e.
|
||||||
# playlists that describe available qualities. On the other hand media
|
# playlists that describe available qualities. On the other hand media
|
||||||
# playlists [2] should be returned as is since they contain just the media
|
# playlists [2] should be returned as is since they contain just the media
|
||||||
@@ -1148,49 +1213,57 @@ class InfoExtractor(object):
|
|||||||
'protocol': entry_protocol,
|
'protocol': entry_protocol,
|
||||||
'preference': preference,
|
'preference': preference,
|
||||||
}]
|
}]
|
||||||
last_info = None
|
last_info = {}
|
||||||
last_media = None
|
last_media = {}
|
||||||
kv_rex = re.compile(
|
|
||||||
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
|
||||||
for line in m3u8_doc.splitlines():
|
for line in m3u8_doc.splitlines():
|
||||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||||
last_info = {}
|
last_info = parse_m3u8_attributes(line)
|
||||||
for m in kv_rex.finditer(line):
|
|
||||||
v = m.group('val')
|
|
||||||
if v.startswith('"'):
|
|
||||||
v = v[1:-1]
|
|
||||||
last_info[m.group('key')] = v
|
|
||||||
elif line.startswith('#EXT-X-MEDIA:'):
|
elif line.startswith('#EXT-X-MEDIA:'):
|
||||||
last_media = {}
|
media = parse_m3u8_attributes(line)
|
||||||
for m in kv_rex.finditer(line):
|
media_type = media.get('TYPE')
|
||||||
v = m.group('val')
|
if media_type in ('VIDEO', 'AUDIO'):
|
||||||
if v.startswith('"'):
|
media_url = media.get('URI')
|
||||||
v = v[1:-1]
|
if media_url:
|
||||||
last_media[m.group('key')] = v
|
format_id = []
|
||||||
|
for v in (media.get('GROUP-ID'), media.get('NAME')):
|
||||||
|
if v:
|
||||||
|
format_id.append(v)
|
||||||
|
formats.append({
|
||||||
|
'format_id': '-'.join(format_id),
|
||||||
|
'url': format_url(media_url),
|
||||||
|
'language': media.get('LANGUAGE'),
|
||||||
|
'vcodec': 'none' if media_type == 'AUDIO' else None,
|
||||||
|
'ext': ext,
|
||||||
|
'protocol': entry_protocol,
|
||||||
|
'preference': preference,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# When there is no URI in EXT-X-MEDIA let this tag's
|
||||||
|
# data be used by regular URI lines below
|
||||||
|
last_media = media
|
||||||
elif line.startswith('#') or not line.strip():
|
elif line.startswith('#') or not line.strip():
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
if last_info is None:
|
tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)
|
||||||
formats.append({'url': format_url(line)})
|
|
||||||
continue
|
|
||||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
|
||||||
format_id = []
|
format_id = []
|
||||||
if m3u8_id:
|
if m3u8_id:
|
||||||
format_id.append(m3u8_id)
|
format_id.append(m3u8_id)
|
||||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None
|
|
||||||
# Despite specification does not mention NAME attribute for
|
# Despite specification does not mention NAME attribute for
|
||||||
# EXT-X-STREAM-INF it still sometimes may be present
|
# EXT-X-STREAM-INF it still sometimes may be present
|
||||||
stream_name = last_info.get('NAME') or last_media_name
|
stream_name = last_info.get('NAME') or last_media.get('NAME')
|
||||||
# Bandwidth of live streams may differ over time thus making
|
# Bandwidth of live streams may differ over time thus making
|
||||||
# format_id unpredictable. So it's better to keep provided
|
# format_id unpredictable. So it's better to keep provided
|
||||||
# format_id intact.
|
# format_id intact.
|
||||||
if not live:
|
if not live:
|
||||||
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
||||||
|
manifest_url = format_url(line.strip())
|
||||||
f = {
|
f = {
|
||||||
'format_id': '-'.join(format_id),
|
'format_id': '-'.join(format_id),
|
||||||
'url': format_url(line.strip()),
|
'url': manifest_url,
|
||||||
|
'manifest_url': manifest_url,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
|
'fps': float_or_none(last_info.get('FRAME-RATE')),
|
||||||
'protocol': entry_protocol,
|
'protocol': entry_protocol,
|
||||||
'preference': preference,
|
'preference': preference,
|
||||||
}
|
}
|
||||||
@@ -1199,29 +1272,20 @@ class InfoExtractor(object):
|
|||||||
width_str, height_str = resolution.split('x')
|
width_str, height_str = resolution.split('x')
|
||||||
f['width'] = int(width_str)
|
f['width'] = int(width_str)
|
||||||
f['height'] = int(height_str)
|
f['height'] = int(height_str)
|
||||||
codecs = last_info.get('CODECS')
|
# Unified Streaming Platform
|
||||||
if codecs:
|
mobj = re.search(
|
||||||
vcodec, acodec = [None] * 2
|
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
|
||||||
va_codecs = codecs.split(',')
|
if mobj:
|
||||||
if len(va_codecs) == 1:
|
abr, vbr = mobj.groups()
|
||||||
# Audio only entries usually come with single codec and
|
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
|
||||||
# no resolution. For more robustness we also check it to
|
|
||||||
# be mp4 audio.
|
|
||||||
if not resolution and va_codecs[0].startswith('mp4a'):
|
|
||||||
vcodec, acodec = 'none', va_codecs[0]
|
|
||||||
else:
|
|
||||||
vcodec = va_codecs[0]
|
|
||||||
else:
|
|
||||||
vcodec, acodec = va_codecs[:2]
|
|
||||||
f.update({
|
f.update({
|
||||||
'acodec': acodec,
|
'vbr': vbr,
|
||||||
'vcodec': vcodec,
|
'abr': abr,
|
||||||
})
|
})
|
||||||
if last_media is not None:
|
f.update(parse_codecs(last_info.get('CODECS')))
|
||||||
f['m3u8_media'] = last_media
|
|
||||||
last_media = None
|
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
last_info = {}
|
last_info = {}
|
||||||
|
last_media = {}
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -1468,9 +1532,17 @@ class InfoExtractor(object):
|
|||||||
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
||||||
|
|
||||||
return self._parse_mpd_formats(
|
return self._parse_mpd_formats(
|
||||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
||||||
|
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||||
|
|
||||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||||
|
"""
|
||||||
|
Parse formats from MPD manifest.
|
||||||
|
References:
|
||||||
|
1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
|
||||||
|
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
||||||
|
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
||||||
|
"""
|
||||||
if mpd_doc.get('type') == 'dynamic':
|
if mpd_doc.get('type') == 'dynamic':
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@@ -1484,34 +1556,52 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def extract_multisegment_info(element, ms_parent_info):
|
def extract_multisegment_info(element, ms_parent_info):
|
||||||
ms_info = ms_parent_info.copy()
|
ms_info = ms_parent_info.copy()
|
||||||
|
|
||||||
|
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
|
||||||
|
# common attributes and elements. We will only extract relevant
|
||||||
|
# for us.
|
||||||
|
def extract_common(source):
|
||||||
|
segment_timeline = source.find(_add_ns('SegmentTimeline'))
|
||||||
|
if segment_timeline is not None:
|
||||||
|
s_e = segment_timeline.findall(_add_ns('S'))
|
||||||
|
if s_e:
|
||||||
|
ms_info['total_number'] = 0
|
||||||
|
ms_info['s'] = []
|
||||||
|
for s in s_e:
|
||||||
|
r = int(s.get('r', 0))
|
||||||
|
ms_info['total_number'] += 1 + r
|
||||||
|
ms_info['s'].append({
|
||||||
|
't': int(s.get('t', 0)),
|
||||||
|
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||||
|
'd': int(s.attrib['d']),
|
||||||
|
'r': r,
|
||||||
|
})
|
||||||
|
start_number = source.get('startNumber')
|
||||||
|
if start_number:
|
||||||
|
ms_info['start_number'] = int(start_number)
|
||||||
|
timescale = source.get('timescale')
|
||||||
|
if timescale:
|
||||||
|
ms_info['timescale'] = int(timescale)
|
||||||
|
segment_duration = source.get('duration')
|
||||||
|
if segment_duration:
|
||||||
|
ms_info['segment_duration'] = int(segment_duration)
|
||||||
|
|
||||||
|
def extract_Initialization(source):
|
||||||
|
initialization = source.find(_add_ns('Initialization'))
|
||||||
|
if initialization is not None:
|
||||||
|
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||||
|
|
||||||
segment_list = element.find(_add_ns('SegmentList'))
|
segment_list = element.find(_add_ns('SegmentList'))
|
||||||
if segment_list is not None:
|
if segment_list is not None:
|
||||||
|
extract_common(segment_list)
|
||||||
|
extract_Initialization(segment_list)
|
||||||
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
||||||
if segment_urls_e:
|
if segment_urls_e:
|
||||||
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
||||||
initialization = segment_list.find(_add_ns('Initialization'))
|
|
||||||
if initialization is not None:
|
|
||||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
|
||||||
else:
|
else:
|
||||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||||
if segment_template is not None:
|
if segment_template is not None:
|
||||||
start_number = segment_template.get('startNumber')
|
extract_common(segment_template)
|
||||||
if start_number:
|
|
||||||
ms_info['start_number'] = int(start_number)
|
|
||||||
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
|
|
||||||
if segment_timeline is not None:
|
|
||||||
s_e = segment_timeline.findall(_add_ns('S'))
|
|
||||||
if s_e:
|
|
||||||
ms_info['total_number'] = 0
|
|
||||||
for s in s_e:
|
|
||||||
ms_info['total_number'] += 1 + int(s.get('r', '0'))
|
|
||||||
else:
|
|
||||||
timescale = segment_template.get('timescale')
|
|
||||||
if timescale:
|
|
||||||
ms_info['timescale'] = int(timescale)
|
|
||||||
segment_duration = segment_template.get('duration')
|
|
||||||
if segment_duration:
|
|
||||||
ms_info['segment_duration'] = int(segment_duration)
|
|
||||||
media_template = segment_template.get('media')
|
media_template = segment_template.get('media')
|
||||||
if media_template:
|
if media_template:
|
||||||
ms_info['media_template'] = media_template
|
ms_info['media_template'] = media_template
|
||||||
@@ -1519,11 +1609,14 @@ class InfoExtractor(object):
|
|||||||
if initialization:
|
if initialization:
|
||||||
ms_info['initialization_url'] = initialization
|
ms_info['initialization_url'] = initialization
|
||||||
else:
|
else:
|
||||||
initialization = segment_template.find(_add_ns('Initialization'))
|
extract_Initialization(segment_template)
|
||||||
if initialization is not None:
|
|
||||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
|
||||||
return ms_info
|
return ms_info
|
||||||
|
|
||||||
|
def combine_url(base_url, target_url):
|
||||||
|
if re.match(r'^https?://', target_url):
|
||||||
|
return target_url
|
||||||
|
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||||
|
|
||||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||||
formats = []
|
formats = []
|
||||||
for period in mpd_doc.findall(_add_ns('Period')):
|
for period in mpd_doc.findall(_add_ns('Period')):
|
||||||
@@ -1541,7 +1634,7 @@ class InfoExtractor(object):
|
|||||||
continue
|
continue
|
||||||
representation_attrib = adaptation_set.attrib.copy()
|
representation_attrib = adaptation_set.attrib.copy()
|
||||||
representation_attrib.update(representation.attrib)
|
representation_attrib.update(representation.attrib)
|
||||||
# According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
|
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
|
||||||
mime_type = representation_attrib['mimeType']
|
mime_type = representation_attrib['mimeType']
|
||||||
content_type = mime_type.split('/')[0]
|
content_type = mime_type.split('/')[0]
|
||||||
if content_type == 'text':
|
if content_type == 'text':
|
||||||
@@ -1566,6 +1659,7 @@ class InfoExtractor(object):
|
|||||||
f = {
|
f = {
|
||||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||||
'url': base_url,
|
'url': base_url,
|
||||||
|
'manifest_url': mpd_url,
|
||||||
'ext': mimetype2ext(mime_type),
|
'ext': mimetype2ext(mime_type),
|
||||||
'width': int_or_none(representation_attrib.get('width')),
|
'width': int_or_none(representation_attrib.get('width')),
|
||||||
'height': int_or_none(representation_attrib.get('height')),
|
'height': int_or_none(representation_attrib.get('height')),
|
||||||
@@ -1580,33 +1674,88 @@ class InfoExtractor(object):
|
|||||||
}
|
}
|
||||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
|
||||||
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
|
|
||||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
|
||||||
media_template = representation_ms_info['media_template']
|
media_template = representation_ms_info['media_template']
|
||||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||||
media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
|
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||||
media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template)
|
media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||||
media_template.replace('$$', '$')
|
media_template.replace('$$', '$')
|
||||||
representation_ms_info['segment_urls'] = [
|
|
||||||
media_template % {
|
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||||
'Number': segment_number,
|
# can't be used at the same time
|
||||||
'Bandwidth': representation_attrib.get('bandwidth')}
|
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||||
for segment_number in range(
|
segment_duration = None
|
||||||
|
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||||
|
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||||
|
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||||
|
representation_ms_info['fragments'] = [{
|
||||||
|
'url': media_template % {
|
||||||
|
'Number': segment_number,
|
||||||
|
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||||
|
},
|
||||||
|
'duration': segment_duration,
|
||||||
|
} for segment_number in range(
|
||||||
representation_ms_info['start_number'],
|
representation_ms_info['start_number'],
|
||||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||||
if 'segment_urls' in representation_ms_info:
|
else:
|
||||||
|
# $Number*$ or $Time$ in media template with S list available
|
||||||
|
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
||||||
|
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
||||||
|
representation_ms_info['fragments'] = []
|
||||||
|
segment_time = 0
|
||||||
|
segment_d = None
|
||||||
|
segment_number = representation_ms_info['start_number']
|
||||||
|
|
||||||
|
def add_segment_url():
|
||||||
|
segment_url = media_template % {
|
||||||
|
'Time': segment_time,
|
||||||
|
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||||
|
'Number': segment_number,
|
||||||
|
}
|
||||||
|
representation_ms_info['fragments'].append({
|
||||||
|
'url': segment_url,
|
||||||
|
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||||
|
})
|
||||||
|
|
||||||
|
for num, s in enumerate(representation_ms_info['s']):
|
||||||
|
segment_time = s.get('t') or segment_time
|
||||||
|
segment_d = s['d']
|
||||||
|
add_segment_url()
|
||||||
|
segment_number += 1
|
||||||
|
for r in range(s.get('r', 0)):
|
||||||
|
segment_time += segment_d
|
||||||
|
add_segment_url()
|
||||||
|
segment_number += 1
|
||||||
|
segment_time += segment_d
|
||||||
|
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
||||||
|
# No media template
|
||||||
|
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||||
|
# or any YouTube dashsegments video
|
||||||
|
fragments = []
|
||||||
|
s_num = 0
|
||||||
|
for segment_url in representation_ms_info['segment_urls']:
|
||||||
|
s = representation_ms_info['s'][s_num]
|
||||||
|
for r in range(s.get('r', 0) + 1):
|
||||||
|
fragments.append({
|
||||||
|
'url': segment_url,
|
||||||
|
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
|
||||||
|
})
|
||||||
|
representation_ms_info['fragments'] = fragments
|
||||||
|
# NB: MPD manifest may contain direct URLs to unfragmented media.
|
||||||
|
# No fragments key is present in this case.
|
||||||
|
if 'fragments' in representation_ms_info:
|
||||||
f.update({
|
f.update({
|
||||||
'segment_urls': representation_ms_info['segment_urls'],
|
'fragments': [],
|
||||||
'protocol': 'http_dash_segments',
|
'protocol': 'http_dash_segments',
|
||||||
})
|
})
|
||||||
if 'initialization_url' in representation_ms_info:
|
if 'initialization_url' in representation_ms_info:
|
||||||
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||||
f.update({
|
|
||||||
'initialization_url': initialization_url,
|
|
||||||
})
|
|
||||||
if not f.get('url'):
|
if not f.get('url'):
|
||||||
f['url'] = initialization_url
|
f['url'] = initialization_url
|
||||||
|
f['fragments'].append({'url': initialization_url})
|
||||||
|
f['fragments'].extend(representation_ms_info['fragments'])
|
||||||
|
for fragment in f['fragments']:
|
||||||
|
fragment['url'] = combine_url(base_url, fragment['url'])
|
||||||
try:
|
try:
|
||||||
existing_format = next(
|
existing_format = next(
|
||||||
fo for fo in formats
|
fo for fo in formats
|
||||||
@@ -1621,6 +1770,131 @@ class InfoExtractor(object):
|
|||||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
|
||||||
|
def absolute_url(video_url):
|
||||||
|
return compat_urlparse.urljoin(base_url, video_url)
|
||||||
|
|
||||||
|
def parse_content_type(content_type):
|
||||||
|
if not content_type:
|
||||||
|
return {}
|
||||||
|
ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
|
||||||
|
if ctr:
|
||||||
|
mimetype, codecs = ctr.groups()
|
||||||
|
f = parse_codecs(codecs)
|
||||||
|
f['ext'] = mimetype2ext(mimetype)
|
||||||
|
return f
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _media_formats(src, cur_media_type):
|
||||||
|
full_url = absolute_url(src)
|
||||||
|
if determine_ext(full_url) == 'm3u8':
|
||||||
|
is_plain_url = False
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
full_url, video_id, ext='mp4',
|
||||||
|
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
|
||||||
|
else:
|
||||||
|
is_plain_url = True
|
||||||
|
formats = [{
|
||||||
|
'url': full_url,
|
||||||
|
'vcodec': 'none' if cur_media_type == 'audio' else None,
|
||||||
|
}]
|
||||||
|
return is_plain_url, formats
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
|
||||||
|
media_info = {
|
||||||
|
'formats': [],
|
||||||
|
'subtitles': {},
|
||||||
|
}
|
||||||
|
media_attributes = extract_attributes(media_tag)
|
||||||
|
src = media_attributes.get('src')
|
||||||
|
if src:
|
||||||
|
_, formats = _media_formats(src, media_type)
|
||||||
|
media_info['formats'].extend(formats)
|
||||||
|
media_info['thumbnail'] = media_attributes.get('poster')
|
||||||
|
if media_content:
|
||||||
|
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
||||||
|
source_attributes = extract_attributes(source_tag)
|
||||||
|
src = source_attributes.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
is_plain_url, formats = _media_formats(src, media_type)
|
||||||
|
if is_plain_url:
|
||||||
|
f = parse_content_type(source_attributes.get('type'))
|
||||||
|
f.update(formats[0])
|
||||||
|
media_info['formats'].append(f)
|
||||||
|
else:
|
||||||
|
media_info['formats'].extend(formats)
|
||||||
|
for track_tag in re.findall(r'<track[^>]+>', media_content):
|
||||||
|
track_attributes = extract_attributes(track_tag)
|
||||||
|
kind = track_attributes.get('kind')
|
||||||
|
if not kind or kind == 'subtitles':
|
||||||
|
src = track_attributes.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
|
||||||
|
media_info['subtitles'].setdefault(lang, []).append({
|
||||||
|
'url': absolute_url(src),
|
||||||
|
})
|
||||||
|
if media_info['formats']:
|
||||||
|
entries.append(media_info)
|
||||||
|
return entries
|
||||||
|
|
||||||
|
def _extract_akamai_formats(self, manifest_url, video_id):
|
||||||
|
formats = []
|
||||||
|
f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
update_url_query(f4m_url, {'hdcore': '3.7.0'}),
|
||||||
|
video_id, f4m_id='hds', fatal=False))
|
||||||
|
m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||||
|
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
||||||
|
url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
|
||||||
|
http_base_url = 'http' + url_base
|
||||||
|
formats = []
|
||||||
|
if 'm3u8' not in skip_protocols:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
http_base_url + '/playlist.m3u8', video_id, 'mp4',
|
||||||
|
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||||
|
if 'f4m' not in skip_protocols:
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
http_base_url + '/manifest.f4m',
|
||||||
|
video_id, f4m_id='hds', fatal=False))
|
||||||
|
if re.search(r'(?:/smil:|\.smil)', url_base):
|
||||||
|
if 'dash' not in skip_protocols:
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
http_base_url + '/manifest.mpd',
|
||||||
|
video_id, mpd_id='dash', fatal=False))
|
||||||
|
if 'smil' not in skip_protocols:
|
||||||
|
rtmp_formats = self._extract_smil_formats(
|
||||||
|
http_base_url + '/jwplayer.smil',
|
||||||
|
video_id, fatal=False)
|
||||||
|
for rtmp_format in rtmp_formats:
|
||||||
|
rtsp_format = rtmp_format.copy()
|
||||||
|
rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
|
||||||
|
del rtsp_format['play_path']
|
||||||
|
del rtsp_format['ext']
|
||||||
|
rtsp_format.update({
|
||||||
|
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
|
||||||
|
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
|
||||||
|
'protocol': 'rtsp',
|
||||||
|
})
|
||||||
|
formats.extend([rtmp_format, rtsp_format])
|
||||||
|
else:
|
||||||
|
for protocol in ('rtmp', 'rtsp'):
|
||||||
|
if protocol not in skip_protocols:
|
||||||
|
formats.append({
|
||||||
|
'url': protocol + url_base,
|
||||||
|
'format_id': protocol,
|
||||||
|
'protocol': protocol,
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
def _live_title(self, name):
|
def _live_title(self, name):
|
||||||
""" Generate the title for a live video """
|
""" Generate the title for a live video """
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
@@ -1681,7 +1955,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
any_restricted = False
|
any_restricted = False
|
||||||
for tc in self.get_testcases(include_onlymatching=False):
|
for tc in self.get_testcases(include_onlymatching=False):
|
||||||
if 'playlist' in tc:
|
if tc.get('playlist', []):
|
||||||
tc = tc['playlist'][0]
|
tc = tc['playlist'][0]
|
||||||
is_restricted = age_restricted(
|
is_restricted = age_restricted(
|
||||||
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
||||||
@@ -1734,6 +2008,13 @@ class InfoExtractor(object):
|
|||||||
def _mark_watched(self, *args, **kwargs):
|
def _mark_watched(self, *args, **kwargs):
|
||||||
raise NotImplementedError('This method must be implemented by subclasses')
|
raise NotImplementedError('This method must be implemented by subclasses')
|
||||||
|
|
||||||
|
def geo_verification_headers(self):
|
||||||
|
headers = {}
|
||||||
|
geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
|
||||||
|
if geo_verification_proxy:
|
||||||
|
headers['Ytdl-request-proxy'] = geo_verification_proxy
|
||||||
|
return headers
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -5,13 +5,17 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
orderedSet,
|
orderedSet,
|
||||||
remove_end,
|
remove_end,
|
||||||
|
extract_attributes,
|
||||||
|
mimetype2ext,
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -58,6 +62,9 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '3D Printed Speakers Lit With LED',
|
'title': '3D Printed Speakers Lit With LED',
|
||||||
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
||||||
|
'uploader': 'wired',
|
||||||
|
'upload_date': '20130314',
|
||||||
|
'timestamp': 1363219200,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# JS embed
|
# JS embed
|
||||||
@@ -67,70 +74,93 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'id': '55f9cf8b61646d1acf00000c',
|
'id': '55f9cf8b61646d1acf00000c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||||
|
'uploader': 'arstechnica',
|
||||||
|
'upload_date': '20150916',
|
||||||
|
'timestamp': 1442434955,
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_series(self, url, webpage):
|
def _extract_series(self, url, webpage):
|
||||||
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
title = self._html_search_regex(
|
||||||
webpage, 'series title', flags=re.DOTALL)
|
r'(?s)<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
||||||
|
webpage, 'series title')
|
||||||
url_object = compat_urllib_parse_urlparse(url)
|
url_object = compat_urllib_parse_urlparse(url)
|
||||||
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
|
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
|
||||||
m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
|
m_paths = re.finditer(
|
||||||
webpage, flags=re.DOTALL)
|
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
|
||||||
paths = orderedSet(m.group(1) for m in m_paths)
|
paths = orderedSet(m.group(1) for m in m_paths)
|
||||||
build_url = lambda path: compat_urlparse.urljoin(base_url, path)
|
build_url = lambda path: compat_urlparse.urljoin(base_url, path)
|
||||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||||
return self.playlist_result(entries, playlist_title=title)
|
return self.playlist_result(entries, playlist_title=title)
|
||||||
|
|
||||||
def _extract_video(self, webpage, url_type):
|
def _extract_video(self, webpage, url_type):
|
||||||
if url_type != 'embed':
|
query = {}
|
||||||
description = self._html_search_regex(
|
params = self._search_regex(
|
||||||
[
|
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
||||||
r'<div class="cne-video-description">(.+?)</div>',
|
if params:
|
||||||
r'<div class="video-post-content">(.+?)</div>',
|
query.update({
|
||||||
],
|
'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'),
|
||||||
webpage, 'description', fatal=False, flags=re.DOTALL)
|
'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'),
|
||||||
|
'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'),
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
description = None
|
params = extract_attributes(self._search_regex(
|
||||||
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
|
r'(<[^>]+data-js="video-player"[^>]+>)',
|
||||||
'player params', flags=re.DOTALL)
|
webpage, 'player params element'))
|
||||||
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
|
query.update({
|
||||||
player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id')
|
'videoId': params['data-video'],
|
||||||
target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target')
|
'playerId': params['data-player'],
|
||||||
data = compat_urllib_parse_urlencode({'videoId': video_id,
|
'target': params['id'],
|
||||||
'playerId': player_id,
|
})
|
||||||
'target': target,
|
video_id = query['videoId']
|
||||||
})
|
video_info = None
|
||||||
base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
|
info_page = self._download_webpage(
|
||||||
webpage, 'base info url',
|
'http://player.cnevids.com/player/video.js',
|
||||||
default='http://player.cnevids.com/player/loader.js?')
|
video_id, 'Downloading video info', query=query, fatal=False)
|
||||||
info_url = base_info_url + data
|
if info_page:
|
||||||
info_page = self._download_webpage(info_url, video_id,
|
video_info = self._parse_json(self._search_regex(
|
||||||
'Downloading video info')
|
r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video']
|
||||||
video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info')
|
else:
|
||||||
video_info = self._parse_json(video_info, video_id)
|
info_page = self._download_webpage(
|
||||||
|
'http://player.cnevids.com/player/loader.js',
|
||||||
|
video_id, 'Downloading loader info', query=query)
|
||||||
|
video_info = self._parse_json(self._search_regex(
|
||||||
|
r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id)
|
||||||
|
title = video_info['title']
|
||||||
|
|
||||||
formats = [{
|
formats = []
|
||||||
'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
|
for fdata in video_info.get('sources', [{}])[0]:
|
||||||
'url': fdata['src'],
|
src = fdata.get('src')
|
||||||
'ext': fdata['type'].split('/')[-1],
|
if not src:
|
||||||
'quality': 1 if fdata['quality'] == 'high' else 0,
|
continue
|
||||||
} for fdata in video_info['sources'][0]]
|
ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
|
||||||
|
quality = fdata.get('quality')
|
||||||
|
formats.append({
|
||||||
|
'format_id': ext + ('-%s' % quality if quality else ''),
|
||||||
|
'url': src,
|
||||||
|
'ext': ext,
|
||||||
|
'quality': 1 if quality == 'high' else 0,
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
info = self._search_json_ld(
|
||||||
|
webpage, video_id, fatal=False) if url_type != 'embed' else {}
|
||||||
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': video_info['title'],
|
'title': title,
|
||||||
'thumbnail': video_info['poster_frame'],
|
'thumbnail': video_info.get('poster_frame'),
|
||||||
'description': description,
|
'uploader': video_info.get('brand'),
|
||||||
}
|
'duration': int_or_none(video_info.get('duration')),
|
||||||
|
'tags': video_info.get('tags'),
|
||||||
|
'series': video_info.get('series_title'),
|
||||||
|
'season': video_info.get('season_title'),
|
||||||
|
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
site, url_type, item_id = re.match(self._VALID_URL, url).groups()
|
||||||
site = mobj.group('site')
|
|
||||||
url_type = mobj.group('type')
|
|
||||||
item_id = mobj.group('id')
|
|
||||||
|
|
||||||
# Convert JS embed to regular embed
|
# Convert JS embed to regular embed
|
||||||
if url_type == 'embedjs':
|
if url_type == 'embedjs':
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals, division
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
@@ -8,12 +8,22 @@ from ..utils import int_or_none
|
|||||||
class CrackleIE(InfoExtractor):
|
class CrackleIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.crackle.com/the-art-of-more/2496419',
|
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2496419',
|
'id': '2498934',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Heavy Lies the Head',
|
'title': 'Everybody Respects A Bloody Nose',
|
||||||
'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
|
'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 906,
|
||||||
|
'series': 'Comedians In Cars Getting Coffee',
|
||||||
|
'season_number': 8,
|
||||||
|
'episode_number': 4,
|
||||||
|
'subtitles': {
|
||||||
|
'en-US': [{
|
||||||
|
'ext': 'ttml',
|
||||||
|
}]
|
||||||
|
},
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@@ -21,12 +31,8 @@ class CrackleIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
|
|
||||||
_SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
|
|
||||||
_UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
|
|
||||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
|
||||||
|
|
||||||
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||||
|
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||||
_MEDIA_FILE_SLOTS = {
|
_MEDIA_FILE_SLOTS = {
|
||||||
'c544.flv': {
|
'c544.flv': {
|
||||||
'width': 544,
|
'width': 544,
|
||||||
@@ -48,16 +54,21 @@ class CrackleIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
config_doc = self._download_xml(
|
||||||
|
'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16',
|
||||||
|
video_id, 'Downloading config')
|
||||||
|
|
||||||
item = self._download_xml(
|
item = self._download_xml(
|
||||||
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||||
video_id).find('i')
|
video_id).find('i')
|
||||||
title = item.attrib['t']
|
title = item.attrib['t']
|
||||||
|
|
||||||
thumbnail = None
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
|
'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
|
||||||
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||||
|
thumbnail = None
|
||||||
path = item.attrib.get('p')
|
path = item.attrib.get('p')
|
||||||
if path:
|
if path:
|
||||||
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
||||||
@@ -76,7 +87,7 @@ class CrackleIE(InfoExtractor):
|
|||||||
if locale not in subtitles:
|
if locale not in subtitles:
|
||||||
subtitles[locale] = []
|
subtitles[locale] = []
|
||||||
subtitles[locale] = [{
|
subtitles[locale] = [{
|
||||||
'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
|
'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v),
|
||||||
'ext': 'ttml',
|
'ext': 'ttml',
|
||||||
}]
|
}]
|
||||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||||
@@ -85,7 +96,7 @@ class CrackleIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': item.attrib.get('d'),
|
'description': item.attrib.get('d'),
|
||||||
'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
|
'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None,
|
||||||
'series': item.attrib.get('sn'),
|
'series': item.attrib.get('sn'),
|
||||||
'season_number': int_or_none(item.attrib.get('se')),
|
'season_number': int_or_none(item.attrib.get('se')),
|
||||||
'episode_number': int_or_none(item.attrib.get('ep')),
|
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class CriterionIE(InfoExtractor):
|
class CriterionIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.criterion\.com/films/(?P<id>[0-9]+)-.+'
|
_VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.criterion.com/films/184-le-samourai',
|
'url': 'http://www.criterion.com/films/184-le-samourai',
|
||||||
'md5': 'bc51beba55685509883a9a7830919ec3',
|
'md5': 'bc51beba55685509883a9a7830919ec3',
|
||||||
|
|||||||
@@ -34,22 +34,58 @@ from ..aes import (
|
|||||||
|
|
||||||
|
|
||||||
class CrunchyrollBaseIE(InfoExtractor):
|
class CrunchyrollBaseIE(InfoExtractor):
|
||||||
|
_LOGIN_URL = 'https://www.crunchyroll.com/login'
|
||||||
|
_LOGIN_FORM = 'login_form'
|
||||||
_NETRC_MACHINE = 'crunchyroll'
|
_NETRC_MACHINE = 'crunchyroll'
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
self.report_login()
|
|
||||||
login_url = 'https://www.crunchyroll.com/?a=formhandler'
|
login_page = self._download_webpage(
|
||||||
data = urlencode_postdata({
|
self._LOGIN_URL, None, 'Downloading login page')
|
||||||
'formname': 'RpcApiUser_Login',
|
|
||||||
'name': username,
|
def is_logged(webpage):
|
||||||
'password': password,
|
return '<title>Redirecting' in webpage
|
||||||
|
|
||||||
|
# Already logged in
|
||||||
|
if is_logged(login_page):
|
||||||
|
return
|
||||||
|
|
||||||
|
login_form_str = self._search_regex(
|
||||||
|
r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
|
||||||
|
login_page, 'login form', group='form')
|
||||||
|
|
||||||
|
post_url = extract_attributes(login_form_str).get('action')
|
||||||
|
if not post_url:
|
||||||
|
post_url = self._LOGIN_URL
|
||||||
|
elif not post_url.startswith('http'):
|
||||||
|
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||||
|
|
||||||
|
login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
|
||||||
|
|
||||||
|
login_form.update({
|
||||||
|
'login_form[name]': username,
|
||||||
|
'login_form[password]': password,
|
||||||
})
|
})
|
||||||
login_request = sanitized_Request(login_url, data)
|
|
||||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
response = self._download_webpage(
|
||||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
post_url, None, 'Logging in', 'Wrong login info',
|
||||||
|
data=urlencode_postdata(login_form),
|
||||||
|
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||||
|
|
||||||
|
# Successful login
|
||||||
|
if is_logged(response):
|
||||||
|
return
|
||||||
|
|
||||||
|
error = self._html_search_regex(
|
||||||
|
'(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
|
||||||
|
response, 'error message', default=None)
|
||||||
|
if error:
|
||||||
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
@@ -114,6 +150,21 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
|||||||
# rtmp
|
# rtmp
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '702409',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
|
||||||
|
'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'TV TOKYO',
|
||||||
|
'upload_date': '20160508',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -336,9 +387,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
if video_encode_id in video_encode_ids:
|
if video_encode_id in video_encode_ids:
|
||||||
continue
|
continue
|
||||||
video_encode_ids.append(video_encode_id)
|
video_encode_ids.append(video_encode_id)
|
||||||
|
|
||||||
|
video_file = xpath_text(stream_info, './file')
|
||||||
|
if not video_file:
|
||||||
|
continue
|
||||||
|
if video_file.startswith('http'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
continue
|
||||||
|
|
||||||
video_url = xpath_text(stream_info, './host')
|
video_url = xpath_text(stream_info, './host')
|
||||||
video_play_path = xpath_text(stream_info, './file')
|
if not video_url:
|
||||||
if not video_url or not video_play_path:
|
|
||||||
continue
|
continue
|
||||||
metadata = stream_info.find('./metadata')
|
metadata = stream_info.find('./metadata')
|
||||||
format_info = {
|
format_info = {
|
||||||
@@ -353,7 +413,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||||
netloc='v.lvlt.crcdn.net',
|
netloc='v.lvlt.crcdn.net',
|
||||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
|
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||||
format_info.update({
|
format_info.update({
|
||||||
'url': direct_video_url,
|
'url': direct_video_url,
|
||||||
@@ -363,7 +423,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
|
|
||||||
format_info.update({
|
format_info.update({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'play_path': video_play_path,
|
'play_path': video_file,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
})
|
})
|
||||||
formats.append(format_info)
|
formats.append(format_info)
|
||||||
|
|||||||
@@ -51,8 +51,11 @@ class CSpanIE(InfoExtractor):
|
|||||||
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
|
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'judiciary031715',
|
'id': 'judiciary031715',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
|
'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # m3u8 downloads
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,12 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import parse_iso8601, ExtractorError
|
from ..utils import unified_timestamp
|
||||||
|
|
||||||
|
|
||||||
class CtsNewsIE(InfoExtractor):
|
class CtsNewsIE(InfoExtractor):
|
||||||
IE_DESC = '華視新聞'
|
IE_DESC = '華視新聞'
|
||||||
# https connection failed (Connection reset)
|
|
||||||
_VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
_VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
|
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
|
||||||
@@ -16,7 +15,7 @@ class CtsNewsIE(InfoExtractor):
|
|||||||
'id': '201501291578109',
|
'id': '201501291578109',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '以色列.真主黨交火 3人死亡',
|
'title': '以色列.真主黨交火 3人死亡',
|
||||||
'description': 'md5:95e9b295c898b7ff294f09d450178d7d',
|
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...',
|
||||||
'timestamp': 1422528540,
|
'timestamp': 1422528540,
|
||||||
'upload_date': '20150129',
|
'upload_date': '20150129',
|
||||||
}
|
}
|
||||||
@@ -28,7 +27,7 @@ class CtsNewsIE(InfoExtractor):
|
|||||||
'id': '201309031304098',
|
'id': '201309031304098',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
||||||
'description': 'md5:f183feeba3752b683827aab71adad584',
|
'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'timestamp': 1378205880,
|
'timestamp': 1378205880,
|
||||||
'upload_date': '20130903',
|
'upload_date': '20130903',
|
||||||
@@ -36,8 +35,7 @@ class CtsNewsIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
# With Youtube embedded video
|
# With Youtube embedded video
|
||||||
'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
|
'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
|
||||||
'md5': '1d842c771dc94c8c3bca5af2cc1db9c5',
|
'md5': 'e4726b2ccd70ba2c319865e28f0a91d1',
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'OVbfO7d0_hQ',
|
'id': 'OVbfO7d0_hQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -47,42 +45,37 @@ class CtsNewsIE(InfoExtractor):
|
|||||||
'upload_date': '20150128',
|
'upload_date': '20150128',
|
||||||
'uploader_id': 'TBSCTS',
|
'uploader_id': 'TBSCTS',
|
||||||
'uploader': '中華電視公司',
|
'uploader': '中華電視公司',
|
||||||
}
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
news_id = self._match_id(url)
|
news_id = self._match_id(url)
|
||||||
page = self._download_webpage(url, news_id)
|
page = self._download_webpage(url, news_id)
|
||||||
|
|
||||||
if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None):
|
news_id = self._hidden_inputs(page).get('get_id')
|
||||||
feed_url = self._html_search_regex(
|
|
||||||
r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)',
|
if news_id:
|
||||||
page, 'feed url')
|
mp4_feed = self._download_json(
|
||||||
video_url = self._download_webpage(
|
'http://news.cts.com.tw/action/test_mp4feed.php',
|
||||||
feed_url, news_id, note='Fetching feed')
|
news_id, note='Fetching feed', query={'news_id': news_id})
|
||||||
|
video_url = mp4_feed['source_url']
|
||||||
else:
|
else:
|
||||||
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||||
youtube_url = self._search_regex(
|
youtube_url = self._search_regex(
|
||||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url',
|
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url')
|
||||||
default=None)
|
|
||||||
if not youtube_url:
|
|
||||||
raise ExtractorError('The news includes no videos!', expected=True)
|
|
||||||
|
|
||||||
return {
|
return self.url_result(youtube_url, ie='Youtube')
|
||||||
'_type': 'url',
|
|
||||||
'url': youtube_url,
|
|
||||||
'ie_key': 'Youtube',
|
|
||||||
}
|
|
||||||
|
|
||||||
description = self._html_search_meta('description', page)
|
description = self._html_search_meta('description', page)
|
||||||
title = self._html_search_meta('title', page)
|
title = self._html_search_meta('title', page, fatal=True)
|
||||||
thumbnail = self._html_search_meta('image', page)
|
thumbnail = self._html_search_meta('image', page)
|
||||||
|
|
||||||
datetime_str = self._html_search_regex(
|
datetime_str = self._html_search_regex(
|
||||||
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
|
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time', fatal=False)
|
||||||
# Transform into ISO 8601 format with timezone info
|
timestamp = None
|
||||||
datetime_str = datetime_str.replace('/', '-') + ':00+0800'
|
if datetime_str:
|
||||||
timestamp = parse_iso8601(datetime_str, delimiter=' ')
|
timestamp = unified_timestamp(datetime_str) - 8 * 3600
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': news_id,
|
'id': news_id,
|
||||||
|
|||||||
65
youtube_dl/extractor/ctvnews.py
Normal file
65
youtube_dl/extractor/ctvnews.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import orderedSet
|
||||||
|
|
||||||
|
|
||||||
|
class CTVNewsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
||||||
|
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '901995',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Extended: \'That person cannot be me\' Johnson says',
|
||||||
|
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
|
||||||
|
'timestamp': 1467286284,
|
||||||
|
'upload_date': '20160630',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
|
||||||
|
'info_dict':
|
||||||
|
{
|
||||||
|
'id': '1.2966224',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 19,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ctvnews.ca/video?binId=1.2876780',
|
||||||
|
'info_dict':
|
||||||
|
{
|
||||||
|
'id': '1.2876780',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 100,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ctvnews.ca/1.810401',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
page_id = self._match_id(url)
|
||||||
|
|
||||||
|
def ninecninemedia_url_result(clip_id):
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': clip_id,
|
||||||
|
'url': '9c9media:ctvnews_web:%s' % clip_id,
|
||||||
|
'ie_key': 'NineCNineMedia',
|
||||||
|
}
|
||||||
|
|
||||||
|
if page_id.isdigit():
|
||||||
|
return ninecninemedia_url_result(page_id)
|
||||||
|
else:
|
||||||
|
webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={
|
||||||
|
'ot': 'example.AjaxPageLayout.ot',
|
||||||
|
'maxItemsPerPage': 1000000,
|
||||||
|
})
|
||||||
|
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
|
||||||
|
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
||||||
|
return self.playlist_result(entries, page_id)
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user