Compare commits

...

11 Commits

Author SHA1 Message Date
Sergey M․
1dc24093f8 release 2017.02.28 2017-02-28 23:59:22 +07:00
Sergey M․
11bae9cdde [ChangeLog] Actualize 2017-02-28 23:49:24 +07:00
Sergey M․
43b38424a9 [azmedien:showplaylist] Improve (closes #12160) 2017-02-28 23:37:54 +07:00
Alex Seiler
948519b35d [azmedien:showplaylist] Add support for all episodes playlists 2017-02-28 23:36:05 +07:00
Sergey M․
87dadd456a [youtube:playlist] Recognize another playlist pattern (closes #11928, closes #12286) 2017-02-28 23:06:47 +07:00
Yen Chi Hsuan
7c4aa6fd6f [daisuki] Add subtitles (#4738) 2017-02-28 22:29:01 +08:00
Yen Chi Hsuan
9bd05b5a18 [daisuki] Add new extractor (closes #4738) 2017-02-28 22:19:26 +08:00
Yen Chi Hsuan
0a5445ddbe [utils] Add bytes_to_long() and long_to_bytes()
Used in daisuki.net (#4738)

Both are adapted from public domain PyCrypto:
https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py
2017-02-28 22:10:31 +08:00
Yen Chi Hsuan
f48409c7ac [utils] Add pkcs1pad
Used in daisuki.net (#4738)
2017-02-28 22:10:31 +08:00
Yen Chi Hsuan
c9619f0a17 [aes] Add aes_cbc_encrypt
Used in daisuki.net (#4738)
2017-02-28 22:10:31 +08:00
Yen Chi Hsuan
f4c68ba372 [douyu] Fix extraction and update _TESTS
They've switched from flv to hls

Closes #12301
2017-02-28 21:41:03 +08:00
13 changed files with 356 additions and 51 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.27**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.28**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.02.27
[debug] youtube-dl version 2017.02.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -1,3 +1,18 @@
version 2017.02.28
Core
+ [utils] Add bytes_to_long and long_to_bytes
+ [utils] Add pkcs1pad
+ [aes] Add aes_cbc_encrypt
Extractors
+ [azmedien:showplaylist] Add support for show playlists (#12160)
+ [youtube:playlist] Recognize another playlist pattern (#11928, #12286)
+ [daisuki] Add support for daisuki.net (#2486, #3186, #4738, #6175, #7776,
#10060)
* [douyu] Fix extraction (#12301)
version 2017.02.27
Core

View File

@@ -78,6 +78,7 @@
- **awaan:video**
- **AZMedien**: AZ Medien videos
- **AZMedienPlaylist**: AZ Medien playlists
- **AZMedienShowPlaylist**: AZ Medien show playlists
- **Azubu**
- **AzubuLive**
- **BaiduVideo**: 百度视频
@@ -191,6 +192,8 @@
- **dailymotion:playlist**
- **dailymotion:user**
- **DailymotionCloud**
- **Daisuki**
- **DaisukiPlaylist**
- **daum.net**
- **daum.net:clip**
- **daum.net:playlist**

View File

@@ -8,7 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
import base64
@@ -34,6 +34,13 @@ class TestAES(unittest.TestCase):
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_cbc_encrypt(self):
data = bytes_to_intlist(self.secret_msg)
encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv))
self.assertEqual(
encrypted,
b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd")
def test_decrypt_text(self):
password = intlist_to_bytes(self.key).decode('utf-8')
encrypted = base64.b64encode(

View File

@@ -52,6 +52,7 @@ from youtube_dl.utils import (
parse_filesize,
parse_count,
parse_iso8601,
pkcs1pad,
read_batch_urls,
sanitize_filename,
sanitize_path,
@@ -1104,6 +1105,14 @@ The first line
ohdave_rsa_encrypt(b'aa111222', e, N),
'726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
def test_pkcs1pad(self):
data = [1, 2, 3]
padded_data = pkcs1pad(data, 32)
self.assertEqual(padded_data[:2], [0, 2])
self.assertEqual(padded_data[28:], [0, 1, 2, 3])
self.assertRaises(ValueError, pkcs1pad, data, 8)
def test_encode_base_n(self):
self.assertEqual(encode_base_n(0, 30), '0')
self.assertEqual(encode_base_n(80, 30), '2k')

View File

@@ -60,6 +60,34 @@ def aes_cbc_decrypt(data, key, iv):
return decrypted_data
def aes_cbc_encrypt(data, key, iv):
"""
Encrypt with aes in CBC mode. Using PKCS#7 padding
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte IV
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
encrypted_data = []
previous_cipher_block = iv
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
remaining_length = BLOCK_SIZE_BYTES - len(block)
block += [remaining_length] * remaining_length
mixed_block = xor(block, previous_cipher_block)
encrypted_block = aes_encrypt(mixed_block, expanded_key)
encrypted_data += encrypted_block
previous_cipher_block = encrypted_block
return encrypted_data
def key_expansion(data):
"""
Generate key schedule

View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import re
@@ -5,6 +6,7 @@ import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
get_element_by_class,
get_element_by_id,
strip_or_none,
urljoin,
@@ -170,3 +172,42 @@ class AZMedienPlaylistIE(AZMedienBaseIE):
'video-title', webpage)), group='title')
return self.playlist_result(entries, show_id, title)
class AZMedienShowPlaylistIE(AZMedienBaseIE):
IE_DESC = 'AZ Medien show playlists'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
(?:
all-episodes|
alle-episoden
)/
(?P<id>[^/?#&]+)
'''
_TEST = {
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
'info_dict': {
'id': 'astrotalk',
'title': 'TeleZüri: AstroTalk - alle episoden',
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
},
'playlist_mincount': 13,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
episodes = get_element_by_class('search-mobile-box', webpage)
entries = [self.url_result(
urljoin(url, m.group('url'))) for m in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
title = self._og_search_title(webpage, fatal=False)
description = self._og_search_description(webpage)
return self.playlist_result(entries, playlist_id, title, description)

View File

@@ -0,0 +1,159 @@
from __future__ import unicode_literals
import base64
import json
import random
import re
from .common import InfoExtractor
from ..aes import (
aes_cbc_decrypt,
aes_cbc_encrypt,
)
from ..utils import (
bytes_to_intlist,
bytes_to_long,
clean_html,
ExtractorError,
intlist_to_bytes,
get_element_by_id,
js_to_json,
int_or_none,
long_to_bytes,
pkcs1pad,
remove_end,
)
class DaisukiIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P<id>\d+)\.html'
_TEST = {
'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html',
'info_dict': {
'id': '11213',
'ext': 'mp4',
'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS',
'subtitles': {
'mul': [{
'ext': 'ttml',
}],
},
'creator': 'BANDAI NAMCO Entertainment',
},
'params': {
'skip_download': True, # AES-encrypted HLS stream
},
}
# The public key in PEM format can be found in clientlibs_anime_watch.min.js
_RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
flashvars = self._parse_json(self._search_regex(
r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
video_id, transform_source=js_to_json)
iv = [0] * 16
data = {}
for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'):
data[key] = flashvars.get(key, '')
encrypted_rtn = None
# Some AES keys are rejected. Try it with different AES keys
for idx in range(5):
aes_key = [random.randint(0, 254) for _ in range(32)]
padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128))
n, e = self._RSA_KEY
encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n))
init_data = self._download_json('http://www.daisuki.net/bin/bgn/init', video_id, query={
's': flashvars.get('s', ''),
'c': flashvars.get('ss3_prm', ''),
'e': url,
'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
bytes_to_intlist(json.dumps(data)),
aes_key, iv))).decode('ascii'),
'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
}, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
if 'rtn' in init_data:
encrypted_rtn = init_data['rtn']
break
self._sleep(5, video_id)
if encrypted_rtn is None:
raise ExtractorError('Failed to fetch init data')
rtn = self._parse_json(
intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
base64.b64decode(encrypted_rtn)),
aes_key, iv)).decode('utf-8').rstrip('\0'),
video_id)
formats = self._extract_m3u8_formats(
rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native')
title = remove_end(self._og_search_title(webpage), ' - DAISUKI')
creator = self._html_search_regex(
r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False)
subtitles = {}
caption_url = rtn.get('caption_url')
if caption_url:
# mul: multiple languages
subtitles['mul'] = [{
'url': caption_url,
'ext': 'ttml',
}]
return {
'id': video_id,
'title': title,
'formats': formats,
'subtitles': subtitles,
'creator': creator,
}
class DaisukiPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P<id>[a-zA-Z0-9]+)\.html'
_TEST = {
'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html',
'info_dict': {
'id': 'TheIdolMasterCG',
'title': 'THE IDOLM@STER CINDERELLA GIRLS',
'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8',
},
'playlist_count': 26,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
episode_pattern = r'''(?sx)
<img[^>]+delay="[^"]+/(\d+)/movie\.jpg".+?
<p[^>]+class=".*?\bepisodeNumber\b.*?">(?:<a[^>]+>)?([^<]+)'''
entries = [{
'_type': 'url_transparent',
'url': url.replace('detail', 'watch').replace('.html', '.' + movie_id + '.html'),
'episode_id': episode_id,
'episode_number': int_or_none(episode_id),
} for movie_id, episode_id in re.findall(episode_pattern, webpage)]
playlist_title = remove_end(
self._og_search_title(webpage, fatal=False), ' - Anime - DAISUKI')
playlist_description = clean_html(get_element_by_id('synopsisTxt', webpage))
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)

View File

@@ -1,15 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
import hashlib
import time
import uuid
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_urlencode,
)
from ..utils import (
ExtractorError,
unescapeHTML,
@@ -24,8 +16,8 @@ class DouyuTVIE(InfoExtractor):
'info_dict': {
'id': '17732',
'display_id': 'iseven',
'ext': 'flv',
'title': 're:^清晨醒脑T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'ext': 'mp4',
'title': 're:^清晨醒脑T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': r're:.*m7show@163\.com.*',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '7师傅',
@@ -39,7 +31,7 @@ class DouyuTVIE(InfoExtractor):
'info_dict': {
'id': '85982',
'display_id': '85982',
'ext': 'flv',
'ext': 'mp4',
'title': 're:^小漠从零单排记——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
'thumbnail': r're:^https?://.*\.jpg$',
@@ -55,8 +47,8 @@ class DouyuTVIE(InfoExtractor):
'info_dict': {
'id': '17732',
'display_id': '17732',
'ext': 'flv',
'title': 're:^清晨醒脑T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'ext': 'mp4',
'title': 're:^清晨醒脑T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': r're:.*m7show@163\.com.*',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '7师傅',
@@ -96,45 +88,18 @@ class DouyuTVIE(InfoExtractor):
if room.get('show_status') == '2':
raise ExtractorError('Live stream is offline', expected=True)
tt = compat_str(int(time.time() / 60))
did = uuid.uuid4().hex.upper()
sign_content = ''.join((room_id, did, self._API_KEY, tt))
sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest()
flv_data = compat_urllib_parse_urlencode({
'cdn': 'ws',
'rate': '0',
'tt': tt,
'did': did,
'sign': sign,
})
video_info = self._download_json(
'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id,
data=flv_data, note='Downloading video info',
headers={'Content-Type': 'application/x-www-form-urlencoded'})
error_code = video_info.get('error', 0)
if error_code is not 0:
raise ExtractorError(
'%s reported error %i' % (self.IE_NAME, error_code),
expected=True)
base_url = video_info['data']['rtmp_url']
live_path = video_info['data']['rtmp_live']
video_url = '%s/%s' % (base_url, live_path)
formats = self._extract_m3u8_formats(
room['hls_url'], video_id, ext='mp4')
title = self._live_title(unescapeHTML(room['room_name']))
description = room.get('notice')
description = room.get('show_details')
thumbnail = room.get('room_src')
uploader = room.get('nickname')
return {
'id': room_id,
'display_id': video_id,
'url': video_url,
'formats': formats,
'title': title,
'description': description,
'thumbnail': thumbnail,

View File

@@ -83,6 +83,7 @@ from .awaan import (
from .azmedien import (
AZMedienIE,
AZMedienPlaylistIE,
AZMedienShowPlaylistIE,
)
from .azubu import AzubuIE, AzubuLiveIE
from .baidu import BaiduVideoIE
@@ -227,6 +228,10 @@ from .dailymotion import (
DailymotionUserIE,
DailymotionCloudIE,
)
from .daisuki import (
DaisukiIE,
DaisukiPlaylistIE,
)
from .daum import (
DaumIE,
DaumClipIE,

View File

@@ -1851,7 +1851,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
(?:
youtube\.com/
(?:
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
\? (?:.*?[&;])*? (?:p|a|list)=
| p/
)|
@@ -1924,6 +1924,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'title': 'JODA15',
'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
}
}, {
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'playlist_mincount': 485,
'info_dict': {
'title': '2017 華語最新單曲 (2/24更新)',
'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
}
}, {
'note': 'Embedded SWF player',
'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
@@ -2072,7 +2079,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
# Check if it's a video-specific URL
query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
video_id = query_dict.get('v', [None])[0] or self._search_regex(
r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,
r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
'video id', default=None)
if video_id:
if self._downloader.params.get('noplaylist'):

View File

@@ -3319,6 +3319,57 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
self, req, proxy, type)
# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
# released into Public Domain
# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
def long_to_bytes(n, blocksize=0):
"""long_to_bytes(n:long, blocksize:int) : string
Convert a long integer to a byte string.
If optional blocksize is given and greater than zero, pad the front of the
byte string with binary zeros so that the length is a multiple of
blocksize.
"""
# after much testing, this algorithm was deemed to be the fastest
s = b''
n = int(n)
while n > 0:
s = compat_struct_pack('>I', n & 0xffffffff) + s
n = n >> 32
# strip off leading zeros
for i in range(len(s)):
if s[i] != b'\000'[0]:
break
else:
# only happens when n == 0
s = b'\000'
i = 0
s = s[i:]
# add back some pad bytes. this could be done more efficiently w.r.t. the
# de-padding being done above, but sigh...
if blocksize > 0 and len(s) % blocksize:
s = (blocksize - len(s) % blocksize) * b'\000' + s
return s
def bytes_to_long(s):
"""bytes_to_long(string) : long
Convert a byte string to a long integer.
This is (essentially) the inverse of long_to_bytes().
"""
acc = 0
length = len(s)
if length % 4:
extra = (4 - length % 4)
s = b'\000' * extra + s
length = length + extra
for i in range(0, length, 4):
acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
return acc
def ohdave_rsa_encrypt(data, exponent, modulus):
'''
Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
@@ -3336,6 +3387,21 @@ def ohdave_rsa_encrypt(data, exponent, modulus):
return '%x' % encrypted
def pkcs1pad(data, length):
"""
Padding input data with PKCS#1 scheme
@param {int[]} data input data
@param {int} length target length
@returns {int[]} padded data
"""
if len(data) > length - 11:
raise ValueError('Input data too long for PKCS#1 padding')
pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
return [0, 2] + pseudo_random + [0] + data
def encode_base_n(num, n, table=None):
FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
if not table:

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.02.27'
__version__ = '2017.02.28'