Compare commits

..

15 Commits

Author SHA1 Message Date
Sergey M․
ab896fa894 release 2018.12.03 2018-12-03 00:10:20 +07:00
Sergey M․
1fa59a928e [ChangeLog] Actualize
[ci skip]
2018-12-03 00:06:54 +07:00
Sergey M․
ce18a19be9 [tiktok] Improve extraction and add support for user pages (closes #18135) 2018-12-02 02:42:56 +07:00
Ken Swenson
1ead840d2c [tiktok] Add extractor (closes #18108) 2018-12-02 02:42:56 +07:00
Alexander Seiler
aa374bc78e [utils] Fix random_birthday to generate existing dates only 2018-12-02 00:05:15 +07:00
Sergey M․
3430ff9b07 [pornhub] Use actual URL host for requests (closes #18359) 2018-12-01 16:45:51 +07:00
Hakim Boyles
f012823082 [lynda] Fix authentication (closes #18158) 2018-11-30 01:20:27 +07:00
Jimm Stout
16597c2f94 [gfycat] Update API endpoint (closes #18333) 2018-11-30 01:07:07 +07:00
Sergey M․
adbbdefc81 [hotstar] Add support for alternative app state layout (closes #18320) 2018-11-30 00:48:15 +07:00
Alexander Seiler
053e5b12b2 [azmedien] Fix extraction (closes #18334) 2018-11-30 00:12:18 +07:00
Remita Amine
d9df8f120b [vimeo] extract VHX subtitles 2018-11-28 20:13:36 +01:00
Remita Amine
ca01d17884 [vimeo] Add support for VHX(Vimeo OTT)(#14835) 2018-11-28 19:53:45 +01:00
Alexander Seiler
d19600df07 [joj] Fix extraction (closes #18280) 2018-11-24 22:14:27 +07:00
Sergey M․
641e86e3cf [wistia] Add support for fast.wistia.com (closes #18287) 2018-11-24 21:47:41 +07:00
Sergey M․
6864855eb1 [tests] Fix invalid escape sequences 2018-11-23 00:43:42 +07:00
17 changed files with 291 additions and 36 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.23**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.03**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.11.23
[debug] youtube-dl version 2018.12.03
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -1,3 +1,20 @@
version 2018.12.03
Core
* [utils] Fix random_birthday to generate existing dates only (#18284)
Extractors
+ [tiktok] Add support for tiktok.com (#18108, #18135)
* [pornhub] Use actual URL host for requests (#18359)
* [lynda] Fix authentication (#18158, #18217)
* [gfycat] Update API endpoint (#18333, #18343)
+ [hotstar] Add support for alternative app state layout (#18320)
* [azmedien] Fix extraction (#18334, #18336)
+ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
* [joj] Fix extraction (#18280, #18281)
+ [wistia] Add support for fast.wistia.com (#18287)
version 2018.11.23
Core

View File

@@ -885,6 +885,8 @@
- **ThisAmericanLife**
- **ThisAV**
- **ThisOldHouse**
- **TikTok**
- **TikTokUser**
- **tinypic**: tinypic.com videos
- **TMZ**
- **TMZArticle**
@@ -979,6 +981,7 @@
- **VevoPlaylist**
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com**
- **vhx:embed**
- **Viafree**
- **vice**
- **vice:article**

View File

@@ -39,7 +39,7 @@ class TestCompat(unittest.TestCase):
def test_compat_expanduser(self):
old_home = os.environ.get('HOME')
test_str = 'C:\Documents and Settings\тест\Application Data'
test_str = r'C:\Documents and Settings\тест\Application Data'
compat_setenv('HOME', test_str)
self.assertEqual(compat_expanduser('~'), test_str)
compat_setenv('HOME', old_home or '')

View File

@@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP
class TestMetadataFromTitle(unittest.TestCase):
def test_format_to_regex(self):
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')

View File

@@ -36,7 +36,6 @@ class AZMedienIE(InfoExtractor):
'id': '1_anruz3wy',
'ext': 'mp4',
'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
'description': 'md5:dd9f96751ec9c35e409a698a328402f3',
'uploader_id': 'TVOnline',
'upload_date': '20180930',
'timestamp': 1538328802,
@@ -53,15 +52,12 @@ class AZMedienIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
video_id = mobj.group('id')
entry_id = mobj.group('kaltura_id')
if not entry_id:
webpage = self._download_webpage(url, video_id)
api_path = self._search_regex(
r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']',
webpage, 'api path')
api_url = 'https://www.%s%s' % (mobj.group('host'), api_path)
api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0])
payload = {
'query': '''query VideoContext($articleId: ID!) {
article: node(id: $articleId) {

View File

@@ -1124,6 +1124,10 @@ from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE
from .threeqsdn import ThreeQSDNIE
from .tiktok import (
TikTokIE,
TikTokUserIE,
)
from .tinypic import TinyPicIE
from .tmz import (
TMZIE,
@@ -1303,6 +1307,7 @@ from .vimeo import (
VimeoReviewIE,
VimeoUserIE,
VimeoWatchLaterIE,
VHXEmbedIE,
)
from .vimple import VimpleIE
from .vine import (

View File

@@ -53,7 +53,7 @@ class GfycatIE(InfoExtractor):
video_id = self._match_id(url)
gfy = self._download_json(
'http://gfycat.com/cajax/get/%s' % video_id,
'https://api.gfycat.com/v1/gfycats/%s' % video_id,
video_id, 'Downloading video info')
if 'error' in gfy:
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)

View File

@@ -43,6 +43,7 @@ class HotStarIE(HotStarBaseIE):
IE_NAME = 'hotstar'
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
_TESTS = [{
# contentData
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
'info_dict': {
'id': '1000076273',
@@ -57,6 +58,10 @@ class HotStarIE(HotStarBaseIE):
# m3u8 download
'skip_download': True,
}
}, {
# contentDetail
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
'only_matching': True,
}, {
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
'only_matching': True,
@@ -74,8 +79,12 @@ class HotStarIE(HotStarBaseIE):
r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
webpage, 'app state'), video_id)
video_data = {}
getters = (
lambda x, k=k: x['initialState']['content%s' % k]['content']
for k in ('Data', 'Detail')
)
for v in app_state.values():
content = try_get(v, lambda x: x['initialState']['contentData']['content'], dict)
content = try_get(v, getters, dict)
if content and content.get('contentId') == video_id:
video_data = content

View File

@@ -61,7 +61,7 @@ class JojIE(InfoExtractor):
bitrates = self._parse_json(
self._search_regex(
r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',
r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
default='{}'),
video_id, transform_source=js_to_json, fatal=False)

View File

@@ -15,7 +15,7 @@ from ..utils import (
class LyndaBaseIE(InfoExtractor):
_SIGNIN_URL = 'https://www.lynda.com/signin'
_SIGNIN_URL = 'https://www.lynda.com/signin/lynda'
_PASSWORD_URL = 'https://www.lynda.com/signin/password'
_USER_URL = 'https://www.lynda.com/signin/user'
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'

View File

@@ -27,7 +27,7 @@ class PornHubIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[\da-z]+)
@@ -129,7 +129,7 @@ class PornHubIE(InfoExtractor):
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
webpage)
def _extract_count(self, pattern, webpage, name):
@@ -137,14 +137,16 @@ class PornHubIE(InfoExtractor):
pattern, webpage, '%s count' % name, fatal=False))
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host') or 'pornhub.com'
video_id = mobj.group('id')
self._set_cookie('pornhub.com', 'age_verified', '1')
self._set_cookie(host, 'age_verified', '1')
def dl_webpage(platform):
self._set_cookie('pornhub.com', 'platform', platform)
self._set_cookie(host, 'platform', platform)
return self._download_webpage(
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
'http://www.%s/view_video.php?viewkey=%s' % (host, video_id),
video_id, 'Downloading %s webpage' % platform)
webpage = dl_webpage('pc')
@@ -306,7 +308,7 @@ class PornHubIE(InfoExtractor):
class PornHubPlaylistBaseIE(InfoExtractor):
def _extract_entries(self, webpage):
def _extract_entries(self, webpage, host):
# Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see
# https://github.com/rg3/youtube-dl/issues/11594).
@@ -316,7 +318,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
return [
self.url_result(
'http://www.pornhub.com/%s' % video_url,
'http://www.%s/%s' % (host, video_url),
PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
@@ -324,11 +326,13 @@ class PornHubPlaylistBaseIE(InfoExtractor):
]
def _real_extract(self, url):
playlist_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
entries = self._extract_entries(webpage)
entries = self._extract_entries(webpage, host)
playlist = self._parse_json(
self._search_regex(
@@ -343,7 +347,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.(?:com|net)/playlist/(?P<id>\d+)'
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/playlist/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.pornhub.com/playlist/4667351',
'info_dict': {
@@ -358,7 +362,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
_TESTS = [{
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
'info_dict': {
@@ -399,7 +403,9 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
}]
def _real_extract(self, url):
user_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
user_id = mobj.group('id')
entries = []
for page_num in itertools.count(1):
@@ -411,7 +417,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
break
raise
page_entries = self._extract_entries(webpage)
page_entries = self._extract_entries(webpage, host)
if not page_entries:
break
entries.extend(page_entries)

View File

@@ -0,0 +1,117 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
compat_str,
ExtractorError,
int_or_none,
str_or_none,
try_get,
url_or_none,
)
class TikTokBaseIE(InfoExtractor):
def _extract_aweme(self, data):
video = data['video']
description = str_or_none(try_get(data, lambda x: x['desc']))
width = int_or_none(try_get(data, lambda x: video['width']))
height = int_or_none(try_get(data, lambda x: video['height']))
format_urls = set()
formats = []
for format_id in (
'play_addr_lowbr', 'play_addr', 'play_addr_h264',
'download_addr'):
for format in try_get(
video, lambda x: x[format_id]['url_list'], list) or []:
format_url = url_or_none(format)
if not format_url:
continue
if format_url in format_urls:
continue
format_urls.add(format_url)
formats.append({
'url': format_url,
'ext': 'mp4',
'height': height,
'width': width,
})
self._sort_formats(formats)
thumbnail = url_or_none(try_get(
video, lambda x: x['cover']['url_list'][0], compat_str))
uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
timestamp = int_or_none(data.get('create_time'))
comment_count = int_or_none(data.get('comment_count')) or int_or_none(
try_get(data, lambda x: x['statistics']['comment_count']))
repost_count = int_or_none(try_get(
data, lambda x: x['statistics']['share_count']))
aweme_id = data['aweme_id']
return {
'id': aweme_id,
'title': uploader or aweme_id,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'timestamp': timestamp,
'comment_count': comment_count,
'repost_count': repost_count,
'formats': formats,
}
class TikTokIE(TikTokBaseIE):
_VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)'
_TEST = {
'url': 'https://m.tiktok.com/v/6606727368545406213.html',
'md5': 'd584b572e92fcd48888051f238022420',
'info_dict': {
'id': '6606727368545406213',
'ext': 'mp4',
'title': 'Zureeal',
'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
'thumbnail': r're:^https?://.*~noop.image',
'uploader': 'Zureeal',
'timestamp': 1538248586,
'upload_date': '20180929',
'comment_count': int,
'repost_count': int,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._parse_json(self._search_regex(
r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
return self._extract_aweme(data)
class TikTokUserIE(TikTokBaseIE):
_VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)'
_TEST = {
'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
'info_dict': {
'id': '188294915489964032',
},
'playlist_mincount': 24,
}
def _real_extract(self, url):
user_id = self._match_id(url)
data = self._download_json(
'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
query={'_signature': '_'})
entries = []
for aweme in data['aweme_list']:
try:
entry = self._extract_aweme(aweme)
except ExtractorError:
continue
entry['extractor_key'] = TikTokIE.ie_key()
entries.append(entry)
return self.playlist_result(entries, user_id)

View File

@@ -14,10 +14,13 @@ from ..compat import (
from ..utils import (
determine_ext,
ExtractorError,
js_to_json,
InAdvancePagedList,
int_or_none,
merge_dicts,
NO_DEFAULT,
parse_filesize,
qualities,
RegexNotFoundError,
sanitized_Request,
smuggle_url,
@@ -27,7 +30,6 @@ from ..utils import (
unsmuggle_url,
urlencode_postdata,
unescapeHTML,
parse_filesize,
)
@@ -1063,3 +1065,96 @@ class VimeoLikesIE(InfoExtractor):
'description': description,
'entries': pl,
}
class VHXEmbedIE(InfoExtractor):
IE_NAME = 'vhx:embed'
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
def _call_api(self, video_id, access_token, path='', query=None):
return self._download_json(
'https://api.vhx.tv/videos/' + video_id + path, video_id, headers={
'Authorization': 'Bearer ' + access_token,
}, query=query)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
credentials = self._parse_json(self._search_regex(
r'(?s)credentials\s*:\s*({.+?}),', webpage,
'config'), video_id, js_to_json)
access_token = credentials['access_token']
query = {}
for k, v in credentials.items():
if k in ('authorization', 'authUserToken', 'ticket') and v and v != 'undefined':
if k == 'authUserToken':
query['auth_user_token'] = v
else:
query[k] = v
files = self._call_api(video_id, access_token, '/files', query)
formats = []
for f in files:
href = try_get(f, lambda x: x['_links']['source']['href'])
if not href:
continue
method = f.get('method')
if method == 'hls':
formats.extend(self._extract_m3u8_formats(
href, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif method == 'dash':
formats.extend(self._extract_mpd_formats(
href, video_id, mpd_id='dash', fatal=False))
else:
fmt = {
'filesize': int_or_none(try_get(f, lambda x: x['size']['bytes'])),
'format_id': 'http',
'preference': 1,
'url': href,
'vcodec': f.get('codec'),
}
quality = f.get('quality')
if quality:
fmt.update({
'format_id': 'http-' + quality,
'height': int_or_none(self._search_regex(r'(\d+)p', quality, 'height', default=None)),
})
formats.append(fmt)
self._sort_formats(formats)
video_data = self._call_api(video_id, access_token)
title = video_data.get('title') or video_data['name']
subtitles = {}
for subtitle in try_get(video_data, lambda x: x['tracks']['subtitles'], list) or []:
lang = subtitle.get('srclang') or subtitle.get('label')
for _link in subtitle.get('_links', {}).values():
href = _link.get('href')
if not href:
continue
subtitles.setdefault(lang, []).append({
'url': href,
})
q = qualities(['small', 'medium', 'large', 'source'])
thumbnails = []
for thumbnail_id, thumbnail_url in video_data.get('thumbnail', {}).items():
thumbnails.append({
'id': thumbnail_id,
'url': thumbnail_url,
'preference': q(thumbnail_id),
})
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'duration': int_or_none(try_get(video_data, lambda x: x['duration']['seconds'])),
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
'timestamp': unified_timestamp(video_data.get('created_at')),
'view_count': int_or_none(video_data.get('plays_count')),
}

View File

@@ -12,7 +12,7 @@ from ..utils import (
class WistiaIE(InfoExtractor):
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)'
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/iframe/)(?P<id>[a-z0-9]+)'
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
@@ -35,12 +35,15 @@ class WistiaIE(InfoExtractor):
# with hls video
'url': 'wistia:807fafadvk',
'only_matching': True,
}, {
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
match = re.search(
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage)
if match:
return unescapeHTML(match.group('url'))

View File

@@ -3948,8 +3948,12 @@ def write_xattr(path, key, value):
def random_birthday(year_field, month_field, day_field):
start_date = datetime.date(1950, 1, 1)
end_date = datetime.date(1995, 12, 31)
offset = random.randint(0, (end_date - start_date).days)
random_date = start_date + datetime.timedelta(offset)
return {
year_field: str(random.randint(1950, 1995)),
month_field: str(random.randint(1, 12)),
day_field: str(random.randint(1, 31)),
year_field: str(random_date.year),
month_field: str(random_date.month),
day_field: str(random_date.day),
}

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2018.11.23'
__version__ = '2018.12.03'