Compare commits
23 Commits
2016.10.25
...
2016.10.31
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c6da7df4a | ||
|
|
7e7a028aa4 | ||
|
|
e70a5e6566 | ||
|
|
3bf55be466 | ||
|
|
a901fc5fc2 | ||
|
|
cae6bc0118 | ||
|
|
d9ee2e5cf6 | ||
|
|
e1a0b3b81c | ||
|
|
2a048f9878 | ||
|
|
ea331f40e6 | ||
|
|
f02700a1fa | ||
|
|
f3517569f6 | ||
|
|
c725333d41 | ||
|
|
a5a8877f9c | ||
|
|
43c53a1700 | ||
|
|
ec8705117a | ||
|
|
3d8d44c7b1 | ||
|
|
88839f4380 | ||
|
|
83e9374464 | ||
|
|
773017c648 | ||
|
|
777d90dc28 | ||
|
|
3791d84acc | ||
|
|
9305a0dc60 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.25**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.31**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.10.25
|
||||
[debug] youtube-dl version 2016.10.31
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -245,7 +245,7 @@ Say `meta` from the previous example has a `title` and you are about to extract
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
|
||||
22
ChangeLog
22
ChangeLog
@@ -1,3 +1,25 @@
|
||||
version 2016.10.30
|
||||
|
||||
Extractors
|
||||
* [facebook] Improve 1080P video detection (#11073)
|
||||
* [imgur] Recognize /r/ URLs (#11071)
|
||||
* [beeg] Fix extraction (#11069)
|
||||
* [openload] Fix extraction (#10408)
|
||||
* [gvsearch] Modernize and fix search request (#11051)
|
||||
* [adultswim] Fix extraction (#10979)
|
||||
+ [nobelprize] Add support for nobelprize.org (#9999)
|
||||
* [hornbunny] Fix extraction (#10981)
|
||||
* [tvp] Improve video id extraction (#10585)
|
||||
|
||||
|
||||
version 2016.10.26
|
||||
|
||||
Extractors
|
||||
+ [rentv] Add support for ren.tv (#10620)
|
||||
+ [ard] Detect unavailable videos (#11018)
|
||||
* [vk] Fix extraction (#11022)
|
||||
|
||||
|
||||
version 2016.10.25
|
||||
|
||||
Core
|
||||
|
||||
@@ -1083,7 +1083,7 @@ Say `meta` from the previous example has a `title` and you are about to extract
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
|
||||
@@ -488,6 +488,7 @@
|
||||
- **Nintendo**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **NobelPrize**
|
||||
- **Noco**
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
@@ -589,6 +590,8 @@
|
||||
- **RDS**: RDS.ca
|
||||
- **RedTube**
|
||||
- **RegioTV**
|
||||
- **RENTV**
|
||||
- **RENTVArticle**
|
||||
- **Restudy**
|
||||
- **Reuters**
|
||||
- **ReverbNation**
|
||||
|
||||
@@ -96,6 +96,27 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/',
|
||||
'info_dict': {
|
||||
'id': 'eYiLsKVgQ6qTC6agD67Sig',
|
||||
'title': 'Toonami - Friday, October 14th, 2016',
|
||||
'description': 'md5:99892c96ffc85e159a428de85c30acde',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 'eYiLsKVgQ6qTC6agD67Sig',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toonami - Friday, October 14th, 2016',
|
||||
'description': 'md5:99892c96ffc85e159a428de85c30acde',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -163,6 +184,8 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||
elif video_info.get('videoPlaybackID'):
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
elif video_info.get('id'):
|
||||
segment_ids = [video_info['id']]
|
||||
else:
|
||||
if video_info.get('auth') is True:
|
||||
raise ExtractorError(
|
||||
|
||||
@@ -174,11 +174,17 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
|
||||
ERRORS = (
|
||||
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
|
||||
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
|
||||
'Video %s is no longer available'),
|
||||
('Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.',
|
||||
'This program is only suitable for those aged 12 and older. Video %s is therefore only available between 8 pm and 6 am.'),
|
||||
)
|
||||
|
||||
if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
|
||||
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
raise ExtractorError(message % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
|
||||
@@ -46,19 +46,19 @@ class BeegIE(InfoExtractor):
|
||||
self._proto_relative_url(cpl_url), video_id,
|
||||
'Downloading cpl JS', fatal=False)
|
||||
if cpl:
|
||||
beeg_version = self._search_regex(
|
||||
r'beeg_version\s*=\s*(\d+)', cpl,
|
||||
'beeg version', default=None) or self._search_regex(
|
||||
beeg_version = int_or_none(self._search_regex(
|
||||
r'beeg_version\s*=\s*([^\b]+)', cpl,
|
||||
'beeg version', default=None)) or self._search_regex(
|
||||
r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
|
||||
beeg_salt = self._search_regex(
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg beeg_salt',
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
||||
default=None, group='beeg_salt')
|
||||
|
||||
beeg_version = beeg_version or '1750'
|
||||
beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr'
|
||||
beeg_version = beeg_version or '2000'
|
||||
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
||||
|
||||
video = self._download_json(
|
||||
'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
|
||||
'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
|
||||
video_id)
|
||||
|
||||
def split(o, e):
|
||||
|
||||
@@ -605,6 +605,7 @@ from .ninecninemedia import (
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .nobelprize import NobelPrizeIE
|
||||
from .noco import NocoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
@@ -745,6 +746,10 @@ from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redtube import RedTubeIE
|
||||
from .regiotv import RegioTVIE
|
||||
from .rentv import (
|
||||
RENTVIE,
|
||||
RENTVArticleIE,
|
||||
)
|
||||
from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
@@ -100,7 +99,8 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '"What are you doing running in the snow?"',
|
||||
'uploader': 'FailArmy',
|
||||
}
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
|
||||
'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
|
||||
@@ -110,6 +110,7 @@ class FacebookIE(InfoExtractor):
|
||||
'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
|
||||
'uploader': 'S. Saint',
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'note': 'swf params escaped',
|
||||
'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
|
||||
@@ -119,6 +120,18 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #10153664894881749',
|
||||
},
|
||||
}, {
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '0d9813160b146b3bc8744e006027fcc6',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
'title': 'Holocaust survivor becomes US citizen',
|
||||
'timestamp': 1477818095,
|
||||
'upload_date': '20161030',
|
||||
'uploader': 'CNN',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@@ -227,43 +240,13 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
video_data = None
|
||||
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});'
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
|
||||
|
||||
for m in re.findall(PATTERN, webpage):
|
||||
swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
|
||||
data = dict(json.loads(swf_params))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
video_data_candidate = json.loads(params_raw)['video_data']
|
||||
for _, f in video_data_candidate.items():
|
||||
if not f:
|
||||
continue
|
||||
if isinstance(f, dict):
|
||||
f = [f]
|
||||
if not isinstance(f, list):
|
||||
continue
|
||||
if f[0].get('video_id') == video_id:
|
||||
video_data = video_data_candidate
|
||||
break
|
||||
if video_data:
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_data = item[2][0]['videoData']
|
||||
break
|
||||
|
||||
def video_data_list2dict(video_data):
|
||||
ret = {}
|
||||
for item in video_data:
|
||||
format_id = item['stream_type']
|
||||
ret.setdefault(format_id, []).append(item)
|
||||
return ret
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_data = video_data_list2dict(item[2][0]['videoData'])
|
||||
break
|
||||
|
||||
if not video_data:
|
||||
if not fatal_if_no_video:
|
||||
return webpage, False
|
||||
@@ -276,7 +259,8 @@ class FacebookIE(InfoExtractor):
|
||||
raise ExtractorError('Cannot parse data')
|
||||
|
||||
formats = []
|
||||
for format_id, f in video_data.items():
|
||||
for f in video_data:
|
||||
format_id = f['stream_type']
|
||||
if f and isinstance(f, dict):
|
||||
f = [f]
|
||||
if not f or not isinstance(f, list):
|
||||
|
||||
@@ -1208,20 +1208,6 @@ class GenericIE(InfoExtractor):
|
||||
'duration': 51690,
|
||||
},
|
||||
},
|
||||
# JWPlayer with M3U8
|
||||
{
|
||||
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
|
||||
'info_dict': {
|
||||
'id': 'playlist',
|
||||
'ext': 'mp4',
|
||||
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
|
||||
'uploader': 'ren.tv',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
|
||||
# This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
|
||||
{
|
||||
|
||||
@@ -4,9 +4,6 @@ import itertools
|
||||
import re
|
||||
|
||||
from .common import SearchInfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class GoogleSearchIE(SearchInfoExtractor):
|
||||
@@ -34,13 +31,16 @@ class GoogleSearchIE(SearchInfoExtractor):
|
||||
}
|
||||
|
||||
for pagenum in itertools.count():
|
||||
result_url = (
|
||||
'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
|
||||
% (compat_urllib_parse.quote_plus(query), pagenum * 10))
|
||||
|
||||
webpage = self._download_webpage(
|
||||
result_url, 'gvsearch:' + query,
|
||||
note='Downloading result page ' + str(pagenum + 1))
|
||||
'http://www.google.com/search',
|
||||
'gvsearch:' + query,
|
||||
note='Downloading result page %s' % (pagenum + 1),
|
||||
query={
|
||||
'tbm': 'vid',
|
||||
'q': query,
|
||||
'start': pagenum * 10,
|
||||
'hl': 'en',
|
||||
})
|
||||
|
||||
for hit_idx, mobj in enumerate(re.finditer(
|
||||
r'<h3 class="r"><a href="([^"]+)"', webpage)):
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -14,29 +12,24 @@ class HornBunnyIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
|
||||
'md5': '95e40865aedd08eff60272b704852ad7',
|
||||
'md5': 'e20fd862d1894b67564c96f180f43924',
|
||||
'info_dict': {
|
||||
'id': '5227',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'panty slut jerk off instruction',
|
||||
'duration': 550,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, note='Downloading initial webpage')
|
||||
title = self._html_search_regex(
|
||||
r'class="title">(.*?)</h2>', webpage, 'title')
|
||||
redirect_url = self._html_search_regex(
|
||||
r'pg&settings=(.*?)\|0"\);', webpage, 'title')
|
||||
webpage2 = self._download_webpage(redirect_url, video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'flvMask:(.*?);', webpage2, 'video_url')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
|
||||
@@ -45,12 +38,12 @@ class HornBunnyIE(InfoExtractor):
|
||||
r'<strong>Views:</strong>\s*(\d+)</div>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'ext': 'flv',
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ImgurIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
@@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
62
youtube_dl/extractor/nobelprize.py
Normal file
62
youtube_dl/extractor/nobelprize.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
update_url_query,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NobelPrizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
|
||||
'md5': '04c81e5714bb36cc4e2232fee1d8157f',
|
||||
'info_dict': {
|
||||
'id': '2636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Announcement of the 2016 Nobel Prize in Physics',
|
||||
'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
media = self._parse_json(self._search_regex(
|
||||
r'(?s)var\s*config\s*=\s*({.+?});', webpage,
|
||||
'config'), video_id, js_to_json)['media']
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for source in media.get('source', []):
|
||||
source_src = source.get('src')
|
||||
if not source_src:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(source_src, {'hdcore': '3.7.0'}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': get_element_by_attribute('itemprop', 'description', webpage),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -82,7 +82,7 @@ class OpenloadIE(InfoExtractor):
|
||||
if j >= 33 and j <= 126:
|
||||
j = ((j + 14) % 94) + 33
|
||||
if idx == len(enc_data) - 1:
|
||||
j += 2
|
||||
j += 3
|
||||
video_url_chars += compat_chr(j)
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
|
||||
|
||||
76
youtube_dl/extractor/rentv.py
Normal file
76
youtube_dl/extractor/rentv.py
Normal file
@@ -0,0 +1,76 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class RENTVIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://ren.tv/video/epizod/118577',
|
||||
'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||
'info_dict': {
|
||||
'id': '118577',
|
||||
'ext': 'mp4',
|
||||
'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://ren.tv/player/118577',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'rentv:118577',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id)
|
||||
jw_config = self._parse_json(self._search_regex(
|
||||
r'config\s*=\s*({.+});', webpage, 'jw config'), video_id)
|
||||
return self._parse_jwplayer_data(jw_config, video_id, m3u8_id='hls')
|
||||
|
||||
|
||||
class RENTVArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v',
|
||||
'md5': 'ebd63c4680b167693745ab91343df1d6',
|
||||
'info_dict': {
|
||||
'id': '136472',
|
||||
'ext': 'mp4',
|
||||
'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла',
|
||||
'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.',
|
||||
}
|
||||
}, {
|
||||
# TODO: invalid m3u8
|
||||
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
|
||||
'info_dict': {
|
||||
'id': 'playlist',
|
||||
'ext': 'mp4',
|
||||
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
|
||||
'uploader': 'ren.tv',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
|
||||
entries = []
|
||||
for config_profile in drupal_settings.get('ren_jwplayer', {}).values():
|
||||
media_id = config_profile.get('mediaid')
|
||||
if not media_id:
|
||||
continue
|
||||
media_id = compat_str(media_id)
|
||||
entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id))
|
||||
return self.playlist_result(entries, display_id)
|
||||
@@ -69,7 +69,8 @@ class TVPIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
video_id = self._search_regex([
|
||||
r'<iframe[^>]+src="[^"]*?object_id=(\d+)',
|
||||
"object_id\s*:\s*'(\d+)'"], webpage, 'video id')
|
||||
r"object_id\s*:\s*'(\d+)'",
|
||||
r'data-video-id="(\d+)"'], webpage, 'video id', default=page_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'tvp:' + video_id,
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class VesselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)'
|
||||
_API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
|
||||
_LOGIN_URL = 'https://www.vessel.com/api/account/login'
|
||||
_NETRC_MACHINE = 'vessel'
|
||||
@@ -32,12 +32,18 @@ class VesselIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.vessel.com/videos/F01_dsLj1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.vessel.com/videos/RRX-sir-J',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1',
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
|
||||
import collections
|
||||
import re
|
||||
import json
|
||||
import sys
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -369,8 +368,18 @@ class VKIE(VKBaseIE):
|
||||
opts_url = 'http:' + opts_url
|
||||
return self.url_result(opts_url)
|
||||
|
||||
data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
|
||||
data = json.loads(data_json)
|
||||
# vars does not look to be served anymore since 24.10.2016
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+vars\s*=\s*({.+?});', info_page, 'vars', default='{}'),
|
||||
video_id, fatal=False)
|
||||
|
||||
# <!json> is served instead
|
||||
if not data:
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<!json>\s*({.+?})\s*<!>', info_page, 'json'),
|
||||
video_id)['player']['params'][0]
|
||||
|
||||
title = unescapeHTML(data['md_title'])
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.10.25'
|
||||
__version__ = '2016.10.31'
|
||||
|
||||
Reference in New Issue
Block a user