Compare commits
33 Commits
2014.09.04
...
2014.09.10
Author | SHA1 | Date | |
---|---|---|---|
|
bc7ff0a8dd | ||
|
c63b30901b | ||
|
f009f19ece | ||
|
68477e8839 | ||
|
0dc5365564 | ||
|
9face18d08 | ||
|
ff0ba8ce0f | ||
|
759c6293bd | ||
|
3fbeb95e14 | ||
|
6e25f51cdf | ||
|
321c1e44f9 | ||
|
cc7fec5818 | ||
|
5fb9077e8c | ||
|
8e20f81c5b | ||
|
e154762c74 | ||
|
ba92ab3d05 | ||
|
a2f0cdc074 | ||
|
70a1ecd2c1 | ||
|
88a23aef5a | ||
|
140d8d77b3 | ||
|
665cd96929 | ||
|
4d067a58ca | ||
|
1c1cff6a52 | ||
|
f063a04f07 | ||
|
af8812bb9b | ||
|
78149a962b | ||
|
f2d9e3a370 | ||
|
16e6f396b4 | ||
|
c6ec6b2e8b | ||
|
7bbc6428b6 | ||
|
c1a3c9ddb2 | ||
|
feec0f56f5 | ||
|
8029857d27 |
@@ -74,6 +74,7 @@ __authors__ = (
|
||||
'Keith Beckman',
|
||||
'Ole Ernst',
|
||||
'Aaron McDaniel (mcd1992)',
|
||||
'Magnus Kolstad',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@@ -193,7 +193,8 @@ class HttpFD(FileDownloader):
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'Did not get any data blocks')
|
||||
return False
|
||||
stream.close()
|
||||
if tmpfilename != u'-':
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
|
@@ -67,6 +67,7 @@ from .dailymotion import (
|
||||
DailymotionUserIE,
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dfb import DFBIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
@@ -199,6 +200,7 @@ from .mitele import MiTeleIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mlb import MLBIE
|
||||
from .mpora import MporaIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .mooshare import MooshareIE
|
||||
@@ -295,6 +297,7 @@ from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
from .sina import SinaIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slutload import SlutloadIE
|
||||
@@ -374,6 +377,7 @@ from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vgtv import VGTVIE
|
||||
from .vh1 import VH1IE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
|
@@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
json_url = self._html_search_regex(
|
||||
r'arte_vp_url="(.*?)"', webpage, 'json vp url')
|
||||
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
|
||||
webpage, 'json vp url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
|
74
youtube_dl/extractor/dbtv.py
Normal file
74
youtube_dl/extractor/dbtv.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class DBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://dbtv\.no/(?P<id>[0-9]+)#(?P<display_id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
|
||||
'info_dict': {
|
||||
'id': '33100',
|
||||
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
||||
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1404039863.438,
|
||||
'upload_date': '20140629',
|
||||
'duration': 69.544,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://api.dbtv.no/discovery/%s' % video_id, display_id)
|
||||
|
||||
video = data['playlist'][0]
|
||||
|
||||
formats = [{
|
||||
'url': f['URL'],
|
||||
'vcodec': f.get('container'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'vbr': float_or_none(f.get('rate'), 1000),
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
} for f in video['renditions'] if 'URL' in f]
|
||||
|
||||
if not formats:
|
||||
for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
|
||||
if url_key in video:
|
||||
formats.append({
|
||||
'url': video[url_key],
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video['id'],
|
||||
'display_id': display_id,
|
||||
'title': video['title'],
|
||||
'description': clean_html(video['desc']),
|
||||
'thumbnail': video.get('splash') or video.get('thumb'),
|
||||
'timestamp': float_or_none(video.get('publishedAt'), 1000),
|
||||
'duration': float_or_none(video.get('length'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'categories': video.get('tags'),
|
||||
'formats': formats,
|
||||
}
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class KhanAcademyIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
_VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
IE_NAME = 'KhanAcademy'
|
||||
|
||||
_TESTS = [{
|
||||
|
112
youtube_dl/extractor/moevideo.py
Normal file
112
youtube_dl/extractor/moevideo.py
Normal file
@@ -0,0 +1,112 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MoeVideoIE(InfoExtractor):
|
||||
IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<host>(?:www\.)?
|
||||
(?:(?:moevideo|playreplay|videochart)\.net))/
|
||||
(?:video|framevideo)/(?P<id>[0-9]+\.[0-9A-Za-z]+)'''
|
||||
_API_URL = 'http://api.letitbit.net/'
|
||||
_API_KEY = 'tVL0gjqo5'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
|
||||
'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
|
||||
'info_dict': {
|
||||
'id': '00297.0036103fe3d513ef27915216fd29',
|
||||
'ext': 'flv',
|
||||
'title': 'Sink cut out machine',
|
||||
'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'width': 540,
|
||||
'height': 360,
|
||||
'duration': 179,
|
||||
'filesize': 17822500,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
|
||||
'md5': '74f0a014d5b661f0f0e2361300d1620e',
|
||||
'info_dict': {
|
||||
'id': '77107.7f325710a627383d40540d8e991a',
|
||||
'ext': 'flv',
|
||||
'title': 'Operacion Condor.',
|
||||
'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'width': 480,
|
||||
'height': 296,
|
||||
'duration': 6027,
|
||||
'filesize': 588257923,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://%s/video/%s' % (mobj.group('host'), video_id),
|
||||
video_id, 'Downloading webpage')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
r = [
|
||||
self._API_KEY,
|
||||
[
|
||||
'preview/flv_link',
|
||||
{
|
||||
'uid': video_id,
|
||||
},
|
||||
],
|
||||
]
|
||||
r_json = json.dumps(r)
|
||||
post = compat_urllib_parse.urlencode({'r': r_json})
|
||||
req = compat_urllib_request.Request(self._API_URL, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
response = self._download_json(req, video_id)
|
||||
if response['status'] != 'OK':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, response['data']),
|
||||
expected=True
|
||||
)
|
||||
item = response['data'][0]
|
||||
video_url = item['link']
|
||||
duration = int_or_none(item['length'])
|
||||
width = int_or_none(item['width'])
|
||||
height = int_or_none(item['height'])
|
||||
filesize = int_or_none(item['convert_size'])
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'http_headers': {'Range': 'bytes=0-'}, # Required to download
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'filesize': filesize,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -3,18 +3,23 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
|
||||
_TEST = {
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
'info_dict': {
|
||||
'id': '0021200253-okc-bkn-recap.nba',
|
||||
'ext': 'mp4',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'title': 'Thunder vs. Nets',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'duration': 181,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -27,13 +32,18 @@ class NBAIE(InfoExtractor):
|
||||
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
||||
|
||||
shortened_video_id = video_id.rpartition('/')[2]
|
||||
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
|
||||
title = remove_end(
|
||||
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
duration = parse_duration(
|
||||
self._html_search_meta('duration', webpage, 'duration', fatal=False))
|
||||
|
||||
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'id': shortened_video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
}
|
||||
|
@@ -5,7 +5,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
unified_strdate,
|
||||
compat_str,
|
||||
)
|
||||
@@ -13,6 +16,8 @@ from ..utils import (
|
||||
|
||||
class NocoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'http://noco.tv/do.php'
|
||||
_NETRC_MACHINE = 'noco'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
|
||||
@@ -30,6 +35,28 @@ class NocoIE(InfoExtractor):
|
||||
'skip': 'Requires noco account',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'a': 'login',
|
||||
'cookie': '1',
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
|
||||
|
||||
login = self._download_json(request, None, 'Logging in as %s' % username)
|
||||
|
||||
if 'erreur' in login:
|
||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
@@ -100,7 +100,7 @@ class RUTVIE(InfoExtractor):
|
||||
return mobj.group('url')
|
||||
|
||||
mobj = re.search(
|
||||
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
|
||||
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
91
youtube_dl/extractor/sharesix.py
Normal file
91
youtube_dl/extractor/sharesix.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class ShareSixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://sharesix.com/f/OXjQ7Y6',
|
||||
'md5': '9e8e95d8823942815a7d7c773110cc93',
|
||||
'info_dict': {
|
||||
'id': 'OXjQ7Y6',
|
||||
'ext': 'mp4',
|
||||
'title': 'big_buck_bunny_480p_surround-fix.avi',
|
||||
'duration': 596,
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://sharesix.com/lfrwoxp35zdd',
|
||||
'md5': 'dd19f1435b7cec2d7912c64beeee8185',
|
||||
'info_dict': {
|
||||
'id': 'lfrwoxp35zdd',
|
||||
'ext': 'flv',
|
||||
'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv',
|
||||
'duration': 65,
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
fields = {
|
||||
'method_free': 'Free'
|
||||
}
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
webpage = self._download_webpage(req, video_id,
|
||||
'Downloading video page')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL')
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title')
|
||||
duration = parse_duration(
|
||||
self._search_regex(
|
||||
r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>',
|
||||
webpage,
|
||||
'duration',
|
||||
fatal=False
|
||||
)
|
||||
)
|
||||
|
||||
m = re.search(
|
||||
r'''(?xs)<dt>Width\sx\sHeight</dt>.+?
|
||||
<dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''',
|
||||
webpage
|
||||
)
|
||||
width = height = None
|
||||
if m:
|
||||
width, height = int(m.group('width')), int(m.group('height'))
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
|
||||
u'playlist': [
|
||||
'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
|
||||
'info_dict': {
|
||||
'id': '57758',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
u'file': u'57758.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||
'info_dict': {
|
||||
'id': '57758',
|
||||
'ext': 'flv',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
},
|
||||
{
|
||||
u'file': u'57758-slides.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||
'info_dict': {
|
||||
'id': '57758-slides',
|
||||
'ext': 'flv',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
},
|
||||
],
|
||||
u'params': {
|
||||
'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
talk_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, talk_id)
|
||||
rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
|
||||
u'rtmp url')
|
||||
play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
||||
webpage, u'presenter play path')
|
||||
rtmp_url = self._search_regex(
|
||||
r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
|
||||
play_path = self._search_regex(
|
||||
r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
||||
webpage, 'presenter play path')
|
||||
title = clean_html(get_element_by_attribute('class', 'title', webpage))
|
||||
video_info = {
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
}
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
}
|
||||
m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
|
||||
if m_slides is None:
|
||||
return video_info
|
||||
else:
|
||||
return [
|
||||
video_info,
|
||||
# The slides video
|
||||
{
|
||||
'id': talk_id + '-slides',
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': m_slides.group(1),
|
||||
'ext': 'flv',
|
||||
},
|
||||
]
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'entries': [
|
||||
video_info,
|
||||
# The slides video
|
||||
{
|
||||
'id': talk_id + '-slides',
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': m_slides.group(1),
|
||||
'ext': 'flv',
|
||||
},
|
||||
],
|
||||
}
|
||||
|
@@ -47,7 +47,7 @@ class ToypicsUserIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'Mikey',
|
||||
},
|
||||
'playlist_mincount': 9917,
|
||||
'playlist_mincount': 19,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,32 +1,66 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import qualities
|
||||
|
||||
|
||||
class UnistraIE(InfoExtractor):
|
||||
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
|
||||
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
|
||||
u'file': u'154.mp4',
|
||||
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
|
||||
u'info_dict': {
|
||||
u'title': u'M!ss Yella',
|
||||
u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://utv.unistra.fr/video.php?id_video=154',
|
||||
'md5': '736f605cfdc96724d55bb543ab3ced24',
|
||||
'info_dict': {
|
||||
'id': '154',
|
||||
'ext': 'mp4',
|
||||
'title': 'M!ss Yella',
|
||||
'description': 'md5:104892c71bd48e55d70b902736b81bbf',
|
||||
},
|
||||
},
|
||||
}
|
||||
{
|
||||
'url': 'http://utv.unistra.fr/index.php?id_video=437',
|
||||
'md5': '1ddddd6cccaae76f622ce29b8779636d',
|
||||
'info_dict': {
|
||||
'id': '437',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prix Louise Weiss 2014',
|
||||
'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, id)
|
||||
file = re.search(r'file: "(.*?)",', webpage).group(1)
|
||||
title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {'id': id,
|
||||
'title': title,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
|
||||
'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
|
||||
}
|
||||
files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage))
|
||||
|
||||
quality = qualities(['SD', 'HD'])
|
||||
formats = []
|
||||
for file_path in files:
|
||||
format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD'
|
||||
formats.append({
|
||||
'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id)
|
||||
})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>UTV - (.*?)</', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<meta name="Description" content="(.*?)"', webpage, 'description', flags=re.DOTALL)
|
||||
thumbnail = self._search_regex(
|
||||
r'image: "(.*?)"', webpage, 'thumbnail')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats
|
||||
}
|
||||
|
119
youtube_dl/extractor/vgtv.py
Normal file
119
youtube_dl/extractor/vgtv.py
Normal file
@@ -0,0 +1,119 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class VGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/(?:.*)/(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
# streamType: vod
|
||||
'url': 'http://www.vgtv.no/#!/video/84196/hevnen-er-soet-episode-10-abu',
|
||||
'md5': 'b8be7a234cebb840c0d512c78013e02f',
|
||||
'info_dict': {
|
||||
'id': '84196',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hevnen er søt episode 10: Abu',
|
||||
'description': 'md5:e25e4badb5f544b04341e14abdc72234',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 648.000,
|
||||
'timestamp': 1404626400,
|
||||
'upload_date': '20140706',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
# streamType: wasLive
|
||||
'url': 'http://www.vgtv.no/#!/live/100764/opptak-vgtv-foelger-em-kvalifiseringen',
|
||||
'info_dict': {
|
||||
'id': '100764',
|
||||
'ext': 'mp4',
|
||||
'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
|
||||
'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 9056.000,
|
||||
'timestamp': 1410113864,
|
||||
'upload_date': '20140907',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# streamType: live
|
||||
'url': 'http://www.vgtv.no/#!/live/100015/direkte-her-kan-du-se-laksen-live-fra-suldalslaagen',
|
||||
'info_dict': {
|
||||
'id': '100015',
|
||||
'ext': 'mp4',
|
||||
'title': 'DIREKTE: Her kan du se laksen live fra Suldalslågen!',
|
||||
'description': 'md5:9a60cc23fa349f761628924e56eeec2d',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1407423348,
|
||||
'upload_date': '20140807',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,
|
||||
video_id, 'Downloading media JSON')
|
||||
|
||||
streams = data['streamUrls']
|
||||
|
||||
formats = []
|
||||
|
||||
hls_url = streams.get('hls')
|
||||
if hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4'))
|
||||
|
||||
hds_url = streams.get('hds')
|
||||
if hds_url:
|
||||
formats.extend(self._extract_f4m_formats(hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id))
|
||||
|
||||
mp4_url = streams.get('mp4')
|
||||
if mp4_url:
|
||||
_url = hls_url or hds_url
|
||||
MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1])
|
||||
for mp4_format in _url.split(','):
|
||||
m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format)
|
||||
if not m:
|
||||
continue
|
||||
width = int(m.group('width'))
|
||||
height = int(m.group('height'))
|
||||
vbr = int(m.group('vbr'))
|
||||
formats.append({
|
||||
'url': MP4_URL_TEMPLATE % mp4_format,
|
||||
'format_id': 'mp4-%s' % vbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'thumbnail': data['images']['main'] + '?t[]=900x506q80',
|
||||
'timestamp': data['published'],
|
||||
'duration': float_or_none(data['duration'], 1000),
|
||||
'view_count': data['displays'],
|
||||
'formats': formats,
|
||||
}
|
@@ -13,6 +13,9 @@ class WashingtonPostIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||
'info_dict': {
|
||||
'title': 'Sinkhole of bureaucracy',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'c3f4b4922ffa259243f68e928db2db8c',
|
||||
'info_dict': {
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.09.04.2'
|
||||
__version__ = '2014.09.10'
|
||||
|
Reference in New Issue
Block a user