.github
bin
devscripts
docs
test
youtube_dl
downloader
extractor
__init__.py
abc.py
abcnews.py
abcotvs.py
academicearth.py
acast.py
addanime.py
adn.py
adobeconnect.py
adobepass.py
adobetv.py
adultswim.py
aenetworks.py
afreecatv.py
airmozilla.py
aliexpress.py
aljazeera.py
allocine.py
alphaporno.py
amcnetworks.py
americastestkitchen.py
amp.py
animeondemand.py
anvato.py
aol.py
apa.py
aparat.py
appleconnect.py
appletrailers.py
archiveorg.py
ard.py
arkena.py
arte.py
asiancrush.py
atresplayer.py
atttechchannel.py
atvat.py
audimedia.py
audioboom.py
audiomack.py
awaan.py
aws.py
azmedien.py
baidu.py
bambuser.py
bandcamp.py
bbc.py
beampro.py
beatport.py
beeg.py
behindkink.py
bellmedia.py
bet.py
bfi.py
bigflix.py
bild.py
bilibili.py
biobiochiletv.py
biqle.py
bitchute.py
bleacherreport.py
blinkx.py
bloomberg.py
bokecc.py
bostonglobe.py
bpb.py
br.py
bravotv.py
breakcom.py
brightcove.py
businessinsider.py
buzzfeed.py
byutv.py
c56.py
camdemy.py
cammodels.py
camtube.py
camwithher.py
canalc2.py
canalplus.py
canvas.py
carambatv.py
cartoonnetwork.py
cbc.py
cbs.py
cbsinteractive.py
cbslocal.py
cbsnews.py
cbssports.py
ccc.py
ccma.py
cctv.py
cda.py
ceskatelevize.py
channel9.py
charlierose.py
chaturbate.py
chilloutzone.py
chirbit.py
cinchcast.py
cinemax.py
ciscolive.py
cjsw.py
cliphunter.py
clippit.py
cliprs.py
clipsyndicate.py
closertotruth.py
cloudflarestream.py
cloudy.py
clubic.py
clyp.py
cmt.py
cnbc.py
cnn.py
comcarcoff.py
comedycentral.py
common.py
commonmistakes.py
commonprotocols.py
condenast.py
corus.py
coub.py
cracked.py
crackle.py
crooksandliars.py
crunchyroll.py
cspan.py
ctsnews.py
ctvnews.py
cultureunplugged.py
curiositystream.py
cwtv.py
dailymail.py
dailymotion.py
daisuki.py
daum.py
dbtv.py
dctp.py
deezer.py
defense.py
democracynow.py
dfb.py
dhm.py
digg.py
digiteka.py
discovery.py
discoverygo.py
discoverynetworks.py
discoveryvr.py
disney.py
dispeak.py
dlive.py
dotsub.py
douyutv.py
dplay.py
drbonanza.py
dreisat.py
dropbox.py
drtuber.py
drtv.py
dtube.py
dumpert.py
dvtv.py
dw.py
eagleplatform.py
ebaumsworld.py
echomsk.py
egghead.py
ehow.py
eighttracks.py
einthusan.py
eitb.py
ellentube.py
elpais.py
embedly.py
engadget.py
eporner.py
eroprofile.py
escapist.py
espn.py
esri.py
europa.py
everyonesmixtape.py
expotv.py
expressen.py
extractors.py
extremetube.py
eyedotv.py
facebook.py
faz.py
fc2.py
fczenit.py
filmon.py
filmweb.py
firsttv.py
fivemin.py
fivetv.py
flickr.py
flipagram.py
folketinget.py
footyroom.py
formula1.py
fourtube.py
fox.py
fox9.py
foxgay.py
foxnews.py
foxsports.py
franceculture.py
franceinter.py
francetv.py
freesound.py
freespeech.py
freshlive.py
frontendmasters.py
funimation.py
funk.py
fusion.py
fxnetworks.py
gaia.py
gameinformer.py
gameone.py
gamespot.py
gamestar.py
gaskrank.py
gazeta.py
gdcvault.py
generic.py
gfycat.py
giantbomb.py
giga.py
gigya.py
glide.py
globo.py
go.py
go90.py
godtube.py
golem.py
googledrive.py
googleplus.py
googlesearch.py
goshgay.py
gputechconf.py
groupon.py
hark.py
hbo.py
hearthisat.py
heise.py
hellporno.py
helsinki.py
hentaistigma.py
hgtv.py
hidive.py
historicfilms.py
hitbox.py
hitrecord.py
hketv.py
hornbunny.py
hotnewhiphop.py
hotstar.py
howcast.py
howstuffworks.py
hrti.py
huajiao.py
huffpost.py
hungama.py
hypem.py
iconosquare.py
ign.py
imdb.py
imgur.py
ina.py
inc.py
indavideo.py
infoq.py
instagram.py
internazionale.py
internetvideoarchive.py
iprima.py
iqiyi.py
ir90tv.py
itv.py
ivi.py
ivideon.py
iwara.py
izlesene.py
jamendo.py
jeuxvideo.py
joj.py
jove.py
jpopsukitv.py
jwplatform.py
kakao.py
kaltura.py
kanalplay.py
kankan.py
karaoketv.py
karrierevideos.py
keek.py
keezmovies.py
ketnet.py
khanacademy.py
kickstarter.py
kinopoisk.py
konserthusetplay.py
kontrtube.py
krasview.py
ku6.py
kusi.py
kuwo.py
la7.py
laola1tv.py
lci.py
lcp.py
learnr.py
lecture2go.py
lecturio.py
leeco.py
lego.py
lemonde.py
lenta.py
libraryofcongress.py
libsyn.py
lifenews.py
limelight.py
line.py
linkedin.py
linuxacademy.py
litv.py
livejournal.py
liveleak.py
livestream.py
lnkgo.py
localnews8.py
lovehomeporn.py
lrt.py
lynda.py
m6.py
macgamestore.py
mailru.py
makertv.py
malltv.py
mangomolo.py
manyvids.py
markiza.py
massengeschmacktv.py
matchtv.py
mdr.py
medialaan.py
mediaset.py
mediasite.py
medici.py
megaphone.py
meipai.py
melonvod.py
meta.py
metacafe.py
metacritic.py
mgoon.py
mgtv.py
miaopai.py
microsoftvirtualacademy.py
minhateca.py
ministrygrid.py
minoto.py
miomio.py
mit.py
mitele.py
mixcloud.py
mlb.py
mnet.py
moevideo.py
mofosex.py
mojvideo.py
morningstar.py
motherless.py
motorsport.py
movieclips.py
moviezine.py
movingimage.py
msn.py
mtv.py
muenchentv.py
musicplayon.py
mwave.py
mychannels.py
myspace.py
myspass.py
myvi.py
myvidster.py
nationalgeographic.py
naver.py
nba.py
nbc.py
ndr.py
ndtv.py
nerdcubed.py
neteasemusic.py
netzkino.py
newgrounds.py
newstube.py
nextmedia.py
nexx.py
nfl.py
nhk.py
nhl.py
nick.py
niconico.py
ninecninemedia.py
ninegag.py
ninenow.py
nintendo.py
njpwworld.py
nobelprize.py
noco.py
nonktube.py
noovo.py
normalboots.py
nosvideo.py
nova.py
nowness.py
noz.py
npo.py
npr.py
nrk.py
nrl.py
ntvcojp.py
ntvde.py
ntvru.py
nuevo.py
nuvid.py
nytimes.py
nzz.py
odatv.py
odnoklassniki.py
oktoberfesttv.py
once.py
ondemandkorea.py
onet.py
onionstudios.py
ooyala.py
openload.py
ora.py
orf.py
outsidetv.py
packtpub.py
pandatv.py
pandoratv.py
parliamentliveuk.py
patreon.py
pbs.py
pearvideo.py
peertube.py
people.py
performgroup.py
periscope.py
philharmoniedeparis.py
phoenix.py
photobucket.py
picarto.py
piksel.py
pinkbike.py
pladform.py
platzi.py
playfm.py
playplustv.py
plays.py
playtvak.py
playvid.py
playwire.py
pluralsight.py
podomatic.py
pokemon.py
polskieradio.py
popcorntv.py
porn91.py
porncom.py
pornhd.py
pornhub.py
pornotube.py
pornovoisines.py
pornoxo.py
presstv.py
promptfile.py
prosiebensat1.py
puhutv.py
puls4.py
pyvideo.py
qqmusic.py
r7.py
radiobremen.py
radiocanada.py
radiode.py
radiofrance.py
radiojavan.py
rai.py
raywenderlich.py
rbmaradio.py
rds.py
redbulltv.py
reddit.py
redtube.py
regiotv.py
rentv.py
restudy.py
reuters.py
reverbnation.py
revision3.py
rice.py
rmcdecouverte.py
ro220.py
rockstargames.py
roosterteeth.py
rottentomatoes.py
roxwel.py
rozhlas.py
rtbf.py
rte.py
rtl2.py
rtlnl.py
rtp.py
rts.py
rtve.py
rtvnh.py
rtvs.py
ruhd.py
rutube.py
rutv.py
ruutu.py
ruv.py
safari.py
sapo.py
savefrom.py
sbs.py
screencast.py
screencastomatic.py
scrippsnetworks.py
seeker.py
senateisvp.py
sendtonews.py
servingsys.py
servus.py
sevenplus.py
sexu.py
seznamzpravy.py
shahid.py
shared.py
showroomlive.py
sina.py
sixplay.py
sky.py
skylinewebcams.py
skynewsarabia.py
slideshare.py
slideslive.py
slutload.py
smotri.py
snotr.py
sohu.py
sonyliv.py
soundcloud.py
soundgasm.py
southpark.py
spankbang.py
spankwire.py
spiegel.py
spiegeltv.py
spike.py
sport5.py
sportbox.py
sportdeutschland.py
springboardplatform.py
sprout.py
srgssr.py
srmediathek.py
stanfordoc.py
steam.py
stitcher.py
streamable.py
streamango.py
streamcloud.py
streamcz.py
streetvoice.py
stretchinternet.py
stv.py
sunporno.py
sverigesradio.py
svt.py
swrmediathek.py
syfy.py
sztvhu.py
tagesschau.py
tass.py
tastytrade.py
tbs.py
tdslifeway.py
teachable.py
teachertube.py
teachingchannel.py
teamcoco.py
teamtreehouse.py
techtalks.py
ted.py
tele13.py
tele5.py
telebruxelles.py
telecinco.py
telegraaf.py
telemb.py
telequebec.py
teletask.py
telewebion.py
tennistv.py
testurl.py
tf1.py
tfo.py
theintercept.py
theplatform.py
thescene.py
thestar.py
thesun.py
theweatherchannel.py
thisamericanlife.py
thisav.py
thisoldhouse.py
threeqsdn.py
tiktok.py
tinypic.py
tmz.py
tnaflix.py
toggle.py
tonline.py
toongoggles.py
toutv.py
toypics.py
traileraddict.py
trilulilu.py
trunews.py
trutv.py
tube8.py
tubitv.py
tudou.py
tumblr.py
tunein.py
tunepk.py
turbo.py
turner.py
tutv.py
tv2.py
tv2hu.py
tv4.py
tv5mondeplus.py
tva.py
tvanouvelles.py
tvc.py
tvigle.py
tvland.py
tvn24.py
tvnet.py
tvnoe.py
tvnow.py
tvp.py
tvplay.py
tvplayer.py
tweakers.py
twentyfourvideo.py
twentymin.py
twentythreevideo.py
twitcasting.py
twitch.py
twitter.py
udemy.py
udn.py
ufctv.py
uktvplay.py
umg.py
unistra.py
unity.py
uol.py
uplynk.py
urort.py
urplay.py
usanetwork.py
usatoday.py
ustream.py
ustudio.py
varzesh3.py
vbox7.py
veehd.py
veoh.py
vessel.py
vesti.py
vevo.py
vgtv.py
vh1.py
vice.py
vidbit.py
viddler.py
videa.py
videodetective.py
videofyme.py
videomore.py
videopremium.py
videopress.py
vidio.py
vidlii.py
vidme.py
vidzi.py
vier.py
viewlift.py
viewster.py
viidea.py
viki.py
vimeo.py
vimple.py
vine.py
viqeo.py
viu.py
vk.py
vlive.py
vodlocker.py
vodpl.py
vodplatform.py
voicerepublic.py
voot.py
voxmedia.py
vrak.py
vrt.py
vrv.py
vshare.py
vube.py
vuclip.py
vvvvid.py
vyborymos.py
vzaar.py
wakanim.py
walla.py
washingtonpost.py
wat.py
watchbox.py
watchindianporn.py
wdr.py
webcaster.py
webofstories.py
weibo.py
weiqitv.py
wistia.py
worldstarhiphop.py
wsj.py
wwe.py
xbef.py
xboxclips.py
xfileshare.py
xhamster.py
xiami.py
ximalaya.py
xminus.py
xnxx.py
xstream.py
xtube.py
xuite.py
xvideos.py
xxxymovies.py
yahoo.py
yandexdisk.py
yandexmusic.py
yandexvideo.py
yapfiles.py
yesjapan.py
yinyuetai.py
ynet.py
youjizz.py
youku.py
younow.py
youporn.py
yourporn.py
yourupload.py
youtube.py
zapiks.py
zaq1.py
zattoo.py
zdf.py
zingmp3.py
zype.py
postprocessor
YoutubeDL.py
__init__.py
__main__.py
aes.py
cache.py
compat.py
jsinterp.py
options.py
socks.py
swfinterp.py
update.py
utils.py
version.py
.gitignore
.travis.yml
AUTHORS
CONTRIBUTING.md
ChangeLog
LICENSE
MANIFEST.in
Makefile
README.md
setup.cfg
setup.py
tox.ini
youtube-dl.plugin.zsh
124 lines
4.6 KiB
Python
124 lines
4.6 KiB
Python
# coding: utf-8
|
|
from __future__ import unicode_literals
|
|
|
|
from .common import InfoExtractor
|
|
from ..utils import (
|
|
int_or_none,
|
|
float_or_none,
|
|
unified_strdate,
|
|
)
|
|
|
|
|
|
class WSJIE(InfoExtractor):
|
|
_VALID_URL = r'''(?x)
|
|
(?:
|
|
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
|
|
https?://(?:www\.)?(?:wsj|barrons)\.com/video/(?:[^/]+/)+|
|
|
wsj:
|
|
)
|
|
(?P<id>[a-fA-F0-9-]{36})
|
|
'''
|
|
IE_DESC = 'Wall Street Journal'
|
|
_TESTS = [{
|
|
'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
|
'md5': 'e230a5bb249075e40793b655a54a02e4',
|
|
'info_dict': {
|
|
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
|
'ext': 'mp4',
|
|
'upload_date': '20150202',
|
|
'uploader_id': 'jdesai',
|
|
'creator': 'jdesai',
|
|
'categories': list, # a long list
|
|
'duration': 90,
|
|
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
|
|
},
|
|
}, {
|
|
'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
|
|
'only_matching': True,
|
|
}, {
|
|
'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
|
|
'only_matching': True,
|
|
}, {
|
|
'url': 'https://www.wsj.com/video/series/a-brief-history-of/the-modern-cell-carrier-how-we-got-here/980E2187-401D-48A1-B82B-1486CEE06CB9',
|
|
'only_matching': True,
|
|
}]
|
|
|
|
def _real_extract(self, url):
|
|
video_id = self._match_id(url)
|
|
|
|
info = self._download_json(
|
|
'http://video-api.wsj.com/api-video/find_all_videos.asp', video_id,
|
|
query={
|
|
'type': 'guid',
|
|
'count': 1,
|
|
'query': video_id,
|
|
'fields': ','.join((
|
|
'type', 'hls', 'videoMP4List', 'thumbnailList', 'author',
|
|
'description', 'name', 'duration', 'videoURL', 'titletag',
|
|
'formattedCreationDate', 'keywords', 'editor')),
|
|
})['items'][0]
|
|
title = info.get('name', info.get('titletag'))
|
|
|
|
formats = []
|
|
|
|
f4m_url = info.get('videoURL')
|
|
if f4m_url:
|
|
formats.extend(self._extract_f4m_formats(
|
|
f4m_url, video_id, f4m_id='hds', fatal=False))
|
|
|
|
m3u8_url = info.get('hls')
|
|
if m3u8_url:
|
|
formats.extend(self._extract_m3u8_formats(
|
|
info['hls'], video_id, ext='mp4',
|
|
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
|
|
|
for v in info.get('videoMP4List', []):
|
|
mp4_url = v.get('url')
|
|
if not mp4_url:
|
|
continue
|
|
tbr = int_or_none(v.get('bitrate'))
|
|
formats.append({
|
|
'url': mp4_url,
|
|
'format_id': 'http' + ('-%d' % tbr if tbr else ''),
|
|
'tbr': tbr,
|
|
'width': int_or_none(v.get('width')),
|
|
'height': int_or_none(v.get('height')),
|
|
'fps': float_or_none(v.get('fps')),
|
|
})
|
|
self._sort_formats(formats)
|
|
|
|
return {
|
|
'id': video_id,
|
|
'formats': formats,
|
|
# Thumbnails are conveniently in the correct format already
|
|
'thumbnails': info.get('thumbnailList'),
|
|
'creator': info.get('author'),
|
|
'uploader_id': info.get('editor'),
|
|
'duration': int_or_none(info.get('duration')),
|
|
'upload_date': unified_strdate(info.get(
|
|
'formattedCreationDate'), day_first=False),
|
|
'title': title,
|
|
'categories': info.get('keywords'),
|
|
}
|
|
|
|
|
|
class WSJArticleIE(InfoExtractor):
|
|
_VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>[^/?#&]+)'
|
|
_TEST = {
|
|
'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
|
|
'info_dict': {
|
|
'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
|
|
'ext': 'mp4',
|
|
'upload_date': '20170221',
|
|
'uploader_id': 'ralcaraz',
|
|
'title': 'Bao Bao the Panda Leaves for China',
|
|
}
|
|
}
|
|
|
|
def _real_extract(self, url):
|
|
article_id = self._match_id(url)
|
|
webpage = self._download_webpage(url, article_id)
|
|
video_id = self._search_regex(
|
|
r'data-src=["\']([a-fA-F0-9-]{36})', webpage, 'video id')
|
|
return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)
|