Compare commits

..

12 Commits

Author SHA1 Message Date
Philipp Hagemeister
26886e6140 release 2015.01.04 2015-01-04 03:15:48 +01:00
Philipp Hagemeister
7a1818c99b [vk] Add support for rutube embeds (Fixes #4514) 2015-01-04 03:15:27 +01:00
Philipp Hagemeister
2ccd1b10e5 [soulanime] Fix under Python 3 2015-01-04 02:20:45 +01:00
Philipp Hagemeister
788fa208c8 Merge branch 'master' of github.com:rg3/youtube-dl 2015-01-04 02:08:38 +01:00
Philipp Hagemeister
8848314c08 [Makefile] Make offline tests actually work offline 2015-01-04 02:08:18 +01:00
Philipp Hagemeister
c11125f9ed [tests] Remove format 138 from tests (#4559) 2015-01-04 02:06:53 +01:00
Philipp Hagemeister
95ceeec722 Remove unused import 2015-01-04 02:05:35 +01:00
Philipp Hagemeister
b68ff25917 Add various anime sites (Closes #4554) 2015-01-04 02:05:26 +01:00
Sergey M.
3e3327ea17 Merge pull request #4629 from t0mm0/tf1-tfou
[tf1] add support for TFOU
2015-01-04 06:51:28 +06:00
t0mm0
b158bb8693 [tf1] simplify regex 2015-01-04 00:45:23 +00:00
t0mm0
2bf098eda4 [tf1] fix test 2015-01-04 00:43:55 +00:00
t0mm0
382e05fa56 [tf1] add support for TFOU 2015-01-04 00:05:31 +00:00
14 changed files with 433 additions and 9 deletions

View File

@@ -98,3 +98,4 @@ Will Glynn
Max Reimann
Cédric Luthi
Thijs Vermeir
Joel Leclerc

View File

@@ -46,7 +46,7 @@ test:
ot: offlinetest
offlinetest: codetest
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
tar: youtube-dl.tar.gz

View File

@@ -218,7 +218,7 @@ class TestFormatSelection(unittest.TestCase):
# 3D
'85', '84', '102', '83', '101', '82', '100',
# Dash video
'138', '137', '248', '136', '247', '135', '246',
'137', '248', '136', '247', '135', '246',
'245', '244', '134', '243', '133', '242', '160',
# Dash audio
'141', '172', '140', '171', '139',

View File

@@ -4,7 +4,6 @@ import os
import subprocess
from .common import FileDownloader
from ..compat import compat_subprocess_get_DEVNULL
from ..utils import (
check_executable,
encodeFilename,

View File

@@ -164,6 +164,10 @@ from .globo import GloboIE
from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
from .gogoanime import (
GoGoAnimeIE,
GoGoAnimeSearchIE
)
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE
@@ -313,6 +317,16 @@ from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .planetaplay import PlanetaPlayIE
from .played import PlayedIE
from .play44 import (
Play44IE,
ByZooIE,
Video44IE,
VideoWingIE,
PlayPandaIE,
VideoZooIE,
PlayBBIE,
EasyVideoIE
)
from .playfm import PlayFMIE
from .playvid import PlayvidIE
from .podomatic import PodomaticIE
@@ -345,6 +359,7 @@ from .ruhd import RUHDIE
from .rutube import (
RutubeIE,
RutubeChannelIE,
RutubeEmbedIE,
RutubeMovieIE,
RutubePersonIE,
)
@@ -373,6 +388,10 @@ from .smotri import (
from .snotr import SnotrIE
from .sockshare import SockshareIE
from .sohu import SohuIE
from .soulanime import (
SoulAnimeWatchingIE,
SoulAnimeSeriesIE
)
from .soundcloud import (
SoundcloudIE,
SoundcloudSetIE,
@@ -467,6 +486,7 @@ from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
from .videolecturesnet import VideoLecturesNetIE
from .videofun import VideoFunIE
from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE
from .videopremium import VideoPremiumIE

View File

@@ -0,0 +1,76 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_parse,
get_element_by_attribute,
unescapeHTML
)
class GoGoAnimeIE(InfoExtractor):
IE_NAME = 'gogoanime'
IE_DESC = 'GoGoAnime'
_VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)'
_TEST = {
'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1',
'info_dict': {
'id': 'mahou-shoujo-madoka-magica-movie-1'
},
'playlist_count': 3
}
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
if 'Oops! Page Not Found</font>' in page:
raise ExtractorError('Video does not exist', expected=True)
content = get_element_by_attribute("class", "postcontent", page)
vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content)
vids = [
unescapeHTML(compat_urllib_parse.unquote(x))
for x in vids if not re.search(r".*videofun.*", x)]
if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page):
return self.playlist_result([self.url_result(vid) for vid in vids], video_id)
title = self._html_search_regex(
r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title')
return {
'_type': 'url',
'id': video_id,
'url': vids[0],
'title': title,
}
class GoGoAnimeSearchIE(InfoExtractor):
IE_NAME = 'gogoanime:search'
IE_DESC = 'GoGoAnime Search'
_VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)'
_TEST = {
'url': 'http://www.gogoanime.com/?s=bokusatsu',
'info_dict': {
'id': 'bokusatsu'
},
'playlist_count': 6
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
posts = re.findall(
r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"',
webpage)
return self.playlist_result(
[self.url_result(p) for p in posts], playlist_id)

View File

@@ -0,0 +1,149 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse
)
class Play44IE(InfoExtractor):
_VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)'
_TESTS = [{
'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv',
'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
'info_dict': {
'id': 'mahou-shoujo-madoka-magica-07',
'ext': 'flv',
'title': 'mahou-shoujo-madoka-magica-07',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
video_url = compat_urllib_parse.unquote(self._html_search_regex(
r'_url = "(https?://[^"]+?)";', page, 'url'))
title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title')
return {
'id': title,
'url': video_url,
'title': title,
}
class ByZooIE(Play44IE):
_VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)'
_TESTS = [{
'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4',
'md5': '455c83dabe2cd9fd74a87612b01fe017',
'info_dict': {
'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
'ext': 'mp4',
'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
}
}]
class Video44IE(Play44IE):
_VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1',
'md5': '43eaec6d0beb10e8d42459b9f108aff3',
'info_dict': {
'id': 'chaoshead-12',
'ext': 'mp4',
'title': 'chaoshead-12',
}
}]
class VideoWingIE(Play44IE):
_VALID_URL = r'''(?x)
http://[w.]*videowing\.[^/]*/
(?:
.*video=/*
|embed/
)
(?P<id>[^&?.]+)
'''
_TESTS = [{
'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
}
}, {
'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438',
'md5': '33fdd71581357018c226f95c5cedcfd7',
'info_dict': {
'id': 'mahoushoujomadokamagicamovie1part1',
'ext': 'flv',
'title': 'mahoushoujomadokamagicamovie1part1',
}
}]
class PlayPandaIE(Play44IE):
_VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
'description': 'boku_wa_tomodachi_ga_sukunai_-_05'
}
}]
class VideoZooIE(Play44IE):
_VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
}
}]
class PlayBBIE(Play44IE):
_VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
}
}]
class EasyVideoIE(Play44IE):
_VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)'
_TESTS = [{
'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1',
'md5': '26178b57629b7650106d72b191137176',
'info_dict': {
'id': 'bokuwatomodachigasukunai-04',
'ext': 'mp4',
'title': 'bokuwatomodachigasukunai-04',
},
'skip': 'Blocked in Germany',
}]

View File

@@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor):
}
class RutubeEmbedIE(InfoExtractor):
IE_NAME = 'rutube:embed'
IE_DESC = 'Rutube embedded videos'
_VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661',
'ext': 'mp4',
'upload_date': '20131223',
'uploader_id': '297833',
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
'uploader': 'subziro89 ILya',
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
},
'params': {
'skip_download': 'Requires ffmpeg',
},
}
def _real_extract(self, url):
embed_id = self._match_id(url)
webpage = self._download_webpage(url, embed_id)
canonical_url = self._html_search_regex(
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
'Canonical URL')
return self.url_result(canonical_url, 'Rutube')
class RutubeChannelIE(InfoExtractor):
IE_NAME = 'rutube:channel'
IE_DESC = 'Rutube channels'

View File

@@ -0,0 +1,80 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
HEADRequest,
urlhandle_detect_ext,
)
class SoulAnimeWatchingIE(InfoExtractor):
IE_NAME = "soulanime:watching"
IE_DESC = "SoulAnime video"
_TEST = {
'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
'md5': '05fae04abf72298098b528e98abf4298',
'info_dict': {
'id': 'seirei-tsukai-no-blade-dance-episode-9',
'ext': 'mp4',
'title': 'seirei-tsukai-no-blade-dance-episode-9',
'description': 'seirei-tsukai-no-blade-dance-episode-9'
}
}
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
domain = mobj.group('domain')
page = self._download_webpage(url, video_id)
video_url_encoded = self._html_search_regex(
r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
video_url = "http://www.soul-anime." + domain + video_url_encoded
ext_req = HEADRequest(video_url)
ext_handle = self._request_webpage(
ext_req, video_id, note='Determining extension')
ext = urlhandle_detect_ext(ext_handle)
return {
'id': video_id,
'url': video_url,
'ext': ext,
'title': video_id,
'description': video_id
}
class SoulAnimeSeriesIE(InfoExtractor):
IE_NAME = "soulanime:series"
IE_DESC = "SoulAnime Series"
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
_EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
_TEST = {
'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
'info_dict': {
'id': 'black-rock-shooter-tv'
},
'playlist_count': 8
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
series_id = mobj.group('id')
domain = mobj.group('domain')
pattern = re.compile(self._EPISODE_REGEX)
page = self._download_webpage(url, series_id, "Downloading series page")
mobj = pattern.findall(page)
entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
return self.playlist_result(entries, series_id)

View File

@@ -8,8 +8,8 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player."""
_VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
_TEST = {
_VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
_TESTS = {
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
'info_dict': {
'id': '10635995',
@@ -21,14 +21,26 @@ class TF1IE(InfoExtractor):
# Sometimes wat serves the whole file with the --test option
'skip_download': True,
},
}, {
'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
'info_dict': {
'id': '12043945',
'ext': 'mp4',
'title': 'Le grand Mystérioso - Chuggington',
'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
'upload_date': '20150103',
},
'params': {
# Sometimes wat serves the whole file with the --test option
'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._html_search_regex(
r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url')
embed_page = self._download_webpage(embed_url, video_id,
'Downloading embed player page')
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')

View File

@@ -0,0 +1,36 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse
)
class VideoFunIE(InfoExtractor):
_VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)'
_TEST = {
'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438',
'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
'info_dict': {
'id': 'Mahou-Shoujo-Madoka-Magica-07',
'ext': 'flv',
'title': 'Mahou-Shoujo-Madoka-Magica-07',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, 'Downloading video page')
video_url_encoded = self._html_search_regex(
r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url')
video_url = compat_urllib_parse.unquote(video_url_encoded)
title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title')
return {
'id': title,
'url': video_url,
'title': title,
}

View File

@@ -164,6 +164,15 @@ class VKIE(InfoExtractor):
self.to_screen('Youtube video detected')
return self.url_result(m_yt.group(1), 'Youtube')
m_rutube = re.search(
r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
assert m_rutube
if m_rutube is not None:
self.to_screen('rutube video detected')
rutube_url = self._proto_relative_url(
m_rutube.group(1).replace('\\', ''))
return self.url_result(rutube_url)
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
if m_opts:
m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))

View File

@@ -1550,3 +1550,14 @@ def ytdl_is_updateable():
def args_to_str(args):
# Get a short string representation for a subprocess command
return ' '.join(shlex_quote(a) for a in args)
def urlhandle_detect_ext(url_handle):
try:
url_handle.headers
getheader = lambda h: url_handle.headers[h]
except AttributeError: # Python < 3
getheader = url_handle.info().getheader
return getheader('Content-Type').split("/")[1]

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2015.01.03'
__version__ = '2015.01.04'