release 2015.01.04

[vk] Add support for rutube embeds (Fixes #4514 )
[soulanime] Fix under Python 3
2015-01-04 03:15:48 +01:00 · 2015-01-04 03:15:27 +01:00 · 2015-01-04 02:20:45 +01:00 · 2015-01-04 02:08:38 +01:00 · 2015-01-04 02:08:18 +01:00 · 2015-01-04 02:06:53 +01:00
14 changed files with 433 additions and 9 deletions
--- a/1
+++ b/1
@@ -98,3 +98,4 @@ Will Glynn
 Max Reimann
 Cédric Luthi
 Thijs Vermeir
+Joel Leclerc
--- a/2
+++ b/2
@@ -46,7 +46,7 @@ test:
 ot: offlinetest

 offlinetest: codetest
-	nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations
+	nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists

 tar: youtube-dl.tar.gz

--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -218,7 +218,7 @@ class TestFormatSelection(unittest.TestCase):
            # 3D
            '85', '84', '102', '83', '101', '82', '100',
            # Dash video
-            '138', '137', '248', '136', '247', '135', '246',
+            '137', '248', '136', '247', '135', '246',
            '245', '244', '134', '243', '133', '242', '160',
            # Dash audio
            '141', '172', '140', '171', '139',
--- a/youtube_dl/downloader/mplayer.py
+++ b/youtube_dl/downloader/mplayer.py
@@ -4,7 +4,6 @@ import os
 import subprocess

 from .common import FileDownloader
-from ..compat import compat_subprocess_get_DEVNULL
 from ..utils import (
    check_executable,
    encodeFilename,
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -164,6 +164,10 @@ from .globo import GloboIE
 from .godtube import GodTubeIE
 from .goldenmoustache import GoldenMoustacheIE
 from .golem import GolemIE
+from .gogoanime import (
+    GoGoAnimeIE,
+    GoGoAnimeSearchIE
+)
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
@@ -313,6 +317,16 @@ from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .planetaplay import PlanetaPlayIE
 from .played import PlayedIE
+from .play44 import (
+    Play44IE,
+    ByZooIE,
+    Video44IE,
+    VideoWingIE,
+    PlayPandaIE,
+    VideoZooIE,
+    PlayBBIE,
+    EasyVideoIE
+)
 from .playfm import PlayFMIE
 from .playvid import PlayvidIE
 from .podomatic import PodomaticIE
@@ -345,6 +359,7 @@ from .ruhd import RUHDIE
 from .rutube import (
    RutubeIE,
    RutubeChannelIE,
+    RutubeEmbedIE,
    RutubeMovieIE,
    RutubePersonIE,
 )
@@ -373,6 +388,10 @@ from .smotri import (
 from .snotr import SnotrIE
 from .sockshare import SockshareIE
 from .sohu import SohuIE
+from .soulanime import (
+    SoulAnimeWatchingIE,
+    SoulAnimeSeriesIE
+)
 from .soundcloud import (
    SoundcloudIE,
    SoundcloudSetIE,
@@ -467,6 +486,7 @@ from .viddler import ViddlerIE
 from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
 from .videolecturesnet import VideoLecturesNetIE
+from .videofun import VideoFunIE
 from .videofyme import VideofyMeIE
 from .videomega import VideoMegaIE
 from .videopremium import VideoPremiumIE
--- a/youtube_dl/extractor/gogoanime.py
+++ b/youtube_dl/extractor/gogoanime.py
@@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_urllib_parse,
+    get_element_by_attribute,
+    unescapeHTML
+)
+
+
+class GoGoAnimeIE(InfoExtractor):
+    IE_NAME = 'gogoanime'
+    IE_DESC = 'GoGoAnime'
+    _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)'
+
+    _TEST = {
+        'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1',
+        'info_dict': {
+            'id': 'mahou-shoujo-madoka-magica-movie-1'
+        },
+        'playlist_count': 3
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        page = self._download_webpage(url, video_id)
+
+        if 'Oops! Page Not Found</font>' in page:
+            raise ExtractorError('Video does not exist', expected=True)
+
+        content = get_element_by_attribute("class", "postcontent", page)
+        vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content)
+        vids = [
+            unescapeHTML(compat_urllib_parse.unquote(x))
+            for x in vids if not re.search(r".*videofun.*", x)]
+
+        if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page):
+            return self.playlist_result([self.url_result(vid) for vid in vids], video_id)
+
+        title = self._html_search_regex(
+            r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title')
+
+        return {
+            '_type': 'url',
+            'id': video_id,
+            'url': vids[0],
+            'title': title,
+        }
+
+
+class GoGoAnimeSearchIE(InfoExtractor):
+    IE_NAME = 'gogoanime:search'
+    IE_DESC = 'GoGoAnime Search'
+
+    _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)'
+    _TEST = {
+        'url': 'http://www.gogoanime.com/?s=bokusatsu',
+        'info_dict': {
+            'id': 'bokusatsu'
+        },
+        'playlist_count': 6
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        posts = re.findall(
+            r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"',
+            webpage)
+
+        return self.playlist_result(
+            [self.url_result(p) for p in posts], playlist_id)
--- a/youtube_dl/extractor/play44.py
+++ b/youtube_dl/extractor/play44.py
@@ -0,0 +1,149 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse
+)
+
+
+class Play44IE(InfoExtractor):
+    _VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)'
+
+    _TESTS = [{
+        'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv',
+        'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
+        'info_dict': {
+            'id': 'mahou-shoujo-madoka-magica-07',
+            'ext': 'flv',
+            'title': 'mahou-shoujo-madoka-magica-07',
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        page = self._download_webpage(url, video_id)
+
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(
+            r'_url = "(https?://[^"]+?)";', page, 'url'))
+        title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title')
+
+        return {
+            'id': title,
+            'url': video_url,
+            'title': title,
+        }
+
+
+class ByZooIE(Play44IE):
+    _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)'
+
+    _TESTS = [{
+        'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4',
+        'md5': '455c83dabe2cd9fd74a87612b01fe017',
+        'info_dict': {
+            'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
+            'ext': 'mp4',
+            'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
+        }
+    }]
+
+
+class Video44IE(Play44IE):
+    _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*'
+
+    _TESTS = [{
+        'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1',
+        'md5': '43eaec6d0beb10e8d42459b9f108aff3',
+        'info_dict': {
+            'id': 'chaoshead-12',
+            'ext': 'mp4',
+            'title': 'chaoshead-12',
+        }
+    }]
+
+
+class VideoWingIE(Play44IE):
+    _VALID_URL = r'''(?x)
+        http://[w.]*videowing\.[^/]*/
+        (?:
+            .*video=/*
+            |embed/
+        )
+        (?P<id>[^&?.]+)
+    '''
+
+    _TESTS = [{
+        'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+        'md5': '4ed320e353ed26c742c4f12a9c210b60',
+        'info_dict': {
+            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+            'ext': 'mp4',
+            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+        }
+    }, {
+        'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438',
+        'md5': '33fdd71581357018c226f95c5cedcfd7',
+        'info_dict': {
+            'id': 'mahoushoujomadokamagicamovie1part1',
+            'ext': 'flv',
+            'title': 'mahoushoujomadokamagicamovie1part1',
+        }
+    }]
+
+
+class PlayPandaIE(Play44IE):
+    _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*'
+
+    _TESTS = [{
+        'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+        'md5': '4ed320e353ed26c742c4f12a9c210b60',
+        'info_dict': {
+            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+            'ext': 'mp4',
+            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+            'description': 'boku_wa_tomodachi_ga_sukunai_-_05'
+        }
+    }]
+
+
+class VideoZooIE(Play44IE):
+    _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*'
+
+    _TESTS = [{
+        'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+        'md5': '4ed320e353ed26c742c4f12a9c210b60',
+        'info_dict': {
+            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+            'ext': 'mp4',
+            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+        }
+    }]
+
+
+class PlayBBIE(Play44IE):
+    _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*'
+
+    _TESTS = [{
+        'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+        'md5': '4ed320e353ed26c742c4f12a9c210b60',
+        'info_dict': {
+            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+            'ext': 'mp4',
+            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+        }
+    }]
+
+
+class EasyVideoIE(Play44IE):
+    _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)'
+
+    _TESTS = [{
+        'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1',
+        'md5': '26178b57629b7650106d72b191137176',
+        'info_dict': {
+            'id': 'bokuwatomodachigasukunai-04',
+            'ext': 'mp4',
+            'title': 'bokuwatomodachigasukunai-04',
+        },
+        'skip': 'Blocked in Germany',
+    }]
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor):
        }


+class RutubeEmbedIE(InfoExtractor):
+    IE_NAME = 'rutube:embed'
+    IE_DESC = 'Rutube embedded videos'
+    _VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
+        'info_dict': {
+            'id': 'a10e53b86e8f349080f718582ce4c661',
+            'ext': 'mp4',
+            'upload_date': '20131223',
+            'uploader_id': '297833',
+            'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
+            'uploader': 'subziro89 ILya',
+            'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
+        },
+        'params': {
+            'skip_download': 'Requires ffmpeg',
+        },
+    }
+
+    def _real_extract(self, url):
+        embed_id = self._match_id(url)
+        webpage = self._download_webpage(url, embed_id)
+
+        canonical_url = self._html_search_regex(
+            r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
+            'Canonical URL')
+        return self.url_result(canonical_url, 'Rutube')
+
+
 class RutubeChannelIE(InfoExtractor):
    IE_NAME = 'rutube:channel'
    IE_DESC = 'Rutube channels'
--- a/youtube_dl/extractor/soulanime.py
+++ b/youtube_dl/extractor/soulanime.py
@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    HEADRequest,
+    urlhandle_detect_ext,
+)
+
+
+class SoulAnimeWatchingIE(InfoExtractor):
+    IE_NAME = "soulanime:watching"
+    IE_DESC = "SoulAnime video"
+    _TEST = {
+        'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
+        'md5': '05fae04abf72298098b528e98abf4298',
+        'info_dict': {
+            'id': 'seirei-tsukai-no-blade-dance-episode-9',
+            'ext': 'mp4',
+            'title': 'seirei-tsukai-no-blade-dance-episode-9',
+            'description': 'seirei-tsukai-no-blade-dance-episode-9'
+        }
+    }
+    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        domain = mobj.group('domain')
+
+        page = self._download_webpage(url, video_id)
+
+        video_url_encoded = self._html_search_regex(
+            r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
+        video_url = "http://www.soul-anime." + domain + video_url_encoded
+
+        ext_req = HEADRequest(video_url)
+        ext_handle = self._request_webpage(
+            ext_req, video_id, note='Determining extension')
+        ext = urlhandle_detect_ext(ext_handle)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': ext,
+            'title': video_id,
+            'description': video_id
+        }
+
+
+class SoulAnimeSeriesIE(InfoExtractor):
+    IE_NAME = "soulanime:series"
+    IE_DESC = "SoulAnime Series"
+
+    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
+
+    _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
+
+    _TEST = {
+        'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
+        'info_dict': {
+            'id': 'black-rock-shooter-tv'
+        },
+        'playlist_count': 8
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        series_id = mobj.group('id')
+        domain = mobj.group('domain')
+
+        pattern = re.compile(self._EPISODE_REGEX)
+
+        page = self._download_webpage(url, series_id, "Downloading series page")
+        mobj = pattern.findall(page)
+
+        entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
+
+        return self.playlist_result(entries, series_id)
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -8,8 +8,8 @@ from .common import InfoExtractor

 class TF1IE(InfoExtractor):
    """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
-    _TEST = {
+    _VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
+    _TESTS = {
        'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
        'info_dict': {
            'id': '10635995',
@@ -21,14 +21,26 @@ class TF1IE(InfoExtractor):
            # Sometimes wat serves the whole file with the --test option
            'skip_download': True,
        },
+    }, {
+        'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
+        'info_dict': {
+            'id': '12043945',
+            'ext': 'mp4',
+            'title': 'Le grand Mystérioso - Chuggington',
+            'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
+            'upload_date': '20150103',
+        },
+        'params': {
+            # Sometimes wat serves the whole file with the --test option
+            'skip_download': True,
+        },
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        embed_url = self._html_search_regex(
-            r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
+            r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url')
        embed_page = self._download_webpage(embed_url, video_id,
                                            'Downloading embed player page')
        wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
--- a/youtube_dl/extractor/videofun.py
+++ b/youtube_dl/extractor/videofun.py
@@ -0,0 +1,36 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse
+)
+
+
+class VideoFunIE(InfoExtractor):
+    _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)'
+
+    _TEST = {
+        'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438',
+        'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
+        'info_dict': {
+            'id': 'Mahou-Shoujo-Madoka-Magica-07',
+            'ext': 'flv',
+            'title': 'Mahou-Shoujo-Madoka-Magica-07',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url, video_id, 'Downloading video page')
+
+        video_url_encoded = self._html_search_regex(
+            r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url')
+        video_url = compat_urllib_parse.unquote(video_url_encoded)
+        title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title')
+
+        return {
+            'id': title,
+            'url': video_url,
+            'title': title,
+        }
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -164,6 +164,15 @@ class VKIE(InfoExtractor):
            self.to_screen('Youtube video detected')
            return self.url_result(m_yt.group(1), 'Youtube')

+        m_rutube = re.search(
+            r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
+        assert m_rutube
+        if m_rutube is not None:
+            self.to_screen('rutube video detected')
+            rutube_url = self._proto_relative_url(
+                m_rutube.group(1).replace('\\', ''))
+            return self.url_result(rutube_url)
+
        m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
        if m_opts:
            m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1550,3 +1550,14 @@ def ytdl_is_updateable():
 def args_to_str(args):
    # Get a short string representation for a subprocess command
    return ' '.join(shlex_quote(a) for a in args)
+
+
+def urlhandle_detect_ext(url_handle):
+    try:
+        url_handle.headers
+        getheader = lambda h: url_handle.headers[h]
+    except AttributeError:  # Python < 3
+        getheader = url_handle.info().getheader
+
+    return getheader('Content-Type').split("/")[1]
+
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.01.03'
+__version__ = '2015.01.04'
Author	SHA1	Message	Date
Philipp Hagemeister	26886e6140	release 2015.01.04	2015-01-04 03:15:48 +01:00
Philipp Hagemeister	7a1818c99b	[vk] Add support for rutube embeds (Fixes #4514 )	2015-01-04 03:15:27 +01:00
Philipp Hagemeister	2ccd1b10e5	[soulanime] Fix under Python 3	2015-01-04 02:20:45 +01:00
Philipp Hagemeister	788fa208c8	Merge branch 'master' of github.com:rg3/youtube-dl	2015-01-04 02:08:38 +01:00
Philipp Hagemeister	8848314c08	[Makefile] Make offline tests actually work offline	2015-01-04 02:08:18 +01:00
Philipp Hagemeister	c11125f9ed	[tests] Remove format 138 from tests (#4559 )	2015-01-04 02:06:53 +01:00
Philipp Hagemeister	95ceeec722	Remove unused import	2015-01-04 02:05:35 +01:00
Philipp Hagemeister	b68ff25917	Add various anime sites (Closes #4554 )	2015-01-04 02:05:26 +01:00
Sergey M.	3e3327ea17	Merge pull request #4629 from t0mm0/tf1-tfou [tf1] add support for TFOU	2015-01-04 06:51:28 +06:00
t0mm0	b158bb8693	[tf1] simplify regex	2015-01-04 00:45:23 +00:00
t0mm0	2bf098eda4	[tf1] fix test	2015-01-04 00:43:55 +00:00
t0mm0	382e05fa56	[tf1] add support for TFOU	2015-01-04 00:05:31 +00:00