[rutube:playlist] Add extractor (closes #13534)
This commit is contained in:
		@@ -7,10 +7,14 @@ import itertools
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_parse_qs,
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    try_get,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -42,8 +46,24 @@ class RutubeIE(InfoExtractor):
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        parts = compat_urllib_parse_urlparse(url)
 | 
			
		||||
        params = compat_parse_qs(parts.query)
 | 
			
		||||
 | 
			
		||||
        # see if URL without parameters is OK
 | 
			
		||||
        res = super(RutubeIE, cls).suitable(url)
 | 
			
		||||
 | 
			
		||||
        if params:  # we only allow pl_id parameter in the url
 | 
			
		||||
            res = res and 'pl_id' in params and len(params) == 1
 | 
			
		||||
 | 
			
		||||
        return res
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _extract_urls(webpage):
 | 
			
		||||
        return [mobj.group('url') for mobj in re.finditer(
 | 
			
		||||
@@ -193,3 +213,67 @@ class RutubePersonIE(RutubeChannelIE):
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RutubePlaylistIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'rutube:playlist'
 | 
			
		||||
    IE_DESC = 'Rutube playlists'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '4252',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 25,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?(?:.+)?pl_id=(?P<id>\d+)'
 | 
			
		||||
    _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/source/%s/?page=%s'
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def suitable(url):
 | 
			
		||||
        params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
 | 
			
		||||
        return params.get('pl_id') and int_or_none(params['pl_id'][0]) \
 | 
			
		||||
            and params.get('pl_type')
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        playlist_id = self._match_id(url)
 | 
			
		||||
        return self._extract_playlist(playlist_id)
 | 
			
		||||
 | 
			
		||||
    def _extract_playlist(self, playlist_id):
 | 
			
		||||
        entries = []
 | 
			
		||||
        for pagenum in itertools.count(1):
 | 
			
		||||
            page_url = self._PAGE_TEMPLATE % (playlist_id, pagenum)
 | 
			
		||||
 | 
			
		||||
            # download_json will sent an accept: application/xml header
 | 
			
		||||
            page = self._download_json(page_url, playlist_id,
 | 
			
		||||
                                       "Downloading metadata for page %s" % pagenum,
 | 
			
		||||
                                       headers={'Accept': 'application/json'})
 | 
			
		||||
 | 
			
		||||
            if not page['results']:
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
            results = page['results']
 | 
			
		||||
            for result in results:
 | 
			
		||||
                entry = self.url_result(result.get('video_url'), 'Rutube')
 | 
			
		||||
                category = try_get(result, lambda x: x['category']['name'])
 | 
			
		||||
                entry.update({
 | 
			
		||||
                    'id': result.get('id'),
 | 
			
		||||
                    'uploader': try_get(result, lambda x: x['author']['name']),
 | 
			
		||||
                    'uploader_id': try_get(result, lambda x: x['author']['id']),
 | 
			
		||||
                    'upload_date': unified_strdate(result.get('created_ts')),
 | 
			
		||||
                    'title': result.get('title'),
 | 
			
		||||
                    'description': result.get('description'),
 | 
			
		||||
                    'thumbnail': result.get('thumbnail_url'),
 | 
			
		||||
                    'duration': int_or_none(result.get('duration')),
 | 
			
		||||
                    'category': [category] if category else None,
 | 
			
		||||
                    'age_limit': 18 if result.get('is_adult') else 0,
 | 
			
		||||
                    'view_count': int_or_none(result.get('hits')),
 | 
			
		||||
                    'is_live': result.get('is_livestream'),
 | 
			
		||||
                    'webpage_url': result.get('video_url'),
 | 
			
		||||
                })
 | 
			
		||||
                entries.append(entry)
 | 
			
		||||
 | 
			
		||||
            if page['has_next'] is False:
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(entries, playlist_id, page['name'])
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user