[toypics] Separate user and video extraction (#2601)
This commit is contained in:
		@@ -239,7 +239,7 @@ from .theplatform import ThePlatformIE
 | 
			
		||||
from .thisav import ThisAVIE
 | 
			
		||||
from .tinypic import TinyPicIE
 | 
			
		||||
from .toutv import TouTvIE
 | 
			
		||||
from .toypics import ToypicsIE
 | 
			
		||||
from .toypics import ToypicsUserIE, ToypicsIE
 | 
			
		||||
from .traileraddict import TrailerAddictIE
 | 
			
		||||
from .trilulilu import TriluliluIE
 | 
			
		||||
from .trutube import TruTubeIE
 | 
			
		||||
 
 | 
			
		||||
@@ -2,43 +2,26 @@ from .common import InfoExtractor
 | 
			
		||||
from math import ceil
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ToypicsIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?:http://)?videos\.toypics\.net/.*'
 | 
			
		||||
    IE_DESC = 'Toypics user profile'
 | 
			
		||||
    _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
 | 
			
		||||
        #'md5': '8a8b546956bbd0e769dbe28f6e80abb3', == $head -c10K 12929646011616163504.mp4 |md5sum //no idea why it fails
 | 
			
		||||
        'md5': '16e806ad6d6f58079d210fe30985e08b',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '514',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Chance-Bulge\'d, 2',
 | 
			
		||||
            'age_limit': 18
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
            'uploader': 'kidsune',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    PAGINATED=8
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(r'(http://)?videos\.toypics\.net/(?P<username>[^/?]+)$', url)
 | 
			
		||||
        if not mobj:
 | 
			
		||||
            return self.extract_one(url)
 | 
			
		||||
        return [self.extract_one(u) for u in self.process_paginated(url,
 | 
			
		||||
            r'public/">Public Videos \((?P<videos_count>[0-9]+)\)</a></li>',
 | 
			
		||||
            r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">'
 | 
			
		||||
        )]
 | 
			
		||||
 | 
			
		||||
    def process_paginated(self, profile_url, re_total, re_video_page):
 | 
			
		||||
        profile_page = self._download_webpage(profile_url, 'profile' , 'getting profile page: '+profile_url)
 | 
			
		||||
        videos_count = self._html_search_regex(re_total, profile_page, 'videos count')
 | 
			
		||||
        lst = []
 | 
			
		||||
        for n in xrange(1,int(ceil(float(videos_count)/self.PAGINATED)) +1):
 | 
			
		||||
            lpage_url = profile_url +'/public/%d'%n
 | 
			
		||||
            lpage = self._download_webpage(lpage_url, 'page %d'%n)
 | 
			
		||||
            lst.extend(re.findall(re_video_page, lpage))
 | 
			
		||||
        return lst
 | 
			
		||||
 | 
			
		||||
    def extract_one(self,url):
 | 
			
		||||
        mobj = re.match(r'(http://)?videos\.toypics\.net/view/(?P<videoid>[0-9]+)/.*', url)
 | 
			
		||||
        video_id = mobj.group('videoid')
 | 
			
		||||
        page = self._download_webpage(url, video_id, 'getting page: '+url)
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        page = self._download_webpage(url, video_id)
 | 
			
		||||
        video_url = self._html_search_regex(
 | 
			
		||||
            r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
@@ -48,8 +31,46 @@ class ToypicsIE(InfoExtractor):
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': video_url[-3:],
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'uploader': username,
 | 
			
		||||
            'age_limit': 18
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ToypicsUserIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'Toypics user profile'
 | 
			
		||||
    _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        username = mobj.group('username')
 | 
			
		||||
 | 
			
		||||
        profile_page = self._download_webpage(
 | 
			
		||||
            url, username, note='Retrieving profile page')
 | 
			
		||||
 | 
			
		||||
        video_count = int(self._search_regex(
 | 
			
		||||
            r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page,
 | 
			
		||||
            'video count'))
 | 
			
		||||
 | 
			
		||||
        PAGE_SIZE = 8
 | 
			
		||||
        urls = []
 | 
			
		||||
        page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
 | 
			
		||||
        for n in range(1, page_count + 1):
 | 
			
		||||
            lpage_url = url + '/public/%d' % n
 | 
			
		||||
            lpage = self._download_webpage(
 | 
			
		||||
                lpage_url, username,
 | 
			
		||||
                note='Downloading page %d/%d' % (n, page_count))
 | 
			
		||||
            urls.extend(
 | 
			
		||||
                re.findall(
 | 
			
		||||
                    r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">',
 | 
			
		||||
                    lpage))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'playlist',
 | 
			
		||||
            'id': username,
 | 
			
		||||
            'entries': [{
 | 
			
		||||
                '_type': 'url',
 | 
			
		||||
                'url': url,
 | 
			
		||||
                'ie_key': 'Toypics',
 | 
			
		||||
            } for url in urls]
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user