[toypics] Separate user and video extraction (#2601)
This commit is contained in:
		| @@ -37,6 +37,7 @@ from youtube_dl.extractor import ( | ||||
|     GoogleSearchIE, | ||||
|     GenericIE, | ||||
|     TEDIE, | ||||
|     ToypicsUserIE, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -269,5 +270,13 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertEqual(result['title'], 'Who are the hackers?') | ||||
|         self.assertTrue(len(result['entries']) >= 6) | ||||
|  | ||||
|     def test_toypics_user(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = ToypicsUserIE(dl) | ||||
|         result = ie.extract('http://videos.toypics.net/Mikey') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'Mikey') | ||||
|         self.assertTrue(len(result['entries']) >= 17) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -239,7 +239,7 @@ from .theplatform import ThePlatformIE | ||||
| from .thisav import ThisAVIE | ||||
| from .tinypic import TinyPicIE | ||||
| from .toutv import TouTvIE | ||||
| from .toypics import ToypicsIE | ||||
| from .toypics import ToypicsUserIE, ToypicsIE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| from .trilulilu import TriluliluIE | ||||
| from .trutube import TruTubeIE | ||||
|   | ||||
| @@ -2,43 +2,26 @@ from .common import InfoExtractor | ||||
| from math import ceil | ||||
| import re | ||||
|  | ||||
|  | ||||
| class ToypicsIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?videos\.toypics\.net/.*' | ||||
|     IE_DESC = 'Toypics user profile' | ||||
|     _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*' | ||||
|     _TEST = { | ||||
|         'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/', | ||||
|         #'md5': '8a8b546956bbd0e769dbe28f6e80abb3', == $head -c10K 12929646011616163504.mp4 |md5sum //no idea why it fails | ||||
|         'md5': '16e806ad6d6f58079d210fe30985e08b', | ||||
|         'info_dict': { | ||||
|             'id': '514', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Chance-Bulge\'d, 2', | ||||
|             'age_limit': 18 | ||||
|             'age_limit': 18, | ||||
|             'uploader': 'kidsune', | ||||
|         } | ||||
|     } | ||||
|     PAGINATED=8 | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(r'(http://)?videos\.toypics\.net/(?P<username>[^/?]+)$', url) | ||||
|         if not mobj: | ||||
|             return self.extract_one(url) | ||||
|         return [self.extract_one(u) for u in self.process_paginated(url, | ||||
|             r'public/">Public Videos \((?P<videos_count>[0-9]+)\)</a></li>', | ||||
|             r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">' | ||||
|         )] | ||||
|  | ||||
|     def process_paginated(self, profile_url, re_total, re_video_page): | ||||
|         profile_page = self._download_webpage(profile_url, 'profile' , 'getting profile page: '+profile_url) | ||||
|         videos_count = self._html_search_regex(re_total, profile_page, 'videos count') | ||||
|         lst = [] | ||||
|         for n in xrange(1,int(ceil(float(videos_count)/self.PAGINATED)) +1): | ||||
|             lpage_url = profile_url +'/public/%d'%n | ||||
|             lpage = self._download_webpage(lpage_url, 'page %d'%n) | ||||
|             lst.extend(re.findall(re_video_page, lpage)) | ||||
|         return lst | ||||
|  | ||||
|     def extract_one(self,url): | ||||
|         mobj = re.match(r'(http://)?videos\.toypics\.net/view/(?P<videoid>[0-9]+)/.*', url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         page = self._download_webpage(url, video_id, 'getting page: '+url) | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         page = self._download_webpage(url, video_id) | ||||
|         video_url = self._html_search_regex( | ||||
|             r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL') | ||||
|         title = self._html_search_regex( | ||||
| @@ -48,8 +31,46 @@ class ToypicsIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': video_url[-3:], | ||||
|             'title': title, | ||||
|             'uploader': username, | ||||
|             'age_limit': 18 | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ToypicsUserIE(InfoExtractor): | ||||
|     IE_DESC = 'Toypics user profile' | ||||
|     _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         username = mobj.group('username') | ||||
|  | ||||
|         profile_page = self._download_webpage( | ||||
|             url, username, note='Retrieving profile page') | ||||
|  | ||||
|         video_count = int(self._search_regex( | ||||
|             r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page, | ||||
|             'video count')) | ||||
|  | ||||
|         PAGE_SIZE = 8 | ||||
|         urls = [] | ||||
|         page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE | ||||
|         for n in range(1, page_count + 1): | ||||
|             lpage_url = url + '/public/%d' % n | ||||
|             lpage = self._download_webpage( | ||||
|                 lpage_url, username, | ||||
|                 note='Downloading page %d/%d' % (n, page_count)) | ||||
|             urls.extend( | ||||
|                 re.findall( | ||||
|                     r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">', | ||||
|                     lpage)) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': username, | ||||
|             'entries': [{ | ||||
|                 '_type': 'url', | ||||
|                 'url': url, | ||||
|                 'ie_key': 'Toypics', | ||||
|             } for url in urls] | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user