[dailymotion:playlist] fix extraction(closes #16894)
This commit is contained in:
		| @@ -2,6 +2,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import functools | ||||
| import hashlib | ||||
| import itertools | ||||
| import json | ||||
| @@ -16,11 +17,13 @@ from ..utils import ( | ||||
|     error_to_compat_str, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     mimetype2ext, | ||||
|     OnDemandPagedList, | ||||
|     parse_iso8601, | ||||
|     sanitized_Request, | ||||
|     str_to_int, | ||||
|     unescapeHTML, | ||||
|     mimetype2ext, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -343,17 +346,93 @@ class DailymotionIE(DailymotionBaseInfoExtractor): | ||||
|  | ||||
| class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|     IE_NAME = 'dailymotion:playlist' | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)' | ||||
|     _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"' | ||||
|     _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', | ||||
|         'info_dict': { | ||||
|             'title': 'SPORT', | ||||
|             'id': 'xv4bw_nqtv_sport', | ||||
|             'id': 'xv4bw', | ||||
|         }, | ||||
|         'playlist_mincount': 20, | ||||
|     }] | ||||
|     _PAGE_SIZE = 100 | ||||
|  | ||||
|     def _fetch_page(self, playlist_id, authorizaion, page): | ||||
|         page += 1 | ||||
|         videos = self._download_json( | ||||
|             'https://graphql.api.dailymotion.com', | ||||
|             playlist_id, 'Downloading page %d' % page, | ||||
|             data=json.dumps({ | ||||
|                 'query': '''{ | ||||
|   collection(xid: "%s") { | ||||
|     videos(first: %d, page: %d) { | ||||
|       pageInfo { | ||||
|         hasNextPage | ||||
|         nextPage | ||||
|       } | ||||
|       edges { | ||||
|         node { | ||||
|           xid | ||||
|           url | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }''' % (playlist_id, self._PAGE_SIZE, page) | ||||
|             }).encode(), headers={ | ||||
|                 'Authorization': authorizaion, | ||||
|                 'Origin': 'https://www.dailymotion.com', | ||||
|             })['data']['collection']['videos'] | ||||
|         for edge in videos['edges']: | ||||
|             node = edge['node'] | ||||
|             yield self.url_result( | ||||
|                 node['url'], DailymotionIE.ie_key(), node['xid']) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         api = self._parse_json(self._search_regex( | ||||
|             r'__PLAYER_CONFIG__\s*=\s*({.+?});', | ||||
|             webpage, 'player config'), playlist_id)['context']['api'] | ||||
|         auth = self._download_json( | ||||
|             api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'), | ||||
|             playlist_id, data=urlencode_postdata({ | ||||
|                 'client_id': api.get('client_id', 'f1a362d288c1b98099c7'), | ||||
|                 'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'), | ||||
|                 'grant_type': 'client_credentials', | ||||
|             })) | ||||
|         authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token']) | ||||
|         entries = OnDemandPagedList(functools.partial( | ||||
|             self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE) | ||||
|         return self.playlist_result( | ||||
|             entries, playlist_id, | ||||
|             self._og_search_title(webpage)) | ||||
|  | ||||
|  | ||||
| class DailymotionUserIE(DailymotionBaseInfoExtractor): | ||||
|     IE_NAME = 'dailymotion:user' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)' | ||||
|     _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"' | ||||
|     _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.dailymotion.com/user/nqtv', | ||||
|         'info_dict': { | ||||
|             'id': 'nqtv', | ||||
|             'title': 'Rémi Gaillard', | ||||
|         }, | ||||
|         'playlist_mincount': 100, | ||||
|     }, { | ||||
|         'url': 'http://www.dailymotion.com/user/UnderProject', | ||||
|         'info_dict': { | ||||
|             'id': 'UnderProject', | ||||
|             'title': 'UnderProject', | ||||
|         }, | ||||
|         'playlist_mincount': 1800, | ||||
|         'expected_warnings': [ | ||||
|             'Stopped at duplicated page', | ||||
|         ], | ||||
|         'skip': 'Takes too long time', | ||||
|     }] | ||||
|  | ||||
|     def _extract_entries(self, id): | ||||
|         video_ids = set() | ||||
| @@ -379,43 +458,6 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: | ||||
|                 break | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'entries': self._extract_entries(playlist_id), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class DailymotionUserIE(DailymotionPlaylistIE): | ||||
|     IE_NAME = 'dailymotion:user' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)' | ||||
|     _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.dailymotion.com/user/nqtv', | ||||
|         'info_dict': { | ||||
|             'id': 'nqtv', | ||||
|             'title': 'Rémi Gaillard', | ||||
|         }, | ||||
|         'playlist_mincount': 100, | ||||
|     }, { | ||||
|         'url': 'http://www.dailymotion.com/user/UnderProject', | ||||
|         'info_dict': { | ||||
|             'id': 'UnderProject', | ||||
|             'title': 'UnderProject', | ||||
|         }, | ||||
|         'playlist_mincount': 1800, | ||||
|         'expected_warnings': [ | ||||
|             'Stopped at duplicated page', | ||||
|         ], | ||||
|         'skip': 'Takes too long time', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         user = mobj.group('user') | ||||
|   | ||||
		Reference in New Issue
	
	Block a user