[toutv] add support for authentication(closes #10669)
This commit is contained in:
		| @@ -13,6 +13,7 @@ from ..utils import ( | |||||||
|     xpath_element, |     xpath_element, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     determine_protocol, |     determine_protocol, | ||||||
|  |     unsmuggle_url, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -35,28 +36,51 @@ class RadioCanadaIE(InfoExtractor): | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|  |         url, smuggled_data = unsmuggle_url(url, {}) | ||||||
|         app_code, video_id = re.match(self._VALID_URL, url).groups() |         app_code, video_id = re.match(self._VALID_URL, url).groups() | ||||||
|  |  | ||||||
|         device_types = ['ipad', 'android'] |         metadata = self._download_xml( | ||||||
|  |             'http://api.radio-canada.ca/metaMedia/v1/index.ashx', | ||||||
|  |             video_id, note='Downloading metadata XML', query={ | ||||||
|  |                 'appCode': app_code, | ||||||
|  |                 'idMedia': video_id, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         def get_meta(name): | ||||||
|  |             el = find_xpath_attr(metadata, './/Meta', 'name', name) | ||||||
|  |             return el.text if el is not None else None | ||||||
|  |  | ||||||
|  |         if get_meta('protectionType'): | ||||||
|  |             raise ExtractorError('This video is DRM protected.', expected=True) | ||||||
|  |  | ||||||
|  |         device_types = ['ipad'] | ||||||
|         if app_code != 'toutv': |         if app_code != 'toutv': | ||||||
|             device_types.append('flash') |             device_types.append('flash') | ||||||
|  |         if not smuggled_data: | ||||||
|  |             device_types.append('android') | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|         # TODO: extract f4m formats |         # TODO: extract f4m formats | ||||||
|         # f4m formats can be extracted using flashhd device_type but they produce unplayable file |         # f4m formats can be extracted using flashhd device_type but they produce unplayable file | ||||||
|         for device_type in device_types: |         for device_type in device_types: | ||||||
|             v_data = self._download_xml( |             validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' | ||||||
|                 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx', |             query = { | ||||||
|                 video_id, note='Downloading %s XML' % device_type, query={ |                 'appCode': app_code, | ||||||
|                     'appCode': app_code, |                 'idMedia': video_id, | ||||||
|                     'idMedia': video_id, |                 'connectionType': 'broadband', | ||||||
|                     'connectionType': 'broadband', |                 'multibitrate': 'true', | ||||||
|                     'multibitrate': 'true', |                 'deviceType': device_type, | ||||||
|                     'deviceType': device_type, |             } | ||||||
|  |             if smuggled_data: | ||||||
|  |                 validation_url = 'https://services.radio-canada.ca/media/validation/v2/' | ||||||
|  |                 query.update(smuggled_data) | ||||||
|  |             else: | ||||||
|  |                 query.update({ | ||||||
|                     # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction |                     # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction | ||||||
|                     'paysJ391wsHjbOJwvCs26toz': 'CA', |                     'paysJ391wsHjbOJwvCs26toz': 'CA', | ||||||
|                     'bypasslock': 'NZt5K62gRqfc', |                     'bypasslock': 'NZt5K62gRqfc', | ||||||
|                 }, fatal=False) |                 }) | ||||||
|  |             v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False) | ||||||
|             v_url = xpath_text(v_data, 'url') |             v_url = xpath_text(v_data, 'url') | ||||||
|             if not v_url: |             if not v_url: | ||||||
|                 continue |                 continue | ||||||
| @@ -101,17 +125,6 @@ class RadioCanadaIE(InfoExtractor): | |||||||
|                                 f4m_id='hds', fatal=False)) |                                 f4m_id='hds', fatal=False)) | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|         metadata = self._download_xml( |  | ||||||
|             'http://api.radio-canada.ca/metaMedia/v1/index.ashx', |  | ||||||
|             video_id, note='Downloading metadata XML', query={ |  | ||||||
|                 'appCode': app_code, |  | ||||||
|                 'idMedia': video_id, |  | ||||||
|             }) |  | ||||||
|  |  | ||||||
|         def get_meta(name): |  | ||||||
|             el = find_xpath_attr(metadata, './/Meta', 'name', name) |  | ||||||
|             return el.text if el is not None else None |  | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': get_meta('Title'), |             'title': get_meta('Title'), | ||||||
|   | |||||||
| @@ -2,12 +2,22 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import int_or_none | from ..utils import ( | ||||||
|  |     int_or_none, | ||||||
|  |     js_to_json, | ||||||
|  |     ExtractorError, | ||||||
|  |     urlencode_postdata, | ||||||
|  |     extract_attributes, | ||||||
|  |     smuggle_url, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class TouTvIE(InfoExtractor): | class TouTvIE(InfoExtractor): | ||||||
|  |     _NETRC_MACHINE = 'toutv' | ||||||
|     IE_NAME = 'tou.tv' |     IE_NAME = 'tou.tv' | ||||||
|     _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)' |     _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)' | ||||||
|  |     _access_token = None | ||||||
|  |     _claims = None | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', |         'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', | ||||||
| @@ -22,18 +32,64 @@ class TouTvIE(InfoExtractor): | |||||||
|             # m3u8 download |             # m3u8 download | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         }, |         }, | ||||||
|  |         'skip': '404 Not Found', | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     def _real_initialize(self): | ||||||
|  |         email, password = self._get_login_info() | ||||||
|  |         if email is None: | ||||||
|  |             return | ||||||
|  |         state = 'http://ici.tou.tv//' | ||||||
|  |         webpage = self._download_webpage(state, None, 'Downloading homepage') | ||||||
|  |         toutvlogin = self._parse_json(self._search_regex( | ||||||
|  |             r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json) | ||||||
|  |         authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize' | ||||||
|  |         login_webpage = self._download_webpage( | ||||||
|  |             authorize_url, None, 'Downloading login page', query={ | ||||||
|  |                 'client_id': toutvlogin['clientId'], | ||||||
|  |                 'redirect_uri': 'https://ici.tou.tv/login/loginCallback', | ||||||
|  |                 'response_type': 'token', | ||||||
|  |                 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', | ||||||
|  |                 'state': state, | ||||||
|  |             }) | ||||||
|  |         login_form = self._search_regex( | ||||||
|  |             r'(?s)(<form[^>]+id="Form-login".+?</form>)', login_webpage, 'login form') | ||||||
|  |         form_data = self._hidden_inputs(login_form) | ||||||
|  |         form_data.update({ | ||||||
|  |             'login-email': email, | ||||||
|  |             'login-password': password, | ||||||
|  |         }) | ||||||
|  |         post_url = extract_attributes(login_form).get('action') or authorize_url | ||||||
|  |         _, urlh = self._download_webpage_handle( | ||||||
|  |             post_url, None, 'Logging in', data=urlencode_postdata(form_data)) | ||||||
|  |         self._access_token = self._search_regex( | ||||||
|  |             r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', | ||||||
|  |             urlh.geturl(), 'access token') | ||||||
|  |         self._claims = self._download_json( | ||||||
|  |             'https://services.radio-canada.ca/media/validation/v2/getClaims', | ||||||
|  |             None, 'Extracting Claims', query={ | ||||||
|  |                 'token': self._access_token, | ||||||
|  |                 'access_token': self._access_token, | ||||||
|  |             })['claims'] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         path = self._match_id(url) |         path = self._match_id(url) | ||||||
|         metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path) |         metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path) | ||||||
|  |         if metadata.get('IsDrm'): | ||||||
|  |             raise ExtractorError('This video is DRM protected.', expected=True) | ||||||
|         video_id = metadata['IdMedia'] |         video_id = metadata['IdMedia'] | ||||||
|         details = metadata['Details'] |         details = metadata['Details'] | ||||||
|         title = details['OriginalTitle'] |         title = details['OriginalTitle'] | ||||||
|  |         video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id) | ||||||
|  |         if self._access_token and self._claims: | ||||||
|  |             video_url = smuggle_url(video_url, { | ||||||
|  |                 'access_token': self._access_token, | ||||||
|  |                 'claims': self._claims, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             '_type': 'url_transparent', |             '_type': 'url_transparent', | ||||||
|             'url': 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id), |             'url': video_url, | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': title, |             'title': title, | ||||||
|             'thumbnail': details.get('ImageUrl'), |             'thumbnail': details.get('ImageUrl'), | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user