[googledrive] Simplify and carry long lines (#13638)
This commit is contained in:
		| @@ -7,7 +7,6 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     lowercase_escape, | ||||
|     error_to_compat_str, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
| @@ -59,7 +58,7 @@ class GoogleDriveIE(InfoExtractor): | ||||
|         'automatic_captions': 'target', | ||||
|     } | ||||
|     _caption_formats_ext = [] | ||||
|     _captions_by_country_xml = None | ||||
|     _captions_xml = None | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
| @@ -69,96 +68,99 @@ class GoogleDriveIE(InfoExtractor): | ||||
|         if mobj: | ||||
|             return 'https://drive.google.com/file/d/%s' % mobj.group('id') | ||||
|  | ||||
|     def _set_captions_data(self, video_id, video_subtitles_id, hl): | ||||
|         try: | ||||
|             self._captions_by_country_xml = self._download_xml(self._BASE_URL_CAPTIONS, video_id, query={ | ||||
|     def _download_subtitles_xml(self, video_id, subtitles_id, hl): | ||||
|         if self._captions_xml: | ||||
|             return | ||||
|         self._captions_xml = self._download_xml( | ||||
|             self._BASE_URL_CAPTIONS, video_id, query={ | ||||
|                 'id': video_id, | ||||
|                 'vid': video_subtitles_id, | ||||
|                 'vid': subtitles_id, | ||||
|                 'hl': hl, | ||||
|                 'v': video_id, | ||||
|                 'type': 'list', | ||||
|                 'tlangs': '1', | ||||
|                 'fmts': '1', | ||||
|                 'vssids': '1', | ||||
|             }) | ||||
|         except ExtractorError as ee: | ||||
|             self.report_warning('unable to download video subtitles: %s' % error_to_compat_str(ee)) | ||||
|         if self._captions_by_country_xml is not None: | ||||
|             caption_available_extensions = self._captions_by_country_xml.findall('format') | ||||
|             for caption_extension in caption_available_extensions: | ||||
|                 if caption_extension.attrib.get('fmt_code') and not caption_extension.attrib.get('default'): | ||||
|                     self._caption_formats_ext.append(caption_extension.attrib['fmt_code']) | ||||
|             }, note='Downloading subtitles XML', | ||||
|             errnote='Unable to download subtitles XML', fatal=False) | ||||
|         if self._captions_xml: | ||||
|             for f in self._captions_xml.findall('format'): | ||||
|                 if f.attrib.get('fmt_code') and not f.attrib.get('default'): | ||||
|                     self._caption_formats_ext.append(f.attrib['fmt_code']) | ||||
|  | ||||
|     def _get_captions_by_type(self, video_id, video_subtitles_id, caption_type, caption_original_lang_code=None): | ||||
|         if not video_subtitles_id or not caption_type: | ||||
|             return None | ||||
|     def _get_captions_by_type(self, video_id, subtitles_id, caption_type, | ||||
|                               origin_lang_code=None): | ||||
|         if not subtitles_id or not caption_type: | ||||
|             return | ||||
|         captions = {} | ||||
|         for caption_entry in self._captions_by_country_xml.findall(self._CAPTIONS_ENTRY_TAG[caption_type]): | ||||
|         for caption_entry in self._captions_xml.findall( | ||||
|                 self._CAPTIONS_ENTRY_TAG[caption_type]): | ||||
|             caption_lang_code = caption_entry.attrib.get('lang_code') | ||||
|             if not caption_lang_code: | ||||
|                 continue | ||||
|             caption_format_data = [] | ||||
|             for caption_format in self._caption_formats_ext: | ||||
|                 query = { | ||||
|                     'vid': video_subtitles_id, | ||||
|                     'vid': subtitles_id, | ||||
|                     'v': video_id, | ||||
|                     'fmt': caption_format, | ||||
|                     'lang': caption_lang_code if caption_original_lang_code is None else caption_original_lang_code, | ||||
|                     'lang': (caption_lang_code if origin_lang_code is None | ||||
|                              else origin_lang_code), | ||||
|                     'type': 'track', | ||||
|                     'name': '', | ||||
|                     'kind': '', | ||||
|                 } | ||||
|                 if caption_original_lang_code is not None: | ||||
|                 if origin_lang_code is not None: | ||||
|                     query.update({'tlang': caption_lang_code}) | ||||
|                 caption_format_data.append({ | ||||
|                     'url': update_url_query(self._BASE_URL_CAPTIONS, query), | ||||
|                     'ext': caption_format, | ||||
|                 }) | ||||
|             captions[caption_lang_code] = caption_format_data | ||||
|         if not captions: | ||||
|             self.report_warning('video doesn\'t have %s' % caption_type.replace('_', ' ')) | ||||
|         return captions | ||||
|  | ||||
|     def _get_subtitles(self, video_id, video_subtitles_id, hl): | ||||
|         if not video_subtitles_id or not hl: | ||||
|             return None | ||||
|         if self._captions_by_country_xml is None: | ||||
|             self._set_captions_data(video_id, video_subtitles_id, hl) | ||||
|             if self._captions_by_country_xml is None: | ||||
|                 return None | ||||
|         return self._get_captions_by_type(video_id, video_subtitles_id, 'subtitles') | ||||
|     def _get_subtitles(self, video_id, subtitles_id, hl): | ||||
|         if not subtitles_id or not hl: | ||||
|             return | ||||
|         self._download_subtitles_xml(video_id, subtitles_id, hl) | ||||
|         if not self._captions_xml: | ||||
|             return | ||||
|         return self._get_captions_by_type(video_id, subtitles_id, 'subtitles') | ||||
|  | ||||
|     def _get_automatic_captions(self, video_id, video_subtitles_id, hl): | ||||
|         if not video_subtitles_id or not hl: | ||||
|             return None | ||||
|         if self._captions_by_country_xml is None: | ||||
|             self._set_captions_data(video_id, video_subtitles_id, hl) | ||||
|             if self._captions_by_country_xml is None: | ||||
|                 return None | ||||
|         self.to_screen('%s: Looking for automatic captions' % video_id) | ||||
|         subtitle_original_track = self._captions_by_country_xml.find('track') | ||||
|         if subtitle_original_track is None: | ||||
|             return None | ||||
|         subtitle_original_lang_code = subtitle_original_track.attrib.get('lang_code') | ||||
|         if not subtitle_original_lang_code: | ||||
|             return None | ||||
|         return self._get_captions_by_type(video_id, video_subtitles_id, 'automatic_captions', subtitle_original_lang_code) | ||||
|     def _get_automatic_captions(self, video_id, subtitles_id, hl): | ||||
|         if not subtitles_id or not hl: | ||||
|             return | ||||
|         self._download_subtitles_xml(video_id, subtitles_id, hl) | ||||
|         if not self._captions_xml: | ||||
|             return | ||||
|         track = self._captions_xml.find('track') | ||||
|         if track is None: | ||||
|             return | ||||
|         origin_lang_code = track.attrib.get('lang_code') | ||||
|         if not origin_lang_code: | ||||
|             return | ||||
|         return self._get_captions_by_type( | ||||
|             video_id, subtitles_id, 'automatic_captions', origin_lang_code) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage( | ||||
|             'http://docs.google.com/file/d/%s' % video_id, video_id) | ||||
|  | ||||
|         reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) | ||||
|         reason = self._search_regex( | ||||
|             r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) | ||||
|         if reason: | ||||
|             raise ExtractorError(reason) | ||||
|  | ||||
|         title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title') | ||||
|         duration = int_or_none(self._search_regex( | ||||
|             r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None)) | ||||
|             r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', | ||||
|             default=None)) | ||||
|         fmt_stream_map = self._search_regex( | ||||
|             r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',') | ||||
|         fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') | ||||
|             r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, | ||||
|             'fmt stream map').split(',') | ||||
|         fmt_list = self._search_regex( | ||||
|             r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') | ||||
|  | ||||
|         resolutions = {} | ||||
|         for fmt in fmt_list: | ||||
| @@ -190,12 +192,14 @@ class GoogleDriveIE(InfoExtractor): | ||||
|  | ||||
|         hl = self._search_regex( | ||||
|             r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None) | ||||
|         video_subtitles_id = None | ||||
|         subtitles_id = None | ||||
|         ttsurl = self._search_regex( | ||||
|             r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None) | ||||
|         if ttsurl: | ||||
|             # the video Id for subtitles will be the last value in the ttsurl query string | ||||
|             video_subtitles_id = ttsurl.encode('utf-8').decode('unicode_escape').split('=')[-1] | ||||
|             # the video Id for subtitles will be the last value in the ttsurl | ||||
|             # query string | ||||
|             subtitles_id = ttsurl.encode('utf-8').decode( | ||||
|                 'unicode_escape').split('=')[-1] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -203,6 +207,7 @@ class GoogleDriveIE(InfoExtractor): | ||||
|             'thumbnail': self._og_search_thumbnail(webpage, default=None), | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': self.extract_subtitles(video_id, video_subtitles_id, hl), | ||||
|             'automatic_captions': self.extract_automatic_captions(video_id, video_subtitles_id, hl), | ||||
|             'subtitles': self.extract_subtitles(video_id, subtitles_id, hl), | ||||
|             'automatic_captions': self.extract_automatic_captions( | ||||
|                 video_id, subtitles_id, hl), | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user