[chirbit] Fix extraction (Closes #10296)
This commit is contained in:
		| @@ -1,30 +1,33 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     int_or_none, | ||||
| ) | ||||
| from ..utils import parse_duration | ||||
|  | ||||
|  | ||||
| class ChirbitIE(InfoExtractor): | ||||
|     IE_NAME = 'chirbit' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://chirb.it/PrIPv5', | ||||
|         'md5': '9847b0dad6ac3e074568bf2cfb197de8', | ||||
|         'url': 'http://chirb.it/be2abG', | ||||
|         'info_dict': { | ||||
|             'id': 'PrIPv5', | ||||
|             'id': 'be2abG', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Фасадстрой', | ||||
|             'duration': 52, | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|             'title': 'md5:f542ea253f5255240be4da375c6a5d7e', | ||||
|             'description': 'md5:f24a4e22a71763e32da5fed59e47c770', | ||||
|             'duration': 306, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://chirb.it/wp/MN58c2', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -33,27 +36,30 @@ class ChirbitIE(InfoExtractor): | ||||
|         webpage = self._download_webpage( | ||||
|             'http://chirb.it/%s' % audio_id, audio_id) | ||||
|  | ||||
|         audio_url = self._search_regex( | ||||
|             r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url') | ||||
|         data_fd = self._search_regex( | ||||
|             r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1', | ||||
|             webpage, 'data fd', group='url') | ||||
|  | ||||
|         # Reverse engineered from https://chirb.it/js/chirbit.player.js (look | ||||
|         # for soundURL) | ||||
|         audio_url = base64.b64decode( | ||||
|             data_fd[::-1].encode('ascii')).decode('utf-8') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'itemprop="name">([^<]+)', webpage, 'title') | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, 'duration', fatal=False)) | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'itemprop="playCount"\s*>(\d+)', webpage, | ||||
|             'listen count', fatal=False)) | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'>(\d+) Comments?:', webpage, | ||||
|             'comment count', fatal=False)) | ||||
|             r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title') | ||||
|         description = self._search_regex( | ||||
|             r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>', | ||||
|             webpage, 'description', default=None) | ||||
|         duration = parse_duration(self._search_regex( | ||||
|             r'class=["\']c-length["\'][^>]*>([^<]+)', | ||||
|             webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': audio_id, | ||||
|             'url': audio_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user