[pornhub] Decode obfuscated video URL (closes #12470)
This commit is contained in:
		| @@ -1,7 +1,9 @@ | |||||||
| # coding: utf-8 | # coding: utf-8 | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import functools | ||||||
| import itertools | import itertools | ||||||
|  | import operator | ||||||
| # import os | # import os | ||||||
| import re | import re | ||||||
|  |  | ||||||
| @@ -129,9 +131,38 @@ class PornHubIE(InfoExtractor): | |||||||
|  |  | ||||||
|         tv_webpage = dl_webpage('tv') |         tv_webpage = dl_webpage('tv') | ||||||
|  |  | ||||||
|         video_url = self._search_regex( |         encoded_url = self._search_regex(r'(var.*mediastring.*)</script>', | ||||||
|             r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage, |             tv_webpage, 'encoded url') | ||||||
|             'video url', group='url') |         assignments = encoded_url.split(";") | ||||||
|  |         js_vars = {} | ||||||
|  |  | ||||||
|  |         def parse_js_value(inp): | ||||||
|  |             inp = re.sub(r'/\*[^*]*\*/', "", inp) | ||||||
|  |  | ||||||
|  |             if "+" in inp: | ||||||
|  |                 inps = inp.split("+") | ||||||
|  |                 return functools.reduce(operator.concat, map(parse_js_value, inps)) | ||||||
|  |  | ||||||
|  |             inp = inp.strip() | ||||||
|  |             if inp in js_vars: | ||||||
|  |                 return js_vars[inp] | ||||||
|  |  | ||||||
|  |             # Hope it's a string! | ||||||
|  |             assert inp.startswith('"') and inp.endswith('"') | ||||||
|  |             return inp[1:-1] | ||||||
|  |  | ||||||
|  |         for assn in assignments: | ||||||
|  |             assn = assn.strip() | ||||||
|  |             if len(assn) == 0: | ||||||
|  |                 continue | ||||||
|  |  | ||||||
|  |             assert assn.startswith("var ") | ||||||
|  |             assn = assn[4:] | ||||||
|  |             vname, value = assn.split("=", 1) | ||||||
|  |  | ||||||
|  |             js_vars[vname] = parse_js_value(value) | ||||||
|  |  | ||||||
|  |         video_url = js_vars["mediastring"] | ||||||
|  |  | ||||||
|         title = self._search_regex( |         title = self._search_regex( | ||||||
|             r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None) |             r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user