[YoutubeDL] Escape non-ASCII characters in URLs
urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) Working around by replacing request's original URL with escaped one
This commit is contained in:
		| @@ -28,6 +28,7 @@ from .utils import ( | |||||||
|     compat_str, |     compat_str, | ||||||
|     compat_urllib_error, |     compat_urllib_error, | ||||||
|     compat_urllib_request, |     compat_urllib_request, | ||||||
|  |     compat_urllib_parse_urlparse, | ||||||
|     ContentTooShortError, |     ContentTooShortError, | ||||||
|     date_from_str, |     date_from_str, | ||||||
|     DateRange, |     DateRange, | ||||||
| @@ -1241,6 +1242,31 @@ class YoutubeDL(object): | |||||||
|  |  | ||||||
|     def urlopen(self, req): |     def urlopen(self, req): | ||||||
|         """ Start an HTTP download """ |         """ Start an HTTP download """ | ||||||
|  |  | ||||||
|  |         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) | ||||||
|  |         # Working around by replacing request's original URL with escaped one | ||||||
|  |  | ||||||
|  |         url = req if isinstance(req, compat_str) else req.get_full_url() | ||||||
|  |  | ||||||
|  |         def escape(component): | ||||||
|  |             return compat_cookiejar.escape_path(component.encode('utf-8')) | ||||||
|  |  | ||||||
|  |         url_parsed = compat_urllib_parse_urlparse(url) | ||||||
|  |         url_escaped = url_parsed._replace( | ||||||
|  |             path=escape(url_parsed.path), | ||||||
|  |             query=escape(url_parsed.query), | ||||||
|  |             fragment=escape(url_parsed.fragment) | ||||||
|  |         ).geturl() | ||||||
|  |  | ||||||
|  |         # Substitute URL if any change after escaping | ||||||
|  |         if url != url_escaped: | ||||||
|  |             if isinstance(req, compat_str): | ||||||
|  |                 req = url_escaped | ||||||
|  |             else: | ||||||
|  |                 req = compat_urllib_request.Request( | ||||||
|  |                     url_escaped, data=req.data, headers=req.headers, | ||||||
|  |                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) | ||||||
|  |  | ||||||
|         return self._opener.open(req, timeout=self._socket_timeout) |         return self._opener.open(req, timeout=self._socket_timeout) | ||||||
|  |  | ||||||
|     def print_debug_header(self): |     def print_debug_header(self): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user