[utils] Encode hostnames before passing to urllib
With IDN (Internationalized Domain Name) and a proxy, non-ascii URLs are passed down to urllib/urllib2, causing UnicodeEncodeError Fixes #8890
This commit is contained in:
		| @@ -1,4 +1,5 @@ | ||||
| #!/usr/bin/env python | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| # Allow direct execution | ||||
| @@ -120,5 +121,14 @@ class TestProxy(unittest.TestCase): | ||||
|         response = ydl.urlopen(req).read().decode('utf-8') | ||||
|         self.assertEqual(response, 'cn: {0}'.format(url)) | ||||
|  | ||||
|     def test_proxy_with_idn(self): | ||||
|         ydl = YoutubeDL({ | ||||
|             'proxy': 'localhost:{0}'.format(self.port), | ||||
|         }) | ||||
|         url = 'http://中文.tw/' | ||||
|         response = ydl.urlopen(url).read().decode('utf-8') | ||||
|         # b'xn--fiq228c' is '中文'.encode('idna') | ||||
|         self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -1746,6 +1746,7 @@ def escape_url(url): | ||||
|     """Escape URL as suggested by RFC 3986""" | ||||
|     url_parsed = compat_urllib_parse_urlparse(url) | ||||
|     return url_parsed._replace( | ||||
|         netloc=url_parsed.netloc.encode('idna').decode('ascii'), | ||||
|         path=escape_rfc3986(url_parsed.path), | ||||
|         params=escape_rfc3986(url_parsed.params), | ||||
|         query=escape_rfc3986(url_parsed.query), | ||||
|   | ||||
		Reference in New Issue
	
	Block a user