[YoutubeDL:utils] Move percent encode non-ASCII URLs workaround to http_request and simplify (Closes #6457)
This commit is contained in:
		@@ -1860,27 +1860,6 @@ class YoutubeDL(object):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def urlopen(self, req):
 | 
					    def urlopen(self, req):
 | 
				
			||||||
        """ Start an HTTP download """
 | 
					        """ Start an HTTP download """
 | 
				
			||||||
 | 
					 | 
				
			||||||
        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
 | 
					 | 
				
			||||||
        # always respected by websites, some tend to give out URLs with non percent-encoded
 | 
					 | 
				
			||||||
        # non-ASCII characters (see telemb.py, ard.py [#3412])
 | 
					 | 
				
			||||||
        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
 | 
					 | 
				
			||||||
        # To work around aforementioned issue we will replace request's original URL with
 | 
					 | 
				
			||||||
        # percent-encoded one
 | 
					 | 
				
			||||||
        req_is_string = isinstance(req, compat_basestring)
 | 
					 | 
				
			||||||
        url = req if req_is_string else req.get_full_url()
 | 
					 | 
				
			||||||
        url_escaped = escape_url(url)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Substitute URL if any change after escaping
 | 
					 | 
				
			||||||
        if url != url_escaped:
 | 
					 | 
				
			||||||
            if req_is_string:
 | 
					 | 
				
			||||||
                req = url_escaped
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
 | 
					 | 
				
			||||||
                req = req_type(
 | 
					 | 
				
			||||||
                    url_escaped, data=req.data, headers=req.headers,
 | 
					 | 
				
			||||||
                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return self._opener.open(req, timeout=self._socket_timeout)
 | 
					        return self._opener.open(req, timeout=self._socket_timeout)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def print_debug_header(self):
 | 
					    def print_debug_header(self):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -651,6 +651,26 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 | 
				
			|||||||
        return ret
 | 
					        return ret
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def http_request(self, req):
 | 
					    def http_request(self, req):
 | 
				
			||||||
 | 
					        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
 | 
				
			||||||
 | 
					        # always respected by websites, some tend to give out URLs with non percent-encoded
 | 
				
			||||||
 | 
					        # non-ASCII characters (see telemb.py, ard.py [#3412])
 | 
				
			||||||
 | 
					        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
 | 
				
			||||||
 | 
					        # To work around aforementioned issue we will replace request's original URL with
 | 
				
			||||||
 | 
					        # percent-encoded one
 | 
				
			||||||
 | 
					        # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
 | 
				
			||||||
 | 
					        # the code of this workaround has been moved here from YoutubeDL.urlopen()
 | 
				
			||||||
 | 
					        url = req.get_full_url()
 | 
				
			||||||
 | 
					        url_escaped = escape_url(url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Substitute URL if any change after escaping
 | 
				
			||||||
 | 
					        if url != url_escaped:
 | 
				
			||||||
 | 
					            req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
 | 
				
			||||||
 | 
					            new_req = req_type(
 | 
				
			||||||
 | 
					                url_escaped, data=req.data, headers=req.headers,
 | 
				
			||||||
 | 
					                origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
 | 
				
			||||||
 | 
					            new_req.timeout = req.timeout
 | 
				
			||||||
 | 
					            req = new_req
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for h, v in std_headers.items():
 | 
					        for h, v in std_headers.items():
 | 
				
			||||||
            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
 | 
					            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
 | 
				
			||||||
            # The dict keys are capitalized because of this bug by urllib
 | 
					            # The dict keys are capitalized because of this bug by urllib
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user