Deal with implicitly UTF-16 decoded webpages
These webpages don't specify an encoding and rely on the BOM
This commit is contained in:
		@@ -220,6 +220,8 @@ class InfoExtractor(object):
 | 
			
		||||
                          webpage_bytes[:1024])
 | 
			
		||||
            if m:
 | 
			
		||||
                encoding = m.group(1).decode('ascii')
 | 
			
		||||
            elif webpage_bytes.startswith(b'\xff\xfe'):
 | 
			
		||||
                encoding = 'utf-16'
 | 
			
		||||
            else:
 | 
			
		||||
                encoding = 'utf-8'
 | 
			
		||||
        if self._downloader.params.get('dump_intermediate_pages', False):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user