Merge pull request #9367 from codesparkle/master
Feature: --restrict-filenames: replace accented characters by their unaccented counterpart instead of "_"
This commit is contained in:
		@@ -139,8 +139,8 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
 | 
			
		||||
        self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
 | 
			
		||||
 | 
			
		||||
        tests = 'a\xe4b\u4e2d\u56fd\u7684c'
 | 
			
		||||
        self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
 | 
			
		||||
        tests = 'aäb\u4e2d\u56fd\u7684c'
 | 
			
		||||
        self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
 | 
			
		||||
        self.assertTrue(sanitize_filename('\xf6', restricted=True) != '')  # No empty filename
 | 
			
		||||
 | 
			
		||||
        forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
 | 
			
		||||
@@ -155,6 +155,10 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        self.assertTrue(sanitize_filename('-', restricted=True) != '')
 | 
			
		||||
        self.assertTrue(sanitize_filename(':', restricted=True) != '')
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(sanitize_filename(
 | 
			
		||||
            'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True),
 | 
			
		||||
            'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy')
 | 
			
		||||
 | 
			
		||||
    def test_sanitize_ids(self):
 | 
			
		||||
        self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
 | 
			
		||||
        self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
 | 
			
		||||
 
 | 
			
		||||
@@ -14,8 +14,8 @@ import email.utils
 | 
			
		||||
import errno
 | 
			
		||||
import functools
 | 
			
		||||
import gzip
 | 
			
		||||
import itertools
 | 
			
		||||
import io
 | 
			
		||||
import itertools
 | 
			
		||||
import json
 | 
			
		||||
import locale
 | 
			
		||||
import math
 | 
			
		||||
@@ -24,8 +24,8 @@ import os
 | 
			
		||||
import pipes
 | 
			
		||||
import platform
 | 
			
		||||
import re
 | 
			
		||||
import ssl
 | 
			
		||||
import socket
 | 
			
		||||
import ssl
 | 
			
		||||
import struct
 | 
			
		||||
import subprocess
 | 
			
		||||
import sys
 | 
			
		||||
@@ -89,6 +89,11 @@ KNOWN_EXTENSIONS = (
 | 
			
		||||
    'wav',
 | 
			
		||||
    'f4f', 'f4m', 'm3u8', 'smil')
 | 
			
		||||
 | 
			
		||||
# needed for sanitizing filenames in restricted mode
 | 
			
		||||
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
 | 
			
		||||
                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'],
 | 
			
		||||
                                        'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy')))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def preferredencoding():
 | 
			
		||||
    """Get preferred encoding.
 | 
			
		||||
@@ -365,6 +370,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
 | 
			
		||||
    Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 | 
			
		||||
    """
 | 
			
		||||
    def replace_insane(char):
 | 
			
		||||
        if restricted and char in ACCENT_CHARS:
 | 
			
		||||
            return ACCENT_CHARS[char]
 | 
			
		||||
        if char == '?' or ord(char) < 32 or ord(char) == 127:
 | 
			
		||||
            return ''
 | 
			
		||||
        elif char == '"':
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user