Switch back to underline for invalid characters, and make restricted ASCII-only
This commit is contained in:
		| @@ -47,8 +47,8 @@ which means you can modify it, redistribute it or use it however you like. | |||||||
|                              %(extractor)s for the provider (youtube, metacafe, |                              %(extractor)s for the provider (youtube, metacafe, | ||||||
|                              etc), %(id)s for the video id and %% for a literal |                              etc), %(id)s for the video id and %% for a literal | ||||||
|                              percent. Use - to output to stdout. |                              percent. Use - to output to stdout. | ||||||
|     --restrict-filenames     Avoid some characters such as "&" and spaces in |     --restrict-filenames     Restrict filenames to only ASCII characters, and | ||||||
|                              filenames |                              avoid "&" and spaces in filenames | ||||||
|     -a, --batch-file FILE    file containing URLs to download ('-' for stdin) |     -a, --batch-file FILE    file containing URLs to download ('-' for stdin) | ||||||
|     -w, --no-overwrites      do not overwrite files |     -w, --no-overwrites      do not overwrite files | ||||||
|     -c, --continue           resume partially downloaded files |     -c, --continue           resume partially downloaded files | ||||||
|   | |||||||
| @@ -22,10 +22,10 @@ class TestUtil(unittest.TestCase): | |||||||
|  |  | ||||||
| 		self.assertEqual(sanitize_filename(u'123'), u'123') | 		self.assertEqual(sanitize_filename(u'123'), u'123') | ||||||
|  |  | ||||||
| 		self.assertEqual(u'abc-de', sanitize_filename(u'abc/de')) | 		self.assertEqual(u'abc_de', sanitize_filename(u'abc/de')) | ||||||
| 		self.assertFalse(u'/' in sanitize_filename(u'abc/de///')) | 		self.assertFalse(u'/' in sanitize_filename(u'abc/de///')) | ||||||
|  |  | ||||||
| 		self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de')) | 		self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de')) | ||||||
| 		self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|')) | 		self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|')) | ||||||
| 		self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) | 		self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) | ||||||
| 		self.assertEqual(u'this - that', sanitize_filename(u'this: that')) | 		self.assertEqual(u'this - that', sanitize_filename(u'this: that')) | ||||||
| @@ -45,14 +45,17 @@ class TestUtil(unittest.TestCase): | |||||||
|  |  | ||||||
| 		self.assertEqual(sanitize_filename(u'123', restricted=True), u'123') | 		self.assertEqual(sanitize_filename(u'123', restricted=True), u'123') | ||||||
|  |  | ||||||
| 		self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True)) | 		self.assertEqual(u'abc_de', sanitize_filename(u'abc/de', restricted=True)) | ||||||
| 		self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True)) | 		self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True)) | ||||||
|  |  | ||||||
| 		self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True)) | 		self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de', restricted=True)) | ||||||
| 		self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True)) | 		self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True)) | ||||||
| 		self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True)) | 		self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True)) | ||||||
| 		self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True)) | 		self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True)) | ||||||
|  |  | ||||||
|  | 		self.assertEqual(sanitize_filename(u'aäb', restricted=True), u'a_b') | ||||||
|  | 		self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename | ||||||
|  |  | ||||||
| 		forbidden = u'"\0\\/&: \'\t\n' | 		forbidden = u'"\0\\/&: \'\t\n' | ||||||
| 		for fc in forbidden: | 		for fc in forbidden: | ||||||
| 			print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True))) | 			print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True))) | ||||||
|   | |||||||
| @@ -59,8 +59,8 @@ redistribute it or use it however you like. | |||||||
| \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe, | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe, | ||||||
| \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal | ||||||
| \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. | ||||||
| --restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in | --restrict-filenames\ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and | ||||||
| \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames | ||||||
| -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin) | -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin) | ||||||
| -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files | -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files | ||||||
| -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files | -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files | ||||||
|   | |||||||
| @@ -274,7 +274,7 @@ def parseOpts(): | |||||||
| 			dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') | 			dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') | ||||||
| 	filesystem.add_option('--restrict-filenames', | 	filesystem.add_option('--restrict-filenames', | ||||||
| 			action='store_true', dest='restrictfilenames', | 			action='store_true', dest='restrictfilenames', | ||||||
| 			help='Avoid some characters such as "&" and spaces in filenames', default=False) | 			help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) | ||||||
| 	filesystem.add_option('-a', '--batch-file', | 	filesystem.add_option('-a', '--batch-file', | ||||||
| 			dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') | 			dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') | ||||||
| 	filesystem.add_option('-w', '--no-overwrites', | 	filesystem.add_option('-w', '--no-overwrites', | ||||||
|   | |||||||
| @@ -207,15 +207,20 @@ def sanitize_filename(s, restricted=False): | |||||||
| 		elif char == ':': | 		elif char == ':': | ||||||
| 			return '_-' if restricted else ' -' | 			return '_-' if restricted else ' -' | ||||||
| 		elif char in '\\/|*<>': | 		elif char in '\\/|*<>': | ||||||
| 			return '-' | 			return '_' | ||||||
| 		if restricted and (char in '&\'' or char.isspace()): | 		if restricted and (char in '&\'' or char.isspace()): | ||||||
| 			return '_' | 			return '_' | ||||||
|  | 		if restricted and ord(char) > 127: | ||||||
|  | 			return '_' | ||||||
| 		return char | 		return char | ||||||
|  |  | ||||||
| 	result = u''.join(map(replace_insane, s)) | 	result = u''.join(map(replace_insane, s)) | ||||||
| 	while '--' in result: | 	while '__' in result: | ||||||
| 		result = result.replace('--', '-') | 		result = result.replace('__', '_') | ||||||
| 	return result.strip('-') | 	result = result.strip('_') | ||||||
|  | 	if not result: | ||||||
|  | 		result = '_' | ||||||
|  | 	return result | ||||||
|  |  | ||||||
| def orderedSet(iterable): | def orderedSet(iterable): | ||||||
| 	""" Remove all duplicates from the input iterable """ | 	""" Remove all duplicates from the input iterable """ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user