2021-01-11 02:59:31 +01:00
|
|
|
|
#! /usr/bin/env python3
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
"""Rename files with bad characters.
|
|
|
|
|
|
|
|
|
|
The shell often is complicated if files have weird name.
|
|
|
|
|
This script renames files in a directory tree so that they
|
|
|
|
|
do not include those bad characters. The worst are whitespace.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
|
|
|
|
import logging
|
2021-02-12 19:41:21 +01:00
|
|
|
|
# import re as regex
|
2021-01-11 02:59:31 +01:00
|
|
|
|
import os
|
|
|
|
|
import os.path
|
|
|
|
|
|
|
|
|
|
LOGGING_LEVELS = {"debug": logging.DEBUG,
|
|
|
|
|
"info": logging.INFO,
|
|
|
|
|
"warning": logging.WARNING,
|
|
|
|
|
"error": logging.ERROR
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
INVALID_NAMES = ["", ".", ".."]
|
2021-02-12 19:41:21 +01:00
|
|
|
|
# IGNORED_NAME_REGEX = regex.compile(r"(__.*__\.(py|html|txt)|.*\.class)")
|
|
|
|
|
# is done with function
|
|
|
|
|
|
2021-01-11 02:59:31 +01:00
|
|
|
|
# most common replacement string
|
|
|
|
|
REPLACER = "_"
|
2022-11-25 14:07:14 +01:00
|
|
|
|
SINGLE_SYMBOLS = r'ãāǎàąćčēéěèȩęêėīíǐìĩïıńōóǒòøũūúǔùşśļłŁĻķўźżĶḩģĢḨņŅŃŗŖĀĄǍÀĆĒÉĚĘÈĪÍǏÌŚŌÓǑÒŪÚǓÙŹŻý=–|'
|
|
|
|
|
SINGLE_REPLACE_SYMBOLS = r'aaaaacceeeeeeeeiiiiiiinooooouuuuussllLLkyzzKhgGHnNNrRAAAACEEEEEIIIISOOOOUUUUZZy---'
|
2021-10-31 19:34:07 +01:00
|
|
|
|
|
|
|
|
|
# from https://programminghistorian.org/en/lessons/transliterating:
|
|
|
|
|
cyrillic = {'\u0410': 'A', '\u0430': 'a',
|
|
|
|
|
'\u0411': 'B', '\u0431': 'b',
|
|
|
|
|
'\u0412': 'V', '\u0432': 'v',
|
|
|
|
|
'\u0413': 'G', '\u0433': 'g',
|
|
|
|
|
'\u0414': 'D', '\u0434': 'd',
|
|
|
|
|
'\u0415': 'E', '\u0435': 'e',
|
|
|
|
|
'\u0416': 'Zh', '\u0436': 'zh',
|
|
|
|
|
'\u0417': 'Z', '\u0437': 'z',
|
|
|
|
|
'\u0418': 'I', '\u0438': 'i',
|
|
|
|
|
'\u0419': 'I', '\u0439': 'i',
|
|
|
|
|
'\u041a': 'K', '\u043a': 'k',
|
|
|
|
|
'\u041b': 'L', '\u043b': 'l',
|
|
|
|
|
'\u041c': 'M', '\u043c': 'm',
|
|
|
|
|
'\u041d': 'N', '\u043d': 'n',
|
|
|
|
|
'\u041e': 'O', '\u043e': 'o',
|
|
|
|
|
'\u041f': 'P', '\u043f': 'p',
|
|
|
|
|
'\u0420': 'R', '\u0440': 'r',
|
|
|
|
|
'\u0421': 'S', '\u0441': 's',
|
|
|
|
|
'\u0422': 'T', '\u0442': 't',
|
|
|
|
|
'\u0423': 'U', '\u0443': 'u',
|
|
|
|
|
'\u0424': 'F', '\u0444': 'f',
|
|
|
|
|
'\u0425': 'Kh', '\u0445': 'kh',
|
|
|
|
|
'\u0426': 'Ts', '\u0446': 'ts',
|
|
|
|
|
'\u0427': 'Ch', '\u0447': 'ch',
|
|
|
|
|
'\u0428': 'Sh', '\u0448': 'sh',
|
|
|
|
|
'\u0429': 'Shch', '\u0449': 'shch',
|
|
|
|
|
'\u042a': '"', '\u044a': '"',
|
|
|
|
|
'\u042b': 'Y', '\u044b': 'y',
|
|
|
|
|
'\u042c': "'", '\u044c': "'",
|
|
|
|
|
'\u042d': 'E', '\u044d': 'e',
|
|
|
|
|
'\u042e': 'Iu', '\u044e': 'iu',
|
|
|
|
|
'\u042f': 'Ia', '\u044f': 'ia'}
|
2021-01-11 02:59:31 +01:00
|
|
|
|
|
2021-02-12 19:41:21 +01:00
|
|
|
|
REPLACEMENTS = {'\n': REPLACER,
|
2021-10-31 19:34:07 +01:00
|
|
|
|
"ǖ": "ue",
|
|
|
|
|
"ǘ": "ue",
|
|
|
|
|
"ǚ": "ue",
|
|
|
|
|
"ǜ": "ue",
|
|
|
|
|
"Ǖ": "Ue",
|
|
|
|
|
"Ǘ": "Ue",
|
|
|
|
|
"Ǚ": "Ue",
|
|
|
|
|
"Ǜ": "Ue",
|
2021-02-12 19:41:21 +01:00
|
|
|
|
r"'": "",
|
2021-01-11 02:59:31 +01:00
|
|
|
|
r'"': "",
|
|
|
|
|
r"`": "",
|
|
|
|
|
r"´": "",
|
2024-02-08 21:42:48 +01:00
|
|
|
|
r"+": REPLACER + "und" + REPLACER,
|
2021-01-11 02:59:31 +01:00
|
|
|
|
r"&": REPLACER + "und" + REPLACER,
|
2022-08-01 15:30:08 +02:00
|
|
|
|
r"%": "Prozent" + REPLACER,
|
2021-01-11 02:59:31 +01:00
|
|
|
|
r"*": "x",
|
2024-02-08 21:42:48 +01:00
|
|
|
|
r"=": "-",
|
2021-01-11 02:59:31 +01:00
|
|
|
|
r"(with lyrics)": "",
|
|
|
|
|
r" ": REPLACER,
|
|
|
|
|
r"C#": "C_sharp",
|
|
|
|
|
r"c#": "c_sharp",
|
|
|
|
|
r"#": REPLACER,
|
|
|
|
|
r"|": "l",
|
|
|
|
|
r",": REPLACER,
|
|
|
|
|
r"{": "",
|
|
|
|
|
r"}": "",
|
|
|
|
|
r"(": "",
|
|
|
|
|
r")": "",
|
|
|
|
|
r"[": "",
|
|
|
|
|
r"]": "",
|
|
|
|
|
r"~": "-",
|
|
|
|
|
r":": REPLACER,
|
|
|
|
|
r"@": "-at-",
|
2023-03-12 20:20:12 +01:00
|
|
|
|
r"?": REPLACER,
|
|
|
|
|
r""": REPLACER,
|
|
|
|
|
r"?": REPLACER,
|
2024-02-08 21:42:48 +01:00
|
|
|
|
r"!": REPLACER,
|
2023-03-12 20:20:12 +01:00
|
|
|
|
r"⧸": REPLACER,
|
|
|
|
|
r":": REPLACER,
|
2021-01-11 02:59:31 +01:00
|
|
|
|
r">": REPLACER,
|
|
|
|
|
r"<": REPLACER,
|
|
|
|
|
r"ä": "ae",
|
2021-10-03 09:12:52 +02:00
|
|
|
|
r"æ": "ae",
|
2021-01-11 02:59:31 +01:00
|
|
|
|
r"Ä": "Ae",
|
|
|
|
|
r"á": "au", # icelandic
|
|
|
|
|
r"Á": "Au",
|
|
|
|
|
r"ö": "oe",
|
|
|
|
|
r"Ö": "Oe",
|
|
|
|
|
r"ü": "ue",
|
|
|
|
|
r"Ü": "Ue",
|
|
|
|
|
r"ẞ": "SS",
|
|
|
|
|
r"ß": "ss",
|
|
|
|
|
r"ð": "dh",
|
|
|
|
|
r"Ð": "Dh",
|
|
|
|
|
r"þ": "th",
|
|
|
|
|
r"Þ": "Th",
|
|
|
|
|
r"μFSR": "myFSR",
|
2022-11-07 15:36:22 +01:00
|
|
|
|
r"μfsr": "myfsr",
|
2021-01-11 02:59:31 +01:00
|
|
|
|
r"μ": "mu",
|
2022-11-07 15:36:22 +01:00
|
|
|
|
r"φFSR": "phyFSR",
|
|
|
|
|
r"φfsr": "phyfsr",
|
|
|
|
|
r"φ": "phi",
|
2021-01-11 02:59:31 +01:00
|
|
|
|
r"$": "USD",
|
|
|
|
|
r"€": "EUR",
|
|
|
|
|
r".jpeg": ".jpg",
|
|
|
|
|
r".JPG": ".jpg",
|
|
|
|
|
r".JPEG": ".jpg",
|
2021-02-12 19:41:21 +01:00
|
|
|
|
"\u202f": "_", # some space
|
2021-06-22 17:15:58 +02:00
|
|
|
|
"N°": "Nr",
|
|
|
|
|
"°C": "degCelsius",
|
2024-07-21 11:53:13 +02:00
|
|
|
|
"…": "_",
|
|
|
|
|
"\\": REPLACER
|
2021-10-31 19:34:07 +01:00
|
|
|
|
}
|
2021-02-12 19:41:21 +01:00
|
|
|
|
REPLACEMENTS.update(
|
|
|
|
|
{s: SINGLE_REPLACE_SYMBOLS[i] for i, s in enumerate(SINGLE_SYMBOLS)})
|
2021-10-31 19:34:07 +01:00
|
|
|
|
REPLACEMENTS.update(cyrillic)
|
|
|
|
|
|
2021-01-11 02:59:31 +01:00
|
|
|
|
for begin, end, replacement in [
|
|
|
|
|
('2000', '200F', REPLACER), # various spaces and formatters
|
|
|
|
|
('2010', '2017', '-'), # various dashes
|
|
|
|
|
('2018', '201F', ""), # various apostrophies
|
|
|
|
|
('2028', '202E', ""), # separators, format characters
|
|
|
|
|
('2032', '203A', "") # various apostrophies and quotation marks
|
|
|
|
|
# one could add many more
|
|
|
|
|
]:
|
2022-01-31 17:28:37 +01:00
|
|
|
|
REPLACEMENTS.update({chr(hexspace): replacement # replacing 0x by u
|
2021-01-11 02:59:31 +01:00
|
|
|
|
for hexspace in range(int(begin, 16), int(end, 16) + 1)})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_args():
|
|
|
|
|
"""Parse command line arguments.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with the command line arguments.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
parser = ArgumentParser(
|
|
|
|
|
description="""Remove bad characters by renaming files.
|
|
|
|
|
|
|
|
|
|
Which files are renamed is logged in a log file.
|
|
|
|
|
(See below in --log.)""",
|
|
|
|
|
formatter_class=ArgumentDefaultsHelpFormatter
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"-a", "--noask", "--no-interactive",
|
|
|
|
|
dest="ask",
|
|
|
|
|
help=("Specify if renaming should not be confirmed by the user" +
|
|
|
|
|
" but just done."),
|
|
|
|
|
action="store_false"
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"-l", "--onlylog",
|
|
|
|
|
dest="rename",
|
|
|
|
|
action="store_false",
|
|
|
|
|
help="Dry run."
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--log", "--logfile",
|
|
|
|
|
dest="log",
|
|
|
|
|
metavar="L",
|
|
|
|
|
help="""the file where to write the logging output.
|
|
|
|
|
This file is overwritten if existing.
|
|
|
|
|
If not specified assume 'rename.log'""",
|
|
|
|
|
action="store",
|
|
|
|
|
default=os.path.join("..", "rename.log")
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--loglevel", "--level",
|
|
|
|
|
dest="loglevel",
|
|
|
|
|
help="""Specify the log level.
|
|
|
|
|
'error' includes OS errors at renaming files.
|
|
|
|
|
'warning' additionally includes no-renames because of weird resulting file name.
|
|
|
|
|
'info' includes no-renames because of existing files and renames and working directories.
|
|
|
|
|
'debug' includes good files.""",
|
|
|
|
|
default="info",
|
|
|
|
|
choices=LOGGING_LEVELS.keys()
|
|
|
|
|
)
|
2021-06-22 17:15:58 +02:00
|
|
|
|
parser.add_argument(
|
|
|
|
|
"-q", "--quiet",
|
|
|
|
|
dest="quiet",
|
|
|
|
|
help=("Set log level to warning, so usually do not create a log file. "
|
|
|
|
|
+ "Overwrites loglevel option."),
|
|
|
|
|
action="store_true"
|
|
|
|
|
)
|
2021-01-11 02:59:31 +01:00
|
|
|
|
parser.add_argument(
|
|
|
|
|
nargs="?",
|
|
|
|
|
dest="top",
|
|
|
|
|
metavar="directory",
|
|
|
|
|
help="""The directory in which to rename files.""",
|
|
|
|
|
action="store",
|
|
|
|
|
default=".")
|
2021-06-22 17:15:58 +02:00
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
if args.quiet:
|
|
|
|
|
args.loglevel = "warning"
|
|
|
|
|
return args
|
2021-01-11 02:59:31 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def transform_filename(name: str):
|
|
|
|
|
"""Remove all bad symbols from name."""
|
|
|
|
|
for symbol, replacement in REPLACEMENTS.items():
|
|
|
|
|
name = name.replace(symbol, replacement)
|
|
|
|
|
name.lstrip("-")
|
|
|
|
|
while True:
|
|
|
|
|
old_name = name
|
|
|
|
|
for separator in ["-", "_", "."]:
|
|
|
|
|
name = name.replace(REPLACER + separator, separator)
|
|
|
|
|
name = name.replace(separator + REPLACER, separator)
|
|
|
|
|
name = name.replace(separator + separator, separator)
|
|
|
|
|
if old_name == name:
|
|
|
|
|
break
|
|
|
|
|
return name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def rename_file(directory, file, args):
|
2021-06-22 17:15:58 +02:00
|
|
|
|
"""Rename file directory/file if renaming is OK.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
filename after processing
|
|
|
|
|
"""
|
2021-01-11 02:59:31 +01:00
|
|
|
|
new_name = transform_filename(file)
|
|
|
|
|
path = os.path.join(directory, file)
|
|
|
|
|
new_path = os.path.join(directory, new_name)
|
|
|
|
|
if new_name == file:
|
|
|
|
|
logging.debug("'{}' is OK.".format(file))
|
2021-06-22 17:15:58 +02:00
|
|
|
|
return file
|
2021-01-11 02:59:31 +01:00
|
|
|
|
if os.path.lexists(new_path):
|
|
|
|
|
logging.info("'{}' is not renamed to '{}' because this already exists.".format(
|
2021-02-12 19:41:21 +01:00
|
|
|
|
file, new_name
|
2021-01-11 02:59:31 +01:00
|
|
|
|
))
|
2021-06-22 17:15:58 +02:00
|
|
|
|
return file
|
2021-01-11 02:59:31 +01:00
|
|
|
|
if new_name in INVALID_NAMES:
|
|
|
|
|
logging.warning("'{}' is not renamed because it would invalid: '{}'.".format(
|
|
|
|
|
path, new_name))
|
2021-06-22 17:15:58 +02:00
|
|
|
|
return file
|
2021-01-11 02:59:31 +01:00
|
|
|
|
if args.ask:
|
|
|
|
|
rename = input("Rename '{}' to '{}'? (Enter for yes)".format(
|
|
|
|
|
path, new_name
|
|
|
|
|
))
|
2021-03-17 19:25:16 +01:00
|
|
|
|
rename = not rename.lower().startswith("n")
|
2021-01-11 02:59:31 +01:00
|
|
|
|
else:
|
|
|
|
|
rename = True
|
|
|
|
|
if rename:
|
2021-06-22 17:15:58 +02:00
|
|
|
|
logging.info("Rename '{}' to '{}'".format(file, new_name))
|
2021-01-11 02:59:31 +01:00
|
|
|
|
if args.rename: # == not onlylog
|
|
|
|
|
try:
|
|
|
|
|
os.rename(path, new_path)
|
|
|
|
|
except FileNotFoundError as error:
|
|
|
|
|
logging.error("Could not move '{}' to '{}' due to FileNotFoundError: {}".format(
|
|
|
|
|
path, new_name, error
|
|
|
|
|
))
|
2021-06-22 17:15:58 +02:00
|
|
|
|
return file
|
|
|
|
|
else:
|
|
|
|
|
return new_name
|
2021-01-11 02:59:31 +01:00
|
|
|
|
else:
|
|
|
|
|
logging.info("Did not rename '{}' to '{}' due to user choice.".format(
|
|
|
|
|
file, new_name
|
|
|
|
|
))
|
2021-06-22 17:15:58 +02:00
|
|
|
|
return file
|
2021-02-12 19:41:21 +01:00
|
|
|
|
|
2021-01-11 02:59:31 +01:00
|
|
|
|
|
|
|
|
|
def if_ignore(filename):
|
|
|
|
|
"""Return if this filename should be ignored.
|
2021-02-12 19:41:21 +01:00
|
|
|
|
|
2021-01-11 02:59:31 +01:00
|
|
|
|
If it's a directory, this means that the directory content is ignored as well."""
|
2021-02-12 19:41:21 +01:00
|
|
|
|
return (
|
|
|
|
|
filename.startswith('.') or
|
|
|
|
|
filename == '__pycache__' or (
|
|
|
|
|
filename.startswith('__') and ( # special python file
|
|
|
|
|
filename.endswith('__.py') or
|
|
|
|
|
filename.endswith('__.html') or
|
|
|
|
|
filename.endswith('__.txt')
|
2021-06-22 17:15:58 +02:00
|
|
|
|
)) or filename.endswith('.class') or # java class file
|
|
|
|
|
filename == "telegram-nachrichten")
|
2021-01-11 02:59:31 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
args = parse_args()
|
|
|
|
|
logging.basicConfig(
|
|
|
|
|
filename=args.log,
|
|
|
|
|
level=LOGGING_LEVELS[args.loglevel]
|
|
|
|
|
)
|
|
|
|
|
for dirpath, dirnames, filenames in os.walk(args.top, topdown=True):
|
|
|
|
|
print('.', end='')
|
|
|
|
|
logging.info("Go to directory '{}'".format(os.path.abspath(dirpath)))
|
|
|
|
|
for dir in dirnames:
|
2021-02-12 19:41:21 +01:00
|
|
|
|
if if_ignore(dir):
|
2021-01-11 02:59:31 +01:00
|
|
|
|
logging.debug("Ignore dir '{}'".format(dir))
|
|
|
|
|
dirnames.remove(dir)
|
|
|
|
|
else:
|
2021-06-22 17:15:58 +02:00
|
|
|
|
new_name = rename_file(dirpath, dir, args)
|
|
|
|
|
if new_name not in dirnames:
|
|
|
|
|
dirnames.append(new_name)
|
2021-01-11 02:59:31 +01:00
|
|
|
|
for file in filenames:
|
|
|
|
|
if if_ignore(file):
|
|
|
|
|
logging.debug("Ignore file '{}'".format(file))
|
|
|
|
|
else:
|
|
|
|
|
rename_file(dirpath, file, args)
|
|
|
|
|
|
|
|
|
|
# remove empty log file:
|
|
|
|
|
logging.shutdown()
|
|
|
|
|
if os.stat(args.log).st_size == 0:
|
|
|
|
|
os.remove(args.log)
|