329 lines
11 KiB
Python
Executable file
329 lines
11 KiB
Python
Executable file
#! /usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""Rename files with bad characters.
|
||
|
||
The shell often is complicated if files have weird name.
|
||
This script renames files in a directory tree so that they
|
||
do not include those bad characters. The worst are whitespace.
|
||
|
||
"""
|
||
|
||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||
import logging
|
||
# import re as regex
|
||
import os
|
||
import os.path
|
||
|
||
LOGGING_LEVELS = {"debug": logging.DEBUG,
|
||
"info": logging.INFO,
|
||
"warning": logging.WARNING,
|
||
"error": logging.ERROR
|
||
}
|
||
|
||
INVALID_NAMES = ["", ".", ".."]
|
||
# IGNORED_NAME_REGEX = regex.compile(r"(__.*__\.(py|html|txt)|.*\.class)")
|
||
# is done with function
|
||
|
||
# most common replacement string
|
||
REPLACER = "_"
|
||
SINGLE_SYMBOLS = r'ãāǎàąćčēéěèȩęêėīíǐìĩïıńōóǒòøũūúǔùşśļłŁĻķўźżĶḩģĢḨņŅŃŗŖĀĄǍÀĆĒÉĚĘÈĪÍǏÌŚŌÓǑÒŪÚǓÙŹŻý=–|'
|
||
SINGLE_REPLACE_SYMBOLS = r'aaaaacceeeeeeeeiiiiiiinooooouuuuussllLLkyzzKhgGHnNNrRAAAACEEEEEIIIISOOOOUUUUZZy---'
|
||
|
||
# from https://programminghistorian.org/en/lessons/transliterating:
|
||
cyrillic = {'\u0410': 'A', '\u0430': 'a',
|
||
'\u0411': 'B', '\u0431': 'b',
|
||
'\u0412': 'V', '\u0432': 'v',
|
||
'\u0413': 'G', '\u0433': 'g',
|
||
'\u0414': 'D', '\u0434': 'd',
|
||
'\u0415': 'E', '\u0435': 'e',
|
||
'\u0416': 'Zh', '\u0436': 'zh',
|
||
'\u0417': 'Z', '\u0437': 'z',
|
||
'\u0418': 'I', '\u0438': 'i',
|
||
'\u0419': 'I', '\u0439': 'i',
|
||
'\u041a': 'K', '\u043a': 'k',
|
||
'\u041b': 'L', '\u043b': 'l',
|
||
'\u041c': 'M', '\u043c': 'm',
|
||
'\u041d': 'N', '\u043d': 'n',
|
||
'\u041e': 'O', '\u043e': 'o',
|
||
'\u041f': 'P', '\u043f': 'p',
|
||
'\u0420': 'R', '\u0440': 'r',
|
||
'\u0421': 'S', '\u0441': 's',
|
||
'\u0422': 'T', '\u0442': 't',
|
||
'\u0423': 'U', '\u0443': 'u',
|
||
'\u0424': 'F', '\u0444': 'f',
|
||
'\u0425': 'Kh', '\u0445': 'kh',
|
||
'\u0426': 'Ts', '\u0446': 'ts',
|
||
'\u0427': 'Ch', '\u0447': 'ch',
|
||
'\u0428': 'Sh', '\u0448': 'sh',
|
||
'\u0429': 'Shch', '\u0449': 'shch',
|
||
'\u042a': '"', '\u044a': '"',
|
||
'\u042b': 'Y', '\u044b': 'y',
|
||
'\u042c': "'", '\u044c': "'",
|
||
'\u042d': 'E', '\u044d': 'e',
|
||
'\u042e': 'Iu', '\u044e': 'iu',
|
||
'\u042f': 'Ia', '\u044f': 'ia'}
|
||
|
||
REPLACEMENTS = {'\n': REPLACER,
|
||
"ǖ": "ue",
|
||
"ǘ": "ue",
|
||
"ǚ": "ue",
|
||
"ǜ": "ue",
|
||
"Ǖ": "Ue",
|
||
"Ǘ": "Ue",
|
||
"Ǚ": "Ue",
|
||
"Ǜ": "Ue",
|
||
r"'": "",
|
||
r'"': "",
|
||
r"`": "",
|
||
r"´": "",
|
||
r"+": REPLACER + "und" + REPLACER,
|
||
r"&": REPLACER + "und" + REPLACER,
|
||
r"%": "Prozent" + REPLACER,
|
||
r"*": "x",
|
||
r"=": "-",
|
||
r"(with lyrics)": "",
|
||
r" ": REPLACER,
|
||
r"C#": "C_sharp",
|
||
r"c#": "c_sharp",
|
||
r"#": REPLACER,
|
||
r"|": "l",
|
||
r",": REPLACER,
|
||
r"{": "",
|
||
r"}": "",
|
||
r"(": "",
|
||
r")": "",
|
||
r"[": "",
|
||
r"]": "",
|
||
r"~": "-",
|
||
r":": REPLACER,
|
||
r"@": "-at-",
|
||
r"?": REPLACER,
|
||
r""": REPLACER,
|
||
r"?": REPLACER,
|
||
r"!": REPLACER,
|
||
r"⧸": REPLACER,
|
||
r":": REPLACER,
|
||
r">": REPLACER,
|
||
r"<": REPLACER,
|
||
r"ä": "ae",
|
||
r"æ": "ae",
|
||
r"Ä": "Ae",
|
||
r"á": "au", # icelandic
|
||
r"Á": "Au",
|
||
r"ö": "oe",
|
||
r"Ö": "Oe",
|
||
r"ü": "ue",
|
||
r"Ü": "Ue",
|
||
r"ẞ": "SS",
|
||
r"ß": "ss",
|
||
r"ð": "dh",
|
||
r"Ð": "Dh",
|
||
r"þ": "th",
|
||
r"Þ": "Th",
|
||
r"μFSR": "myFSR",
|
||
r"μfsr": "myfsr",
|
||
r"μ": "mu",
|
||
r"φFSR": "phyFSR",
|
||
r"φfsr": "phyfsr",
|
||
r"φ": "phi",
|
||
r"$": "USD",
|
||
r"€": "EUR",
|
||
r".jpeg": ".jpg",
|
||
r".JPG": ".jpg",
|
||
r".JPEG": ".jpg",
|
||
"\u202f": "_", # some space
|
||
"N°": "Nr",
|
||
"°C": "degCelsius",
|
||
"…": "_",
|
||
"\\": REPLACER
|
||
}
|
||
REPLACEMENTS.update(
|
||
{s: SINGLE_REPLACE_SYMBOLS[i] for i, s in enumerate(SINGLE_SYMBOLS)})
|
||
REPLACEMENTS.update(cyrillic)
|
||
|
||
for begin, end, replacement in [
|
||
('2000', '200F', REPLACER), # various spaces and formatters
|
||
('2010', '2017', '-'), # various dashes
|
||
('2018', '201F', ""), # various apostrophies
|
||
('2028', '202E', ""), # separators, format characters
|
||
('2032', '203A', "") # various apostrophies and quotation marks
|
||
# one could add many more
|
||
]:
|
||
REPLACEMENTS.update({chr(hexspace): replacement # replacing 0x by u
|
||
for hexspace in range(int(begin, 16), int(end, 16) + 1)})
|
||
|
||
|
||
def parse_args():
|
||
"""Parse command line arguments.
|
||
|
||
Returns:
|
||
Dictionary with the command line arguments.
|
||
|
||
"""
|
||
parser = ArgumentParser(
|
||
description="""Remove bad characters by renaming files.
|
||
|
||
Which files are renamed is logged in a log file.
|
||
(See below in --log.)""",
|
||
formatter_class=ArgumentDefaultsHelpFormatter
|
||
)
|
||
parser.add_argument(
|
||
"-a", "--noask", "--no-interactive",
|
||
dest="ask",
|
||
help=("Specify if renaming should not be confirmed by the user" +
|
||
" but just done."),
|
||
action="store_false"
|
||
)
|
||
parser.add_argument(
|
||
"-l", "--onlylog",
|
||
dest="rename",
|
||
action="store_false",
|
||
help="Dry run."
|
||
)
|
||
parser.add_argument(
|
||
"--log", "--logfile",
|
||
dest="log",
|
||
metavar="L",
|
||
help="""the file where to write the logging output.
|
||
This file is overwritten if existing.
|
||
If not specified assume 'rename.log'""",
|
||
action="store",
|
||
default=os.path.join("..", "rename.log")
|
||
)
|
||
parser.add_argument(
|
||
"--loglevel", "--level",
|
||
dest="loglevel",
|
||
help="""Specify the log level.
|
||
'error' includes OS errors at renaming files.
|
||
'warning' additionally includes no-renames because of weird resulting file name.
|
||
'info' includes no-renames because of existing files and renames and working directories.
|
||
'debug' includes good files.""",
|
||
default="info",
|
||
choices=LOGGING_LEVELS.keys()
|
||
)
|
||
parser.add_argument(
|
||
"-q", "--quiet",
|
||
dest="quiet",
|
||
help=("Set log level to warning, so usually do not create a log file. "
|
||
+ "Overwrites loglevel option."),
|
||
action="store_true"
|
||
)
|
||
parser.add_argument(
|
||
nargs="?",
|
||
dest="top",
|
||
metavar="directory",
|
||
help="""The directory in which to rename files.""",
|
||
action="store",
|
||
default=".")
|
||
args = parser.parse_args()
|
||
if args.quiet:
|
||
args.loglevel = "warning"
|
||
return args
|
||
|
||
|
||
def transform_filename(name: str):
|
||
"""Remove all bad symbols from name."""
|
||
for symbol, replacement in REPLACEMENTS.items():
|
||
name = name.replace(symbol, replacement)
|
||
name.lstrip("-")
|
||
while True:
|
||
old_name = name
|
||
for separator in ["-", "_", "."]:
|
||
name = name.replace(REPLACER + separator, separator)
|
||
name = name.replace(separator + REPLACER, separator)
|
||
name = name.replace(separator + separator, separator)
|
||
if old_name == name:
|
||
break
|
||
return name
|
||
|
||
|
||
def rename_file(directory, file, args):
|
||
"""Rename file directory/file if renaming is OK.
|
||
|
||
Returns:
|
||
filename after processing
|
||
"""
|
||
new_name = transform_filename(file)
|
||
path = os.path.join(directory, file)
|
||
new_path = os.path.join(directory, new_name)
|
||
if new_name == file:
|
||
logging.debug("'{}' is OK.".format(file))
|
||
return file
|
||
if os.path.lexists(new_path):
|
||
logging.info("'{}' is not renamed to '{}' because this already exists.".format(
|
||
file, new_name
|
||
))
|
||
return file
|
||
if new_name in INVALID_NAMES:
|
||
logging.warning("'{}' is not renamed because it would invalid: '{}'.".format(
|
||
path, new_name))
|
||
return file
|
||
if args.ask:
|
||
rename = input("Rename '{}' to '{}'? (Enter for yes)".format(
|
||
path, new_name
|
||
))
|
||
rename = not rename.lower().startswith("n")
|
||
else:
|
||
rename = True
|
||
if rename:
|
||
logging.info("Rename '{}' to '{}'".format(file, new_name))
|
||
if args.rename: # == not onlylog
|
||
try:
|
||
os.rename(path, new_path)
|
||
except FileNotFoundError as error:
|
||
logging.error("Could not move '{}' to '{}' due to FileNotFoundError: {}".format(
|
||
path, new_name, error
|
||
))
|
||
return file
|
||
else:
|
||
return new_name
|
||
else:
|
||
logging.info("Did not rename '{}' to '{}' due to user choice.".format(
|
||
file, new_name
|
||
))
|
||
return file
|
||
|
||
|
||
def if_ignore(filename):
|
||
"""Return if this filename should be ignored.
|
||
|
||
If it's a directory, this means that the directory content is ignored as well."""
|
||
return (
|
||
filename.startswith('.') or
|
||
filename == '__pycache__' or (
|
||
filename.startswith('__') and ( # special python file
|
||
filename.endswith('__.py') or
|
||
filename.endswith('__.html') or
|
||
filename.endswith('__.txt')
|
||
)) or filename.endswith('.class') or # java class file
|
||
filename == "telegram-nachrichten")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
args = parse_args()
|
||
logging.basicConfig(
|
||
filename=args.log,
|
||
level=LOGGING_LEVELS[args.loglevel]
|
||
)
|
||
for dirpath, dirnames, filenames in os.walk(args.top, topdown=True):
|
||
print('.', end='')
|
||
logging.info("Go to directory '{}'".format(os.path.abspath(dirpath)))
|
||
for dir in dirnames:
|
||
if if_ignore(dir):
|
||
logging.debug("Ignore dir '{}'".format(dir))
|
||
dirnames.remove(dir)
|
||
else:
|
||
new_name = rename_file(dirpath, dir, args)
|
||
if new_name not in dirnames:
|
||
dirnames.append(new_name)
|
||
for file in filenames:
|
||
if if_ignore(file):
|
||
logging.debug("Ignore file '{}'".format(file))
|
||
else:
|
||
rename_file(dirpath, file, args)
|
||
|
||
# remove empty log file:
|
||
logging.shutdown()
|
||
if os.stat(args.log).st_size == 0:
|
||
os.remove(args.log)
|