#! /usr/bin/env python3 # -*- coding: utf-8 -*- """Rename files with bad characters. The shell often is complicated if files have weird name. This script renames files in a directory tree so that they do not include those bad characters. The worst are whitespace. """ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter import logging # import re as regex import os import os.path LOGGING_LEVELS = {"debug": logging.DEBUG, "info": logging.INFO, "warning": logging.WARNING, "error": logging.ERROR } INVALID_NAMES = ["", ".", ".."] # IGNORED_NAME_REGEX = regex.compile(r"(__.*__\.(py|html|txt)|.*\.class)") # is done with function # most common replacement string REPLACER = "_" SINGLE_SYMBOLS = r'ãāǎàąćčēéěèȩęêėīíǐìĩïıńōóǒòøũūúǔùşśļłŁĻķўźżĶḩģĢḨņŅŃŗŖĀĄǍÀĆĒÉĚĘÈĪÍǏÌŚŌÓǑÒŪÚǓÙŹŻý=–|' SINGLE_REPLACE_SYMBOLS = r'aaaaacceeeeeeeeiiiiiiinooooouuuuussllLLkyzzKhgGHnNNrRAAAACEEEEEIIIISOOOOUUUUZZy---' # from https://programminghistorian.org/en/lessons/transliterating: cyrillic = {'\u0410': 'A', '\u0430': 'a', '\u0411': 'B', '\u0431': 'b', '\u0412': 'V', '\u0432': 'v', '\u0413': 'G', '\u0433': 'g', '\u0414': 'D', '\u0434': 'd', '\u0415': 'E', '\u0435': 'e', '\u0416': 'Zh', '\u0436': 'zh', '\u0417': 'Z', '\u0437': 'z', '\u0418': 'I', '\u0438': 'i', '\u0419': 'I', '\u0439': 'i', '\u041a': 'K', '\u043a': 'k', '\u041b': 'L', '\u043b': 'l', '\u041c': 'M', '\u043c': 'm', '\u041d': 'N', '\u043d': 'n', '\u041e': 'O', '\u043e': 'o', '\u041f': 'P', '\u043f': 'p', '\u0420': 'R', '\u0440': 'r', '\u0421': 'S', '\u0441': 's', '\u0422': 'T', '\u0442': 't', '\u0423': 'U', '\u0443': 'u', '\u0424': 'F', '\u0444': 'f', '\u0425': 'Kh', '\u0445': 'kh', '\u0426': 'Ts', '\u0446': 'ts', '\u0427': 'Ch', '\u0447': 'ch', '\u0428': 'Sh', '\u0448': 'sh', '\u0429': 'Shch', '\u0449': 'shch', '\u042a': '"', '\u044a': '"', '\u042b': 'Y', '\u044b': 'y', '\u042c': "'", '\u044c': "'", '\u042d': 'E', '\u044d': 'e', '\u042e': 'Iu', '\u044e': 'iu', '\u042f': 'Ia', '\u044f': 'ia'} REPLACEMENTS = {'\n': REPLACER, "ǖ": "ue", "ǘ": "ue", "ǚ": "ue", "ǜ": "ue", "Ǖ": "Ue", "Ǘ": "Ue", "Ǚ": "Ue", "Ǜ": "Ue", r"'": "", r'"': "", r"`": "", r"´": "", r"+": REPLACER + "und" + REPLACER, r"&": REPLACER + "und" + REPLACER, r"%": "Prozent" + REPLACER, r"*": "x", r"=": "-", r"(with lyrics)": "", r" ": REPLACER, r"C#": "C_sharp", r"c#": "c_sharp", r"#": REPLACER, r"|": "l", r",": REPLACER, r"{": "", r"}": "", r"(": "", r")": "", r"[": "", r"]": "", r"~": "-", r":": REPLACER, r"@": "-at-", r"?": REPLACER, r""": REPLACER, r"?": REPLACER, r"!": REPLACER, r"⧸": REPLACER, r":": REPLACER, r">": REPLACER, r"<": REPLACER, r"ä": "ae", r"æ": "ae", r"Ä": "Ae", r"á": "au", # icelandic r"Á": "Au", r"ö": "oe", r"Ö": "Oe", r"ü": "ue", r"Ü": "Ue", r"ẞ": "SS", r"ß": "ss", r"ð": "dh", r"Ð": "Dh", r"þ": "th", r"Þ": "Th", r"μFSR": "myFSR", r"μfsr": "myfsr", r"μ": "mu", r"φFSR": "phyFSR", r"φfsr": "phyfsr", r"φ": "phi", r"$": "USD", r"€": "EUR", r".jpeg": ".jpg", r".JPG": ".jpg", r".JPEG": ".jpg", "\u202f": "_", # some space "N°": "Nr", "°C": "degCelsius", "…": "_", "\\": REPLACER } REPLACEMENTS.update( {s: SINGLE_REPLACE_SYMBOLS[i] for i, s in enumerate(SINGLE_SYMBOLS)}) REPLACEMENTS.update(cyrillic) for begin, end, replacement in [ ('2000', '200F', REPLACER), # various spaces and formatters ('2010', '2017', '-'), # various dashes ('2018', '201F', ""), # various apostrophies ('2028', '202E', ""), # separators, format characters ('2032', '203A', "") # various apostrophies and quotation marks # one could add many more ]: REPLACEMENTS.update({chr(hexspace): replacement # replacing 0x by u for hexspace in range(int(begin, 16), int(end, 16) + 1)}) def parse_args(): """Parse command line arguments. Returns: Dictionary with the command line arguments. """ parser = ArgumentParser( description="""Remove bad characters by renaming files. Which files are renamed is logged in a log file. (See below in --log.)""", formatter_class=ArgumentDefaultsHelpFormatter ) parser.add_argument( "-a", "--noask", "--no-interactive", dest="ask", help=("Specify if renaming should not be confirmed by the user" + " but just done."), action="store_false" ) parser.add_argument( "-l", "--onlylog", dest="rename", action="store_false", help="Dry run." ) parser.add_argument( "--log", "--logfile", dest="log", metavar="L", help="""the file where to write the logging output. This file is overwritten if existing. If not specified assume 'rename.log'""", action="store", default=os.path.join("..", "rename.log") ) parser.add_argument( "--loglevel", "--level", dest="loglevel", help="""Specify the log level. 'error' includes OS errors at renaming files. 'warning' additionally includes no-renames because of weird resulting file name. 'info' includes no-renames because of existing files and renames and working directories. 'debug' includes good files.""", default="info", choices=LOGGING_LEVELS.keys() ) parser.add_argument( "-q", "--quiet", dest="quiet", help=("Set log level to warning, so usually do not create a log file. " + "Overwrites loglevel option."), action="store_true" ) parser.add_argument( nargs="?", dest="top", metavar="directory", help="""The directory in which to rename files.""", action="store", default=".") args = parser.parse_args() if args.quiet: args.loglevel = "warning" return args def transform_filename(name: str): """Remove all bad symbols from name.""" for symbol, replacement in REPLACEMENTS.items(): name = name.replace(symbol, replacement) name.lstrip("-") while True: old_name = name for separator in ["-", "_", "."]: name = name.replace(REPLACER + separator, separator) name = name.replace(separator + REPLACER, separator) name = name.replace(separator + separator, separator) if old_name == name: break return name def rename_file(directory, file, args): """Rename file directory/file if renaming is OK. Returns: filename after processing """ new_name = transform_filename(file) path = os.path.join(directory, file) new_path = os.path.join(directory, new_name) if new_name == file: logging.debug("'{}' is OK.".format(file)) return file if os.path.lexists(new_path): logging.info("'{}' is not renamed to '{}' because this already exists.".format( file, new_name )) return file if new_name in INVALID_NAMES: logging.warning("'{}' is not renamed because it would invalid: '{}'.".format( path, new_name)) return file if args.ask: rename = input("Rename '{}' to '{}'? (Enter for yes)".format( path, new_name )) rename = not rename.lower().startswith("n") else: rename = True if rename: logging.info("Rename '{}' to '{}'".format(file, new_name)) if args.rename: # == not onlylog try: os.rename(path, new_path) except FileNotFoundError as error: logging.error("Could not move '{}' to '{}' due to FileNotFoundError: {}".format( path, new_name, error )) return file else: return new_name else: logging.info("Did not rename '{}' to '{}' due to user choice.".format( file, new_name )) return file def if_ignore(filename): """Return if this filename should be ignored. If it's a directory, this means that the directory content is ignored as well.""" return ( filename.startswith('.') or filename == '__pycache__' or ( filename.startswith('__') and ( # special python file filename.endswith('__.py') or filename.endswith('__.html') or filename.endswith('__.txt') )) or filename.endswith('.class') or # java class file filename == "telegram-nachrichten") if __name__ == "__main__": args = parse_args() logging.basicConfig( filename=args.log, level=LOGGING_LEVELS[args.loglevel] ) for dirpath, dirnames, filenames in os.walk(args.top, topdown=True): print('.', end='') logging.info("Go to directory '{}'".format(os.path.abspath(dirpath))) for dir in dirnames: if if_ignore(dir): logging.debug("Ignore dir '{}'".format(dir)) dirnames.remove(dir) else: new_name = rename_file(dirpath, dir, args) if new_name not in dirnames: dirnames.append(new_name) for file in filenames: if if_ignore(file): logging.debug("Ignore file '{}'".format(file)) else: rename_file(dirpath, file, args) # remove empty log file: logging.shutdown() if os.stat(args.log).st_size == 0: os.remove(args.log)