272 lines
8.8 KiB
Python
Executable file
272 lines
8.8 KiB
Python
Executable file
#! /usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""Rename files with bad characters.
|
||
|
||
The shell often is complicated if files have weird name.
|
||
This script renames files in a directory tree so that they
|
||
do not include those bad characters. The worst are whitespace.
|
||
|
||
"""
|
||
|
||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||
import logging
|
||
# import re as regex
|
||
import os
|
||
import os.path
|
||
|
||
LOGGING_LEVELS = {"debug": logging.DEBUG,
|
||
"info": logging.INFO,
|
||
"warning": logging.WARNING,
|
||
"error": logging.ERROR
|
||
}
|
||
|
||
INVALID_NAMES = ["", ".", ".."]
|
||
# IGNORED_NAME_REGEX = regex.compile(r"(__.*__\.(py|html|txt)|.*\.class)")
|
||
# is done with function
|
||
|
||
# most common replacement string
|
||
REPLACER = "_"
|
||
SINGLE_SYMBOLS = r'ãāǎàčēéěèȩêėīíǐìĩïıōóǒòøũūúǔùǖǘǚǜşļĻķĶḩģĢḨņŅŗŖĀǍÀĒÉĚÈĪÍǏÌŌÓǑÒŪÚǓÙǕǗǙǛý='
|
||
SINGLE_REPLACE_SYMBOLS = r'aaaaceeeeeeeiiiiiiiooooouuuuuüüüüslLkKhgGHnNrRAAAEEEEIIIIOOOOUUUUÜÜÜÜy-'
|
||
|
||
REPLACEMENTS = {'\n': REPLACER,
|
||
r"'": "",
|
||
r'"': "",
|
||
r"`": "",
|
||
r"´": "",
|
||
r"&": REPLACER + "und" + REPLACER,
|
||
r"*": "x",
|
||
r"(with lyrics)": "",
|
||
r" ": REPLACER,
|
||
r"C#": "C_sharp",
|
||
r"c#": "c_sharp",
|
||
r"#": REPLACER,
|
||
r"|": "l",
|
||
r",": REPLACER,
|
||
r"{": "",
|
||
r"}": "",
|
||
r"(": "",
|
||
r")": "",
|
||
r"[": "",
|
||
r"]": "",
|
||
r"~": "-",
|
||
r":": REPLACER,
|
||
r"@": "-at-",
|
||
r"?": "",
|
||
r">": REPLACER,
|
||
r"<": REPLACER,
|
||
r"ä": "ae",
|
||
r"æ": "ae",
|
||
r"Ä": "Ae",
|
||
r"á": "au", # icelandic
|
||
r"Á": "Au",
|
||
r"ö": "oe",
|
||
r"Ö": "Oe",
|
||
r"ü": "ue",
|
||
r"Ü": "Ue",
|
||
r"ẞ": "SS",
|
||
r"ß": "ss",
|
||
r"ð": "dh",
|
||
r"Ð": "Dh",
|
||
r"þ": "th",
|
||
r"Þ": "Th",
|
||
r"μFSR": "myFSR",
|
||
r"μ": "mu",
|
||
r"$": "USD",
|
||
r"€": "EUR",
|
||
r".jpeg": ".jpg",
|
||
r".JPG": ".jpg",
|
||
r".JPEG": ".jpg",
|
||
"\u202f": "_", # some space
|
||
"N°": "Nr",
|
||
"°C": "degCelsius",
|
||
"\\": REPLACER
|
||
}
|
||
REPLACEMENTS.update(
|
||
{s: SINGLE_REPLACE_SYMBOLS[i] for i, s in enumerate(SINGLE_SYMBOLS)})
|
||
for begin, end, replacement in [
|
||
('2000', '200F', REPLACER), # various spaces and formatters
|
||
('2010', '2017', '-'), # various dashes
|
||
('2018', '201F', ""), # various apostrophies
|
||
('2028', '202E', ""), # separators, format characters
|
||
('2032', '203A', "") # various apostrophies and quotation marks
|
||
# one could add many more
|
||
]:
|
||
REPLACEMENTS.update({'u' + hex(hexspace)[2:]: replacement # replacing 0x by u
|
||
for hexspace in range(int(begin, 16), int(end, 16) + 1)})
|
||
|
||
|
||
def parse_args():
|
||
"""Parse command line arguments.
|
||
|
||
Returns:
|
||
Dictionary with the command line arguments.
|
||
|
||
"""
|
||
parser = ArgumentParser(
|
||
description="""Remove bad characters by renaming files.
|
||
|
||
Which files are renamed is logged in a log file.
|
||
(See below in --log.)""",
|
||
formatter_class=ArgumentDefaultsHelpFormatter
|
||
)
|
||
parser.add_argument(
|
||
"-a", "--noask", "--no-interactive",
|
||
dest="ask",
|
||
help=("Specify if renaming should not be confirmed by the user" +
|
||
" but just done."),
|
||
action="store_false"
|
||
)
|
||
parser.add_argument(
|
||
"-l", "--onlylog",
|
||
dest="rename",
|
||
action="store_false",
|
||
help="Dry run."
|
||
)
|
||
parser.add_argument(
|
||
"--log", "--logfile",
|
||
dest="log",
|
||
metavar="L",
|
||
help="""the file where to write the logging output.
|
||
This file is overwritten if existing.
|
||
If not specified assume 'rename.log'""",
|
||
action="store",
|
||
default=os.path.join("..", "rename.log")
|
||
)
|
||
parser.add_argument(
|
||
"--loglevel", "--level",
|
||
dest="loglevel",
|
||
help="""Specify the log level.
|
||
'error' includes OS errors at renaming files.
|
||
'warning' additionally includes no-renames because of weird resulting file name.
|
||
'info' includes no-renames because of existing files and renames and working directories.
|
||
'debug' includes good files.""",
|
||
default="info",
|
||
choices=LOGGING_LEVELS.keys()
|
||
)
|
||
parser.add_argument(
|
||
"-q", "--quiet",
|
||
dest="quiet",
|
||
help=("Set log level to warning, so usually do not create a log file. "
|
||
+ "Overwrites loglevel option."),
|
||
action="store_true"
|
||
)
|
||
parser.add_argument(
|
||
nargs="?",
|
||
dest="top",
|
||
metavar="directory",
|
||
help="""The directory in which to rename files.""",
|
||
action="store",
|
||
default=".")
|
||
args = parser.parse_args()
|
||
if args.quiet:
|
||
args.loglevel = "warning"
|
||
return args
|
||
|
||
|
||
def transform_filename(name: str):
|
||
"""Remove all bad symbols from name."""
|
||
for symbol, replacement in REPLACEMENTS.items():
|
||
name = name.replace(symbol, replacement)
|
||
name.lstrip("-")
|
||
while True:
|
||
old_name = name
|
||
for separator in ["-", "_", "."]:
|
||
name = name.replace(REPLACER + separator, separator)
|
||
name = name.replace(separator + REPLACER, separator)
|
||
name = name.replace(separator + separator, separator)
|
||
if old_name == name:
|
||
break
|
||
return name
|
||
|
||
|
||
def rename_file(directory, file, args):
|
||
"""Rename file directory/file if renaming is OK.
|
||
|
||
Returns:
|
||
filename after processing
|
||
"""
|
||
new_name = transform_filename(file)
|
||
path = os.path.join(directory, file)
|
||
new_path = os.path.join(directory, new_name)
|
||
if new_name == file:
|
||
logging.debug("'{}' is OK.".format(file))
|
||
return file
|
||
if os.path.lexists(new_path):
|
||
logging.info("'{}' is not renamed to '{}' because this already exists.".format(
|
||
file, new_name
|
||
))
|
||
return file
|
||
if new_name in INVALID_NAMES:
|
||
logging.warning("'{}' is not renamed because it would invalid: '{}'.".format(
|
||
path, new_name))
|
||
return file
|
||
if args.ask:
|
||
rename = input("Rename '{}' to '{}'? (Enter for yes)".format(
|
||
path, new_name
|
||
))
|
||
rename = not rename.lower().startswith("n")
|
||
else:
|
||
rename = True
|
||
if rename:
|
||
logging.info("Rename '{}' to '{}'".format(file, new_name))
|
||
if args.rename: # == not onlylog
|
||
try:
|
||
os.rename(path, new_path)
|
||
except FileNotFoundError as error:
|
||
logging.error("Could not move '{}' to '{}' due to FileNotFoundError: {}".format(
|
||
path, new_name, error
|
||
))
|
||
return file
|
||
else:
|
||
return new_name
|
||
else:
|
||
logging.info("Did not rename '{}' to '{}' due to user choice.".format(
|
||
file, new_name
|
||
))
|
||
return file
|
||
|
||
|
||
def if_ignore(filename):
|
||
"""Return if this filename should be ignored.
|
||
|
||
If it's a directory, this means that the directory content is ignored as well."""
|
||
return (
|
||
filename.startswith('.') or
|
||
filename == '__pycache__' or (
|
||
filename.startswith('__') and ( # special python file
|
||
filename.endswith('__.py') or
|
||
filename.endswith('__.html') or
|
||
filename.endswith('__.txt')
|
||
)) or filename.endswith('.class') or # java class file
|
||
filename == "telegram-nachrichten")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
args = parse_args()
|
||
logging.basicConfig(
|
||
filename=args.log,
|
||
level=LOGGING_LEVELS[args.loglevel]
|
||
)
|
||
for dirpath, dirnames, filenames in os.walk(args.top, topdown=True):
|
||
print('.', end='')
|
||
logging.info("Go to directory '{}'".format(os.path.abspath(dirpath)))
|
||
for dir in dirnames:
|
||
if if_ignore(dir):
|
||
logging.debug("Ignore dir '{}'".format(dir))
|
||
dirnames.remove(dir)
|
||
else:
|
||
new_name = rename_file(dirpath, dir, args)
|
||
if new_name not in dirnames:
|
||
dirnames.append(new_name)
|
||
for file in filenames:
|
||
if if_ignore(file):
|
||
logging.debug("Ignore file '{}'".format(file))
|
||
else:
|
||
rename_file(dirpath, file, args)
|
||
|
||
# remove empty log file:
|
||
logging.shutdown()
|
||
if os.stat(args.log).st_size == 0:
|
||
os.remove(args.log)
|