pdfformfill/fillform.py
2018-11-02 18:14:11 +01:00

453 lines
17 KiB
Python
Executable file

#! /usr/bin/python3
# -*- coding: UTF-8 -*-
"""Fill a pdf form.
Input are:
the pdf
the config file for this pdf
the data (= config-file) for other pdfs that have useful information
user input
Outputs are:
a filled pdf
the config file including the created data for use in other forms.
TODOs are:
reise.py sollte bei Programmen (fillpdf) liegen
fillpdf in Pfad, sodass von überall ausführbar
reise.py braucht TU-Dokumente Pfad -> sollte in ~/.fillpdfrc liegen
fix in Beginn Dienstgeschäft Datum
es gibt Fälle ohne Erstattung -> Dann z.B. Teil 15. leer lassen
bei Privatreise: Beginn als Default Ende der Reise
bei choice: zusätzlich Zahlen als Eingabeoption
-> Ja ist immer 1, nein immer 2
statt "nicht möglich" Optionen nicht anzeigen
bei Backspace: vorige Frage nochmal
Option für alles nochmal fragen
Reiseziel -> Leerzeichen entfernen in output file names
Antrag:
18: Abschlag bisher nichts eintragen
Texte verkürzen (z.B. bei Meilengutschrifte) -> nach Möglichkeit
max. 50 Zeichen (besser 45) pro Zeile
erste Abrechnung im Juni
readline benutzen
(im Antrag) überall Zukunft statt Vergangenheit, da es ja ein Antrag ist
Reihenfolge von Infos sinnvoller: alles zu einem Command zusammen
dokumentation neues "data file finden"
in findDataFile: wenn es mehrere zur Auswahl gibt, den Nutzer fragen
(bzw. ob Nutzer fragen als Option anbieten per flag)
am Ende sollte der Nutzer informiert werden, dass:
- das pdf fertig ist
- er (bei Abbruch) mit fillform ...pdf --config new-file.form weiter
machen kann
- er (bei Fertigstellung) mit dem nächsten Formular weiter machen
kann (was das kanonische nächste ist, könnte in der Config stehen)
fdfgen wird benötigt. Wenn man der Hilfe folgt und
wget https://github.com/ccnmtl/fdfgen/blob/master/fdfgen/__init__.py
aufruft bekommt man eine HTML-Datei. Könnte man vielleicht
das fdfgen-Repo als submodule einrichten und es dann nutzen?
Oder eine vollständigere Anleitung a la
git clone https://github.com/ccnmtl/fdfgen.git
cp fdfgen/fdfgen/__init__.py fdfgen.py
rm --recursive --force fdfgen
einfügen in README
pdftk wird benötigt. Sinnvoll abfangen, wenn es nicht
zur Verfügung steht. Anleitung zum installieren, insbesondere
da es bei Ubuntu 18.10 nicht in den Standardrepos zur Verfügung steht.
Anleitung für Ubuntu 18.10 unter
https://askubuntu.com/questions/1028522/how-can-i-install-pdftk-in-ubuntu-18-04-bionic
bessere Fehlermeldung, wenn ein Data form file nicht gefunden
wurde. Mit Verweis auf: muss erst noch fertig gestellt werden.
(Wäre eine Fehlerquelle, siehe doc zu Specifier.)
.form ist an einigen Stellen hardgecoded als extension für die config files,
sollte in einer Konstante liegen
"""
import os.path
import argparse
import itertools
from subprocess import CalledProcessError as ExternalError
import commands
import formfield
import readformdata
from constants import (NON_FORMFIELD, CONFIGFIELD, OUTPUTFIELD,
SPECIFIERFIELD,
FORMOUTPUTFIELD, SUBINFO_SEP, ConfigError,
GENERALADVICE, EPILOG)
import writetopdf
def parse():
"""Create the argument parser (including the help) and parse the arguments.
Options/ Arguments:
pdffile
config
other data ("data")
outputpdf
fdf tmp file ("fdf")
"""
parser = argparse.ArgumentParser(
description="Fill out a pdf.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=EPILOG)
parser.add_argument("pdffile",
help="The pdf file with the form.")
parser.add_argument("--outputpdf", default=None, # nargs=1,
# None = file.pdf -> file-filled.pdf
# todo: more sophisticated with name of person/ date
help="The output file with the filled form. " +
"(Default: pdffile[without .pdf]-filled.pdf or" +
" as configured in the config file.)")
parser.add_argument("--config", default=None, # nargs=1,
# None = file.pdf -> file.form
help="The config file used to know how to fill the " +
"pdf. (Default: pdffile[without .pdf].form")
parser.add_argument("--form", default=None, # nargs=1,
# None = file.pdf -> file-filled.form
help="The file where all the inputed data is saved" +
" to. (Default: pdffile[without .pdf]-filled.form" +
" or as configured in the config file.)" +
"\nThis can be used in other forms and to finish" +
" the input later.")
parser.add_argument("--data", default=None, nargs=2, action='append',
metavar=("REF", "DATAFILE"),
help="Data from other forms can be supplied via" +
" this option. " +
"REF is the reference used in the config" +
" file, the DATAFILE is the .form-file.")
parser.add_argument("--fdf", default="tmp.fdf", # nargs=1,
help="To fill the pdf a temporary fdf is created. " +
"Specify with FDF the file name for this. " +
"(Default: tmp.fdf).")
args = vars(parser.parse_args())
if not os.path.isfile(args["pdffile"]):
parser.print_help()
raise argparse.ArgumentTypeError(args["pdffile"] +
" is not a valid file.")
if not args["pdffile"].endswith(".pdf"):
parser.print_help()
raise argparse.ArgumentTypeError(args["pdffile"] +
" is probably not a valid pdf file.")
if args["config"] is None:
args["config"] = os.path.splitext(args["pdffile"])[0] + ".form"
if not os.path.isfile(args["config"]):
parser.print_help()
raise argparse.ArgumentTypeError(args["config"] +
" is not a valid file.")
# if args["form"] is None:
# args["form"] = os.path.splitext(args["pdffile"])[0] + "-filled.form"
# if args["outputpdf"] is None:
# args["outputpdf"] =
# os.path.splitext(args["pdffile"])[0] + "-filled.pdf"
for arg in [args["outputpdf"], args["form"]]:
if arg is not None and os.path.isfile(arg):
try:
input(arg + " is an existing file. For aborting " +
"hit Ctrl+C. For overwriting hit Enter.")
except KeyboardInterrupt:
parser.print_help()
raise argparse.ArgumentTypeError(arg + " is already an " +
"existing file.")
if args["data"] is None:
args["data"] = []
for datafile in args["data"]:
if not os.path.isfile(datafile[1]):
parser.print_help()
raise argparse.ArgumentTypeError(datafile[1] +
" is not a valid file.")
return args
def readDataPaths(configs, maindir="."):
"""Read data files that should be read.
Configs is a list of FormFields.
In the configs there can be a field with the FieldName "Mainconfig".
This can specify with the info Data and the data specifier-Path
paths for data to be found in.
If this path is a file, try to use that file for this data.
If this path is a directory, search for a file that gives this
data. This is the case if the config value SPECIFIERFIELD
of this other form file
specifies that is to be called this name.
Attributes:
configs ([FormField]):
form fields that include the mainconfig field with the
information about the necessary data
maindir (str):
directory of the form file. Used for searching for
data files relative to maindir.
Returns:
a dict with {dataspecifier: formfieldlist},
to be merged with parsing result.
If no suitable file is found, None is saved.
"""
try:
mainconfig = formfield.FormField.findByFieldName(
configs,
CONFIGFIELD,
include=lambda f: NON_FORMFIELD in f)
except KeyError:
# nothing found, OK
return {}
else:
try:
specs = mainconfig["Data"] # specifier
except KeyError:
# nothing found, OK
return {}
else:
datalists = {}
for spec in specs:
try:
path = mainconfig[spec + SUBINFO_SEP + "Path"]
except KeyError as e:
raise ConfigError(
"A data file was specified but a path is missing: " +
str(e))
if os.path.isfile(path):
# try: not try/ except since errors should be shown
# maybe show errors but include suggestion to
# rename them .form-broken and specify
# correct file via command-line
datalists[spec] = readformdata.listFields([], path)
elif os.path.isdir(path):
datalists[spec] = findDataFile(os.path.join(maindir, path),
spec)
else:
print("debug:", path, "is not a valid file or directory.",
"The path for ", spec, " must be specified in a",
"different way.")
datalists[spec] = None
return datalists
def findDataFile(directory, specifier):
"""Search for a file for this specifier.
Only uses .form files.
Attributes:
directory (str): directory in which to be searched.
specifier (str):
the file searched for has the Field SPECIFIERFIELD with the value
specifier.
Returns:
a list of fields from this file.
Raises:
ConfigError: if no suitable file is found.
"""
for dirpath, _, files in os.walk(directory):
print("debug: directory:", directory)
if ".git" in dirpath:
continue
print("debug: dirpath:", dirpath)
print("debug: files:", files)
for datafile in files:
if os.path.splitext(datafile)[1] == ".form":
filepath = os.path.join(dirpath, datafile)
# dirpath is relative to current working directory
# not to directory
print("debug:", filepath)
try:
datalist = readformdata.listFields(
[], filepath)
except UnicodeError as e:
print("Warning: An error occured while",
"attempting to read",
filepath + ":",
str(e))
# todo: proper error handling
except ConfigError as e:
print("Warning: An error occured while",
"attempting to understand",
filepath + ":",
str(e))
else:
if getConfigvalue(fieldname=SPECIFIERFIELD,
fields=datalist,
argument=None,
default=None) == specifier:
return datalist
else:
print("debug:", datafile, "has no or wrong specifier.")
# if none found
print("debug: no file for", specifier, "found.")
raise ConfigError("I looked for a data file for the specifier " +
specifier + " in the directory " + directory +
" but could not find one.")
def combineDataFiles(configs, configfile, dataarguments):
"""Combine all data.
Combine the data files specified on the command line
and the ones specified in the config file.
Attributes:
configs ([FormField]): config file content
configfile (str): config file path
dataarguments ([[str]]):
data files specified on the command line
Returns:
a dict with {dataspecifier: formfieldlist}.
Raises:
Various Errors: if reading data files did not work
ConfigError: if no data file was found for one of the specifiers
"""
dataLists = readDataPaths(config, os.path.dirname(configfile))
print("Debug: datalists automagically:", dataLists)
for specFilePair in dataarguments:
spec = specFilePair[0]
datafile = specFilePair[1]
if spec not in dataLists:
# data includes all necessary specs
# if the specified one is not there, the information is useless
print("Warning: you specified a file for the data",
spec, "but this specifier is not",
"necessary.", "Did you misspell it?")
continue # next pair
try:
# overwrite file found automatically
dataLists[spec] = readformdata.listFields(
[], datafile)
except FileNotFoundError as e:
if dataLists[spec] is not None:
# None = no file found
raise ConfigError("A data file specified ",
"on the command line for specifier " +
spec +
" does not exist: " + str(e) +
" But you could use the one specified" +
" in the form file.")
else:
raise ConfigError("A data file specified ",
"on the command line for specifier " +
spec +
" does not exist: " + str(e))
for spec in dataLists:
if dataLists[spec] is None:
raise ConfigError("For the specifier " + spec + " no data file" +
" was found.")
return dataLists
def getConfigvalue(fieldname, fields, argument, default):
"""Get the name of a config.
If specified on the commandline (argument is not None)
take this.
If a NON-field with the FieldName fieldname exists in fields,
take the value of this field.
If such a field does not exist or it has no value, take default.
"""
if argument is not None:
return argument
try:
field = formfield.FormField.findByFieldName(
fields, fieldname, include=lambda f: NON_FORMFIELD in f)
except KeyError:
return default
else:
try:
return field["Value"]
except KeyError:
return default
def getOutput(fields, args):
"""Generate the name of the output pdf.
If specified on the commandline (arguments["Output"] is not None, take
this.)
If a field with the name Outputconfig exists, take its value.
Otherwise generate outputfilename out of input file.
"""
default = os.path.splitext(args["pdffile"])[0] + "-filled.pdf"
return getConfigvalue(OUTPUTFIELD, fields, args["outputpdf"], default)
def getFormoutput(fields, args):
"""Generate the name of the output form file.
If specified on the commandline (arguments["form"] is not None, take
this.)
If a field with the name Formoutputconfig exists, take its value.
Otherwise generate output form filename out of input file.
"""
default = os.path.splitext(args["pdffile"])[0] + "-filled.form"
return getConfigvalue(FORMOUTPUTFIELD, fields, args["form"], default)
if __name__ == "__main__":
try:
arguments = parse()
except argparse.ArgumentTypeError as e:
print("Error while parsing the command line arguments:\n", e)
else:
config = readformdata.listFields([], arguments["config"])
# make dict out of [[]] list + dict from config file
# f[0] is REF, f[1] is the file
data = combineDataFiles(config, arguments["config"],
arguments["data"])
print("Debug:", data)
comms = itertools.chain(*[commands.Command.extractCommands(
field, config, data) for field in config])
# comms = commands.Command.extractCommands(config[0], config, data)
# print(comms)
print(GENERALADVICE)
for command in sorted(comms):
try:
command()
except KeyboardInterrupt:
break
else:
# print(config[0])
pass
# in else: fillpdf since we do not need a pdf that is not done yet
# for testing always create
try:
writetopdf.fillpdf(config, arguments["pdffile"],
getOutput(config, arguments),
arguments["fdf"])
except ExternalError as e:
print(e)
if e.output is not None:
print(e.output)
readformdata.writeFields(config, getFormoutput(config, arguments))