177 lines
6.2 KiB
Python
Executable file
177 lines
6.2 KiB
Python
Executable file
#! /usr/bin/python3
|
|
# -*- coding: UTF-8 -*-
|
|
"""Functions for getting data from files.
|
|
|
|
If started as the main file, ask the user what the fields in
|
|
the pdf entered on the command line are.
|
|
|
|
"""
|
|
|
|
import subprocess as cmd
|
|
import argparse
|
|
from formfield import FormField
|
|
from constants import ConfigError, LISTINFOS, COMMENT_SYMBOL
|
|
|
|
|
|
def listFields(fields, newfile): # NOQA
|
|
"""Use pdftk dump_data_fields output or a file to add to a data fields list.
|
|
|
|
Lines starting with # will be ignored (comments).
|
|
(# must be the first non-blank symbol in the line!)
|
|
|
|
Attribute:
|
|
fields [FormField]: list of FormFields to add to
|
|
-> will be changed and returned
|
|
newfile (str): pdf file or form-file with the form (data)
|
|
If newfile is .pdf, use pdftk dump_data_fields_utf8
|
|
If newfile is other, read as if it was output of pdftk dump_data_fields
|
|
|
|
Results:
|
|
list of all form fields. Each field is a dictionary of information.
|
|
Typically those are:
|
|
FieldType
|
|
FieldName
|
|
FieldNameAlt
|
|
FieldFlags
|
|
FieldJustification
|
|
FieldStateOption (list of several options)
|
|
|
|
Raises
|
|
------
|
|
ValueError: if a line cannot be parsed
|
|
some io error if file does not exist or pdftk has problems
|
|
|
|
"""
|
|
if newfile.endswith(".pdf"):
|
|
dump = cmd.Popen(["pdftk", newfile, "dump_data_fields_utf8"],
|
|
stdout=cmd.PIPE,
|
|
universal_newlines=True)
|
|
field_desc, _ = dump.communicate()
|
|
else:
|
|
with open(newfile) as inputfile:
|
|
field_desc = inputfile.read()
|
|
field_desc = field_desc.splitlines()
|
|
# stdout=sp.PIPE
|
|
# field_desc, _ = dump.communicate() # second result would be error
|
|
# fields = []
|
|
# includes both inputs: pdftk and file
|
|
field_desc = [l for l in field_desc if l.strip() != "" and not
|
|
l.strip().startswith(COMMENT_SYMBOL)]
|
|
if field_desc[0] != "---":
|
|
raise ConfigError("Output of pdftk dump_data_fields should start " +
|
|
"with ---. (Config files as well.)")
|
|
# ignore empty lines and comment lines
|
|
field_desc.append("---") # to include last FormField
|
|
field = FormField()
|
|
for line in field_desc[1:]: # ignore first ---
|
|
if line == "---":
|
|
if len(field) < 0:
|
|
raise ValueError("Several --- following each other.")
|
|
# else add:
|
|
for f in fields:
|
|
try:
|
|
if f["FieldName"] == field["FieldName"]:
|
|
f.merge(field)
|
|
break
|
|
except KeyError as ke:
|
|
raise KeyError("Some field has no 'FieldName': "
|
|
+ str(ke))
|
|
else:
|
|
# no field for merging found
|
|
fields.append(field)
|
|
field = FormField() # start new Field
|
|
else:
|
|
line = line.split(": ", maxsplit=1)
|
|
# try:
|
|
# line[1] = FormField.convertUmlauts(line[1])
|
|
# except IndexError:
|
|
if len(line) == 1:
|
|
raise ConfigError("The line '" + str(line) +
|
|
"' cannot be parsed. " +
|
|
"Apparently there is no ': '.")
|
|
if line[0] in field: # several entries
|
|
if line[0] in LISTINFOS:
|
|
field[line[0]].append(line[1])
|
|
else:
|
|
raise ConfigError("The information " + line[0] +
|
|
" appeared twice in one field.")
|
|
else:
|
|
if line[0] in LISTINFOS:
|
|
field[line[0]] = [line[1]]
|
|
else:
|
|
field[line[0]] = line[1]
|
|
|
|
return fields
|
|
|
|
|
|
def writeFields(fields, formfile, onlyNonEmpty=True):
|
|
"""Write the data saved in fields to formfile.
|
|
|
|
Attributes:
|
|
onlyNonEmpty (Bool): True=ignore fields that only have the information
|
|
that comes directly from the pdf. (i.e. all infos begin with "Field".)
|
|
|
|
"""
|
|
with open(formfile, mode="w") as outfile:
|
|
for field in fields:
|
|
if not (field.essentiallyempty() and onlyNonEmpty):
|
|
outfile.write("\n---\n")
|
|
outfile.write(str(field))
|
|
|
|
|
|
def parse():
|
|
"""Parse the arguments.
|
|
|
|
Results:
|
|
input pdf file (["pdffile"]
|
|
output info file (["output"])
|
|
|
|
"""
|
|
parser = argparse.ArgumentParser(
|
|
description="Go through the list of fields of a pdf form" +
|
|
" and ask for each one for a helpful description.")
|
|
parser.add_argument("pdffile",
|
|
help="The pdf file with the form fields.")
|
|
parser.add_argument("output", default="form.form",
|
|
help="The file that includes all the information " +
|
|
"collected in this script.\n" +
|
|
"If it exists already, add to the information saved.")
|
|
parser.add_argument("-u", "--update", help="Also ask for information " +
|
|
"about fields that already have information stored.",
|
|
action="store_true")
|
|
return vars(parser.parse_args())
|
|
|
|
|
|
def overviewFields(fields):
|
|
"""Create a list of FieldName:goodName.
|
|
|
|
Can be written to a file to help someone keeping the overview while
|
|
writing the config for a form.
|
|
|
|
"""
|
|
return "\n".join(["'" + f["FieldName"] + "' : " + f["Name"] for f in fields
|
|
if "Name" in f])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# start
|
|
args = parse()
|
|
formfields = listFields([], args["pdffile"])
|
|
try:
|
|
formfields = listFields(formfields, args["output"])
|
|
# otherwise nothing to be added
|
|
except FileNotFoundError:
|
|
pass
|
|
for fi in formfields:
|
|
try:
|
|
fi.askUser(args["pdffile"], update=args["update"])
|
|
except (KeyboardInterrupt, EOFError):
|
|
break
|
|
writeFields(formfields, args["output"])
|
|
with open("ov_" + args["output"], mode="w") as outputfile:
|
|
outputfile.write(overviewFields(formfields))
|
|
|
|
# next todos:
|
|
# • evince should display the right page somehow, maybe give the user
|
|
# the possibility to say "this was on page 2, assume next one is on page 2
|
|
# as well"
|