224 lines
7.2 KiB
Python
224 lines
7.2 KiB
Python
#! /usr/bin/python3
|
|
# -*- coding: UTF-8 -*-
|
|
"""Functions for getting data from files.
|
|
|
|
Constants:
|
|
COMMENT_SYMBOL = "#":
|
|
symbol to indicate a comment line
|
|
SUBINFO_SEP = "-":
|
|
symbol to separate subinformation, e.g. for commands
|
|
|
|
"""
|
|
|
|
import subprocess as cmd
|
|
import re
|
|
import argparse
|
|
import commands
|
|
from formfield import FormField
|
|
|
|
|
|
COMMENT_SYMBOL = "#"
|
|
SUBINFO_SEP = "-"
|
|
VARIABLE_BEGIN_SEP = "{"
|
|
VARIABLE_END_SEP = "}"
|
|
VARIABLE_PARTS_SEP = "|"
|
|
|
|
|
|
class ConfigError(Exception):
|
|
"""There are a lot of mistakes one can make in writing config files.
|
|
|
|
This Error indicates that some information is missing or some line is
|
|
malformed.
|
|
|
|
No __init__ supplied because the standard __init__ of Exception does
|
|
a good job.
|
|
|
|
"""
|
|
|
|
|
|
class MobileRegex():
|
|
"""A little util class.
|
|
|
|
To be used as a singleton.
|
|
|
|
Check whether an info should be saved as a list.
|
|
|
|
One could have a list of regex or just one regex but
|
|
then commands.Command.__subclasses__() would be a circular
|
|
import and that's bad.
|
|
|
|
"""
|
|
|
|
def __contains__(self, info):
|
|
"""Return whether this info can appear several times.
|
|
|
|
Attributes:
|
|
info: the info queried for.
|
|
|
|
"""
|
|
return (info in ["FieldStateOption"] or
|
|
info in [scls.__name__ for scls in
|
|
commands.Command.__subclasses__()] or
|
|
re.match(r"[^" + SUBINFO_SEP + r"]" +
|
|
SUBINFO_SEP + r"If.*", info))
|
|
# If clauses for commands
|
|
|
|
|
|
LISTINFOS = MobileRegex()
|
|
|
|
|
|
def listFields(fields, newfile): # NOQA
|
|
"""Use pdftk dump_data_fields output or a file to add to a data fields list.
|
|
|
|
Lines starting with # will be ignored (comments).
|
|
(# must be the first non-blank symbol in the line!)
|
|
|
|
Attribute:
|
|
fields [FormField]: list of FormFields to add to
|
|
-> will be changed and returned
|
|
newfile (str): pdf file or form-file with the form (data)
|
|
If newfile is .pdf, use pdftk dump_data_fields
|
|
If newfile is other, read as if it was output of pdftk dump_data_fields
|
|
|
|
Results:
|
|
list of all form fields. Each field is a dictionary of information.
|
|
Typically those are:
|
|
FieldType
|
|
FieldName
|
|
FieldNameAlt
|
|
FieldFlags
|
|
FieldJustification
|
|
FieldStateOption (list of several options)
|
|
|
|
Raises
|
|
------
|
|
ValueError: if a line cannot be parsed
|
|
some io error if file does not exist or pdftk has problems
|
|
|
|
"""
|
|
if newfile.endswith(".pdf"):
|
|
dump = cmd.Popen(["pdftk", newfile, "dump_data_fields"],
|
|
stdout=cmd.PIPE,
|
|
universal_newlines=True)
|
|
field_desc, _ = dump.communicate()
|
|
else:
|
|
with open(newfile) as inputfile:
|
|
field_desc = inputfile.read()
|
|
field_desc = field_desc.splitlines()
|
|
# stdout=sp.PIPE
|
|
# field_desc, _ = dump.communicate() # second result would be error
|
|
# fields = []
|
|
# includes both inputs: pdftk and file
|
|
field_desc = [l for l in field_desc if l.strip() != "" and not
|
|
l.strip().startswith(COMMENT_SYMBOL)]
|
|
if field_desc[0] != "---":
|
|
raise ValueError("Output of pdftk dump_data_fields should start " +
|
|
"with ---. (Config files as well.)")
|
|
# ignore empty lines and comment lines
|
|
field_desc.append("---") # to include last FormField
|
|
field = FormField()
|
|
for line in field_desc[1:]: # ignore first ---
|
|
if line == "---":
|
|
if len(field) < 0:
|
|
raise ValueError("Several --- following each other.")
|
|
# else add:
|
|
for f in fields:
|
|
try:
|
|
if f["FieldName"] == field["FieldName"]:
|
|
f.merge(field)
|
|
break
|
|
except KeyError as ke:
|
|
raise KeyError("Some field has no 'FieldName': "
|
|
+ str(ke))
|
|
else:
|
|
# no field for merging found
|
|
fields.append(field)
|
|
field = FormField() # start new Field
|
|
else:
|
|
line = line.split(": ", maxsplit=1)
|
|
try:
|
|
line[1] = FormField.convertUmlauts(line[1])
|
|
except IndexError:
|
|
raise ValueError("The line '" + str(line) +
|
|
"' cannot be parsed. " +
|
|
"Apparently there is no ': '.")
|
|
if line[0] in field: # several entries
|
|
if line[0] in LISTINFOS:
|
|
field[line[0]].append(line[1])
|
|
else:
|
|
raise ValueError("The information " + line[0] +
|
|
" appeared twice in one field.")
|
|
else:
|
|
if line[0] in LISTINFOS:
|
|
field[line[0]] = [line[1]]
|
|
else:
|
|
field[line[0]] = line[1]
|
|
|
|
return fields
|
|
|
|
|
|
def writeFields(fields, formfile, onlyNonEmpty=True):
|
|
"""Write the data saved in fields to formfile.
|
|
|
|
Attributes:
|
|
onlyNonEmpty (Bool): True=ignore fields that only have the information
|
|
that comes directly from the pdf. (i.e. all infos begin with "Field".)
|
|
|
|
"""
|
|
with open(formfile, mode="w") as outfile:
|
|
for field in fields:
|
|
if not (field.essentiallyempty() and onlyNonEmpty):
|
|
outfile.write("\n---\n")
|
|
outfile.write(str(field))
|
|
|
|
|
|
def parse():
|
|
"""Parse the arguments.
|
|
|
|
Results:
|
|
input pdf file (["pdffile"]
|
|
output info file (["output"])
|
|
|
|
"""
|
|
parser = argparse.ArgumentParser(
|
|
description="Go through the list of fields of a pdf form" +
|
|
" and ask for each one for a helpful description.")
|
|
parser.add_argument("pdffile",
|
|
help="The pdf file with the form fields.")
|
|
parser.add_argument("output", default="form.form",
|
|
help="The file that includes all the information " +
|
|
"collected in this script.")
|
|
parser.add_argument("-u", "--update", help="Also ask for information " +
|
|
"about fields that already have information stored.",
|
|
action="store_true")
|
|
return vars(parser.parse_args())
|
|
|
|
|
|
def overviewFields(fields):
|
|
"""Create a list of FieldName:goodName.
|
|
|
|
Can be written to a file to help someone keeping the overview while
|
|
writing the config for a form.
|
|
|
|
"""
|
|
return "\n".join(["'" + f["FieldName"] + "' : " + f["Name"] for f in fields
|
|
if "Name" in f])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# start
|
|
args = parse()
|
|
formfields = listFields([], args["pdffile"])
|
|
formfields = listFields(formfields, args["output"])
|
|
for fi in formfields:
|
|
continu = fi.askUser(args["pdffile"], update=args["update"])
|
|
if not continu:
|
|
break
|
|
writeFields(formfields, args["output"])
|
|
with open("ov_" + args["output"], mode="w") as outputfile:
|
|
outputfile.write(overviewFields(formfields))
|
|
|
|
# next todos:
|
|
# • evince should display the right page somehow, maybe give the user
|
|
# the possibility to say "this was on page 2, assume next one is on page 2
|
|
# as well"
|