ce0fd38380
That is helpful to get meaningful diffs when doing automatic changes.
383 lines
12 KiB
Python
Executable file
383 lines
12 KiB
Python
Executable file
#! /usr/bin/python3
|
|
# -*- coding: UTF-8 -*-
|
|
|
|
"""Help decrypting the field names in pdf files.
|
|
|
|
FormFields model all information existing about fields in pdf forms.
|
|
|
|
Include a method to go through the list of pdf form fields and ask the
|
|
user for suitable descriptions.
|
|
|
|
The information stored for the fields are:
|
|
information from the pdf file
|
|
Name
|
|
Description
|
|
commands (as in config files)
|
|
"""
|
|
|
|
# todo: disable focus on pdf - apparently not simple :(
|
|
|
|
import subprocess as cmd
|
|
import os.path
|
|
from constants import NON_FORMFIELD, OFF, SUBINFO_SEP, ConfigError, LISTINFOS
|
|
import writetopdf
|
|
|
|
|
|
class FormField():
|
|
"""
|
|
Store all information for one field.
|
|
|
|
They can be accessed via [...]
|
|
(via __getitem__).
|
|
|
|
Those are:
|
|
information from dump_data_fields ("Field...")
|
|
informative name (["Name"])
|
|
long description (["Description"])
|
|
|
|
"""
|
|
|
|
def convertToBool(self, possBool):
|
|
"""Convert value to True or or False if this field is a Checkbox.
|
|
|
|
Returns:
|
|
possBool if this is no Checkbox or it is unclear of True or False is
|
|
meant. Otherwise True or False
|
|
|
|
"""
|
|
try:
|
|
if self["FieldType"] == "Button":
|
|
if possBool.lower() in ["yes", "on", "x", "true"]:
|
|
return True
|
|
elif possBool.lower() in ["no", "_", "off", "false"]:
|
|
return False
|
|
else:
|
|
return possBool
|
|
else:
|
|
return possBool
|
|
except AttributeError:
|
|
# lower probably did not work:
|
|
return possBool
|
|
except KeyError:
|
|
# "FieldType" does not exist
|
|
# maybe something more intelligent necessary
|
|
return possBool
|
|
|
|
@staticmethod
|
|
def convertUmlauts(value):
|
|
"""Convert all html-Syntax-Umlauts to the correct symbols.
|
|
|
|
Necessary to make fdfgen generate files that are read correctly
|
|
by pdftk.
|
|
|
|
Returns:
|
|
value, &#number replaced by the correct unicode
|
|
|
|
"""
|
|
for entity, uni in [("Ä", "Ä"),
|
|
("Ö", "Ö"),
|
|
("Ü", "Ü"),
|
|
("ß", "ß"),
|
|
("ä", "ä"),
|
|
("ö", "ö"),
|
|
("ü", "ü")]:
|
|
value = value.replace(entity, uni)
|
|
return value
|
|
|
|
def __init__(self):
|
|
"""
|
|
Initialise a FormField.
|
|
|
|
With an empty dictionary for pdfinfo
|
|
and None for the other values.
|
|
|
|
"""
|
|
self.__pdfinfo = {}
|
|
|
|
@property
|
|
def name(self):
|
|
"""Get the name of this field.
|
|
|
|
If "Name" exists, take this.
|
|
Otherwise if "FieldName" exists, take this.
|
|
Otherwise if "FieldNameAlt" exists, take this.
|
|
Otherwise take "[Error: Unnamed]".
|
|
|
|
Returns:
|
|
A name of the FormField usable in error messages.
|
|
|
|
"""
|
|
for key in ["Name", "FieldName", "FieldNameAlt"]:
|
|
try:
|
|
return self[key]
|
|
except KeyError:
|
|
pass # try next one
|
|
return "[Error: Unnamed]"
|
|
|
|
def __getitem__(self, key):
|
|
"""Get an information about this field.
|
|
|
|
Returns:
|
|
self[key]
|
|
|
|
Raises:
|
|
KeyError: as usual if key is not a valid key
|
|
|
|
"""
|
|
if key in self:
|
|
assert self.__pdfinfo[key] is not None
|
|
return self.__pdfinfo[key]
|
|
|
|
def __setitem__(self, key, value):
|
|
"""Set an information about this field.
|
|
|
|
Do: self[key] = value.
|
|
|
|
If value is None, the key is removed.
|
|
|
|
"""
|
|
if value is None:
|
|
try:
|
|
del self.__pdfinfo[key]
|
|
except KeyError:
|
|
pass
|
|
# todo: add warning in some log file
|
|
else:
|
|
self.__pdfinfo[key] = value
|
|
|
|
def __delitem__(self, key):
|
|
"""Delete an information from this field.
|
|
|
|
Do: del self.__pdfinfo[key]
|
|
|
|
Raises:
|
|
KeyError: as usual for dicts
|
|
"""
|
|
del self.__pdfinfo[key]
|
|
|
|
def __contains__(self, item):
|
|
"""Return if item is a valid info."""
|
|
return item in self.__pdfinfo
|
|
|
|
def expanditer(self):
|
|
"""Return an iterator that iterates over all information.
|
|
|
|
Yielding those having lists once for each element.
|
|
|
|
For each information the name and the value in a tuple
|
|
is returned.
|
|
"""
|
|
for info in sorted(self.__pdfinfo): # sorted: always the same order
|
|
if info in LISTINFOS:
|
|
for oneinfo in self[info]:
|
|
yield (info, oneinfo)
|
|
else:
|
|
yield (info, self[info])
|
|
|
|
def iterkeys(self):
|
|
"""Return what you would expect of an iteration over a dict.
|
|
|
|
Returns:
|
|
an iterator that iterates over all information keys.
|
|
|
|
"""
|
|
return iter(self.__pdfinfo)
|
|
|
|
def __iter__(self):
|
|
"""Return all info:value pairs.
|
|
|
|
Returns:
|
|
iter(self.__pdfinfo.items()).
|
|
|
|
"""
|
|
return iter(self.__pdfinfo.items())
|
|
|
|
def __len__(self):
|
|
"""Return number of information(s)."""
|
|
return len(self.__pdfinfo)
|
|
|
|
def essentiallyempty(self):
|
|
"""Return if any information from the user is stored.
|
|
|
|
Returns:
|
|
True if any info starts not with "Field"
|
|
|
|
"""
|
|
return all([info.startswith("Field") for info in self.__pdfinfo])
|
|
|
|
def askUserOptions(self, origpdf):
|
|
"""Asks the user what the different options mean.
|
|
|
|
Assumes that field has one or more FieldStateOptions.
|
|
Show the user the options one after another and ask
|
|
what they mean.
|
|
|
|
The information for a choice command is created.
|
|
It is not checked if there exists already a Choice command.
|
|
|
|
Attributes:
|
|
Raises:
|
|
EOFError, KeyboardInterrupt:
|
|
if the user interrupted the input
|
|
ValueError:
|
|
if there is no FieldStateOption
|
|
|
|
"""
|
|
if "FieldStateOption" in self:
|
|
choicename = "c"
|
|
self["Choice"] = choicename
|
|
print("Enter values of options:")
|
|
for option in [option for option in
|
|
self["FieldStateOption"] if option not in OFF]:
|
|
self.showPDFWithOneEntry(origpdf, option)
|
|
userinput = input("Option " + option + ": ")
|
|
if len(userinput) > 0:
|
|
optionkey = (choicename + SUBINFO_SEP + "Option" +
|
|
SUBINFO_SEP + option)
|
|
self[optionkey] = userinput
|
|
for option in [option for option in
|
|
self["FieldStateOption"] if option in OFF]:
|
|
userinput = input("None marked: ")
|
|
if len(userinput) > 0:
|
|
optionkey = (choicename + SUBINFO_SEP + "Option" +
|
|
SUBINFO_SEP + option)
|
|
self[optionkey] = userinput
|
|
question = input("Type what the user is going to be asked" +
|
|
" when entering information for this field. " +
|
|
"(Empty input = Do not ask the user.) ")
|
|
if len(question) == 0:
|
|
self[choicename + SUBINFO_SEP + "Ifequal"] = "no"
|
|
self[choicename + SUBINFO_SEP + "no" + SUBINFO_SEP + "A"] = "A"
|
|
self[choicename + SUBINFO_SEP + "no" + SUBINFO_SEP + "B"] = "B"
|
|
else:
|
|
self[choicename + SUBINFO_SEP + "Prompt"] = question
|
|
else:
|
|
raise ValueError("Asking for explanations for FieldStateOptions" +
|
|
" only makes sense if there are those options.")
|
|
|
|
def askUserName(self):
|
|
"""Ask the user for a name and a description.
|
|
|
|
No pdf is shown displaying the field.
|
|
|
|
"""
|
|
for key in ["Name", "Description"]:
|
|
userinput = input("Enter for this field the " + key + ": ")
|
|
if userinput.strip() != "":
|
|
self[key] = userinput
|
|
|
|
def showPDFWithOneEntry(self, origpdf, value):
|
|
"""Show the origpdf with one value printed in this field (self).
|
|
|
|
Create tmp fdf file.
|
|
|
|
"""
|
|
essentialField = FormField()
|
|
try:
|
|
essentialField["FieldName"] = self["FieldName"]
|
|
except KeyError:
|
|
raise ConfigError("This field " + self.name + " has no FieldName." +
|
|
" This should not be possible.")
|
|
essentialField["Value"] = value
|
|
print(essentialField["FieldName"], ":", essentialField["Value"])
|
|
tmpfdf = "field-tmp.fdf"
|
|
tmppdf = os.path.basename(origpdf) + "-tmpfilled.pdf"
|
|
writetopdf.fillpdfform([essentialField], origpdf,
|
|
tmppdf, tmpfdf)
|
|
# those pdfs are pseudo-encrypted. That makes pdftk issue
|
|
# a warning that one should respect copyright.
|
|
# then evince can work
|
|
cmd.Popen(["evince", tmppdf], stderr=cmd.PIPE) # do not need
|
|
# the errors to show
|
|
|
|
def askUser(self, origpdf, update=False):
|
|
"""Ask the user for information about this form field.
|
|
|
|
Create a pdf where only this field is filled with some data
|
|
such that the user can easily identify which field we are
|
|
talking about.
|
|
Doing this a file origpdf-tmpfilled.pdf and field-tmp.fdf are created.
|
|
It is not checked if this is a problem.
|
|
|
|
Attributes:
|
|
origpdf (str): path to the pdf file that all is about
|
|
update (bool): if fields that have an information should be queried
|
|
|
|
Raises:
|
|
EOFError or KeyBoardInterrupt:
|
|
if the user wants to abort this and considers her/himself done
|
|
|
|
"""
|
|
if update or self.essentiallyempty():
|
|
# no data there or should be updated
|
|
if "FieldStateOption" in self:
|
|
self.askUserOptions(origpdf)
|
|
else:
|
|
self.showPDFWithOneEntry(origpdf, "Text")
|
|
print("Look at the field with the text 'Text'")
|
|
self.askUserName()
|
|
|
|
def __str__(self):
|
|
"""Return a several line long string with all info of this field.
|
|
|
|
Information that are missing are not displayed.
|
|
|
|
"""
|
|
return "\n".join([str(info) + ": " + str(value)
|
|
for info, value in self.expanditer()])
|
|
|
|
def merge(self, otherField):
|
|
"""Merge two fields.
|
|
|
|
Add the information from otherField that are not in self to self.
|
|
|
|
Returns:
|
|
self after adding stuff
|
|
Raises:
|
|
ValueError: if otherField and self disagree on a value.
|
|
! self might already have changed !
|
|
|
|
"""
|
|
for info, value in otherField:
|
|
# otherField.__iter__() is called
|
|
try:
|
|
if self[info] != value:
|
|
raise ValueError("The two fields disagree on " + info
|
|
+ ": '" + str(self[info]) + "' != '"
|
|
+ str(value) + "'")
|
|
except KeyError:
|
|
pass # ok, info does not exist in self
|
|
# else: do nothing, info exists already
|
|
|
|
for info, value in otherField:
|
|
# info does not exist in self or is equal
|
|
self[info] = value
|
|
|
|
@staticmethod
|
|
def findByFieldName(fieldList, fieldName,
|
|
include=(lambda x: (NON_FORMFIELD
|
|
not in x))):
|
|
"""Return a FormField out of a list given the searched fieldName.
|
|
|
|
If there are several fields with this fieldName,
|
|
the first one is returned. This should not be the case though.
|
|
|
|
Attributes:
|
|
fieldList ([FormField]): list of formfields to search in
|
|
fieldName (str): the fieldName looked for
|
|
include (fct: FormField -> Bool):
|
|
fct. that returns True if the argument should be looked for
|
|
Default: only use FormFields that do not have the info
|
|
NON_FORMFIELD.
|
|
|
|
Raises:
|
|
KeyError: if no such FieldName exist.
|
|
|
|
"""
|
|
for f in fieldList:
|
|
try:
|
|
if include(f) and f["FieldName"] == fieldName:
|
|
return f
|
|
except KeyError:
|
|
pass # should not happen but who knows?
|
|
raise KeyError("No field with FieldName '" + fieldName + "' exist.")
|