pdfformfill/formfield.py
Bela ce0fd38380 export to form file always with same order
That is helpful to get meaningful diffs when doing automatic changes.
2018-04-11 14:25:36 +02:00

383 lines
12 KiB
Python
Executable file

#! /usr/bin/python3
# -*- coding: UTF-8 -*-
"""Help decrypting the field names in pdf files.
FormFields model all information existing about fields in pdf forms.
Include a method to go through the list of pdf form fields and ask the
user for suitable descriptions.
The information stored for the fields are:
information from the pdf file
Name
Description
commands (as in config files)
"""
# todo: disable focus on pdf - apparently not simple :(
import subprocess as cmd
import os.path
from constants import NON_FORMFIELD, OFF, SUBINFO_SEP, ConfigError, LISTINFOS
import writetopdf
class FormField():
"""
Store all information for one field.
They can be accessed via [...]
(via __getitem__).
Those are:
information from dump_data_fields ("Field...")
informative name (["Name"])
long description (["Description"])
"""
def convertToBool(self, possBool):
"""Convert value to True or or False if this field is a Checkbox.
Returns:
possBool if this is no Checkbox or it is unclear of True or False is
meant. Otherwise True or False
"""
try:
if self["FieldType"] == "Button":
if possBool.lower() in ["yes", "on", "x", "true"]:
return True
elif possBool.lower() in ["no", "_", "off", "false"]:
return False
else:
return possBool
else:
return possBool
except AttributeError:
# lower probably did not work:
return possBool
except KeyError:
# "FieldType" does not exist
# maybe something more intelligent necessary
return possBool
@staticmethod
def convertUmlauts(value):
"""Convert all html-Syntax-Umlauts to the correct symbols.
Necessary to make fdfgen generate files that are read correctly
by pdftk.
Returns:
value, &#number replaced by the correct unicode
"""
for entity, uni in [("Ä", "Ä"),
("Ö", "Ö"),
("Ü", "Ü"),
("ß", "ß"),
("ä", "ä"),
("ö", "ö"),
("ü", "ü")]:
value = value.replace(entity, uni)
return value
def __init__(self):
"""
Initialise a FormField.
With an empty dictionary for pdfinfo
and None for the other values.
"""
self.__pdfinfo = {}
@property
def name(self):
"""Get the name of this field.
If "Name" exists, take this.
Otherwise if "FieldName" exists, take this.
Otherwise if "FieldNameAlt" exists, take this.
Otherwise take "[Error: Unnamed]".
Returns:
A name of the FormField usable in error messages.
"""
for key in ["Name", "FieldName", "FieldNameAlt"]:
try:
return self[key]
except KeyError:
pass # try next one
return "[Error: Unnamed]"
def __getitem__(self, key):
"""Get an information about this field.
Returns:
self[key]
Raises:
KeyError: as usual if key is not a valid key
"""
if key in self:
assert self.__pdfinfo[key] is not None
return self.__pdfinfo[key]
def __setitem__(self, key, value):
"""Set an information about this field.
Do: self[key] = value.
If value is None, the key is removed.
"""
if value is None:
try:
del self.__pdfinfo[key]
except KeyError:
pass
# todo: add warning in some log file
else:
self.__pdfinfo[key] = value
def __delitem__(self, key):
"""Delete an information from this field.
Do: del self.__pdfinfo[key]
Raises:
KeyError: as usual for dicts
"""
del self.__pdfinfo[key]
def __contains__(self, item):
"""Return if item is a valid info."""
return item in self.__pdfinfo
def expanditer(self):
"""Return an iterator that iterates over all information.
Yielding those having lists once for each element.
For each information the name and the value in a tuple
is returned.
"""
for info in sorted(self.__pdfinfo): # sorted: always the same order
if info in LISTINFOS:
for oneinfo in self[info]:
yield (info, oneinfo)
else:
yield (info, self[info])
def iterkeys(self):
"""Return what you would expect of an iteration over a dict.
Returns:
an iterator that iterates over all information keys.
"""
return iter(self.__pdfinfo)
def __iter__(self):
"""Return all info:value pairs.
Returns:
iter(self.__pdfinfo.items()).
"""
return iter(self.__pdfinfo.items())
def __len__(self):
"""Return number of information(s)."""
return len(self.__pdfinfo)
def essentiallyempty(self):
"""Return if any information from the user is stored.
Returns:
True if any info starts not with "Field"
"""
return all([info.startswith("Field") for info in self.__pdfinfo])
def askUserOptions(self, origpdf):
"""Asks the user what the different options mean.
Assumes that field has one or more FieldStateOptions.
Show the user the options one after another and ask
what they mean.
The information for a choice command is created.
It is not checked if there exists already a Choice command.
Attributes:
Raises:
EOFError, KeyboardInterrupt:
if the user interrupted the input
ValueError:
if there is no FieldStateOption
"""
if "FieldStateOption" in self:
choicename = "c"
self["Choice"] = choicename
print("Enter values of options:")
for option in [option for option in
self["FieldStateOption"] if option not in OFF]:
self.showPDFWithOneEntry(origpdf, option)
userinput = input("Option " + option + ": ")
if len(userinput) > 0:
optionkey = (choicename + SUBINFO_SEP + "Option" +
SUBINFO_SEP + option)
self[optionkey] = userinput
for option in [option for option in
self["FieldStateOption"] if option in OFF]:
userinput = input("None marked: ")
if len(userinput) > 0:
optionkey = (choicename + SUBINFO_SEP + "Option" +
SUBINFO_SEP + option)
self[optionkey] = userinput
question = input("Type what the user is going to be asked" +
" when entering information for this field. " +
"(Empty input = Do not ask the user.) ")
if len(question) == 0:
self[choicename + SUBINFO_SEP + "Ifequal"] = "no"
self[choicename + SUBINFO_SEP + "no" + SUBINFO_SEP + "A"] = "A"
self[choicename + SUBINFO_SEP + "no" + SUBINFO_SEP + "B"] = "B"
else:
self[choicename + SUBINFO_SEP + "Prompt"] = question
else:
raise ValueError("Asking for explanations for FieldStateOptions" +
" only makes sense if there are those options.")
def askUserName(self):
"""Ask the user for a name and a description.
No pdf is shown displaying the field.
"""
for key in ["Name", "Description"]:
userinput = input("Enter for this field the " + key + ": ")
if userinput.strip() != "":
self[key] = userinput
def showPDFWithOneEntry(self, origpdf, value):
"""Show the origpdf with one value printed in this field (self).
Create tmp fdf file.
"""
essentialField = FormField()
try:
essentialField["FieldName"] = self["FieldName"]
except KeyError:
raise ConfigError("This field " + self.name + " has no FieldName." +
" This should not be possible.")
essentialField["Value"] = value
print(essentialField["FieldName"], ":", essentialField["Value"])
tmpfdf = "field-tmp.fdf"
tmppdf = os.path.basename(origpdf) + "-tmpfilled.pdf"
writetopdf.fillpdfform([essentialField], origpdf,
tmppdf, tmpfdf)
# those pdfs are pseudo-encrypted. That makes pdftk issue
# a warning that one should respect copyright.
# then evince can work
cmd.Popen(["evince", tmppdf], stderr=cmd.PIPE) # do not need
# the errors to show
def askUser(self, origpdf, update=False):
"""Ask the user for information about this form field.
Create a pdf where only this field is filled with some data
such that the user can easily identify which field we are
talking about.
Doing this a file origpdf-tmpfilled.pdf and field-tmp.fdf are created.
It is not checked if this is a problem.
Attributes:
origpdf (str): path to the pdf file that all is about
update (bool): if fields that have an information should be queried
Raises:
EOFError or KeyBoardInterrupt:
if the user wants to abort this and considers her/himself done
"""
if update or self.essentiallyempty():
# no data there or should be updated
if "FieldStateOption" in self:
self.askUserOptions(origpdf)
else:
self.showPDFWithOneEntry(origpdf, "Text")
print("Look at the field with the text 'Text'")
self.askUserName()
def __str__(self):
"""Return a several line long string with all info of this field.
Information that are missing are not displayed.
"""
return "\n".join([str(info) + ": " + str(value)
for info, value in self.expanditer()])
def merge(self, otherField):
"""Merge two fields.
Add the information from otherField that are not in self to self.
Returns:
self after adding stuff
Raises:
ValueError: if otherField and self disagree on a value.
! self might already have changed !
"""
for info, value in otherField:
# otherField.__iter__() is called
try:
if self[info] != value:
raise ValueError("The two fields disagree on " + info
+ ": '" + str(self[info]) + "' != '"
+ str(value) + "'")
except KeyError:
pass # ok, info does not exist in self
# else: do nothing, info exists already
for info, value in otherField:
# info does not exist in self or is equal
self[info] = value
@staticmethod
def findByFieldName(fieldList, fieldName,
include=(lambda x: (NON_FORMFIELD
not in x))):
"""Return a FormField out of a list given the searched fieldName.
If there are several fields with this fieldName,
the first one is returned. This should not be the case though.
Attributes:
fieldList ([FormField]): list of formfields to search in
fieldName (str): the fieldName looked for
include (fct: FormField -> Bool):
fct. that returns True if the argument should be looked for
Default: only use FormFields that do not have the info
NON_FORMFIELD.
Raises:
KeyError: if no such FieldName exist.
"""
for f in fieldList:
try:
if include(f) and f["FieldName"] == fieldName:
return f
except KeyError:
pass # should not happen but who knows?
raise KeyError("No field with FieldName '" + fieldName + "' exist.")