334 lines
11 KiB
Python
Executable file
334 lines
11 KiB
Python
Executable file
#! /usr/bin/python3
|
|
# -*- coding: UTF-8 -*-
|
|
|
|
"""Help decrypting the field names in pdf files.
|
|
|
|
FormFields model all information existing about fields in pdf forms.
|
|
|
|
Include a method to go through the list of pdf form fields and ask the
|
|
user for suitable descriptions.
|
|
|
|
The information stored for the fields are:
|
|
information from the pdf file
|
|
Name
|
|
Description
|
|
commands (as in config files)
|
|
"""
|
|
|
|
# todo: Button Varianten sind Off, On, sollte anpassbar/ automatisch
|
|
# angepasst werden
|
|
# todo: disable focus on pdf - apparently not simple :(
|
|
|
|
import subprocess as cmd
|
|
import os.path
|
|
import fdfgen
|
|
|
|
|
|
class FormField():
|
|
"""
|
|
Store all information for one field.
|
|
|
|
They can be accessed via [...]
|
|
(via __getitem__).
|
|
|
|
Those are:
|
|
information from dump_data_fields ("Field...")
|
|
informative name (["Name"])
|
|
long description (["Description"])
|
|
standard value (["Stdvalue"]) todo: how to implement that?
|
|
something about a special treatment (["Special"])
|
|
|
|
"""
|
|
|
|
def convertToBool(self, possBool):
|
|
"""Convert value to True or or False if this field is a Checkbox.
|
|
|
|
Returns:
|
|
possBool if this is no Checkbox or it is unclear of True or False is
|
|
meant. Otherwise True or False
|
|
|
|
"""
|
|
try:
|
|
if self["FieldType"] == "Button":
|
|
if possBool.lower() in ["yes", "on", "x", "true"]:
|
|
return True
|
|
elif possBool.lower() in ["no", "_", "off", "false"]:
|
|
return False
|
|
else:
|
|
return possBool
|
|
else:
|
|
return possBool
|
|
except AttributeError:
|
|
# lower probably did not work:
|
|
return possBool
|
|
except KeyError:
|
|
# "FieldType" does not exist
|
|
# maybe something more intelligent necessary
|
|
return possBool
|
|
|
|
@staticmethod
|
|
def convertUmlauts(value):
|
|
"""Convert all html-Syntax-Umlauts to the correct symbols.
|
|
|
|
Necessary to make fdfgen generate files that are read correctly
|
|
by pdftk.
|
|
|
|
Returns:
|
|
value, &#number replaced by the correct unicode
|
|
|
|
"""
|
|
for entity, uni in [("Ä", "Ä"),
|
|
("Ö", "Ö"),
|
|
("Ü", "Ü"),
|
|
("ß", "ß"),
|
|
("ä", "ä"),
|
|
("ö", "ö"),
|
|
("ü", "ü")]:
|
|
value = value.replace(entity, uni)
|
|
return value
|
|
|
|
def __init__(self):
|
|
"""
|
|
Initialise a FormField.
|
|
|
|
With an empty dictionary for pdfinfo
|
|
and None for the other values.
|
|
|
|
"""
|
|
self.__pdfinfo = {}
|
|
|
|
def __getitem__(self, key):
|
|
"""Get an information about this field.
|
|
|
|
Returns:
|
|
self[key]
|
|
|
|
Raises:
|
|
KeyError: as usual if key is not a valid key
|
|
|
|
"""
|
|
if key in self:
|
|
assert self.__pdfinfo[key] is not None
|
|
return self.__pdfinfo[key]
|
|
|
|
def __setitem__(self, key, value):
|
|
"""Set an information about this field.
|
|
|
|
Do: self[key] = value.
|
|
|
|
If value is None, the key is removed.
|
|
|
|
"""
|
|
if value is None:
|
|
try:
|
|
del self.__pdfinfo[key]
|
|
except KeyError:
|
|
pass
|
|
# todo: add warning in some log file
|
|
else:
|
|
self.__pdfinfo[key] = value
|
|
|
|
def __delitem__(self, key):
|
|
"""Delete an information from this field.
|
|
|
|
Do: del self.__pdfinfo[key]
|
|
|
|
Raises:
|
|
KeyError: as usual for dicts
|
|
"""
|
|
del self.__pdfinfo[key]
|
|
|
|
def __contains__(self, item):
|
|
"""Return if item is a valid info."""
|
|
return item in self.__pdfinfo
|
|
|
|
def expanditer(self):
|
|
"""Return an iterator that iterates over all information.
|
|
|
|
Yielding those having lists once for each element.
|
|
|
|
For each information the name and the value in a tuple
|
|
is returned.
|
|
"""
|
|
for info in self.__pdfinfo:
|
|
# strings behave like lists, hence I cannot identify a
|
|
# difference
|
|
if str(self[info]) == self[info]:
|
|
yield (info, self[info])
|
|
else:
|
|
try:
|
|
for oneinfo in self[info]:
|
|
yield (info, oneinfo)
|
|
except TypeError:
|
|
# e.g. bool is not iterable
|
|
yield (info, self[info])
|
|
|
|
def iterkeys(self):
|
|
"""Return what you would expect of an iteration over a dict.
|
|
|
|
Returns:
|
|
an iterator that iterates over all information keys.
|
|
|
|
"""
|
|
return iter(self.__pdfinfo)
|
|
|
|
def __iter__(self):
|
|
"""Return all info:value pairs.
|
|
|
|
Returns:
|
|
iter(self.__pdfinfo.items()).
|
|
|
|
"""
|
|
return iter(self.__pdfinfo.items())
|
|
|
|
def __len__(self):
|
|
"""Return number of information(s)."""
|
|
return len(self.__pdfinfo)
|
|
|
|
def essentiallyempty(self):
|
|
"""Return if any information from the user is stored.
|
|
|
|
Returns:
|
|
True if any info starts not with "Field"
|
|
|
|
"""
|
|
return all([info.startswith("Field") for info in self.__pdfinfo])
|
|
|
|
def askUser(self, origpdf, update=False):
|
|
"""Ask the user for information about this form field.
|
|
|
|
Create a pdf where only this field is filled with some data
|
|
such that the user can easily identify which field we are
|
|
talking about.
|
|
Doing this a file origpdf-tmpfilled.pdf and field-tmp.fdf are created.
|
|
It is not checked if this is a problem.
|
|
|
|
Attributes:
|
|
origpdf (str): path to the pdf file that all is about
|
|
update (bool): if fields that have an information should be queried
|
|
|
|
Returns:
|
|
if the user wants to abort this and considers her/himself done
|
|
"""
|
|
if update or self.essentiallyempty():
|
|
# no data there or should be updated
|
|
# all information from dump_data_fields start with Field
|
|
try:
|
|
fdfinfo = [(self["FieldName"], "On")]
|
|
except KeyError:
|
|
raise ValueError("This field does not have a FieldName. This" +
|
|
" should not be possible in a pdf.")
|
|
else:
|
|
fdf = fdfgen.forge_fdf("", fdfinfo, [], [], [])
|
|
with open("field-tmp.fdf", "wb") as fdf_file: # includes close
|
|
fdf_file.write(fdf)
|
|
tmppdf = os.path.basename(origpdf) + "-tmpfilled.pdf"
|
|
pdfcreation = cmd.Popen(["pdftk", origpdf,
|
|
"fill_form", "field-tmp.fdf", "output",
|
|
tmppdf], stderr=cmd.PIPE)
|
|
# those pdfs are pseudo-encrypted. That makes pdftk issue
|
|
# a warning that one should respect copyright.
|
|
pdfcreation.wait(timeout=5) # wait a maximum of 5 seconds
|
|
# for pdftk to finish
|
|
# then evince can work
|
|
cmd.Popen(["evince", tmppdf], stderr=cmd.PIPE) # do not need
|
|
# the errors to show
|
|
print("The current field is called " + self["FieldName"])
|
|
|
|
# def buttonmessage():
|
|
# """Show a special message for buttons.
|
|
#
|
|
# Explains Button options.
|
|
#
|
|
# """
|
|
# if self["FieldType"] == "Button":
|
|
# print("It is a butten.",
|
|
# "'X', 'On', 'Yes' all say 'cross it'",
|
|
# " while '_', 'Off', 'No' all say",
|
|
# "'do not make a cross here'")
|
|
|
|
for message, info, specialmessage, converting in [
|
|
("Descriptive name", "Name", lambda: None,
|
|
lambda x: x),
|
|
("Long description", "Description", lambda: None,
|
|
lambda x: x),
|
|
# ("Standard value", "Stdvalue", buttonmessage,
|
|
# self.convertToBool),
|
|
# ("Special handling", "Special", lambda: None,
|
|
# lambda x: x)
|
|
]:
|
|
if info in self:
|
|
print("Value now:", self[info])
|
|
print("Enter nothing and this value is used.")
|
|
specialmessage()
|
|
try:
|
|
newvalue = converting(input(message + ":"))
|
|
except EOFError:
|
|
# user hit Ctrl+D
|
|
return False
|
|
try:
|
|
if newvalue.strip() != "":
|
|
self[info] = newvalue
|
|
# otherwise take old value
|
|
except AttributeError:
|
|
# strip is not a method (not a string)
|
|
if newvalue is not None:
|
|
self[info] = newvalue
|
|
# otherwise take old value
|
|
|
|
return True
|
|
|
|
def __str__(self):
|
|
"""Return a several line long string with all info of this field.
|
|
|
|
Information that are missing are not displayed.
|
|
|
|
"""
|
|
return "\n".join([str(info) + ": " + str(value)
|
|
for info, value in self.expanditer()])
|
|
|
|
def merge(self, otherField):
|
|
"""Merge two fields.
|
|
|
|
Add the information from otherField that are not in self to self.
|
|
|
|
Returns:
|
|
self after adding stuff
|
|
Raises:
|
|
ValueError: if otherField and self disagree on a value.
|
|
! self might already have changed !
|
|
|
|
"""
|
|
for info, value in otherField:
|
|
# otherField.__iter__() is called
|
|
try:
|
|
if self[info] != value:
|
|
raise ValueError("The two fields disagree on " + info
|
|
+ ": '" + str(self[info]) + "' != '"
|
|
+ str(value) + "'")
|
|
except KeyError:
|
|
pass # ok, info does not exist in self
|
|
# else: do nothing, info exists already
|
|
|
|
for info, value in otherField:
|
|
# info does not exist in self or is equal
|
|
self[info] = value
|
|
|
|
@staticmethod
|
|
def findByFieldName(fieldList, fieldName):
|
|
"""Return a FormField out of a list given the searched fieldName.
|
|
|
|
If there are several fields with this fieldName,
|
|
the first one is returned. This should not be the case though.
|
|
|
|
Raises:
|
|
KeyError: if no such FieldName exist.
|
|
|
|
"""
|
|
for f in fieldList:
|
|
try:
|
|
if f["FieldName"] == fieldName:
|
|
return f
|
|
except KeyError:
|
|
pass # should not happen but who knows?
|
|
raise KeyError("No field with FieldName '" + fieldName + "' exist.")
|