pdfformfill/formfield.py

334 lines
11 KiB
Python
Executable file

#! /usr/bin/python3
# -*- coding: UTF-8 -*-
"""Help decrypting the field names in pdf files.
FormFields model all information existing about fields in pdf forms.
Include a method to go through the list of pdf form fields and ask the
user for suitable descriptions.
The information stored for the fields are:
information from the pdf file
Name
Description
commands (as in config files)
"""
# todo: Button Varianten sind Off, On, sollte anpassbar/ automatisch
# angepasst werden
# todo: disable focus on pdf - apparently not simple :(
import subprocess as cmd
import os.path
import fdfgen
class FormField():
"""
Store all information for one field.
They can be accessed via [...]
(via __getitem__).
Those are:
information from dump_data_fields ("Field...")
informative name (["Name"])
long description (["Description"])
standard value (["Stdvalue"]) todo: how to implement that?
something about a special treatment (["Special"])
"""
def convertToBool(self, possBool):
"""Convert value to True or or False if this field is a Checkbox.
Returns:
possBool if this is no Checkbox or it is unclear of True or False is
meant. Otherwise True or False
"""
try:
if self["FieldType"] == "Button":
if possBool.lower() in ["yes", "on", "x", "true"]:
return True
elif possBool.lower() in ["no", "_", "off", "false"]:
return False
else:
return possBool
else:
return possBool
except AttributeError:
# lower probably did not work:
return possBool
except KeyError:
# "FieldType" does not exist
# maybe something more intelligent necessary
return possBool
@staticmethod
def convertUmlauts(value):
"""Convert all html-Syntax-Umlauts to the correct symbols.
Necessary to make fdfgen generate files that are read correctly
by pdftk.
Returns:
value, &#number replaced by the correct unicode
"""
for entity, uni in [("Ä", "Ä"),
("Ö", "Ö"),
("Ü", "Ü"),
("ß", "ß"),
("ä", "ä"),
("ö", "ö"),
("ü", "ü")]:
value = value.replace(entity, uni)
return value
def __init__(self):
"""
Initialise a FormField.
With an empty dictionary for pdfinfo
and None for the other values.
"""
self.__pdfinfo = {}
def __getitem__(self, key):
"""Get an information about this field.
Returns:
self[key]
Raises:
KeyError: as usual if key is not a valid key
"""
if key in self:
assert self.__pdfinfo[key] is not None
return self.__pdfinfo[key]
def __setitem__(self, key, value):
"""Set an information about this field.
Do: self[key] = value.
If value is None, the key is removed.
"""
if value is None:
try:
del self.__pdfinfo[key]
except KeyError:
pass
# todo: add warning in some log file
else:
self.__pdfinfo[key] = value
def __delitem__(self, key):
"""Delete an information from this field.
Do: del self.__pdfinfo[key]
Raises:
KeyError: as usual for dicts
"""
del self.__pdfinfo[key]
def __contains__(self, item):
"""Return if item is a valid info."""
return item in self.__pdfinfo
def expanditer(self):
"""Return an iterator that iterates over all information.
Yielding those having lists once for each element.
For each information the name and the value in a tuple
is returned.
"""
for info in self.__pdfinfo:
# strings behave like lists, hence I cannot identify a
# difference
if str(self[info]) == self[info]:
yield (info, self[info])
else:
try:
for oneinfo in self[info]:
yield (info, oneinfo)
except TypeError:
# e.g. bool is not iterable
yield (info, self[info])
def iterkeys(self):
"""Return what you would expect of an iteration over a dict.
Returns:
an iterator that iterates over all information keys.
"""
return iter(self.__pdfinfo)
def __iter__(self):
"""Return all info:value pairs.
Returns:
iter(self.__pdfinfo.items()).
"""
return iter(self.__pdfinfo.items())
def __len__(self):
"""Return number of information(s)."""
return len(self.__pdfinfo)
def essentiallyempty(self):
"""Return if any information from the user is stored.
Returns:
True if any info starts not with "Field"
"""
return all([info.startswith("Field") for info in self.__pdfinfo])
def askUser(self, origpdf, update=False):
"""Ask the user for information about this form field.
Create a pdf where only this field is filled with some data
such that the user can easily identify which field we are
talking about.
Doing this a file origpdf-tmpfilled.pdf and field-tmp.fdf are created.
It is not checked if this is a problem.
Attributes:
origpdf (str): path to the pdf file that all is about
update (bool): if fields that have an information should be queried
Returns:
if the user wants to abort this and considers her/himself done
"""
if update or self.essentiallyempty():
# no data there or should be updated
# all information from dump_data_fields start with Field
try:
fdfinfo = [(self["FieldName"], "On")]
except KeyError:
raise ValueError("This field does not have a FieldName. This" +
" should not be possible in a pdf.")
else:
fdf = fdfgen.forge_fdf("", fdfinfo, [], [], [])
with open("field-tmp.fdf", "wb") as fdf_file: # includes close
fdf_file.write(fdf)
tmppdf = os.path.basename(origpdf) + "-tmpfilled.pdf"
pdfcreation = cmd.Popen(["pdftk", origpdf,
"fill_form", "field-tmp.fdf", "output",
tmppdf], stderr=cmd.PIPE)
# those pdfs are pseudo-encrypted. That makes pdftk issue
# a warning that one should respect copyright.
pdfcreation.wait(timeout=5) # wait a maximum of 5 seconds
# for pdftk to finish
# then evince can work
cmd.Popen(["evince", tmppdf], stderr=cmd.PIPE) # do not need
# the errors to show
print("The current field is called " + self["FieldName"])
# def buttonmessage():
# """Show a special message for buttons.
#
# Explains Button options.
#
# """
# if self["FieldType"] == "Button":
# print("It is a butten.",
# "'X', 'On', 'Yes' all say 'cross it'",
# " while '_', 'Off', 'No' all say",
# "'do not make a cross here'")
for message, info, specialmessage, converting in [
("Descriptive name", "Name", lambda: None,
lambda x: x),
("Long description", "Description", lambda: None,
lambda x: x),
# ("Standard value", "Stdvalue", buttonmessage,
# self.convertToBool),
# ("Special handling", "Special", lambda: None,
# lambda x: x)
]:
if info in self:
print("Value now:", self[info])
print("Enter nothing and this value is used.")
specialmessage()
try:
newvalue = converting(input(message + ":"))
except EOFError:
# user hit Ctrl+D
return False
try:
if newvalue.strip() != "":
self[info] = newvalue
# otherwise take old value
except AttributeError:
# strip is not a method (not a string)
if newvalue is not None:
self[info] = newvalue
# otherwise take old value
return True
def __str__(self):
"""Return a several line long string with all info of this field.
Information that are missing are not displayed.
"""
return "\n".join([str(info) + ": " + str(value)
for info, value in self.expanditer()])
def merge(self, otherField):
"""Merge two fields.
Add the information from otherField that are not in self to self.
Returns:
self after adding stuff
Raises:
ValueError: if otherField and self disagree on a value.
! self might already have changed !
"""
for info, value in otherField:
# otherField.__iter__() is called
try:
if self[info] != value:
raise ValueError("The two fields disagree on " + info
+ ": '" + str(self[info]) + "' != '"
+ str(value) + "'")
except KeyError:
pass # ok, info does not exist in self
# else: do nothing, info exists already
for info, value in otherField:
# info does not exist in self or is equal
self[info] = value
@staticmethod
def findByFieldName(fieldList, fieldName):
"""Return a FormField out of a list given the searched fieldName.
If there are several fields with this fieldName,
the first one is returned. This should not be the case though.
Raises:
KeyError: if no such FieldName exist.
"""
for f in fieldList:
try:
if f["FieldName"] == fieldName:
return f
except KeyError:
pass # should not happen but who knows?
raise KeyError("No field with FieldName '" + fieldName + "' exist.")