#! /usr/bin/python3 # -*- coding: UTF-8 -*- """Help decrypting the field names in pdf files. FormFields model all information existing about fields in pdf forms. Include a method to go through the list of pdf form fields and ask the user for suitable descriptions. The information stored for the fields are: information from the pdf file Name Description commands (as in config files) """ # todo: disable focus on pdf - apparently not simple :( import subprocess as cmd import os.path from constants import NON_FORMFIELD, OFF, SUBINFO_SEP, ConfigError import writetopdf class FormField(): """ Store all information for one field. They can be accessed via [...] (via __getitem__). Those are: information from dump_data_fields ("Field...") informative name (["Name"]) long description (["Description"]) """ def convertToBool(self, possBool): """Convert value to True or or False if this field is a Checkbox. Returns: possBool if this is no Checkbox or it is unclear of True or False is meant. Otherwise True or False """ try: if self["FieldType"] == "Button": if possBool.lower() in ["yes", "on", "x", "true"]: return True elif possBool.lower() in ["no", "_", "off", "false"]: return False else: return possBool else: return possBool except AttributeError: # lower probably did not work: return possBool except KeyError: # "FieldType" does not exist # maybe something more intelligent necessary return possBool @staticmethod def convertUmlauts(value): """Convert all html-Syntax-Umlauts to the correct symbols. Necessary to make fdfgen generate files that are read correctly by pdftk. Returns: value, &#number replaced by the correct unicode """ for entity, uni in [("Ä", "Ä"), ("Ö", "Ö"), ("Ü", "Ü"), ("ß", "ß"), ("ä", "ä"), ("ö", "ö"), ("ü", "ü")]: value = value.replace(entity, uni) return value def __init__(self): """ Initialise a FormField. With an empty dictionary for pdfinfo and None for the other values. """ self.__pdfinfo = {} @property def name(self): """Get the name of this field. If "Name" exists, take this. Otherwise if "FieldName" exists, take this. Otherwise if "FieldNameAlt" exists, take this. Otherwise take "[Error: Unnamed]". Returns: A name of the FormField usable in error messages. """ for key in ["Name", "FieldName", "FieldNameAlt"]: try: return self[key] except KeyError: pass # try next one return "[Error: Unnamed]" def __getitem__(self, key): """Get an information about this field. Returns: self[key] Raises: KeyError: as usual if key is not a valid key """ if key in self: assert self.__pdfinfo[key] is not None return self.__pdfinfo[key] def __setitem__(self, key, value): """Set an information about this field. Do: self[key] = value. If value is None, the key is removed. """ if value is None: try: del self.__pdfinfo[key] except KeyError: pass # todo: add warning in some log file else: self.__pdfinfo[key] = value def __delitem__(self, key): """Delete an information from this field. Do: del self.__pdfinfo[key] Raises: KeyError: as usual for dicts """ del self.__pdfinfo[key] def __contains__(self, item): """Return if item is a valid info.""" return item in self.__pdfinfo def expanditer(self): """Return an iterator that iterates over all information. Yielding those having lists once for each element. For each information the name and the value in a tuple is returned. """ for info in self.__pdfinfo: # strings behave like lists, hence I cannot identify a # difference if str(self[info]) == self[info]: yield (info, self[info]) else: try: for oneinfo in self[info]: yield (info, oneinfo) except TypeError: # e.g. bool is not iterable yield (info, self[info]) def iterkeys(self): """Return what you would expect of an iteration over a dict. Returns: an iterator that iterates over all information keys. """ return iter(self.__pdfinfo) def __iter__(self): """Return all info:value pairs. Returns: iter(self.__pdfinfo.items()). """ return iter(self.__pdfinfo.items()) def __len__(self): """Return number of information(s).""" return len(self.__pdfinfo) def essentiallyempty(self): """Return if any information from the user is stored. Returns: True if any info starts not with "Field" """ return all([info.startswith("Field") for info in self.__pdfinfo]) def askUserOptions(self, origpdf): """Asks the user what the different options mean. Assumes that field has one or more FieldStateOptions. Show the user the options one after another and ask what they mean. The information for a choice command is created. It is not checked if there exists already a Choice command. Attributes: Raises: EOFError, KeyboardInterrupt: if the user interrupted the input ValueError: if there is no FieldStateOption """ if "FieldStateOption" in self: choicename = "c" self["Choice"] = choicename print("Enter values of options:") for option in [option for option in self["FieldStateOption"] if option not in OFF]: self.showPDFWithOneEntry(origpdf, option) userinput = input("Option " + option + ": ") if len(userinput) > 0: optionkey = choicename + SUBINFO_SEP + option self[optionkey] = userinput for option in [option for option in self["FieldStateOption"] if option in OFF]: userinput = input("None marked: ") if len(userinput) > 0: optionkey = choicename + SUBINFO_SEP + option self[optionkey] = userinput question = input("Type what the user is going to be asked" + " when entering information for this field. " + "(Empty input = Do not ask the user.) ") if len(question) == 0: self[choicename + SUBINFO_SEP + "Ifequal"] = "no" self[choicename + SUBINFO_SEP + "no" + SUBINFO_SEP + "A"] = "A" self[choicename + SUBINFO_SEP + "no" + SUBINFO_SEP + "B"] = "B" else: self[choicename + SUBINFO_SEP + "Prompt"] = question else: raise ValueError("Asking for explanations for FieldStateOptions" + " only makes sense if there are those options.") def askUserName(self): """Ask the user for a name and a description. No pdf is shown displaying the field. """ for key in ["Name", "Description"]: userinput = input("Enter for this field the " + key + ": ") if userinput.strip() != "": self[key] = userinput def showPDFWithOneEntry(self, origpdf, value): """Show the origpdf with one value printed in this field (self). Create tmp fdf file. """ essentialField = FormField() try: essentialField["FieldName"] = self["FieldName"] except KeyError: raise ConfigError("This field " + self.name + " has no FieldName." + " This should not be possible.") essentialField["Value"] = value tmpfdf = "field-tmp.fdf" tmppdf = os.path.basename(origpdf) + "-tmpfilled.pdf" writetopdf.fillpdfform([essentialField], origpdf, tmppdf, tmpfdf) # those pdfs are pseudo-encrypted. That makes pdftk issue # a warning that one should respect copyright. # then evince can work cmd.Popen(["evince", tmppdf], stderr=cmd.PIPE) # do not need # the errors to show def askUser(self, origpdf, update=False): """Ask the user for information about this form field. Create a pdf where only this field is filled with some data such that the user can easily identify which field we are talking about. Doing this a file origpdf-tmpfilled.pdf and field-tmp.fdf are created. It is not checked if this is a problem. Attributes: origpdf (str): path to the pdf file that all is about update (bool): if fields that have an information should be queried Raises: EOFError or KeyBoardInterrupt: if the user wants to abort this and considers her/himself done """ if update or self.essentiallyempty(): # no data there or should be updated if "FieldStateOption" in self: self.askUserOptions(origpdf) else: self.showPDFWithOneEntry(origpdf, "Text") print("Look at the field with the text 'Text'") self.askUserName() def __str__(self): """Return a several line long string with all info of this field. Information that are missing are not displayed. """ return "\n".join([str(info) + ": " + str(value) for info, value in self.expanditer()]) def merge(self, otherField): """Merge two fields. Add the information from otherField that are not in self to self. Returns: self after adding stuff Raises: ValueError: if otherField and self disagree on a value. ! self might already have changed ! """ for info, value in otherField: # otherField.__iter__() is called try: if self[info] != value: raise ValueError("The two fields disagree on " + info + ": '" + str(self[info]) + "' != '" + str(value) + "'") except KeyError: pass # ok, info does not exist in self # else: do nothing, info exists already for info, value in otherField: # info does not exist in self or is equal self[info] = value @staticmethod def findByFieldName(fieldList, fieldName, include=(lambda x: (NON_FORMFIELD not in x))): """Return a FormField out of a list given the searched fieldName. If there are several fields with this fieldName, the first one is returned. This should not be the case though. Attributes: fieldList ([FormField]): list of formfields to search in fieldName (str): the fieldName looked for include (fct: FormField -> Bool): fct. that returns True if the argument should be looked for Default: only use FormFields that do not have the info NON_FORMFIELD. Raises: KeyError: if no such FieldName exist. """ for f in fieldList: try: if include(f) and f["FieldName"] == fieldName: return f except KeyError: pass # should not happen but who knows? raise KeyError("No field with FieldName '" + fieldName + "' exist.")