#! /usr/bin/python3 """Help decrypting the field names in pdf files. A script that goes through the list of pdf form fields and asks the user for suitable descriptions. """ import argparse import subprocess as cmd # import fdfgen def parse(): """ Parse the arguments. Results: ... """ parser = argparse.ArgumentParser( description="Go through the list of fields of a pdf form" + " and ask for each one for a helpful description.") parser.add_argument("pdffile", help="The pdf file with the form fields.") parser.add_argument("output", default="form.form", help="The file that includes all the information " + "collected in this script.") return vars(parser.parse_args()) def listFields(pdf_file): """Use pdftk dump_data_fields to generate a list of all data fields. Attribute: pdf_file (str): pdf file with the form Results: list of all form fields. Each field is a dictionary of information. Typically those are: FieldType FieldName FieldNameAlt FieldFlags FieldJustification FieldStateOption (list of several options) Raises ------ ValueError: if a line cannot be parsed some io error if file does not exist or pdftk has problems """ dump = cmd.Popen(["pdftk", pdf_file, "dump_data_fields"], stdout=cmd.PIPE, universal_newlines=True) # stdout=sp.PIPE field_desc, _ = dump.communicate() # second result would be error fields = [] field = {} field_desc = field_desc.splitlines() if field_desc[0] != "---": raise ValueError("Output of pdftk dump_data_fields should start " + "with ---.") for line in field_desc[1:]: # ignore first --- if line == "---": fields.append(field) field = {} else: line = line.split(": ", maxsplit=1) if len(line) != 2: raise ValueError("The line '" + line + "' cannot be parsed. " + "Apparently there is no ': '.") if line[0] in field: # several entries try: # works if it is already a list field[line[0]].append(line[1]) except AttributeError: field[line[0]] = [field[line[0]], line[1]] else: # information is new field[line[0]] = line[1] if len(field) == 0: # pdftk output ended with --- raise ValueError("Output of pdftk dump_data_fields should end " + "with a field.") else: fields.append(field) return fields if __name__ == "__main__": # start args = parse() formfields = listFields(args["pdffile"]) # next todos: # • ask user about a single field by creating a pdf that has something # written in this field # • ask user for all fields # • save those information in a suitable file # • read from this file and do not ask for things that are already # saved in this file