101 lines
3.2 KiB
Python
101 lines
3.2 KiB
Python
|
#! /usr/bin/python3
|
||
|
"""Help decrypting the field names in pdf files.
|
||
|
|
||
|
A script that goes through the list of pdf form fields and asks the
|
||
|
user for suitable descriptions.
|
||
|
"""
|
||
|
|
||
|
import argparse
|
||
|
import subprocess as cmd
|
||
|
# import fdfgen
|
||
|
|
||
|
|
||
|
def parse():
|
||
|
"""
|
||
|
Parse the arguments.
|
||
|
|
||
|
Results:
|
||
|
...
|
||
|
|
||
|
"""
|
||
|
parser = argparse.ArgumentParser(
|
||
|
description="Go through the list of fields of a pdf form" +
|
||
|
" and ask for each one for a helpful description.")
|
||
|
parser.add_argument("pdffile",
|
||
|
help="The pdf file with the form fields.")
|
||
|
parser.add_argument("output", default="form.form",
|
||
|
help="The file that includes all the information " +
|
||
|
"collected in this script.")
|
||
|
return vars(parser.parse_args())
|
||
|
|
||
|
|
||
|
def listFields(pdf_file):
|
||
|
"""Use pdftk dump_data_fields to generate a list of all data fields.
|
||
|
|
||
|
Attribute:
|
||
|
pdf_file (str): pdf file with the form
|
||
|
|
||
|
Results:
|
||
|
list of all form fields. Each field is a dictionary of information.
|
||
|
Typically those are:
|
||
|
FieldType
|
||
|
FieldName
|
||
|
FieldNameAlt
|
||
|
FieldFlags
|
||
|
FieldJustification
|
||
|
FieldStateOption (list of several options)
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
ValueError: if a line cannot be parsed
|
||
|
some io error if file does not exist or pdftk has problems
|
||
|
|
||
|
"""
|
||
|
dump = cmd.Popen(["pdftk", pdf_file, "dump_data_fields"], stdout=cmd.PIPE,
|
||
|
universal_newlines=True)
|
||
|
# stdout=sp.PIPE
|
||
|
field_desc, _ = dump.communicate() # second result would be error
|
||
|
fields = []
|
||
|
field = {}
|
||
|
field_desc = field_desc.splitlines()
|
||
|
if field_desc[0] != "---":
|
||
|
raise ValueError("Output of pdftk dump_data_fields should start " +
|
||
|
"with ---.")
|
||
|
for line in field_desc[1:]: # ignore first ---
|
||
|
if line == "---":
|
||
|
fields.append(field)
|
||
|
field = {}
|
||
|
else:
|
||
|
line = line.split(": ", maxsplit=1)
|
||
|
if len(line) != 2:
|
||
|
raise ValueError("The line '" + line +
|
||
|
"' cannot be parsed. " +
|
||
|
"Apparently there is no ': '.")
|
||
|
if line[0] in field: # several entries
|
||
|
try: # works if it is already a list
|
||
|
field[line[0]].append(line[1])
|
||
|
except AttributeError:
|
||
|
field[line[0]] = [field[line[0]], line[1]]
|
||
|
else: # information is new
|
||
|
field[line[0]] = line[1]
|
||
|
if len(field) == 0:
|
||
|
# pdftk output ended with ---
|
||
|
raise ValueError("Output of pdftk dump_data_fields should end " +
|
||
|
"with a field.")
|
||
|
else:
|
||
|
fields.append(field)
|
||
|
return fields
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
# start
|
||
|
args = parse()
|
||
|
formfields = listFields(args["pdffile"])
|
||
|
# next todos:
|
||
|
# • ask user about a single field by creating a pdf that has something
|
||
|
# written in this field
|
||
|
# • ask user for all fields
|
||
|
# • save those information in a suitable file
|
||
|
# • read from this file and do not ask for things that are already
|
||
|
# saved in this file
|