restructering: FormField on its own while reading/ writing config data in readformdata.py
This commit is contained in:
parent
90eb325464
commit
0520c310e8
2 changed files with 186 additions and 150 deletions
|
@ -1,21 +1,24 @@
|
|||
#! /usr/bin/python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
"""Help decrypting the field names in pdf files.
|
||||
|
||||
A script that goes through the list of pdf form fields and asks the
|
||||
FormFields model all information existing about fields in pdf forms.
|
||||
|
||||
Include a method to go through the list of pdf form fields and ask the
|
||||
user for suitable descriptions.
|
||||
|
||||
The information stored for the fields are:
|
||||
name
|
||||
long description
|
||||
standard value
|
||||
special (hardcoded) treatment?, maybe code that tells the code how to use it
|
||||
information from the pdf file
|
||||
Name
|
||||
Description
|
||||
commands (as in config files)
|
||||
"""
|
||||
|
||||
# todo: Button Varianten sind Off, On, sollte anpassbar/ automatisch
|
||||
# angepasst werden
|
||||
# todo: disable focus on pdf - apparently not simple :(
|
||||
|
||||
import argparse
|
||||
import subprocess as cmd
|
||||
import os.path
|
||||
import fdfgen
|
||||
|
@ -228,27 +231,28 @@ class FormField():
|
|||
# the errors to show
|
||||
print("The current field is called " + self["FieldName"])
|
||||
|
||||
def buttonmessage():
|
||||
"""Show a special message for buttons.
|
||||
|
||||
Explains Button options.
|
||||
|
||||
"""
|
||||
if self["FieldType"] == "Button":
|
||||
print("It is a butten.",
|
||||
"'X', 'On', 'Yes' all say 'cross it'",
|
||||
" while '_', 'Off', 'No' all say",
|
||||
"'do not make a cross here'")
|
||||
# def buttonmessage():
|
||||
# """Show a special message for buttons.
|
||||
#
|
||||
# Explains Button options.
|
||||
#
|
||||
# """
|
||||
# if self["FieldType"] == "Button":
|
||||
# print("It is a butten.",
|
||||
# "'X', 'On', 'Yes' all say 'cross it'",
|
||||
# " while '_', 'Off', 'No' all say",
|
||||
# "'do not make a cross here'")
|
||||
|
||||
for message, info, specialmessage, converting in [
|
||||
("Descriptive name", "Name", lambda: None,
|
||||
lambda x: x),
|
||||
("Long description", "Description", lambda: None,
|
||||
lambda x: x),
|
||||
("Standard value", "Stdvalue", buttonmessage,
|
||||
self.convertToBool),
|
||||
("Special handling", "Special", lambda: None,
|
||||
lambda x: x)]:
|
||||
# ("Standard value", "Stdvalue", buttonmessage,
|
||||
# self.convertToBool),
|
||||
# ("Special handling", "Special", lambda: None,
|
||||
# lambda x: x)
|
||||
]:
|
||||
if info in self:
|
||||
print("Value now:", self[info])
|
||||
print("Enter nothing and this value is used.")
|
||||
|
@ -305,132 +309,3 @@ class FormField():
|
|||
for info, value in otherField:
|
||||
# info does not exist in self or is equal
|
||||
self[info] = value
|
||||
|
||||
|
||||
def parse():
|
||||
"""Parse the arguments.
|
||||
|
||||
Results:
|
||||
input pdf file (["pdffile"]
|
||||
output info file (["output"])
|
||||
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Go through the list of fields of a pdf form" +
|
||||
" and ask for each one for a helpful description.")
|
||||
parser.add_argument("pdffile",
|
||||
help="The pdf file with the form fields.")
|
||||
parser.add_argument("output", default="form.form",
|
||||
help="The file that includes all the information " +
|
||||
"collected in this script.")
|
||||
parser.add_argument("-u", "--update", help="Also ask for information " +
|
||||
"about fields that already have information stored.",
|
||||
action="store_true")
|
||||
return vars(parser.parse_args())
|
||||
|
||||
|
||||
def listFields(pdf_file, fieldListFile): # NOQA
|
||||
"""Use pdftk dump_data_fields to generate a list of all data fields.
|
||||
|
||||
Attribute:
|
||||
pdf_file (str): pdf file with the form
|
||||
fieldListFile (str): file with same format with some data already
|
||||
entered
|
||||
|
||||
Results:
|
||||
list of all form fields. Each field is a dictionary of information.
|
||||
Typically those are:
|
||||
FieldType
|
||||
FieldName
|
||||
FieldNameAlt
|
||||
FieldFlags
|
||||
FieldJustification
|
||||
FieldStateOption (list of several options)
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError: if a line cannot be parsed
|
||||
some io error if file does not exist or pdftk has problems
|
||||
|
||||
"""
|
||||
dump = cmd.Popen(["pdftk", pdf_file, "dump_data_fields"], stdout=cmd.PIPE,
|
||||
universal_newlines=True)
|
||||
# stdout=sp.PIPE
|
||||
# field_desc, _ = dump.communicate() # second result would be error
|
||||
fields = []
|
||||
field_desc = dump.communicate()[0].splitlines()
|
||||
# includes both inputs: pdftk and file
|
||||
try:
|
||||
data = open(fieldListFile)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
else:
|
||||
field_desc += data.read().splitlines()
|
||||
data.close()
|
||||
field = FormField()
|
||||
if field_desc[0] != "---":
|
||||
raise ValueError("Output of pdftk dump_data_fields should start " +
|
||||
"with ---.")
|
||||
field_desc = [l for l in field_desc if l.strip() != ""]
|
||||
field_desc.append("---")
|
||||
for line in field_desc[1:]: # ignore first ---
|
||||
if line == "---":
|
||||
if len(field) < 0:
|
||||
raise ValueError("Several --- following each other.")
|
||||
# else add:
|
||||
for f in fields:
|
||||
try:
|
||||
if f["FieldName"] == field["FieldName"]:
|
||||
f.merge(field)
|
||||
break
|
||||
except KeyError as ke:
|
||||
raise KeyError("Some field has no 'FieldName': "
|
||||
+ str(ke))
|
||||
else:
|
||||
# no field for merging found
|
||||
fields.append(field)
|
||||
field = FormField() # start new Field
|
||||
else:
|
||||
line = line.split(": ", maxsplit=1)
|
||||
try:
|
||||
line[1] = FormField.convertUmlauts(line[1])
|
||||
except IndexError:
|
||||
raise ValueError("The line '" + str(line) +
|
||||
"' cannot be parsed. " +
|
||||
"Apparently there is no ': '.")
|
||||
if line[0] in field: # several entries
|
||||
if line[0] in FormField.LISTINFOS:
|
||||
field[line[0]].append(line[1])
|
||||
else:
|
||||
raise ValueError("The information " + line[0] +
|
||||
"appeared twice in one field.")
|
||||
else:
|
||||
if line[0] in FormField.LISTINFOS:
|
||||
field[line[0]] = [line[1]]
|
||||
else:
|
||||
field[line[0]] = line[1]
|
||||
|
||||
return fields
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# start
|
||||
args = parse()
|
||||
formfields = listFields(args["pdffile"], args["output"])
|
||||
for fi in formfields:
|
||||
continu = fi.askUser(args["pdffile"], update=args["update"])
|
||||
if not continu:
|
||||
break
|
||||
with open(args["output"], mode="w") as outputfile:
|
||||
for fi in formfields:
|
||||
if not fi.essentiallyempty():
|
||||
outputfile.write("\n---\n")
|
||||
outputfile.write(str(fi))
|
||||
# next todos:
|
||||
# • add possibility to abort this information input
|
||||
# • evince should display the right page somehow, maybe give the user
|
||||
# the possibility to say "this was on page 2, assume next one is on page 2
|
||||
# as well"
|
||||
# • save those information in a suitable file
|
||||
# • read from this file and do not ask for things that are already
|
||||
# saved in this file
|
161
readformdata.py
Normal file
161
readformdata.py
Normal file
|
@ -0,0 +1,161 @@
|
|||
#! /usr/bin/python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""Functions for getting data from files.
|
||||
|
||||
Constants:
|
||||
COMMENT_SYMBOL = "#"
|
||||
|
||||
TODO:
|
||||
part with FormField.LISTINFOS feels like hardcoded hack.
|
||||
"""
|
||||
|
||||
import subprocess as cmd
|
||||
import argparse
|
||||
from formfield import FormField
|
||||
|
||||
COMMENT_SYMBOL = "#"
|
||||
|
||||
def listFields(fields, newfile): # NOQA
|
||||
"""Use pdftk dump_data_fields output to add to a list of data fields.
|
||||
|
||||
Lines starting with # will be ignored (comments).
|
||||
(# must be the first non-blank symbol in the line!)
|
||||
|
||||
Attribute:
|
||||
fields [FormField]: list of FormFields to add to
|
||||
-> will be changed and returned
|
||||
newfile (str): pdf file or form-file with the form (data)
|
||||
If newfile is .pdf, use pdftk dump_data_fields
|
||||
If newfile is other, read as if it was output of pdftk dump_data_fields
|
||||
|
||||
Results:
|
||||
list of all form fields. Each field is a dictionary of information.
|
||||
Typically those are:
|
||||
FieldType
|
||||
FieldName
|
||||
FieldNameAlt
|
||||
FieldFlags
|
||||
FieldJustification
|
||||
FieldStateOption (list of several options)
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError: if a line cannot be parsed
|
||||
some io error if file does not exist or pdftk has problems
|
||||
|
||||
"""
|
||||
if newfile.endswith(".pdf"):
|
||||
dump = cmd.Popen(["pdftk", newfile, "dump_data_fields"],
|
||||
stdout=cmd.PIPE,
|
||||
universal_newlines=True)
|
||||
field_desc, _ = dump.communicate()
|
||||
else:
|
||||
with open(newfile) as inputfile:
|
||||
field_desc = inputfile.read()
|
||||
field_desc = field_desc.splitlines()
|
||||
# stdout=sp.PIPE
|
||||
# field_desc, _ = dump.communicate() # second result would be error
|
||||
# fields = []
|
||||
# includes both inputs: pdftk and file
|
||||
field_desc = [l for l in field_desc if l.strip() != "" and not
|
||||
l.strip().startswith(COMMENT_SYMBOL)]
|
||||
if field_desc[0] != "---":
|
||||
raise ValueError("Output of pdftk dump_data_fields should start " +
|
||||
"with ---. (Config files as well.)")
|
||||
# ignore empty lines and comment lines
|
||||
field_desc.append("---") # to include last FormField
|
||||
field = FormField()
|
||||
for line in field_desc[1:]: # ignore first ---
|
||||
if line == "---":
|
||||
if len(field) < 0:
|
||||
raise ValueError("Several --- following each other.")
|
||||
# else add:
|
||||
for f in fields:
|
||||
try:
|
||||
if f["FieldName"] == field["FieldName"]:
|
||||
f.merge(field)
|
||||
break
|
||||
except KeyError as ke:
|
||||
raise KeyError("Some field has no 'FieldName': "
|
||||
+ str(ke))
|
||||
else:
|
||||
# no field for merging found
|
||||
fields.append(field)
|
||||
field = FormField() # start new Field
|
||||
else:
|
||||
line = line.split(": ", maxsplit=1)
|
||||
try:
|
||||
line[1] = FormField.convertUmlauts(line[1])
|
||||
except IndexError:
|
||||
raise ValueError("The line '" + str(line) +
|
||||
"' cannot be parsed. " +
|
||||
"Apparently there is no ': '.")
|
||||
if line[0] in field: # several entries
|
||||
if line[0] in FormField.LISTINFOS:
|
||||
field[line[0]].append(line[1])
|
||||
else:
|
||||
raise ValueError("The information " + line[0] +
|
||||
"appeared twice in one field.")
|
||||
else:
|
||||
if line[0] in FormField.LISTINFOS:
|
||||
field[line[0]] = [line[1]]
|
||||
else:
|
||||
field[line[0]] = line[1]
|
||||
|
||||
return fields
|
||||
|
||||
|
||||
def parse():
|
||||
"""Parse the arguments.
|
||||
|
||||
Results:
|
||||
input pdf file (["pdffile"]
|
||||
output info file (["output"])
|
||||
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Go through the list of fields of a pdf form" +
|
||||
" and ask for each one for a helpful description.")
|
||||
parser.add_argument("pdffile",
|
||||
help="The pdf file with the form fields.")
|
||||
parser.add_argument("output", default="form.form",
|
||||
help="The file that includes all the information " +
|
||||
"collected in this script.")
|
||||
parser.add_argument("-u", "--update", help="Also ask for information " +
|
||||
"about fields that already have information stored.",
|
||||
action="store_true")
|
||||
return vars(parser.parse_args())
|
||||
|
||||
|
||||
def overviewFields(fields):
|
||||
"""Create a list of FieldName:goodName.
|
||||
|
||||
Can be written to a file to help someone keeping the overview while
|
||||
writing the config for a form.
|
||||
|
||||
"""
|
||||
return "\n".join(["'" + f["FieldName"] + "' : " + f["Name"] for f in fields
|
||||
if "Name" in f])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# start
|
||||
args = parse()
|
||||
formfields = listFields([], args["pdffile"])
|
||||
formfields = listFields(formfields, args["output"])
|
||||
for fi in formfields:
|
||||
continu = fi.askUser(args["pdffile"], update=args["update"])
|
||||
if not continu:
|
||||
break
|
||||
with open(args["output"], mode="w") as outputfile:
|
||||
for fi in formfields:
|
||||
if not fi.essentiallyempty():
|
||||
outputfile.write("\n---\n")
|
||||
outputfile.write(str(fi))
|
||||
with open("ov_" + args["output"], mode="w") as outputfile:
|
||||
outputfile.write(overviewFields(formfields))
|
||||
|
||||
# next todos:
|
||||
# • evince should display the right page somehow, maybe give the user
|
||||
# the possibility to say "this was on page 2, assume next one is on page 2
|
||||
# as well"
|
Loading…
Reference in a new issue