restructering: FormField on its own while reading/ writing config data in readformdata.py

2018-02-15 21:43:25 +01:00 · 2018-02-15 21:43:25 +01:00 · 0520c310e8
commit 0520c310e8
parent 90eb325464
2 changed files with 186 additions and 150 deletions
--- a/formfield.py
+++ b/formfield.py
@ -1,21 +1,24 @@
 #! /usr/bin/python3
+# -*- coding: UTF-8 -*-
+
 """Help decrypting the field names in pdf files.

-A script that goes through the list of pdf form fields and asks the
+FormFields model all information existing about fields in pdf forms.
+
+Include a method to go through the list of pdf form fields and ask the
 user for suitable descriptions.

 The information stored for the fields are:
-    name
-    long description
-    standard value
-    special (hardcoded) treatment?, maybe code that tells the code how to use it
+    information from the pdf file
+    Name
+    Description
+    commands (as in config files)
 """

 # todo: Button Varianten sind Off, On, sollte anpassbar/ automatisch
 # angepasst werden
 # todo: disable focus on pdf  - apparently not simple :(

-import argparse
 import subprocess as cmd
 import os.path
 import fdfgen
@ -228,27 +231,28 @@ class FormField():
                # the errors to show
                print("The current field is called " + self["FieldName"])

-                def buttonmessage():
-                    """Show a special message for buttons.
-
-                    Explains Button options.
-
-                    """
-                    if self["FieldType"] == "Button":
-                        print("It is a butten.",
-                              "'X', 'On', 'Yes' all say 'cross it'",
-                              " while '_', 'Off', 'No' all say",
-                              "'do not make a cross here'")
+                # def buttonmessage():
+                #     """Show a special message for buttons.
+                #
+                #     Explains Button options.
+                #
+                #     """
+                #     if self["FieldType"] == "Button":
+                #         print("It is a butten.",
+                #               "'X', 'On', 'Yes' all say 'cross it'",
+                #               " while '_', 'Off', 'No' all say",
+                #               "'do not make a cross here'")

                for message, info, specialmessage, converting in [
                        ("Descriptive name", "Name", lambda: None,
                         lambda x: x),
                        ("Long description", "Description", lambda: None,
                         lambda x: x),
-                        ("Standard value", "Stdvalue", buttonmessage,
-                         self.convertToBool),
-                        ("Special handling", "Special", lambda: None,
-                         lambda x: x)]:
+                        # ("Standard value", "Stdvalue", buttonmessage,
+                        #  self.convertToBool),
+                        # ("Special handling", "Special", lambda: None,
+                        #  lambda x: x)
+                ]:
                    if info in self:
                        print("Value now:", self[info])
                        print("Enter nothing and this value is used.")
@ -305,132 +309,3 @@ class FormField():
        for info, value in otherField:
            # info does not exist in self or is equal
            self[info] = value
-
-
-def parse():
-    """Parse the arguments.
-
-    Results:
-        input pdf file (["pdffile"]
-        output info file (["output"])
-
-    """
-    parser = argparse.ArgumentParser(
-        description="Go through the list of fields of a pdf form" +
-        " and ask for each one for a helpful description.")
-    parser.add_argument("pdffile",
-                        help="The pdf file with the form fields.")
-    parser.add_argument("output", default="form.form",
-                        help="The file that includes all the information " +
-                        "collected in this script.")
-    parser.add_argument("-u", "--update", help="Also ask for information " +
-                        "about fields that already have information stored.",
-                        action="store_true")
-    return vars(parser.parse_args())
-
-
-def listFields(pdf_file, fieldListFile):  # NOQA
-    """Use pdftk dump_data_fields to generate a list of all data fields.
-
-    Attribute:
-        pdf_file (str): pdf file with the form
-        fieldListFile (str): file with same format with some data already
-        entered
-
-    Results:
-        list of all form fields. Each field is a dictionary of information.
-        Typically those are:
-            FieldType
-            FieldName
-            FieldNameAlt
-            FieldFlags
-            FieldJustification
-            FieldStateOption (list of several options)
-
-    Raises
-    ------
-        ValueError: if a line cannot be parsed
-        some io error if file does not exist or pdftk has problems
-
-    """
-    dump = cmd.Popen(["pdftk", pdf_file, "dump_data_fields"], stdout=cmd.PIPE,
-                     universal_newlines=True)
-    # stdout=sp.PIPE
-    # field_desc, _ = dump.communicate()  # second result would be error
-    fields = []
-    field_desc = dump.communicate()[0].splitlines()
-    # includes both inputs: pdftk and file
-    try:
-        data = open(fieldListFile)
-    except FileNotFoundError:
-        pass
-    else:
-        field_desc += data.read().splitlines()
-        data.close()
-    field = FormField()
-    if field_desc[0] != "---":
-        raise ValueError("Output of pdftk dump_data_fields should start " +
-                         "with ---.")
-    field_desc = [l for l in field_desc if l.strip() != ""]
-    field_desc.append("---")
-    for line in field_desc[1:]:  # ignore first ---
-        if line == "---":
-            if len(field) < 0:
-                raise ValueError("Several --- following each other.")
-            # else add:
-            for f in fields:
-                try:
-                    if f["FieldName"] == field["FieldName"]:
-                        f.merge(field)
-                        break
-                except KeyError as ke:
-                    raise KeyError("Some field has no 'FieldName': "
-                                   + str(ke))
-            else:
-                # no field for merging found
-                fields.append(field)
-            field = FormField()  # start new Field
-        else:
-            line = line.split(": ", maxsplit=1)
-            try:
-                line[1] = FormField.convertUmlauts(line[1])
-            except IndexError:
-                raise ValueError("The line '" + str(line) +
-                                 "' cannot be parsed. " +
-                                 "Apparently there is no ': '.")
-            if line[0] in field:  # several entries
-                if line[0] in FormField.LISTINFOS:
-                    field[line[0]].append(line[1])
-                else:
-                    raise ValueError("The information " + line[0] +
-                                     "appeared twice in one field.")
-            else:
-                if line[0] in FormField.LISTINFOS:
-                    field[line[0]] = [line[1]]
-                else:
-                    field[line[0]] = line[1]
-
-    return fields
-
-
-if __name__ == "__main__":
-    # start
-    args = parse()
-    formfields = listFields(args["pdffile"], args["output"])
-    for fi in formfields:
-        continu = fi.askUser(args["pdffile"], update=args["update"])
-        if not continu:
-            break
-    with open(args["output"], mode="w") as outputfile:
-        for fi in formfields:
-            if not fi.essentiallyempty():
-                outputfile.write("\n---\n")
-                outputfile.write(str(fi))
-    # next todos:
-    # • add possibility to abort this information input
-    # • evince should display the right page somehow, maybe give the user
-    # the possibility to say "this was on page 2, assume next one is on page 2
-    # as well"
-    # • save those information in a suitable file
-    # • read from this file and do not ask for things that are already
-    # saved in this file
--- a/readformdata.py
+++ b/readformdata.py
@ -0,0 +1,161 @@
+#! /usr/bin/python3
+# -*- coding: UTF-8 -*-
+"""Functions for getting data from files.
+
+Constants:
+    COMMENT_SYMBOL = "#"
+
+TODO:
+    part with FormField.LISTINFOS feels like hardcoded hack.
+"""
+
+import subprocess as cmd
+import argparse
+from formfield import FormField
+
+COMMENT_SYMBOL = "#"
+
+def listFields(fields, newfile):  # NOQA
+    """Use pdftk dump_data_fields output to add to a list of data fields.
+
+    Lines starting with # will be ignored (comments).
+    (# must be the first non-blank symbol in the line!)
+
+    Attribute:
+        fields [FormField]: list of FormFields to add to
+        -> will be changed and returned
+        newfile (str): pdf file or form-file with the form (data)
+        If newfile is .pdf, use pdftk dump_data_fields
+        If newfile is other, read as if it was output of pdftk dump_data_fields
+
+    Results:
+        list of all form fields. Each field is a dictionary of information.
+        Typically those are:
+            FieldType
+            FieldName
+            FieldNameAlt
+            FieldFlags
+            FieldJustification
+            FieldStateOption (list of several options)
+
+    Raises
+    ------
+        ValueError: if a line cannot be parsed
+        some io error if file does not exist or pdftk has problems
+
+    """
+    if newfile.endswith(".pdf"):
+        dump = cmd.Popen(["pdftk", newfile, "dump_data_fields"],
+                         stdout=cmd.PIPE,
+                         universal_newlines=True)
+        field_desc, _ = dump.communicate()
+    else:
+        with open(newfile) as inputfile:
+            field_desc = inputfile.read()
+    field_desc = field_desc.splitlines()
+    # stdout=sp.PIPE
+    # field_desc, _ = dump.communicate()  # second result would be error
+    # fields = []
+    # includes both inputs: pdftk and file
+    field_desc = [l for l in field_desc if l.strip() != "" and not
+                  l.strip().startswith(COMMENT_SYMBOL)]
+    if field_desc[0] != "---":
+        raise ValueError("Output of pdftk dump_data_fields should start " +
+                         "with ---. (Config files as well.)")
+    # ignore empty lines and comment lines
+    field_desc.append("---")  # to include last FormField
+    field = FormField()
+    for line in field_desc[1:]:  # ignore first ---
+        if line == "---":
+            if len(field) < 0:
+                raise ValueError("Several --- following each other.")
+            # else add:
+            for f in fields:
+                try:
+                    if f["FieldName"] == field["FieldName"]:
+                        f.merge(field)
+                        break
+                except KeyError as ke:
+                    raise KeyError("Some field has no 'FieldName': "
+                                   + str(ke))
+            else:
+                # no field for merging found
+                fields.append(field)
+            field = FormField()  # start new Field
+        else:
+            line = line.split(": ", maxsplit=1)
+            try:
+                line[1] = FormField.convertUmlauts(line[1])
+            except IndexError:
+                raise ValueError("The line '" + str(line) +
+                                 "' cannot be parsed. " +
+                                 "Apparently there is no ': '.")
+            if line[0] in field:  # several entries
+                if line[0] in FormField.LISTINFOS:
+                    field[line[0]].append(line[1])
+                else:
+                    raise ValueError("The information " + line[0] +
+                                     "appeared twice in one field.")
+            else:
+                if line[0] in FormField.LISTINFOS:
+                    field[line[0]] = [line[1]]
+                else:
+                    field[line[0]] = line[1]
+
+    return fields
+
+
+def parse():
+    """Parse the arguments.
+
+    Results:
+        input pdf file (["pdffile"]
+        output info file (["output"])
+
+    """
+    parser = argparse.ArgumentParser(
+        description="Go through the list of fields of a pdf form" +
+        " and ask for each one for a helpful description.")
+    parser.add_argument("pdffile",
+                        help="The pdf file with the form fields.")
+    parser.add_argument("output", default="form.form",
+                        help="The file that includes all the information " +
+                        "collected in this script.")
+    parser.add_argument("-u", "--update", help="Also ask for information " +
+                        "about fields that already have information stored.",
+                        action="store_true")
+    return vars(parser.parse_args())
+
+
+def overviewFields(fields):
+    """Create a list of FieldName:goodName.
+
+    Can be written to a file to help someone keeping the overview while
+    writing the config for a form.
+
+    """
+    return "\n".join(["'" + f["FieldName"] + "' : " + f["Name"] for f in fields
+                      if "Name" in f])
+
+
+if __name__ == "__main__":
+    # start
+    args = parse()
+    formfields = listFields([], args["pdffile"])
+    formfields = listFields(formfields, args["output"])
+    for fi in formfields:
+        continu = fi.askUser(args["pdffile"], update=args["update"])
+        if not continu:
+            break
+    with open(args["output"], mode="w") as outputfile:
+        for fi in formfields:
+            if not fi.essentiallyempty():
+                outputfile.write("\n---\n")
+                outputfile.write(str(fi))
+    with open("ov_" + args["output"], mode="w") as outputfile:
+        outputfile.write(overviewFields(formfields))
+
+    # next todos:
+    # • evince should display the right page somehow, maybe give the user
+    # the possibility to say "this was on page 2, assume next one is on page 2
+    # as well"