fix umlaut handling
This commit is contained in:
parent
836c02060f
commit
9b2bc16c8a
1 changed files with 41 additions and 7 deletions
48
identify.py
48
identify.py
|
@ -13,6 +13,7 @@ The information stored for the fields are:
|
|||
|
||||
# todo: Button Varianten sind Off, On, sollte anpassbar/ automatisch
|
||||
# angepasst werden
|
||||
# todo: disable focus on pdf - apparently not simple :(
|
||||
|
||||
import argparse
|
||||
import subprocess as cmd
|
||||
|
@ -34,8 +35,13 @@ class FormField():
|
|||
standard value (["Stdvalue"]) todo: how to implement that?
|
||||
something about a special treatment (["Special"])
|
||||
|
||||
Constants:
|
||||
LISTINFOS: those infos can appear several times, hence store a list.
|
||||
|
||||
"""
|
||||
|
||||
LISTINFOS = ["FieldStateOption"]
|
||||
|
||||
def convertToBool(self, possBool):
|
||||
"""Convert value to True or or False if this field is a Checkbox.
|
||||
|
||||
|
@ -62,12 +68,34 @@ class FormField():
|
|||
# maybe something more intelligent necessary
|
||||
return possBool
|
||||
|
||||
@staticmethod
|
||||
def convertUmlauts(value):
|
||||
"""Convert all html-Syntax-Umlauts to the correct symbols.
|
||||
|
||||
Necessary to make fdfgen generate files that are read correctly
|
||||
by pdftk.
|
||||
|
||||
Returns:
|
||||
value, &#number replaced by the correct unicode
|
||||
|
||||
"""
|
||||
for entity, uni in [("Ä", "Ä"),
|
||||
("Ö", "Ö"),
|
||||
("Ü", "Ü"),
|
||||
("ß", "ß"),
|
||||
("ä", "ä"),
|
||||
("ö", "ö"),
|
||||
("ü", "ü")]:
|
||||
value = value.replace(entity, uni)
|
||||
return value
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Initialise a FormField.
|
||||
|
||||
With an empty dictionary for pdfinfo
|
||||
and None for the other values.
|
||||
|
||||
"""
|
||||
self.__pdfinfo = {}
|
||||
|
||||
|
@ -204,6 +232,7 @@ class FormField():
|
|||
"""Show a special message for buttons.
|
||||
|
||||
Explains Button options.
|
||||
|
||||
"""
|
||||
if self["FieldType"] == "Button":
|
||||
print("It is a butten.",
|
||||
|
@ -363,18 +392,23 @@ def listFields(pdf_file, fieldListFile): # NOQA
|
|||
field = FormField() # start new Field
|
||||
else:
|
||||
line = line.split(": ", maxsplit=1)
|
||||
if len(line) != 2:
|
||||
try:
|
||||
line[1] = FormField.convertUmlauts(line[1])
|
||||
except IndexError:
|
||||
raise ValueError("The line '" + str(line) +
|
||||
"' cannot be parsed. " +
|
||||
"Apparently there is no ': '.")
|
||||
if line[0] in field: # several entries
|
||||
try: # works if it is already a list
|
||||
if line[0] in FormField.LISTINFOS:
|
||||
field[line[0]].append(line[1])
|
||||
except AttributeError: # append does not exist for str :)
|
||||
field[line[0]] = [field[line[0]], line[1]]
|
||||
# include existing info
|
||||
else: # information is new
|
||||
field[line[0]] = line[1]
|
||||
else:
|
||||
raise ValueError("The information " + line[0] +
|
||||
"appeared twice in one field.")
|
||||
else:
|
||||
if line[0] in FormField.LISTINFOS:
|
||||
field[line[0]] = [line[1]]
|
||||
else:
|
||||
field[line[0]] = line[1]
|
||||
|
||||
return fields
|
||||
|
||||
|
|
Loading…
Reference in a new issue