fix umlaut handling

This commit is contained in:
Bela 2018-02-15 20:41:35 +01:00
parent 836c02060f
commit 9b2bc16c8a

View file

@ -13,6 +13,7 @@ The information stored for the fields are:
# todo: Button Varianten sind Off, On, sollte anpassbar/ automatisch
# angepasst werden
# todo: disable focus on pdf - apparently not simple :(
import argparse
import subprocess as cmd
@ -34,8 +35,13 @@ class FormField():
standard value (["Stdvalue"]) todo: how to implement that?
something about a special treatment (["Special"])
Constants:
LISTINFOS: those infos can appear several times, hence store a list.
"""
LISTINFOS = ["FieldStateOption"]
def convertToBool(self, possBool):
"""Convert value to True or or False if this field is a Checkbox.
@ -62,12 +68,34 @@ class FormField():
# maybe something more intelligent necessary
return possBool
@staticmethod
def convertUmlauts(value):
"""Convert all html-Syntax-Umlauts to the correct symbols.
Necessary to make fdfgen generate files that are read correctly
by pdftk.
Returns:
value, &#number replaced by the correct unicode
"""
for entity, uni in [("Ä", "Ä"),
("Ö", "Ö"),
("Ü", "Ü"),
("ß", "ß"),
("ä", "ä"),
("ö", "ö"),
("ü", "ü")]:
value = value.replace(entity, uni)
return value
def __init__(self):
"""
Initialise a FormField.
With an empty dictionary for pdfinfo
and None for the other values.
"""
self.__pdfinfo = {}
@ -204,6 +232,7 @@ class FormField():
"""Show a special message for buttons.
Explains Button options.
"""
if self["FieldType"] == "Button":
print("It is a butten.",
@ -363,18 +392,23 @@ def listFields(pdf_file, fieldListFile): # NOQA
field = FormField() # start new Field
else:
line = line.split(": ", maxsplit=1)
if len(line) != 2:
try:
line[1] = FormField.convertUmlauts(line[1])
except IndexError:
raise ValueError("The line '" + str(line) +
"' cannot be parsed. " +
"Apparently there is no ': '.")
if line[0] in field: # several entries
try: # works if it is already a list
if line[0] in FormField.LISTINFOS:
field[line[0]].append(line[1])
except AttributeError: # append does not exist for str :)
field[line[0]] = [field[line[0]], line[1]]
# include existing info
else: # information is new
field[line[0]] = line[1]
else:
raise ValueError("The information " + line[0] +
"appeared twice in one field.")
else:
if line[0] in FormField.LISTINFOS:
field[line[0]] = [line[1]]
else:
field[line[0]] = line[1]
return fields