pdfformfill/stamps.py

401 lines
13 KiB
Python

#! /usr/bin/python3
# -*- coding: UTF-8 -*-
r"""Create a pdf that has some text at a specified position.
Steps to add text to a pdf at any position:
1) create tex source file with \put-Makro. This puts something at
any position relative to a picture frame. (createStampTexCode())
2) compile with latexmk -pdf (compileTex())
3) pdftk stamp overlays a pdf with another (stampPdf())
Constants:
TMPDIR: the tmp directory name in which files are created.
For testing run this file.
TODO:
Customization of font, size, color, etc. of text stamped on pdf.
Some of this is possible because the text can include tex makros but
including more packages or anything more fancy is not possible.
"""
import os
import os.path
import subprocess
from constants import ConfigError
TMPDIR = "tmp-stamp"
class MissingStampError(Exception):
"""A FormField has no information about a Stamp.
Not necessarily an actual error, depending on the calling situation.
If there is information about a stamp but some information,
e.g. the posX, is missing, then a ConfigError should be raised and no
MissingStampError.
No further implementation since Exception includes everything necessary.
"""
class Stamp:
"""A text with a position.
Intended to be used to print some text at a specific position of a pdf
with latex. ("Stamped")
Attributes:
text (str): LaTeX code to be printed.
page (int): on which page?
posX (int): how far right from upper left corner
posY (int): how far above upper left corner (a negative number)
"""
def __init__(self, text, page, posX, posY):
"""Create a Stamp from the given data.
No plausibility checks done.
"""
self.text = text
self.page = page
self.posX = posX
self.posY = posY
@classmethod
def fromFormField(cls, formfield):
"""Create a Stamp with data from a FormField.
Uses information:
Value: as the text, only if this information exists, create a Stamp
Stamp: only if this information exists, create a Stamp
PosX: used as posX
PosY: used as posY
Page: used as page
Raises:
ConfigError:
if a necessary information is missing or
if an information that should be integer is not
MissingStampError:
if there one of the information "Stamp" or "Value" is missing
Returns:
a Stamp with the information saved in the FormField
"""
if "Stamp" in formfield and "Value" in formfield:
try:
return cls(formfield["Value"],
int(formfield["Page"]),
int(formfield["PosX"]),
int(formfield["PosY"]))
except KeyError as e:
raise ConfigError("A stamp is missing necessary information: " +
str(e))
except ValueError as e:
raise ConfigError("The position of a stamp is given by three" +
" integers. One is not an integer: " +
str(e))
else:
raise MissingStampError("FormField " + formfield.name +
" has no stamp.")
@classmethod
def fromFormFieldList(cls, formfields):
"""Create a list of the Stamps with data from FormFields.
Raises:
ConfigError:
see fromFormField
Returns:
a list of stamps with the information saved in the FormFields
"""
stamps = []
for field in formfields:
try:
stamps.append(cls.fromFormField(field))
except MissingStampError:
# that's fine, there is no stamp
# happens in most cases
pass
return stamps
def createPutMakro(self):
"""Create a piece of LaTeX code.
It puts the text at the
specified X-Y position. (Does not care about the page.)
"""
return "".join([r"\put(", str(self.posX), r",", str(self.posY), ")"
r"{", self.text, r"}"])
@staticmethod
def createStampTexCode(stamps, totalpagenumber, texfilename):
"""Create a tex file for a pdf that has text at an arbitrary position.
Overwrite texfilename if existing.
Attributes:
stamps ([Stamp]): Stamps to be included in the pdf.
totalpagenumber: how many pages the file should have in total
texfilename (str): the path to the created file.
Raises:
various IOErrors if something does not work with writing files/
creating directories
"""
try:
texfile = open(texfilename, mode="x")
# x = create a new file for writing
except FileExistsError:
texfile = open(texfilename, mode="w")
# w = truncating existing file
# exceptions to be handled by calling function
# create list of lists where every element holds all stamps
# for the same page
stamps.sort(key=lambda s: s.page) # in place
# header
texfile.write("".join([r"\documentclass[a4paper]{article}",
r"\usepackage[margin=0cm]{geometry}",
r"\usepackage[T1]{fontenc}",
r"\usepackage[utf8]{inputenc}",
r"\begin{document}",
r"~\begin{picture}(0,0)(0,0)"]))
# first page always has a \begin/\end{picture}, even if there
# are no stamps. But who cares?, it does not show up in the pdf.
page = 1
for stamp in stamps:
if stamp.page != page:
texfile.write(r"\end{picture}" +
(stamp.page - page) * r"\newpage~" +
r"\begin{picture}(0,0)(0,0)")
# ~ (a space) is necessary to have something
# on the page to make the page necessary and
# therefore existent
# do in any case:
texfile.write(stamp.createPutMakro())
page = stamp.page
texfile.write(r"\end{picture}" +
(totalpagenumber - page) * r"\newpage~" +
r"\end{document}")
texfile.close()
def compileTex(texfile):
"""Compile a tex file with latexmk.
Use directory where texfile is in for the temporary files and the pdf.
The created pdf is texfile[without .tex].pdf.
Attributes:
texfile (str): the path to the to be compiled tex file
Raises:
IOError: if latexmk has some error
"""
compilation = subprocess.Popen(["latexmk", "-pdf",
texfile,
"-outdir=" + os.path.dirname(texfile)],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
# ignore error and output
errorcode = compilation.wait(timeout=5)
if errorcode != 0:
raise IOError("latexmk compiling file '" + texfile +
"' created an error. (Errorcode: " + str(errorcode) + ")")
def createStamppdf(pdf, stamps, totalpagenumber):
"""Create a pdf with stamps.
Create intermediate files in same directory as the pdf.
Attributes:
pdf (str): the pdf to be created. Should have a file extension (.pdf).
stamps ([Stamp]): what should be stamped on the pdf.
Raises:
various IOErrors
"""
try:
os.mkdir(os.path.dirname(pdf))
except FileExistsError:
# ignore, just use it
pass
texfilename = os.path.splitext(pdf)[0] + ".tex"
Stamp.createStampTexCode(stamps, totalpagenumber, texfilename)
compileTex(texfilename)
def getNumberOfPagespdf(pdf):
"""Get number of pages of pdf file.
Use pdftk dump_data to read the number of pages.
It is saved in the line with "Number of pages: "
Raises:
ValueError: if no number of pages is not given by pdftk
"""
pdftk = subprocess.Popen(["pdftk", pdf, "dump_data_utf8"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
pdffiledata, error = pdftk.communicate()
if b"Error" in error:
# b = Bytes object, similar to str but only ASCII
# thats the type of error output
# decode interprets the bytes as UTF8
raise FileNotFoundError("Reading the number of pages of file " +
pdf + " failed:\n" + error.decode())
pdffiledata = pdffiledata.splitlines()
for line in pdffiledata:
if line.startswith(b"NumberOfPages: "):
# b: same as above
return int(line[len(b"NumberOfPages: "):])
raise ValueError("Reading the number of pages of file " +
pdf + " failed. There is no page number saved.")
def stampPdfWithStamps(stamps, pdf, newpdf=None):
"""Add texts at an arbitrary position of a pdf.
Create if non-existent and use temporary directory TMPDIR.
Attributes:
stamps ([Stamp]): stamps to be added.
pdf (str): path to the pdf that gets the text added.
newpdf (str): path to the newly created pdf (if None, use
pdf-stamped.pdf)
Raises:
various IOError
"""
if newpdf is None or newpdf == pdf:
# Default
tmpnewpdf = os.path.splitext(pdf)[0] + "-stamped.pdf"
else:
tmpnewpdf = newpdf
totalpagenumber = getNumberOfPagespdf(pdf)
stamppdffile = os.path.join(TMPDIR, os.path.basename(tmpnewpdf))
createStamppdf(stamppdffile, stamps, totalpagenumber)
stamp = subprocess.Popen(["pdftk", pdf, "multistamp", stamppdffile,
"output", tmpnewpdf], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
_, error = stamp.communicate()
if b"Error" in error:
raise FileNotFoundError("Adding a stamp to file " + pdf + " failed:"
+ "\n" + str(error))
else:
# we do not need original:
os.rename(tmpnewpdf, newpdf)
for f in os.listdir(TMPDIR):
os.remove(os.path.join(TMPDIR, f))
os.rmdir(TMPDIR)
# todo: how can I just remove a directory like rm -r?
def stampPdf(formfields, pdf, newpdf=None):
"""Stamp all stamps with values from a list of formfields.
See comments at stampPdfWithStamps for Attributes.
Raises:
ConfigError: see Stamp.fromFormFieldList
"""
stamps = Stamp.fromFormFieldList(formfields)
stampPdfWithStamps(stamps, pdf, newpdf=newpdf)
# test
if __name__ == "__main__":
try:
stampPdfWithStamps([], "notfound.pdf")
except FileNotFoundError as e:
print("Correct error:\n", e)
teststamps = [Stamp("Uli", 2, 20, -20),
Stamp("Krähmer", 2, 20, -50),
Stamp("Toller Hecht", 3, 30, -100)]
try:
os.mkdir(TMPDIR)
except FileExistsError:
pass
Stamp.createStampTexCode(teststamps, 4, TMPDIR + "/4seiten.tex")
Stamp.createStampTexCode(teststamps, 2, TMPDIR + "/2seiten.tex")
teststamps = teststamps[:2] + [Stamp("pdf filled with fillform.py", 1,
10, -830)]
stampPdfWithStamps(teststamps,
"../TU-Dokumente/abrechnung/abrechnung.pdf",
"../TU-Dokumente/abrechnung/abrechnung-marked.pdf")
print("Now look at the pdf at ../TU-Dokumente/abrechnung-marked.pdf")
formfile = os.path.join(TMPDIR, "stamptest.form")
try:
form = open(formfile, mode="x")
except FileExistsError:
form = open(formfile, mode="w")
# the tests need rewriting or readformdata
# rewriting is work
# readformdata makes a circular import and breaks other things
# formtext = r"""---
# FieldName: Unterschrift
# Value: Ulrich Krähmer
# Stamp: yes
# PosX: 460
# PosY: -764
# Page: 2
# ---
# FieldName: Bonusprogramm
# Value: Bahn\&More
# Stamp: blub
# PosX: 410
# PosY: -492
# Page: 2
# ---
# FieldName: Fehlerprone
# Value: Fehler!!!
# Stamp: Jes
# PosX: 20
# """
# form.write(formtext)
# form.close()
# fields = readformdata.listFields([], formfile)
# try:
# stampPdf(fields,
# "../TU-Dokumente/abrechnung/abrechnung.pdf",
# newpdf="../TU-Dokumente/abrechnung/abrechnung-marked2.pdf")
# except ConfigError as e:
# print("Correct Error:\n", e)
# formtext += """Page: 1
# PosY: -00
# """
# with open(formfile, mode="w") as form:
# form.write(formtext)
# fields = readformdata.listFields([], formfile)
# stampPdf(fields,
# "../TU-Dokumente/abrechnung/abrechnung.pdf",
# newpdf="../TU-Dokumente/abrechnung/abrechnung-marked2.pdf")
# print("Now look at the pdf at ../TU-Dokumente/abrechnung-marked2.pdf")