#! /usr/bin/python3 # -*- coding: UTF-8 -*- r"""Create a pdf that has some text at a specified position. Steps to add text to a pdf at any position: 1) create tex source file with \put-Makro. This puts something at any position relative to a picture frame. (createStampTexCode()) 2) compile with latexmk -pdf (compileTex()) 3) pdftk stamp overlays a pdf with another (stampPdf()) Constants: TMPDIR: the tmp directory name in which files are created. For testing run this file. TODO: Customization of font, size, color, etc. of text stamped on pdf. Some of this is possible because the text can include tex makros but including more packages or anything more fancy is not possible. """ import os import os.path import subprocess from constants import ConfigError TMPDIR = "tmp-stamp" class MissingStampError(Exception): """A FormField has no information about a Stamp. Not necessarily an actual error, depending on the calling situation. If there is information about a stamp but some information, e.g. the posX, is missing, then a ConfigError should be raised and no MissingStampError. No further implementation since Exception includes everything necessary. """ class Stamp: """A text with a position. Intended to be used to print some text at a specific position of a pdf with latex. ("Stamped") Attributes: text (str): LaTeX code to be printed. page (int): on which page? posX (int): how far right from upper left corner posY (int): how far above upper left corner (a negative number) """ def __init__(self, text, page, posX, posY): """Create a Stamp from the given data. No plausibility checks done. """ self.text = text self.page = page self.posX = posX self.posY = posY @classmethod def fromFormField(cls, formfield): """Create a Stamp with data from a FormField. Uses information: Value: as the text, only if this information exists, create a Stamp Stamp: only if this information exists, create a Stamp PosX: used as posX PosY: used as posY Page: used as page Raises: ConfigError: if a necessary information is missing or if an information that should be integer is not MissingStampError: if there one of the information "Stamp" or "Value" is missing Returns: a Stamp with the information saved in the FormField """ if "Stamp" in formfield and "Value" in formfield: try: return cls(formfield["Value"], int(formfield["Page"]), int(formfield["PosX"]), int(formfield["PosY"])) except KeyError as e: raise ConfigError("A stamp is missing necessary information: " + str(e)) except ValueError as e: raise ConfigError("The position of a stamp is given by three" + " integers. One is not an integer: " + str(e)) else: raise MissingStampError("FormField " + formfield.name + " has no stamp.") @classmethod def fromFormFieldList(cls, formfields): """Create a list of the Stamps with data from FormFields. Raises: ConfigError: see fromFormField Returns: a list of stamps with the information saved in the FormFields """ stamps = [] for field in formfields: try: stamps.append(cls.fromFormField(field)) except MissingStampError: # that's fine, there is no stamp # happens in most cases pass return stamps def createPutMakro(self): """Create a piece of LaTeX code. It puts the text at the specified X-Y position. (Does not care about the page.) """ return "".join([r"\put(", str(self.posX), r",", str(self.posY), ")" r"{", self.text, r"}"]) @staticmethod def createStampTexCode(stamps, totalpagenumber, texfilename): """Create a tex file for a pdf that has text at an arbitrary position. Overwrite texfilename if existing. Attributes: stamps ([Stamp]): Stamps to be included in the pdf. totalpagenumber: how many pages the file should have in total texfilename (str): the path to the created file. Raises: various IOErrors if something does not work with writing files/ creating directories """ try: texfile = open(texfilename, mode="x") # x = create a new file for writing except FileExistsError: texfile = open(texfilename, mode="w") # w = truncating existing file # exceptions to be handled by calling function # create list of lists where every element holds all stamps # for the same page stamps.sort(key=lambda s: s.page) # in place # header texfile.write("".join([r"\documentclass[a4paper]{article}", r"\usepackage[margin=0cm]{geometry}", r"\usepackage[T1]{fontenc}", r"\usepackage[utf8]{inputenc}", r"\begin{document}", r"~\begin{picture}(0,0)(0,0)"])) # first page always has a \begin/\end{picture}, even if there # are no stamps. But who cares?, it does not show up in the pdf. page = 1 for stamp in stamps: if stamp.page != page: texfile.write(r"\end{picture}" + (stamp.page - page) * r"\newpage~" + r"\begin{picture}(0,0)(0,0)") # ~ (a space) is necessary to have something # on the page to make the page necessary and # therefore existent # do in any case: texfile.write(stamp.createPutMakro()) page = stamp.page texfile.write(r"\end{picture}" + (totalpagenumber - page) * r"\newpage~" + r"\end{document}") texfile.close() def compileTex(texfile): """Compile a tex file with latexmk. Use directory where texfile is in for the temporary files and the pdf. The created pdf is texfile[without .tex].pdf. Attributes: texfile (str): the path to the to be compiled tex file Raises: IOError: if latexmk has some error """ compilation = subprocess.Popen(["latexmk", "-pdf", texfile, "-outdir=" + os.path.dirname(texfile)], stderr=subprocess.PIPE, stdout=subprocess.PIPE) # ignore error and output errorcode = compilation.wait(timeout=5) if errorcode != 0: raise IOError("latexmk compiling file '" + texfile + "' created an error. (Errorcode: " + str(errorcode) + ")") def createStamppdf(pdf, stamps, totalpagenumber): """Create a pdf with stamps. Create intermediate files in same directory as the pdf. Attributes: pdf (str): the pdf to be created. Should have a file extension (.pdf). stamps ([Stamp]): what should be stamped on the pdf. Raises: various IOErrors """ try: os.mkdir(os.path.dirname(pdf)) except FileExistsError: # ignore, just use it pass texfilename = os.path.splitext(pdf)[0] + ".tex" Stamp.createStampTexCode(stamps, totalpagenumber, texfilename) compileTex(texfilename) def getNumberOfPagespdf(pdf): """Get number of pages of pdf file. Use pdftk dump_data to read the number of pages. It is saved in the line with "Number of pages: " Raises: ValueError: if no number of pages is not given by pdftk """ pdftk = subprocess.Popen(["pdftk", pdf, "dump_data_utf8"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) pdffiledata, error = pdftk.communicate() if b"Error" in error: # b = Bytes object, similar to str but only ASCII # thats the type of error output # decode interprets the bytes as UTF8 raise FileNotFoundError("Reading the number of pages of file " + pdf + " failed:\n" + error.decode()) pdffiledata = pdffiledata.splitlines() for line in pdffiledata: if line.startswith(b"NumberOfPages: "): # b: same as above return int(line[len(b"NumberOfPages: "):]) raise ValueError("Reading the number of pages of file " + pdf + " failed. There is no page number saved.") def stampPdfWithStamps(stamps, pdf, newpdf=None): """Add texts at an arbitrary position of a pdf. Create if non-existent and use temporary directory TMPDIR. Attributes: stamps ([Stamp]): stamps to be added. pdf (str): path to the pdf that gets the text added. newpdf (str): path to the newly created pdf (if None, use pdf-stamped.pdf) Raises: various IOError """ if newpdf is None or newpdf == pdf: # Default tmpnewpdf = os.path.splitext(pdf)[0] + "-stamped.pdf" else: tmpnewpdf = newpdf totalpagenumber = getNumberOfPagespdf(pdf) stamppdffile = os.path.join(TMPDIR, os.path.basename(tmpnewpdf)) createStamppdf(stamppdffile, stamps, totalpagenumber) stamp = subprocess.Popen(["pdftk", pdf, "multistamp", stamppdffile, "output", tmpnewpdf], stdout=subprocess.PIPE, stderr=subprocess.PIPE) _, error = stamp.communicate() if b"Error" in error: raise FileNotFoundError("Adding a stamp to file " + pdf + " failed:" + "\n" + str(error)) else: # we do not need original: os.rename(tmpnewpdf, newpdf) for f in os.listdir(TMPDIR): os.remove(os.path.join(TMPDIR, f)) os.rmdir(TMPDIR) # todo: how can I just remove a directory like rm -r? def stampPdf(formfields, pdf, newpdf=None): """Stamp all stamps with values from a list of formfields. See comments at stampPdfWithStamps for Attributes. Raises: ConfigError: see Stamp.fromFormFieldList """ stamps = Stamp.fromFormFieldList(formfields) stampPdfWithStamps(stamps, pdf, newpdf=newpdf) # test if __name__ == "__main__": try: stampPdfWithStamps([], "notfound.pdf") except FileNotFoundError as e: print("Correct error:\n", e) teststamps = [Stamp("Uli", 2, 20, -20), Stamp("Krähmer", 2, 20, -50), Stamp("Toller Hecht", 3, 30, -100)] try: os.mkdir(TMPDIR) except FileExistsError: pass Stamp.createStampTexCode(teststamps, 4, TMPDIR + "/4seiten.tex") Stamp.createStampTexCode(teststamps, 2, TMPDIR + "/2seiten.tex") teststamps = teststamps[:2] + [Stamp("pdf filled with fillform.py", 1, 10, -830)] stampPdfWithStamps(teststamps, "../TU-Dokumente/abrechnung/abrechnung.pdf", "../TU-Dokumente/abrechnung/abrechnung-marked.pdf") print("Now look at the pdf at ../TU-Dokumente/abrechnung-marked.pdf") formfile = os.path.join(TMPDIR, "stamptest.form") try: form = open(formfile, mode="x") except FileExistsError: form = open(formfile, mode="w") # the tests need rewriting or readformdata # rewriting is work # readformdata makes a circular import and breaks other things # formtext = r"""--- # FieldName: Unterschrift # Value: Ulrich Krähmer # Stamp: yes # PosX: 460 # PosY: -764 # Page: 2 # --- # FieldName: Bonusprogramm # Value: Bahn\&More # Stamp: blub # PosX: 410 # PosY: -492 # Page: 2 # --- # FieldName: Fehlerprone # Value: Fehler!!! # Stamp: Jes # PosX: 20 # """ # form.write(formtext) # form.close() # fields = readformdata.listFields([], formfile) # try: # stampPdf(fields, # "../TU-Dokumente/abrechnung/abrechnung.pdf", # newpdf="../TU-Dokumente/abrechnung/abrechnung-marked2.pdf") # except ConfigError as e: # print("Correct Error:\n", e) # formtext += """Page: 1 # PosY: -00 # """ # with open(formfile, mode="w") as form: # form.write(formtext) # fields = readformdata.listFields([], formfile) # stampPdf(fields, # "../TU-Dokumente/abrechnung/abrechnung.pdf", # newpdf="../TU-Dokumente/abrechnung/abrechnung-marked2.pdf") # print("Now look at the pdf at ../TU-Dokumente/abrechnung-marked2.pdf")