PDFlib Cookbook

cookbook

pdfua/merge_and_stamp_pdfua1

Merge PDF/UA documents including the document structure trees. Additional text is stamped onto each page and tagged as Artifact.

Download Java Code  Switch to PHP Code  Show Output 

/*
 * Merge PDF/UA documents including the document structure trees.
 * Additional text is stamped onto each page and tagged as Artifact.
 * The code below can also be used to import Tagged PDF which does not conform
 * to PDF/UA-1 by changing the option list of p.begin_document() as explained
 * in the comment.
 *
 * Required software: PDFlib+PDI/PPS 10
 * Required data: PDF/UA documents, embeddable font
 */
package com.pdflib.cookbook.pdflib.pdfua;

import com.pdflib.pdflib;
import com.pdflib.PDFlibException;

public class merge_and_stamp_pdfua1 {
    public static void main(String argv[]) {
        /* This is where the data files are. Adjust as necessary. */
        String searchpath = "../input";

        /* By default annotations are also imported. In some cases this
         * requires the Noto fonts for creating annotation appearance streams.
         * We therefore set the searchpath to also point to the font directory.
         */
        String fontpath = "../resource/font";

        String title = "merge_and_stamp_pdfua1";
        int exitcode = 0;

        pdflib p = null;
        String pdffiles[] = { "PDFUA-invoice.pdf" };

        try {
            int i;
            p = new pdflib();

            p.set_option("errorpolicy=exception searchpath={" + searchpath + "}");

            p.set_option("searchpath={" + fontpath + "}");

            /*
             * Replace "pdfua=PDF/UA-1" with "tagged=true" to import
             * non-PDF/UA-1 documents.
             */
            p.begin_document(title + ".pdf",
                "pdfua=PDF/UA-1 lang=en tag={tagname=Document}");

            p.set_info("Creator", "PDFlib Cookbook");
            p.set_info("Title", title);

            for (i = 0; i < pdffiles.length; i++) {
                int indoc, endpage, pageno, page;
                int id_part = 0;

                /* Open the input PDF */
                indoc = p.open_pdi_document(pdffiles[i], "errorpolicy=return");
                if (indoc == -1) {
                    System.err.println("Error while opening '" + pdffiles[i]
                        + "' : " + p.get_errmsg());
                    continue;
                }

                /* Wrap each document inside a "Part" structure element */
                if (pdffiles.length > 1)
                    id_part = p.begin_item("Part", "Title=" + pdffiles[i]);

                endpage = (int) p.pcos_get_number(indoc, "length:pages");

                /* Loop over all pages of the input document */
                for (pageno = 1; pageno <= endpage; pageno++) {
                    int topleveltagcount;
                    String additionaltag;
                    boolean fittingpossible;

                    page = p.open_pdi_page(indoc, pageno, "errorpolicy=return");

                    if (page == -1) {
                        System.err.println("Error while opening '"
                            + pdffiles[i] + "' (page " + pageno + "): "
                            + p.get_errmsg());
                        continue;
                    }
                    /* Page size may be adjusted by fit_pdi_page() */
                    p.begin_page_ext(0, 0, "width=a4.width height=a4.height");

                    /* Create a bookmark with the file name */
                    if (pageno == 1)
                        p.create_bookmark(pdffiles[i], "");

                    fittingpossible = true;
                    additionaltag = "";

                    topleveltagcount = (int) p.info_pdi_page(page,
                        "topleveltagcount", "");

                    if (topleveltagcount == 0) {
                        /*
                         * The page doesn't contain any structure elements, i.e.
                         * it is empty or contains only Artifacts. Some
                         * applications may decide to skip such pages.
                         * 
                         * We add an "Artifact" tag to work around an Acrobat
                         * bug.
                         */
                        additionaltag = "tag={tagname=Artifact} ";
                    }
                    else
                    /*
                     * Try to place the page without any additional tag; if this
                     * doesn't work we insert another tag.
                     */
                    if (p.info_pdi_page(page, "fittingpossible", "") == 0) {
                        additionaltag = "tag={tagname=P} ";
                        if (p.info_pdi_page(page, "fittingpossible",
                            additionaltag) == 0) {
                            fittingpossible = false;
                        }
                    }

                    if (fittingpossible) {
                        p.fit_pdi_page(page, 0, 0, "adjustpage "
                            + additionaltag);

                        /* Add a stamp on each page and tag it as Artifact */
                        p.fit_textline(pdffiles[i] + ", page " + pageno
                            + " of " + endpage, 20, 20,
                            "fontname=NotoSerif-Regular "
                                + "fillcolor=red fontsize=12 "
                                + "tag={tagname=Artifact}");
                    }
                    else {
                        System.err.println("Skipping page " + pageno + " of '"
                            + pdffiles[i] + "': " + p.get_errmsg());
                    }

                    p.close_pdi_page(page);

                    p.end_page_ext("");
                }
                p.close_pdi_document(indoc);

                if (pdffiles.length > 1)
                    p.end_item(id_part);
            }

            p.end_document("");
        }
        catch (PDFlibException e) {
            System.err.println("PDFlib exception occurred:");
            System.err.println("[" + e.get_errnum() + "] " + e.get_apiname() +
                ": " + e.get_errmsg());
            exitcode = 1;
        }
        catch (Exception e) {
            System.err.println(e);
            exitcode = 1;
        }
        finally {
            if (p != null) {
                p.delete();
            }
            System.exit(exitcode);
        }
    }
}