PDFlib Cookbook

cookbook

pdfua/merge_and_stamp_pdfua1

Merge PDF/UA documents including the document structure trees. Additional text is stamped onto each page and tagged as Artifact.

Download PHP Code  Switch to Java Code  Show Output 

<?php
/*
 *
 * Merge PDF/UA documents including the document structure trees.
 * Additional text is stamped onto each page and tagged as Artifact.
 * The code below can also be used to import Tagged PDF which does not conform
 * to PDF/UA-1 by changing the option list of $p->begin_document() as explained
 * in the comment.
 *
 * Required software: PDFlib+PDI/PPS 10
 * Required data: PDF/UA documents, embeddable font
 */

/* This is where the data files are. Adjust as necessary. */
$searchpath = dirname(__FILE__,3)."/input";

/* By default annotations are also imported. In some cases this
 * requires the Noto fonts for creating annotation appearance streams.
 * We therefore set the searchpath to also point to the font directory.
 */
$fontpath = dirname(__FILE__,3)."/resource/font";

$title = "merge_and_stamp_pdfua1";

$pdffiles = array("PDFUA-invoice.pdf");

try {
    $p = new pdflib();

    $p->set_option( "errorpolicy=exception searchpath={" . $searchpath . "}");
    
    $p->set_option( "searchpath={" . $fontpath . "}");
    
    /* all strings are expected as UTF-8 */

    /* Replace "pdfua=PDF/UA-1" with "tagged=true" to import
     * non-PDF/UA-1 documents.
     */
    $p->begin_document("", "pdfua=PDF/UA-1 lang=en tag={tagname=Document}");

    $p->set_info("Creator", "PDFlib Cookbook");
    $p->set_info("Title", $title);

    for ($i = 0; $i < count($pdffiles); $i++) {
        $id_part = 0;

        /* Open the input PDF */
        $indoc = $p->open_pdi_document($pdffiles[$i], "errorpolicy=return");
        if ($indoc == 0) {
            throw new Exception("Error while opening '" . $pdffiles[$i] . 
                    "' : " . $p->get_errmsg());
        }

        /* Wrap each document inside a "Part" structure element */
        if (count($pdffiles) > 1)
            $id_part = $p->begin_item("Part", "Title=" . $pdffiles[$i]);

        $endpage = (int) $p->pcos_get_number($indoc, "length:pages");

        /* Loop over all pages of the input document */
        for ($pageno = 1; $pageno <= $endpage; $pageno++) {
            $page = $p->open_pdi_page($indoc, $pageno, "errorpolicy=return");

            if ($page == 0)
            {
                throw new Exception("Error while opening '" . 
                    $pdffiles[$i] . "' (page " . $pageno . "): " . 
                $p->get_errmsg());
            }
            
            /* Page size may be adjusted by fit_pdi_page() */
            $p->begin_page_ext(0, 0, "width=a4.width height=a4.height");

            /* Create a bookmark with the file name */
            if ($pageno == 1)
                $p->create_bookmark($pdffiles[$i], "");

            $fittingpossible = true;
            $additionaltag = "";

            $topleveltagcount =
                    (int) $p->info_pdi_page($page, "topleveltagcount", "");

            if ($topleveltagcount == 0)
            {
                /* The page doesn't contain any structure elements,
                 * i.e. it is empty or contains only Artifacts.
                 * Some applications may decide to skip such pages.
                 *
                 * We add an "Artifact" tag to work around an
                 * Acrobat bug.
                */
                $additionaltag = "tag={tagname=Artifact} ";
            }
            else
            /*
             * Try to place the page without any additional tag;
             * if this doesn't work we insert another tag.
             */
            if ($p->info_pdi_page($page, "fittingpossible", "") == 0)
            {
                $additionaltag = "tag={tagname=P} ";
                if ($p->info_pdi_page($page,
                        "fittingpossible", $additionaltag) == 0)
                {
                    $fittingpossible = false;
                }
            }

            if ($fittingpossible)
            {
                $p->fit_pdi_page($page, 0, 0, "adjustpage " . $additionaltag);

                /* Add a stamp on each page and tag it as Artifact */
                $p->fit_textline(
                    $pdffiles[$i] . ", page " . $pageno . " of " . $endpage,
                    20, 20,
                    "fontname=NotoSerif-Regular " . 
                    "fillcolor=red fontsize=12 tag={tagname=Artifact}");
            }
            else
            {
                throw new Exception("Cannot import page " . $pageno . 
                    " of '" . $pdffiles[$i] . "': " . $p->get_errmsg());
            }

            $p->close_pdi_page($page);

            $p->end_page_ext("");
        }
        $p->close_pdi_document($indoc);

        if (count($pdffiles) > 1)
            $p->end_item($id_part);
    }

    $p->end_document("");

    $buf = $p->get_buffer();
    $len = strlen($buf);

    header("Content-type: application/pdf");
    header("Content-Length: $len");
    header("Content-Disposition: inline; filename=" . $title . ".pdf");
    print $buf;
}
catch (PDFlibException $e) {
    echo("PDFlib exception occurred in " . $title . " sample:\n" .
        "[" . $e->get_errnum() . "] " . $e->get_apiname() . ": " .
        $e->get_errmsg() . "\n");
    exit(1);
}
catch (Throwable $e) {
    echo($e);
    exit(1);
}

$p = 0;
?>