PDFlib Cookbook

cookbook

pdfua/merge_and_stamp_pdfua

Download PHP Code       Switch to Java Code      Show Output PDF

<?php
/* $Id: merge_and_stamp_pdfua.php,v 1.3 2014/02/03 17:10:24 rjs Exp $
 *
 * Merge PDF/UA documents including the document structure trees.
 * Additional text is stamped onto each page and tagged as Artifact.
 * The code below can also be used to import Tagged PDF which does not conform
 * to PDF/UA-1 by changing the option list of $p->begin_document() as explained
 * in the comment.
 *
 * Required software: PDFlib+PDI/PPS 9
 * Required data: PDF/UA documents, embeddable font
 */

/* This is where the data files are. Adjust as necessary. */
$searchpath = dirname(dirname(dirname(__FILE__)))."/input";

$title = "merge_and_stamp_pdfua";

$pdffiles = array("Whitepaper-PDFUA.pdf");

try {
    $p = new pdflib();

    $p->set_option(
    	"errorpolicy=exception searchpath={" . $searchpath . "}");
    
    /* all strings are expected as UTF-8 */
    $p->set_option("stringformat=utf8");

    /* Replace "pdfua=PDF/UA-1" with "tagged=true" to import
     * non-PDF/UA-1 documents.
     */
    $p->begin_document("", "pdfua=PDF/UA-1 lang=en tag={tagname=Document}");

    $p->set_info("Creator", "PDFlib Cookbook");
    $p->set_info("Title", $title . ' $Revision: 1.3 $');

    for ($i = 0; $i < count($pdffiles); $i++) {
        $id_part = 0;

        /* Open the input PDF */
        $indoc = $p->open_pdi_document($pdffiles[$i], "errorpolicy=return");
        if ($indoc == 0) {
            throw new Exception("Error while opening '" . $pdffiles[$i] . 
            	"' : " . $p->get_errmsg());
        }

        /* Wrap each document inside a "Part" structure element */
        if (count($pdffiles) > 1)
            $id_part = $p->begin_item("Part", "Title=" . $pdffiles[$i]);

        $endpage = (int) $p->pcos_get_number($indoc, "length:pages");

        /* Loop over all pages of the input document */
        for ($pageno = 1; $pageno <= $endpage; $pageno++) {
            $page = $p->open_pdi_page($indoc, $pageno, "errorpolicy=return");

            if ($page == 0)
            {
                throw new Exception("Error while opening '" . 
                    $pdffiles[$i] . "' (page " . $pageno . "): " . 
                    $p->get_errmsg());
            }
            
            /* Dummy page size; will be adjusted later */
            $p->begin_page_ext(10, 10, "");

            /* Create a bookmark with the file name */
            if ($pageno == 1)
                $p->create_bookmark($pdffiles[$i], "");

            $fittingpossible = true;
            $additionaltag = "";

            $topleveltagcount =
            	(int) $p->info_pdi_page($page, "topleveltagcount", "");

            if ($topleveltagcount == 0)
            {
            	/* The page doesn't contain any structure elements,
            	 * i.e. it is empty or contains only Artifacts.
            	 * Some applications may decide to skip such pages.
            	 *
            	 * We add an "Artifact" tag to work around an
            	 * Acrobat bug.
            	 */
            	$additionaltag = "tag={tagname=Artifact} ";
            }
            else
            /*
             * Try to place the page without any additional tag;
             * if this doesn't work we insert another tag.
             */
            if ($p->info_pdi_page($page, "fittingpossible", "") == 0)
            {
            	$additionaltag = "tag={tagname=P} ";
            	if ($p->info_pdi_page($page,
            		"fittingpossible", $additionaltag) == 0)
            	{
    		    $fittingpossible = false;
            	}
            }

            if ($fittingpossible)
            {
            	$p->fit_pdi_page($page, 0, 0, "adjustpage " . $additionaltag);

            	/* Add a stamp on each page and tag it as Artifact */
            	$p->fit_textline(
            		$pdffiles[$i] . ", page " . $pageno . " of " . $endpage,
            		20, 20,
            		"fontname=DejaVuSerif encoding=unicode embedding " . 
            		"fillcolor=red fontsize=12 tag={tagname=Artifact}");
            }
            else
            {
                throw new Exception("Cannot import page " . $pageno . 
                	" of '" . $pdffiles[$i] . "': " . $p->get_errmsg());
            }

            $p->close_pdi_page($page);

            $p->end_page_ext("");
        }
        $p->close_pdi_document($indoc);

        if (count($pdffiles) > 1)
    	    $p->end_item($id_part);
    }

    $p->end_document("");

    $buf = $p->get_buffer();
    $len = strlen($buf);

    header("Content-type: application/pdf");
    header("Content-Length: $len");
    header("Content-Disposition: inline; filename=" . $title . ".pdf");
    print $buf;
}
catch (PDFlibException $e) {
    echo("PDFlib exception occurred in " . $title . " sample:\n" .
        "[" . $e->get_errnum() . "] " . $e->get_apiname() . ": " .
        $e->get_errmsg() . "\n");
    exit(1);
}
catch (Exception $e) {
    echo($e);
    exit(1);
}

$p = 0;
?>