pdfua/merge_and_stamp_pdfua1
Merge PDF/UA documents including the document structure trees. Additional text is stamped onto each page and tagged as Artifact.
Download PHP Code Switch to Java Code Show Output
<?php
/*
*
* Merge PDF/UA documents including the document structure trees.
* Additional text is stamped onto each page and tagged as Artifact.
* The code below can also be used to import Tagged PDF which does not conform
* to PDF/UA-1 by changing the option list of $p->begin_document() as explained
* in the comment.
*
* Required software: PDFlib+PDI/PPS 10
* Required data: PDF/UA documents, embeddable font
*/
/* This is where the data files are. Adjust as necessary. */
$searchpath = dirname(__FILE__,3)."/input";
/* By default annotations are also imported. In some cases this
* requires the Noto fonts for creating annotation appearance streams.
* We therefore set the searchpath to also point to the font directory.
*/
$fontpath = dirname(__FILE__,3)."/resource/font";
$title = "merge_and_stamp_pdfua1";
$pdffiles = array("PDFUA-invoice.pdf");
try {
$p = new pdflib();
$p->set_option( "errorpolicy=exception searchpath={" . $searchpath . "}");
$p->set_option( "searchpath={" . $fontpath . "}");
/* all strings are expected as UTF-8 */
/* Replace "pdfua=PDF/UA-1" with "tagged=true" to import
* non-PDF/UA-1 documents.
*/
$p->begin_document("", "pdfua=PDF/UA-1 lang=en tag={tagname=Document}");
$p->set_info("Creator", "PDFlib Cookbook");
$p->set_info("Title", $title);
for ($i = 0; $i < count($pdffiles); $i++) {
$id_part = 0;
/* Open the input PDF */
$indoc = $p->open_pdi_document($pdffiles[$i], "errorpolicy=return");
if ($indoc == 0) {
throw new Exception("Error while opening '" . $pdffiles[$i] .
"' : " . $p->get_errmsg());
}
/* Wrap each document inside a "Part" structure element */
if (count($pdffiles) > 1)
$id_part = $p->begin_item("Part", "Title=" . $pdffiles[$i]);
$endpage = (int) $p->pcos_get_number($indoc, "length:pages");
/* Loop over all pages of the input document */
for ($pageno = 1; $pageno <= $endpage; $pageno++) {
$page = $p->open_pdi_page($indoc, $pageno, "errorpolicy=return");
if ($page == 0)
{
throw new Exception("Error while opening '" .
$pdffiles[$i] . "' (page " . $pageno . "): " .
$p->get_errmsg());
}
/* Page size may be adjusted by fit_pdi_page() */
$p->begin_page_ext(0, 0, "width=a4.width height=a4.height");
/* Create a bookmark with the file name */
if ($pageno == 1)
$p->create_bookmark($pdffiles[$i], "");
$fittingpossible = true;
$additionaltag = "";
$topleveltagcount =
(int) $p->info_pdi_page($page, "topleveltagcount", "");
if ($topleveltagcount == 0)
{
/* The page doesn't contain any structure elements,
* i.e. it is empty or contains only Artifacts.
* Some applications may decide to skip such pages.
*
* We add an "Artifact" tag to work around an
* Acrobat bug.
*/
$additionaltag = "tag={tagname=Artifact} ";
}
else
/*
* Try to place the page without any additional tag;
* if this doesn't work we insert another tag.
*/
if ($p->info_pdi_page($page, "fittingpossible", "") == 0)
{
$additionaltag = "tag={tagname=P} ";
if ($p->info_pdi_page($page,
"fittingpossible", $additionaltag) == 0)
{
$fittingpossible = false;
}
}
if ($fittingpossible)
{
$p->fit_pdi_page($page, 0, 0, "adjustpage " . $additionaltag);
/* Add a stamp on each page and tag it as Artifact */
$p->fit_textline(
$pdffiles[$i] . ", page " . $pageno . " of " . $endpage,
20, 20,
"fontname=NotoSerif-Regular " .
"fillcolor=red fontsize=12 tag={tagname=Artifact}");
}
else
{
throw new Exception("Cannot import page " . $pageno .
" of '" . $pdffiles[$i] . "': " . $p->get_errmsg());
}
$p->close_pdi_page($page);
$p->end_page_ext("");
}
$p->close_pdi_document($indoc);
if (count($pdffiles) > 1)
$p->end_item($id_part);
}
$p->end_document("");
$buf = $p->get_buffer();
$len = strlen($buf);
header("Content-type: application/pdf");
header("Content-Length: $len");
header("Content-Disposition: inline; filename=" . $title . ".pdf");
print $buf;
}
catch (PDFlibException $e) {
echo("PDFlib exception occurred in " . $title . " sample:\n" .
"[" . $e->get_errnum() . "] " . $e->get_apiname() . ": " .
$e->get_errmsg() . "\n");
exit(1);
}
catch (Throwable $e) {
echo($e);
exit(1);
}
$p = 0;
?>