PDFlib Cookbook

cookbook

pdfa/clone_pdfa

Clone PDF/A and PDF/X standard documents.

Download PHP Code  Switch to Java Code  Show Output  Show Input (PLOP-datasheet-PDFA-1b.pdf) 

<?php
/*
 *
 * Clone PDF/A, PDF/UA and PDF/X standard documents
 * This is useful as basis for additional processing,
 * such as stamping, adding XMP metadata, adding page content, etc.
 *
 * The following aspects of the input document are cloned:
 * - PDF/A, PDF/UA and PDF/X version 
 * - PDF/A or PDF/X output intent (if present)
 * - document language (if present)
 * - all pages including page geometry, i.e. page boxes and Rotate key
 * - the structure elements (tags); if required, an additional
 *   tag is inserted on top of the imported page elements
 * - XMP document metadata
 *   This will generally also clone document info fields since these are
 *   synchronized with XMP in the majority of modern PDF documents.
 *
 * To demonstrate coordinate transformations which may be required
 * to add new page content this topic adds a stamp across all pages.
 *
 * Input documents may conform to any combination of PDF/A, PDF/UA 
 * and PDF/X simultaneously.
 *
 * Note: Except for the names of the input and output documents the three
 * Cookbook topics clone_pdfa, clone_pdfua and clone_pdfx are exact copies.
 * They are included multiply so that they can easily be found in the
 * PDF/A, PDF/UA and PDF/X categories.
 *
 * required software: PDFlib+PDI/PPS 10
 * required data: PDF/A, PDF/UA or PDF/X input document
 */

 /* This is where the data files are. Adjust as necessary. */
$searchpath = dirname(__FILE__,3)."/input";

/* By default annotations are also imported. In some cases this
 * requires the Noto fonts for creating annotation appearance streams.
 * We therefore set the searchpath to also point to the font directory.
 */
$fontpath = dirname(__FILE__,3)."/resource/font";

$pdfinputfile = "PLOP-datasheet-PDFA-1b.pdf";

$title = "clone_pdfa";

$optlist = "";

/* The following standard flavors can be cloned: */
$supportedflavors = array(
    "PDF/A-1a:2005", "PDF/A-1b:2005", 
    "PDF/A-2a", "PDF/A-2b", "PDF/A-2u",
    "PDF/A-3a", "PDF/A-3b", "PDF/A-3u",
            
    "PDF/X-3:2003",
    "PDF/X-4", "PDF/X-4p", 
    "PDF/X-5g", "PDF/X-5pg", "PDF/X-5n",
    
    "PDF/UA-1",
    "none",
);

try {
    $p = new pdflib();

    $p->set_option("searchpath={" . $searchpath . "}");

    $p->set_option("searchpath={" . $fontpath . "}");

    /* This means we must check return values of load_font() etc. */
    $p->set_option("errorpolicy=return");

    /*
     * Open the input PDF, preserve tags if present (for cloning
     * PDF/A-1/2/3a and PDF/UA)
     */
    $indoc = $p->open_pdi_document($pdfinputfile, "usetags=true");
    if ($indoc == 0) {
        throw new Exception("Error: " . $p->get_apiname() . ": "
                . $p->get_errmsg());
    }

    /*
     * Read PDF/A, PDF/UA and PDF/X version of the input document
     */
    $pdfaversion = $p->pcos_get_string($indoc, "pdfa");
    $pdfuaversion = $p->pcos_get_string($indoc, "pdfua");
    $pdfxversion = $p->pcos_get_string($indoc, "pdfx");

    for ($i = 0; $i < count($supportedflavors); $i++) {
        if ($pdfaversion == $supportedflavors[$i]) {
            $optlist .= " pdfa=" . $pdfaversion;
            break;
        }
    }
    if ($i == count($supportedflavors))
        throw new Exception("Error: Cannot clone " . $pdfaversion
                . " documents");

    for ($i = 0; $i < count($supportedflavors); $i++) {
        if ($pdfuaversion == $supportedflavors[$i]) {
            $optlist .= " pdfua=" . $pdfuaversion;
            break;
        }
    }
    if ($i == count($supportedflavors))
        throw new Exception("Error: Cannot clone " . $pdfuaversion
                . " documents");

    for ($i = 0; $i < count($supportedflavors); $i++) {
        if ($pdfxversion == $supportedflavors[$i]) {
            $optlist .= " pdfx=" . $pdfxversion;
            break;
        }
    }
    if ($i == count($supportedflavors))
        throw new Exception("Error: Cannot clone " . $pdfxversion
                . " documents");

    /*
     * Read language entry of the input document if present
     */
    if ($p->pcos_get_string($indoc, "type:/Root/Lang") == "string") {
        $inputlang = $p->pcos_get_string($indoc, "/Root/Lang");
        $optlist .= " lang=" . $inputlang;
    }
    else if ($pdfuaversion == "PDF/UA-1")
    {
	/* PDF/UA documents don't necessarily need the /Lang entry
	 * in the Catalog, but PDFlib requires the "lang" option.
	 * We supply a default language (which may be wrong) to
	 * ensure that such documents can be cloned nevertheless.
	 */
	define("DEFAULT_LANGUAGE", "en");
	$optlist .= " lang=" . DEFAULT_LANGUAGE;
    }

    else
        $inputlang = "";

    /*
     * Clone XMP metadata of input document if present
     */
    if ($p->pcos_get_string($indoc, "type:/Root/Metadata") == "stream") {
        $xmp = $p->pcos_get_stream($indoc, "", "/Root/Metadata");
        $p->create_pvf("/xmp/document.xmp", $xmp, "");
        $optlist .= " metadata={filename=/xmp/document.xmp}";
    }

    /*
     * Read Tagged status of input document
     */
    $taggedinput = $p->pcos_get_string($indoc, "tagged") == "true";

    if ($taggedinput)
        $optlist .= " tag={tagname=Document}";

    /*
     * Create a new document and clone PDF/A, PDF/UA and PDF/X status
     */
    if ($p->begin_document("", $optlist) == 0)
        throw new Exception("Error: " . $p->get_apiname() . ": "
                                                . $p->get_errmsg());

    $p->set_info("Creator", "PDFlib Cookbook");
    $p->set_info("Title", $title);

    /*
     * Clone PDF/A or PDF/X output intent
     */
    if ($p->process_pdi($indoc, -1, "action=copyoutputintent")== 0){
       throw new Exception("Error: " . $p->get_apiname() . ": "
           . $p->get_errmsg());
    }

    /* Create a bookmark with the name of the input document */
    $p->create_bookmark($pdfinputfile, "");

    $endpage = (int) $p->pcos_get_number($indoc, "length:pages");

    /* Copy all pages of the input document */
    for ($pageno = 1; $pageno <= $endpage; $pageno++) {
        $lowerleftcorner = array( 
                array( "x1", "y1" ), /* 0 degrees */
                array( "x2", "y2" ), /* 90 degrees */
                array( "x3", "y3" ), /* 180 degrees */
                array( "x4", "y4" ), /* 270 degrees */
        );

        $page = $p->open_pdi_page($indoc, $pageno, "cloneboxes");

        if ($page == 0) {
            throw new Exception("Error opening page: " . $p->get_errmsg());
        }

        /*
         * Query the geometry of the cloned page. This is required to
         * account for translated or rotated pages if we want to add
         * more contents to the page.
         */
        $phi = $p->info_pdi_page($page, "rotate", "");

        /*
         * Select the lower left corner depending on the rotation angle
         */
        $x = $p->info_pdi_page($page, $lowerleftcorner[intval($phi / 90)][0],
                "");
        $y = $p->info_pdi_page($page, $lowerleftcorner[intval($phi / 90)][1],
                "");

        $fittingpossible = true;
        $additionaltag = "";

        if ($taggedinput) {
            $topleveltagcount = (int) $p->info_pdi_page($page,
                "topleveltagcount", "");

            if ($topleveltagcount == 0) {
                /*
                 * The page doesn't contain any structure elements, i.e.
                 * it is empty or contains only Artifacts. Some
                 * applications may decide to skip such pages.
                 * 
                 * We add an "Artifact" tag to work around an Acrobat
                 * bug.
                 */
                $additionaltag = "tag={tagname=Artifact} ";
            }
            else if ($p->info_pdi_page($page, "fittingpossible", "") == 0) {
                /*
                 * Try to place the page without any additional tag; if
                 * this doesn't work we insert another tag.
                 */
                $additionaltag = "tag={tagname=P} ";
                if ($p->info_pdi_page($page, "fittingpossible",
                                                $additionaltag) == 0) {
                    $fittingpossible = false;
                }
            }
        }

        if ($fittingpossible) {
            /* Page size may be adjusted by "cloneboxes" option */
            $p->begin_page_ext(0, 0, "width=a4.width height=a4.height");

            $optlist = "cloneboxes ";
            if ($taggedinput)
                $optlist .= $additionaltag;

            $p->fit_pdi_page($page, 0, 0, $optlist);

            /*
             * Adjust the coordinate system to facilitate adding new
             * page content on top of the cloned page.
             */
            $p->translate($x, $y);
            $p->rotate($phi);

            $width = $p->info_pdi_page($page, "pagewidth", "");
            $height = $p->info_pdi_page($page, "pageheight", "");

            /*
             * Add some text on each page and tag it as Artifact.
             */
            $optlist = "fontname=NotoSerif-Regular "
                . "textrendering=1 stamp=ll2ur "
                . "boxsize={" . $width . " " . $height . "}";

            if ($taggedinput)
                $optlist .= " tag={tagname=Artifact}";

            $p->fit_textline("Cloned page", 0, 0, $optlist);

            $p->end_page_ext("");
        }
        else {
            throw new Exception("Cannot fit page " . $pageno . " of '"
                . $pdfinputfile . "': " . $p->get_errmsg());
        }

        $p->close_pdi_page($page);
    }

    $p->end_document("");
    $p->delete_pvf("/xmp/document.xmp");
    $p->close_pdi_document($indoc);

    $buf = $p->get_buffer();
    $len = strlen($buf);

    header("Content-type: application/pdf");
    header("Content-Length: $len");
    header("Content-Disposition: inline; filename=clone_pdfa.pdf");
    print $buf;
} catch (PDFlibException $e) {
    echo("PDFlib exception occurred:\n".
        "[" . $e->get_errnum() . "] " . $e->get_apiname() .
        ": " . $e->get_errmsg() . "\n");
    exit(1);
} catch (Throwable $e) {
    echo("PHP exception occurred: " . $e->getMessage() . "\n");
    exit(1);
}

$p = 0;

?>