PDFlib Cookbook

cookbook

interchange/import_xmp_from_pdf

Retrieve the XMP metadata from an imported document and write all document-level XMP metadata to the output PDF.

Download PHP Code  Switch to Java Code  Show Output 

<?php
/*
 * Import XMP from PDF:
 * Retrieve the XMP metadata from an imported PDF document and write all
 * document-level XMP metadata to the output PDF 
 * 
 * Maintain existing XMP metadata when processing documents: Read the XMP 
 * stream from the imported document with the pCOS path "/Root/Metadata",
 * create a PVF file from the XMP and feed the document-level metadata to the
 * output document.
 *
 * Required software: PDFlib+PDI/PPS 9
 * Required data: PDF document with XMP metadata
 */

/* This is where the data files are. Adjust as necessary. */
$searchpath = dirname(__FILE__,3)."/input";
$outfile = "";
$title = "Import XMP from PDF";

$pdffile = "PLOP-datasheet.pdf";

try {
    $p = new PDFlib();

    $p->set_option("searchpath={" . $searchpath . "}");

    /* This means we must check return values of load_font() etc. */
    $p->set_option("errorpolicy=return");
    $p->set_option("stringformat=utf8");
    
    /* Open the input PDF */
    $indoc = $p->open_pdi_document($pdffile, "");
    if ($indoc == 0)
        throw new Exception("Error: " + $p->get_errmsg());
    
    /* Check if any document-level metadata exists in the input document */
    $objtype = $p->pcos_get_string($indoc, "type:/Root/Metadata");
    if ($objtype =="stream")
    {
        /* If document-level metadata exists retrieve it using the pCOS
         * path "/Root/Metadata". (Similarly, you could retrieve any
         * existing XMP metadata on page, font, or image level, etc. using
         * the pCOS path "pages[...]/Metadata", "images[...]/Metadata",
         * "fonts[...]/Metadata", etc.)
         */
        $metadata = $p->pcos_get_stream($indoc, "", "/Root/Metadata");
    }

    if ($metadata != "")
    {
        /* If document-level metadata is available, store it in a
         * PDFlib virtual file (PVF)
         */
        $p->create_pvf("/pvf/metadata", $metadata, "");
                    
        /* Start the output document and copy the XMP metadata from the PVF
         * to it 
         */
        if ($p->begin_document($outfile,
            "metadata={filename=/pvf/metadata}") == 0)
            throw new Exception("Error: " + $p->get_errmsg());
        
        $p->delete_pvf("/pvf/metadata");
    }
    else
    {
        /* Start the output document without copying any metadata */
        if ($p->begin_document($outfile, "") == 0)
            throw new Exception("Error: " + $p->get_errmsg());
    }
     
    $p->set_info("Creator", "PDFlib Cookbook");
    $p->set_info("Title", $title);

    /* Retrieve the number of pages for the input document */
    $endpage = $p->pcos_get_number($indoc, "length:pages");

    /* Loop over all pages of the input document */
    for ($pageno = 1; $pageno <= $endpage; $pageno++)
    {
        $page = $p->open_pdi_page($indoc, $pageno, "");

        if ($page == 0)
            throw new Exception("Error: " + $p->get_errmsg());

        /* Page size may be adjusted by fit_pdi_page() */
        $p->begin_page_ext(0, 0, "width=a4.width height=a4.height");

        /* Place the imported page without performing
         * any changes on the output page
         */
        $p->fit_pdi_page($page, 0, 0, "adjustpage");
        
        $p->end_page_ext("");

        $p->close_pdi_page($page);
    }

    $p->end_document("");

    $buf = $p->get_buffer();
    $len = strlen($buf);

    header("Content-type: application/pdf");
    header("Content-Length: $len");
    header("Content-Disposition: inline; filename=import_xmp_from_pdf.pdf");
    print $buf;

} catch (PDFlibException $e) {
    echo("PDFlib exception occurred:\n".
        "[" . $e->get_errnum() . "] " . $e->get_apiname() .
        ": " . $e->get_errmsg() . "\n");
    exit(1);
} catch (Exception $e) {
    echo($e->getMessage());
    exit(1);
}

$p = 0;

?>