PDFlib Cookbook

cookbook

interchange/import_xmp_from_pdf

Retrieve the XMP metadata from an imported document and write all document-level XMP metadata to the output PDF.

Download Java Code  Switch to PHP Code  Show Output 

/*
 * Import XMP from PDF:
 * Retrieve the XMP metadata from an imported PDF document and write all
 * document-level XMP metadata to the output PDF 
 * 
 * Maintain existing XMP metadata when processing documents: Read the XMP 
 * stream from the imported document with the pCOS path "/Root/Metadata",
 * create a PVF file from the XMP and feed the document-level metadata to the
 * output document.
 *
 * Required software: PDFlib+PDI/PPS 9
 * Required data: PDF document with XMP metadata
 */
package com.pdflib.cookbook.pdflib.interchange;

import com.pdflib.pdflib;
import com.pdflib.PDFlibException;

public class import_xmp_from_pdf
{
    public static void main (String argv[])
    {
        /* This is where the data files are. Adjust as necessary. */
        String searchpath = "../input";

        /* By default annotations are also imported. In some cases this
         * requires the Noto fonts for creating annotation appearance streams.
         * We therefore set the searchpath to also point to the font directory.
         */
        String fontpath = "../resource/font";
        String outfile = "import_xmp_from_pdf.pdf";
        String title = "Import XMP from PDF";

        pdflib p = null;
        String pdffile = "PLOP-datasheet.pdf";
        byte[] metadata = null;
        int indoc, pageno, endpage, page;
        int exitcode = 0;

        try {
            p = new pdflib();

            p.set_option("searchpath={" + searchpath + "}");

            p.set_option("searchpath={" + fontpath + "}");

            /* This means we must check return values of load_font() etc. */
            p.set_option("errorpolicy=return");
            
            /* Open the input PDF */
            indoc = p.open_pdi_document(pdffile, "");
            if (indoc == -1)
                throw new Exception("Error: " + p.get_errmsg());
            
            /* Check if any document-level metadata exists in the input document */
            String objtype = p.pcos_get_string(indoc, "type:/Root/Metadata");
            if (objtype.equals("stream"))
            {
                /* If document-level metadata exists retrieve it using the pCOS
                 * path "/Root/Metadata". (Similarly, you could retrieve any
                 * existing XMP metadata on page, font, or image level, etc. using
                 * the pCOS path "pages[...]/Metadata", "images[...]/Metadata",
                 * "fonts[...]/Metadata", etc.)
                 */
                metadata = p.pcos_get_stream(indoc, "", "/Root/Metadata");
            }

            if (metadata != null && metadata.length > 0)
            {
                /* If document-level metadata is available, store it in a
                 * PDFlib virtual file (PVF)
                 */
                p.create_pvf("/pvf/metadata", metadata, "");
                            
                /* Start the output document and copy the XMP metadata from the PVF
                 * to it 
                 */
                if (p.begin_document(outfile,
                    "metadata={filename={/pvf/metadata}}") == -1)
                    throw new Exception("Error: " + p.get_errmsg());
                
                p.delete_pvf("/pvf/metadata");
            }
            else
            {
                /* Start the output document without copying any metadata */
                if (p.begin_document(outfile, "") == -1)
                    throw new Exception("Error: " + p.get_errmsg());
            }
             
            p.set_info("Creator", "PDFlib Cookbook");
            p.set_info("Title", title);

            /* Retrieve the number of pages for the input document */
            endpage = (int) p.pcos_get_number(indoc, "length:pages");

            /* Loop over all pages of the input document */
            for (pageno = 1; pageno <= endpage; pageno++)
            {
                page = p.open_pdi_page(indoc, pageno, "");

                if (page == -1)
                    throw new Exception("Error: " + p.get_errmsg());

                /* Page size may be adjusted later  by fit_pdi_page(). */
                p.begin_page_ext(0, 0, "width=a4.width height=a4.height");

                /* Place the imported page without performing
                 * any changes on the output page
                 */
                p.fit_pdi_page(page, 0, 0, "adjustpage");
                
                p.end_page_ext("");

                p.close_pdi_page(page);
            }
     
            p.end_document("");

        } catch (PDFlibException e) {
            System.err.println("PDFlib exception occurred:");
            System.err.println("[" + e.get_errnum() + "] " + e.get_apiname() +
                ": " + e.get_errmsg());
            exitcode = 1;
        } catch (Exception e) {
            System.err.println(e);
            exitcode = 1;
        } finally {
            if (p != null) {
                p.delete();
            }
            System.exit(exitcode);
        }
    }
}