pCOS Cookbook

cookbook

interchange/zugferd_retrieve_XML

Download Java Code     Show Output     Show Input PDF

package com.pdflib.cookbook.pcos.interchange;

import java.io.FileOutputStream;
import java.io.OutputStream;

import com.pdflib.IpCOS;
import com.pdflib.cookbook.pcos.pcos_cookbook_example;

/**
 * Retrieve the XML invoice data from a ZUGFeRD document.
 * <p>
 * Required software: pCOS interface 3 (pCOS 3.x, PDFlib+PDI/PPS 7.x, TET 2.2,
 * PLOP 3.x)<br>
 * Required data: PDF document
 * 
 * @version $Id: zugferd_retrieve_XML.java,v 1.4 2015/11/16 11:53:16 stm Exp $
 */
public class zugferd_retrieve_XML extends pcos_cookbook_example {

    /* This is where the data files are. Adjust as necessary. */
    private final static String SEARCH_PATH = "../input";

    public void example_code(IpCOS p, int doc) throws Exception {

        String filename = p.pcos_get_string(doc, "filename");
        System.out.println("Input file name: " + filename);

        get_zugferd_invoice(p, filename, doc);
    }

    private void bad_format_exception(String filename, String text)
        throws Exception
    {
        throw new Exception(
            "Input document " + filename
            + " doesn't conform to the ZUGFeRD 1.0 standard ("
            + text + ")");
    }
    
    private void get_zugferd_invoice(IpCOS p, String filename, int doc)
        throws Exception {
        /*
         * Name of invoice data is always "ZUGFeRD-invoice.xml" starting with
         * ZUGFeRD 1.0.
         */
        final String xml_invoice_name = "ZUGFeRD-invoice.xml";

        /*
         * ZUGFeRD demands that the invoice associated file is also stored
         * as a normal attachment, therefore we can access it through the
         * names/EmbeddedFiles pseudo object.
         */
        int attachment_count = (int) p.pcos_get_number(doc,
            "length:names/EmbeddedFiles");

        if (attachment_count > 0) {
            boolean found = false;
            for (int i = 0; !found && i < attachment_count; i += 1) {
                String attachment = "names/EmbeddedFiles[" + i + "]";

                /*
                 * PDF/A-3 requires the presence of the /UF entry in the
                 * File Specification Dictionarry
                 */
                String objtype = p.pcos_get_string(doc, "type:"
                    + attachment + "/UF");

                if (objtype.equals("string")) {
                    String name = p
                        .pcos_get_string(doc, attachment + "/UF");

                    if (xml_invoice_name.equals(name)) {
                        /*
                         * There must be a corresponding "/UF" entry in the
                         * EF dictionary.
                         */
                        objtype = p.pcos_get_string(doc, "type:"
                            + attachment + "/EF/UF");

                        if (objtype.equals("stream")) {
                            /*
                             * Get the contents of the invoice and write it to a
                             * file.
                             */
                            byte[] invoice_contents = p.pcos_get_stream(doc,
                                "", attachment + "/EF/UF");
    
                            OutputStream os = new FileOutputStream(
                                xml_invoice_name);
                            os.write(invoice_contents);
                            os.close();
    
                            found = true;
                            System.out
                                .println(
                                    "Wrote XML invoice attachment to file \""
                                    + xml_invoice_name + "\"");
                        }
                        else
                        {
                            bad_format_exception(filename,
                                "/UF embedded file stream with XML file "
                                + "contents is missing");
                        }
                    }
                }
                else {
                    System.err.println("Warning: /UF key is missing in "
                        + attachment);
                }
            }

            if (!found) {
                bad_format_exception(filename,
                    "Did not find an invoice attachment with name \""
                        + xml_invoice_name + "\"");
            }
        }
        else {
            bad_format_exception(filename,
                "PDF document does not have any attachments");
        }
    }

    public zugferd_retrieve_XML(String[] argv, String readable_name,
        String search_path, String full_rcs_file_name, String revision) {
        super(argv, readable_name, search_path, full_rcs_file_name, revision);
    }

    public static void main(String argv[]) {
        zugferd_retrieve_XML example = new zugferd_retrieve_XML(argv,
            "Extract ZUGFeRD Invoice", SEARCH_PATH,
            "$RCSfile: zugferd_retrieve_XML.java,v $", "$Revision: 1.4 $");
        example.execute();
    }
}