pCOS Cookbook

cookbook

interchange/retrieve_associated_files updated

Retrieve associated files from a document.

Download Java Code  Show Output  Show Input (associated_files.pdf) 

/*
 * Retrieve associated files from a PDF document
 * 
 * Associated files are available in PDF 2.0 and PDF/A-3. They are
 * searched in the following locations:
 * - document (e.g. ZUGFeRD and Factur-X)
 * - document-level and page-level XMP metadata (PDF 2.0 only)
 * - structure element hierarchy (PDF 2.0 only)
 * - pages
 * - Form XObjects
 * - Image XObjects
 * - annotations
 * 
 * The following locations are also possible, but are not searched:
 * - structure elements
 * - marked content
 * - DPart (document part) node (PDF 2.0 only)
 * 
 * Searching regular document attachments (in EmbeddedFiles) is not sufficient
 * since associated files are not required to be listed there. Also, we are
 * interested in the location where the associated files are hooked up.
 *
 * Required software: pCOS interface 8 (PDFlib+PDI/PPS 9, TET 4.1, PLOP 5.0)
 * Required data: PDF document
 */

package com.pdflib.cookbook.pcos.interchange;

import com.pdflib.IpCOS;
import com.pdflib.cookbook.pcos.pcos_cookbook_example;

public class retrieve_associated_files extends pcos_cookbook_example {

    /* This is where the data files are. Adjust as necessary. */
    private final static String SEARCH_PATH = "../input";

    public void example_code(IpCOS p, int doc) throws Exception {

        String filename = p.pcos_get_string(doc, "filename");
        System.out.println("Input file name: " + filename);

        get_associated_files(p, filename, doc);
    }
    
    private void dump_af(IpCOS p, String ObjectName, int doc, String ObjectPath) throws Exception
    {
        int af_count;
        String AF_path = ObjectPath + "/AF";
        String AF_type = p.pcos_get_string(doc, "type:" + AF_path);
        
        if (AF_type.equals("null")) {
        	/* No associated files present */
        	return;
        }
        
        if (!AF_type.equals("array")) {
        	System.out.println("Error: '" + AF_path + "' doesn't have type array");
        	return;
        }
        
        System.out.println("Associated file(s) for " + ObjectName + ":");
        af_count = (int) p.pcos_get_number(doc, "length:" + AF_path);

        for (int af = 0; af < af_count; af++) {
        	String AF_path_single = AF_path + "[" + af + "]";
            String key;

            System.out.println("\tfile " + af + ": " + AF_path_single);

            key = AF_path_single + "/UF";			// Unicode filename
            if (p.pcos_get_string(doc, "type:" + key).equals("string"))
                System.out.println("\t\tUnicode filename='" + p.pcos_get_string(doc, key) + "'");
            else {
                key = AF_path_single + "/F";		// filename
                if (p.pcos_get_string(doc, "type:" + key).equals("string"))
                    System.out.println("\t\tfilename='" + p.pcos_get_string(doc, key) + "'");
            }

            key = AF_path_single + "/AFRelationship";
            if (p.pcos_get_string(doc, "type:" + key).equals("name"))
                System.out.println("\t\trelationship='" + p.pcos_get_string(doc, key) + "'");
            
            key = AF_path_single + "/Desc";
            if (p.pcos_get_string(doc, "type:" + key).equals("string"))
                System.out.println("\t\tdescription='" + p.pcos_get_string(doc, key) + "'");
            
            key = AF_path_single + "/EF/F";
            if (p.pcos_get_string(doc, "type:" + key).equals("stream")) {
                /* Retrieve contents of the associated file */
                 byte[] contents = p.pcos_get_stream(doc, "", key);
                 System.out.println("\t\tlength=" + contents.length);
            }
        }
    }
    private void get_associated_files(IpCOS p, String filename, int doc)
        throws Exception {

        int page_count;

        // Search associated files on document level
        dump_af(p, "document", doc, "/Root");     

        // Search associated files on document-level XMP metadata
        dump_af(p, "document-level XMP", doc, "/Root/Metadata");
        
        // Search associated files on the structure tree root
        dump_af(p, "structure tree root", doc, "/Root/StructTreeRoot");  

        // Search associated files on page level and page-level XMP
        page_count = (int) p.pcos_get_number(doc, "length:pages");
        
        for (int i = 0; i < page_count; i++) {
        	String base = "pages[" + i + "]";
            dump_af(p, "page " + (i + 1), doc, base + "");
            dump_af(p, "page " + (i + 1) + " XMP", doc, base + "/Metadata");
        }

       // Search associated files on image level
        for (int i = 0; i < page_count; i++) {
        	String base = "pages[" + i + "]/images";
            int image_count = (int) p.pcos_get_number(doc, "length:" + base);
            
            for (int j = 0; j < image_count; j++) {
                dump_af(p, "page " + (i + 1) + ", image " + j, doc, base + "[" + j + "]");
            }
        }
        
        // Search associated files on template (Form XObject) level
        for (int i = 0; i < page_count; i++) {
        	String base = "pages[" + i + "]/templates";
            int template_count = (int) p.pcos_get_number(doc, "length:" + base);
            
            for (int j = 0; j < template_count; j++) {
                dump_af(p, "page " + (i + 1) + ", template " + j, doc, base + "[" + j + "]");
            }
        }
        
        // Search associated files on annotation level
        for (int i = 0; i < page_count; i++) {
        	String base = "pages[" + i + "]/annots";
            int annot_count = (int) p.pcos_get_number(doc, "length:" + base);
            
            for (int j = 0; j < annot_count; j++) {
                dump_af(p, "page " + (i + 1) + ", annotation " + j, doc, base + "[" + j + "]");
            }
        }
    }

    public retrieve_associated_files(String[] argv, String readable_name,
        String search_path) {
        super(argv, readable_name, search_path);
    }

    public static void main(String argv[]) {
        retrieve_associated_files example = new retrieve_associated_files(argv,
            "Retrieve associated files", SEARCH_PATH);
        example.execute();
    }
}