interchange/retrieve_associated_files
Retrieve associated files from a document.
Download Java Code Show Output Show Input (associated_files.pdf)
/*
* Retrieve associated files from a PDF document
*
* Associated files are available in PDF 2.0 and PDF/A-3. They are
* searched in the following locations:
* - document (e.g. ZUGFeRD and Factur-X)
* - document-level and page-level XMP metadata (PDF 2.0 only)
* - structure element hierarchy (PDF 2.0 only)
* - pages
* - Form XObjects
* - Image XObjects
* - annotations
*
* The following locations are also possible, but are not searched:
* - structure elements
* - marked content
* - DPart (document part) node (PDF 2.0 only)
*
* Searching regular document attachments (in EmbeddedFiles) is not sufficient
* since associated files are not required to be listed there. Also, we are
* interested in the location where the associated files are hooked up.
*
* Required software: pCOS interface 8 (PDFlib+PDI/PPS 9, TET 4.1, PLOP 5.0)
* Required data: PDF document
*/
package com.pdflib.cookbook.pcos.interchange;
import com.pdflib.IpCOS;
import com.pdflib.cookbook.pcos.pcos_cookbook_example;
public class retrieve_associated_files extends pcos_cookbook_example {
/* This is where the data files are. Adjust as necessary. */
private final static String SEARCH_PATH = "../input";
public void example_code(IpCOS p, int doc) throws Exception {
String filename = p.pcos_get_string(doc, "filename");
System.out.println("Input file name: " + filename);
get_associated_files(p, filename, doc);
}
private void dump_af(IpCOS p, String ObjectName, int doc, String ObjectPath) throws Exception
{
int af_count;
String AF_path = ObjectPath + "/AF";
String AF_type = p.pcos_get_string(doc, "type:" + AF_path);
if (AF_type.equals("null")) {
/* No associated files present */
return;
}
if (!AF_type.equals("array")) {
System.out.println("Error: '" + AF_path + "' doesn't have type array");
return;
}
System.out.println("Associated file(s) for " + ObjectName + ":");
af_count = (int) p.pcos_get_number(doc, "length:" + AF_path);
for (int af = 0; af < af_count; af++) {
String AF_path_single = AF_path + "[" + af + "]";
String key;
System.out.println("\tfile " + af + ": " + AF_path_single);
key = AF_path_single + "/UF"; // Unicode filename
if (p.pcos_get_string(doc, "type:" + key).equals("string"))
System.out.println("\t\tUnicode filename='" + p.pcos_get_string(doc, key) + "'");
else {
key = AF_path_single + "/F"; // filename
if (p.pcos_get_string(doc, "type:" + key).equals("string"))
System.out.println("\t\tfilename='" + p.pcos_get_string(doc, key) + "'");
}
key = AF_path_single + "/AFRelationship";
if (p.pcos_get_string(doc, "type:" + key).equals("name"))
System.out.println("\t\trelationship='" + p.pcos_get_string(doc, key) + "'");
key = AF_path_single + "/Desc";
if (p.pcos_get_string(doc, "type:" + key).equals("string"))
System.out.println("\t\tdescription='" + p.pcos_get_string(doc, key) + "'");
key = AF_path_single + "/EF/F";
if (p.pcos_get_string(doc, "type:" + key).equals("stream")) {
/* Retrieve contents of the associated file */
byte[] contents = p.pcos_get_stream(doc, "", key);
System.out.println("\t\tlength=" + contents.length);
}
}
}
private void get_associated_files(IpCOS p, String filename, int doc)
throws Exception {
int page_count;
// Search associated files on document level
dump_af(p, "document", doc, "/Root");
// Search associated files on document-level XMP metadata
dump_af(p, "document-level XMP", doc, "/Root/Metadata");
// Search associated files on the structure tree root
dump_af(p, "structure tree root", doc, "/Root/StructTreeRoot");
// Search associated files on page level and page-level XMP
page_count = (int) p.pcos_get_number(doc, "length:pages");
for (int i = 0; i < page_count; i++) {
String base = "pages[" + i + "]";
dump_af(p, "page " + (i + 1), doc, base + "");
dump_af(p, "page " + (i + 1) + " XMP", doc, base + "/Metadata");
}
// Search associated files on image level
for (int i = 0; i < page_count; i++) {
String base = "pages[" + i + "]/images";
int image_count = (int) p.pcos_get_number(doc, "length:" + base);
for (int j = 0; j < image_count; j++) {
dump_af(p, "page " + (i + 1) + ", image " + j, doc, base + "[" + j + "]");
}
}
// Search associated files on template (Form XObject) level
for (int i = 0; i < page_count; i++) {
String base = "pages[" + i + "]/templates";
int template_count = (int) p.pcos_get_number(doc, "length:" + base);
for (int j = 0; j < template_count; j++) {
dump_af(p, "page " + (i + 1) + ", template " + j, doc, base + "[" + j + "]");
}
}
// Search associated files on annotation level
for (int i = 0; i < page_count; i++) {
String base = "pages[" + i + "]/annots";
int annot_count = (int) p.pcos_get_number(doc, "length:" + base);
for (int j = 0; j < annot_count; j++) {
dump_af(p, "page " + (i + 1) + ", annotation " + j, doc, base + "[" + j + "]");
}
}
}
public retrieve_associated_files(String[] argv, String readable_name,
String search_path) {
super(argv, readable_name, search_path);
}
public static void main(String argv[]) {
retrieve_associated_files example = new retrieve_associated_files(argv,
"Retrieve associated files", SEARCH_PATH);
example.execute();
}
}