TET Cookbook

cookbook

image/image_resources

Resource-based image extractor based on PDFlib TET.

Download Java Code  Show Output  Show Input (TET-datasheet.pdf) 

package com.pdflib.cookbook.tet.image;

import com.pdflib.TET;
import com.pdflib.TETException;

/**
 * Resource-based image extractor based on PDFlib TET
 * <p>
 * Required software: TET 5
 * <p>
 * Required data: PDF document
 * 
 */
public class image_resources {
    /**
     * Global option list
     */
    static final String globaloptlist = "searchpath={{../input}}";

    /**
     * Document-specific option list
     */
    static final String docoptlist = "";

    /**
     * Page-specific option list e.g. "imageanalysis={merge={gap=2}"
     */
    static final String pageoptlist = "";

    public static void main(String argv[]) {
        TET tet = null;

        try {
            if (argv.length != 1) {
                throw new Exception("usage: image_resources <filename>");
            }

            String outfilebase = argv[0];
            if (outfilebase.substring(outfilebase.length() - 4).equalsIgnoreCase(".pdf"))
                outfilebase = outfilebase.substring(0, outfilebase.length() - 4);

            tet = new TET();

            tet.set_option(globaloptlist);

            int doc = tet.open_document(argv[0], docoptlist);

            if (doc == -1) {
                throw new Exception(
                        "Error " + tet.get_errnum() + " in " + tet.get_apiname() + "(): " + tet.get_errmsg());
            }

            /*
             * Images will only be merged upon opening a page. In order to enumerate all
             * merged image resources we open all pages before extracting the images.
             */

            /* Get number of pages in the document */
            int n_pages = (int) tet.pcos_get_number(doc, "length:pages");

            /* Loop over pages to trigger image merging */
            for (int pageno = 1; pageno <= n_pages; ++pageno) {
                int page = tet.open_page(doc, pageno, pageoptlist);

                if (page == -1) {
                    print_tet_error(tet, pageno);
                    continue; /* process next page */
                }

                if (tet.get_errnum() != 0) {
                    print_tet_error(tet, pageno);
                }

                tet.close_page(page);
            }

            /* Get the number of pages in the document */
            int n_images = (int) tet.pcos_get_number(doc, "length:images");

            /* Loop over image resources */
            for (int imageid = 0; imageid < n_images; imageid++) {
                String imageoptlist;
                /* Skip images which hve been consumed by merging */
                int mergetype = (int) tet.pcos_get_number(doc, "images[" + imageid + "]/mergetype");

                /* Skip images which have been flagged by the "small image" filter */
                if (mergetype == 2)
                    continue;

                /* Skip small images (see suboptions heightrange/sizerange/widthrange of "imageanalysis") */
                if (tet.pcos_get_number(doc, "images[" + imageid + "]/small") == 1)
                    continue;

                /* Report image details: pixel geometry, color space etc. */
                report_image_info(tet, doc, imageid);

                /* Write image data to file */
                imageoptlist = "filename={" + outfilebase + "_I" + imageid + "}";

                if (tet.write_image_file(doc, imageid, imageoptlist) == -1) {
                    System.out.println(
                            "\nError [" + tet.get_errnum() + " in " + tet.get_apiname() + "(): " + tet.get_errmsg());
                    continue; /* try next image */
                }
            }

            tet.close_document(doc);
        } catch (TETException e) {
            System.err.println("TET exception occurred in image_resources sample:");
            System.err.println("[" + e.get_errnum() + "] " + e.get_apiname() + ": " + e.get_errmsg());
        } catch (Exception e) {
            System.err.println(e);
        } finally {
            if (tet != null) {
                tet.delete();
            }
        }
    }

    /**
     * Report image info.
     * 
     * Print the following information for each image:
     * 
     * - page and image number - pCOS id (required for indexing the images[] array)
     * - physical size of the placed image on the page - pixel size of the
     * underlying PDF image - number of components, bits per component,and
     * colorspace - mergetype if different from "normal", i.e. "artificial"
     * (=merged) or "consumed"
     * 
     * @param tet     The TET object
     * @param doc     The document handle
     * @param imageid The image ID
     */
    private static void report_image_info(TET tet, int doc, int imageid) throws com.pdflib.TETException {
        int width, height, bpc, cs, components, mergetype, stencilmask, maskid;
        String csname;

        width = (int) tet.pcos_get_number(doc, "images[" + imageid + "]/Width");
        height = (int) tet.pcos_get_number(doc, "images[" + imageid + "]/Height");
        bpc = (int) tet.pcos_get_number(doc, "images[" + imageid + "]/bpc");
        cs = (int) tet.pcos_get_number(doc, "images[" + imageid + "]/colorspaceid");
        components = (int) tet.pcos_get_number(doc, "colorspaces[" + cs + "]/components");

        System.out.print("image " + imageid);
        System.out.print(": " + width + "x" + height + " pixel, ");

        csname = tet.pcos_get_string(doc, "colorspaces[" + cs + "]/name");

        System.out.print(components + "x" + bpc + " bit " + csname);

        if (csname.equals("Indexed")) {
            int basecs = 0;
            String basecsname;
            basecs = (int) tet.pcos_get_number(doc, "colorspaces[" + cs + "]/baseid");
            basecsname = tet.pcos_get_string(doc, "colorspaces[" + basecs + "]/name");
            System.out.print(" " + basecsname);
        }

        /* Check whether this image has been created by merging smaller images */
        mergetype = (int) tet.pcos_get_number(doc, "images[" + imageid + "]/mergetype");
        if (mergetype == 1)
            System.out.print(", mergetype=artificial");

        stencilmask = (int) tet.pcos_get_number(doc, "images[" + imageid + "]/stencilmask");
        if (stencilmask == 1)
            System.out.print(", used as stencil mask");

        /* Check whether the image has an attached mask */
        maskid = (int) tet.pcos_get_number(doc, "images[" + imageid + "]/maskid");
        if (maskid != -1)
            System.out.print(", masked with image " + maskid);

        System.out.println("");
    }

    /**
     * Report a TET error.
     * 
     * @param tet    The TET object
     * @param pageno The page number on which the error occurred
     */
    private static void print_tet_error(TET tet, int pageno) {
        System.err.println("Error " + tet.get_errnum() + " in " + tet.get_apiname() + "() on page " + pageno + ": "
                + tet.get_errmsg());
    }
}