TET Cookbook

cookbook

image/image_count

Count images in a PDF according to various interpretations.

Download Java Code  Show Output  Show Input (FontReporter.pdf) 

/*
 * Count images in a PDF according to various interpretations.
 *
 * Required software: TET 5.2
 * 
 * Required data: PDF document
 *
 */

package com.pdflib.cookbook.tet.image;

import com.pdflib.TETException;
import com.pdflib.TET;

public class image_count {
    /*
     * Global option list
     */
    static final String globaloptlist = "searchpath={{../input}} ";

    /*
     * Document-specific option list
     */
    static final String docoptlist = "";

    /*
     * Page-specific option list
     */
    static final String pageoptlist = "";

    /*
     * Here you can insert basic image extract options (more below)
     */
    static final String baseimageoptlist = "";

    public static void main(String argv[]) {
        TET tet = null;

        try {
            /* image counts for normal, artificial, and consumed images */
            int stats[] = { 0, 0, 0 };

            if (argv.length != 1) {
                throw new Exception("usage: image_count <filename>");
            }

            tet = new TET();

            tet.set_option(globaloptlist);

            int doc = tet.open_document(argv[0], docoptlist);

            if (doc == -1) {
                throw new Exception(
                        "Error " + tet.get_errnum() + "in " + tet.get_apiname() + "(): " + tet.get_errmsg());
            }

            int n_images = (int) tet.pcos_get_number(doc, "length:images");
            System.out.println("Number of image resources before merging: " + n_images);

            /* get number of pages in the document */
            int n_pages = (int) tet.pcos_get_number(doc, "length:pages");

            /* loop over pages in the document (triggers image merging) */
            int placed_images = 0;
            int artifact_images = 0;
            int pattern_images = 0;
            int annotation_images = 0;
            int softmask_images = 0;

            for (int pageno = 1; pageno <= n_pages; ++pageno) {
                int page;

                page = tet.open_page(doc, pageno, pageoptlist);

                if (page == -1) {
                    print_tet_error(tet, pageno);
                    continue; /* try next page */
                }

                if (tet.get_errnum() != 0) {
                    print_tet_error(tet, pageno);
                }
                
                /* Retrieve all images on the page */
                while (tet.get_image_info(page) == 1) {
                    placed_images++;

                    if ((tet.attributes & TET.ATTR_ARTIFACT) != 0)
                        artifact_images++;
                    if ((tet.attributes & TET.ATTR_ANNOTATION) != 0)
                        annotation_images++;
                    if ((tet.attributes & TET.ATTR_PATTERN) != 0)
                        pattern_images++;
                    if ((tet.attributes & TET.ATTR_SOFTMASK) != 0)
                        softmask_images++;
                }
                tet.close_page(page);
            }

            n_images = (int) tet.pcos_get_number(doc, "length:images");
            System.out.println("Number of image resources after merging (all types): " + n_images);

            /* loop over image resources in the document */
            int image_resources = 0;
            for (int imageid = 0; imageid < n_images; imageid++) {
                int mergetype = (int) tet.pcos_get_number(doc, "images[" + imageid + "]/mergetype");

                stats[mergetype]++;

                if (mergetype == 0 || mergetype == 1)
                    image_resources++;
            }

            System.out.println("  normal images: " + stats[0]);
            System.out.println("  artificial (merged) images: " + stats[1]);
            System.out.println("  consumed images: " + stats[2]);

            System.out.println("Number of relevant (normal or artificial) image resources: " + image_resources);
            

            System.out.println("Number of placed images: " + placed_images);
            System.out.println("  including " + artifact_images + " artifact image(s)");
            System.out.println("  including " + annotation_images + " annotation image(s)");
            System.out.println("  including " + pattern_images + " pattern image(s)");
            System.out.println("  including " + softmask_images + " softmask image(s)");

            tet.close_document(doc);
        } catch (TETException e) {
            System.err.println("TET exception occurred in image_resources sample:");
            System.err.println("[" + e.get_errnum() + "] " + e.get_apiname() + ": " + e.get_errmsg());
            System.exit(1);
        } catch (Exception e) {
            System.err.println(e);
            System.exit(1);
        } finally {
            if (tet != null) {
                tet.delete();
            }
        }
    }

    /*
     * Report a TET error.
     * 
     * @param tet    The TET object
     * @param pageno The page number on which the error occurred
     */
    private static void print_tet_error(TET tet, int pageno) {
        System.err.println("Error " + tet.get_errnum() + " in " + tet.get_apiname() + "() on page " + pageno + ": "
                + tet.get_errmsg());
    }
}