PDFlib
PDFlib

image_resources

Resource-based image extractor based on PDFlib TET

Required software: TET 4

Download Java Code     Show Output     Show Input PDF

package com.pdflib.cookbook.tet.image;


import com.pdflib.TET;

import com.pdflib.TETException;


/**

 * Resource-based image extractor based on PDFlib TET

 * <p>

 * Required software: TET 4

 * <p>

 * Required data: PDF document

 *

 * @version $Id: image_resources.java,v 1.2 2010/07/27 11:25:51 stm Exp $

 */

public class image_resources

{

    /**

     * Global option list

     */

    static final String globaloptlist = "searchpath={{../input}} ";

   

    /**

     * Document-specific option list

     */

    static final String docoptlist = "";

   

    /**

     * Page-specific option list

     */

    static final String pageoptlist = "";

   

    /**

     * Here you can insert basic image extract options (more below)

     */

    static final String baseimageoptlist = "";



    public static void main (String argv[])

    {

        TET tet = null;

       

        try

        {

            if (argv.length != 1)

            {

                throw new Exception( "usage: image_resources <filename>");

            }


            String outfilebase = argv[0];


            tet = new TET();


            tet.set_option(globaloptlist);


            int doc = tet.open_document(argv[0], docoptlist);


            if (doc == -1)

            {

                throw new Exception("Error " + tet.get_errnum() + " in "

                        + tet.get_apiname() + "(): " + tet.get_errmsg());

            }

           

            /*

             * Images will only be merged upon opening a page.

             * In order to enumerate all merged image resources

             * we open all pages before extracting the images.

             */


            /* get number of pages in the document */

            int n_pages = (int) tet.pcos_get_number(doc, "length:pages");


            /* loop over pages in the document */

            for (int pageno = 1; pageno <= n_pages; ++pageno)

            {

                int page = tet.open_page(doc, pageno, pageoptlist);


                if (page == -1)

                {

                    print_tet_error(tet, pageno);

                    continue;      /* try next page */

                }


                if (tet.get_errnum() != 0)

                {

                    print_tet_error(tet, pageno);

                }


                tet.close_page(page);

            }


            /* get number of image resources in the document */

            int n_images = (int) tet.pcos_get_number(doc, "length:images");


            /* loop over image resources in the document */

            for (int imageid = 0; imageid < n_images; imageid++)

            {

                /* examine image type */

                int mergetype = (int) tet.pcos_get_number(doc,

                            "images["+ imageid + "]/mergetype");


                /* skip images which have been consumed by merging */

                if (mergetype == 0 || mergetype == 1)

                {

                    report_image_info(tet, doc, imageid);


                    /*

                     * Fetch the image data and write it to a disk file. The

                     * output filenames are generated from the input filename by

                     * appending the image ID.

                     */

                    String imageoptlist =

                        "filename={" + outfilebase + "_I" + imageid + "}";


                    if (tet.write_image_file(doc, imageid, imageoptlist) == -1)

                    {

                        print_tet_error(tet, 0);

                        continue;              /* process next image */

                    }

                }

            }


            tet.close_document(doc);

        }

        catch (TETException e)

        {

            System.err.println(

                "TET exception occurred in image_resources sample:");

            System.err.println("[" + e.get_errnum() + "] " + e.get_apiname() +

                            ": " + e.get_errmsg());

        }

        catch (Exception e)

        {

            System.err.println(e.getMessage());

        }

        finally

        {

            if (tet != null) {

                tet.delete();

            }

        }

    }


    /**

     * Report image info.

     *

     * Print the following information for each image:

     *

     * - page and image number

     * - pCOS id (required for indexing the images[] array)

     * - physical size of the placed image on the page

     * - pixel size of the underlying PDF image

     * - number of components, bits per component,and colorspace

     * - mergetype if different from "normal", i.e. "artificial" (=merged)

     *   or "consumed"

     *   

     * @param tet The TET object

     * @param doc The document handle

     * @param imageid The image ID

     */

    private static void report_image_info(TET tet, int doc, int imageid)

        throws com.pdflib.TETException

    {

        int width, height, bpc, cs, mergetype;


        width = (int) tet.pcos_get_number(doc,

                    "images[" + imageid + "]/Width");

        height = (int) tet.pcos_get_number(doc,

                    "images[" + imageid + "]/Height");

        bpc = (int) tet.pcos_get_number(doc,

                    "images[" + imageid + "]/bpc");

        cs = (int) tet.pcos_get_number(doc,

                    "images[" + imageid + "]/colorspaceid");


        System.out.print("image I" + imageid)

        System.out.print(", " + width + "x" + height + " pixel, ");

       

        if (cs != -1)

        {

            System.out.print(

                (int) tet.pcos_get_number(doc, "colorspaces["

                + cs + "]/components") + "x" + bpc + " bit " +

                tet.pcos_get_string(doc, "colorspaces[" + cs

                + "]/name"));

        }

        else {

            /* cs==-1 may happen for some JPEG 2000 images. bpc,

             * colorspace name and number of components are not

             * available in this case.

             */

            System.out.print("JPEG2000");

        }


        mergetype = (int) tet.pcos_get_number(doc,

                    "images["+ imageid + "]/mergetype");


        /* mergetype==0 means normal image */

        if (mergetype != 0)

        {

            System.out.print(", mergetype=");

            if (mergetype == 1)

                System.out.print("artificial");

            else

                System.out.print("consumed");

        }

        System.out.println("");

    }


    /**

     * Report a TET error.

     *

     * @param tet The TET object

     * @param pageno The page number on which the error occurred

     */

    private static void print_tet_error(TET tet, int pageno)

    {

        System.err.println("Error " + tet.get_errnum() + " in  "

                + tet.get_apiname() + "() on page " + pageno + ": "

                + tet.get_errmsg());

    }

}