/* Resource-based image extractor based on PDFlib TET
 *
 * $Id: image_resources.c,v 1.3 2010/07/21 13:34:16 rjs Exp $
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#include "tetlib.h"

/* global option list */
static const char *globaloptlist =
    "searchpath={{../data}}";

/* document-specific option list */
static const char *docoptlist = "";

/* page-specific option list */
static const char *pageoptlist = "";

/* here you can insert basic image extract options (more below) */
static const char *baseimageoptlist = "";


static void report_image_info(TET *tet, int doc, int imageid);

int main(int argc, char **argv)
{
    TET *tet;
    char *outfilebase;

    if (argc != 2)
    {
        fprintf(stderr, "usage: image_resources <filename>\n");
        return(2);
    }

    if ((tet = TET_new()) == (TET *) 0)
    {
        fprintf(stderr, "image_resources: out of memory\n");
        return(2);
    }

    outfilebase = argv[1];

    TET_TRY (tet)
    {
        int pageno, n_pages;
	int imageid, n_images;
        int doc;

        TET_set_option(tet, globaloptlist);

        doc = TET_open_document(tet, argv[1], 0, docoptlist);

        if (doc == -1)
        {
            fprintf(stderr, "Error %d in %s(): %s\n",
                TET_get_errnum(tet), TET_get_apiname(tet), TET_get_errmsg(tet));
            TET_EXIT_TRY(tet);
            TET_delete(tet);
            return(2);
        }

	/* Images will only be merged upon opening a page.
	 * In order to enumerate all merged image resources
	 * we open all pages before extracting the images.
	 */

        /* get number of pages in the document */
        n_pages = (int) TET_pcos_get_number(tet, doc, "length:pages");

	/* loop over pages in the document */
        for (pageno = 1; pageno <= n_pages; ++pageno)
        {
            int page;

            page = TET_open_page(tet, doc, pageno, pageoptlist);

            if (page == -1)
            {
                fprintf(stderr, "Error %d in %s() on page %d: %s\n",
                    TET_get_errnum(tet), TET_get_apiname(tet), pageno,
                    TET_get_errmsg(tet));
                continue;                        /* try next page */
            }

	    if (TET_get_errnum(tet) != 0)
	    {
		fprintf(stderr, "Error %d in %s() on page %d: %s\n",
		    TET_get_errnum(tet), TET_get_apiname(tet), pageno,
		    TET_get_errmsg(tet));
	    }

            TET_close_page(tet, page);
	}

	/* get number of image resources in the document */
	n_images = (int) TET_pcos_get_number(tet, doc, "length:images");

	/* loop over image resources in the document */
	for (imageid = 0; imageid < n_images; imageid++)
	{
	    char imageoptlist[1024];

	    /* examine image type */
	    int mergetype = (int) TET_pcos_get_number(tet, doc, 
			"images[%d]/mergetype", imageid);

	    /* skip images which have been consumed by merging */
	    if (mergetype == 0 || mergetype == 1)
	    {
		report_image_info(tet, doc, imageid);

		/*
		* Fetch the image data and write it to a disk file. The
		* output filenames are generated from the input filename
		* by appending the image ID.
		*/
		sprintf(imageoptlist, "%s filename={%s_I%d}",
		    baseimageoptlist, outfilebase, imageid);

		if (TET_write_image_file(tet, doc, imageid, imageoptlist) == -1)
		{
		    printf("\nError %d in %s(): %s\n",
			TET_get_errnum(tet), TET_get_apiname(tet),
			TET_get_errmsg(tet));
		    continue;                  /* process next image */
		}
	    }
	}

        TET_close_document(tet, doc);
    }

    TET_CATCH (tet)
    {
	fprintf(stderr, "Error %d in %s(): %s\n",
	    TET_get_errnum(tet), TET_get_apiname(tet), TET_get_errmsg(tet));
    }

    TET_delete(tet);

    return 0;
}

/* Print the following information for each image:
 * - page and image number
 * - pCOS id (required for indexing the images[] array)
 * - physical size of the placed image on the page
 * - pixel size of the underlying PDF image
 * - number of components, bits per component, and colorspace
 * - mergetype if different from "normal", i.e. "artificial" (=merged)
 *   or "consumed"
 */
static void
report_image_info(TET *tet, int doc, int imageid)
{
    int width, height, bpc, cs, mergetype;

    width = (int) TET_pcos_get_number(tet, doc,
			"images[%d]/Width", imageid);
    height = (int) TET_pcos_get_number(tet, doc,
			"images[%d]/Height", imageid);
    bpc = (int) TET_pcos_get_number(tet, doc,
			"images[%d]/bpc", imageid);
    cs = (int) TET_pcos_get_number(tet, doc,
			"images[%d]/colorspaceid", imageid);

    printf("image I%d: %dx%d pixel, ", imageid, width, height);

    if (cs != -1)
    {
	printf("%dx%d bit %s",
		(int) TET_pcos_get_number(tet, doc,
			"colorspaces[%d]/components", cs), bpc,
		TET_pcos_get_string(tet, doc,
			"colorspaces[%d]/name", cs));
    }
    else
    {
	/* cs==-1 may happen for some JPEG 2000 images. bpc,
	 * colorspace name and number of components are not
	 * available in this case.
	 */
	printf("JPEG2000\n");
    }

    mergetype = (int) TET_pcos_get_number(tet, doc,
			"images[%d]/mergetype", imageid);

    /* mergetype==0 means normal image */
    if (mergetype != 0)
    {
	printf(", mergetype=");

	if (mergetype == 1)
	    printf("artificial");
	else
	    printf("consumed");
    }
    printf("\n");
}
