/* Simple PDF glyph dumper based on PDFlib TET
 *
 * $Id: glyphinfo.c,v 1.5 2010/07/15 10:34:11 rjs Exp $
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#include "tetlib.h"

/* global option list */
static const char *globaloptlist =
    "searchpath={{../data} "
		"{../../../resource/cmap}}";

/* document-specific option list */
static const char *docoptlist = "";

/* page-specific option list */
static const char *pageoptlist = "granularity=word";

int main(int argc, char **argv)
{
    TET *tet;
    FILE *outfp;
    volatile int pageno = 0;

    if (argc != 3)
    {
        fprintf(stderr, "usage: glyphinfo <infilename> <outfilename>\n");
        return(2);
    }

    if ((tet = TET_new()) == (TET *) 0)
    {
        fprintf(stderr, "glyphinfo: out of memory\n");
        return(2);
    }

    if ((outfp = fopen(argv[2], "w")) == NULL)
    {
	fprintf(stderr, "Couldn't open output file '%s'\n", argv[2]);
	TET_delete(tet);
	return(2);
    }


    TET_TRY (tet)
    {
        int n_pages;
        int doc;

        TET_set_option(tet, globaloptlist);

        doc = TET_open_document(tet, argv[1], 0, docoptlist);

        if (doc == -1)
        {
            fprintf(stderr, "Error %d in %s(): %s\n",
                TET_get_errnum(tet), TET_get_apiname(tet), TET_get_errmsg(tet));
            TET_EXIT_TRY(tet);
            TET_delete(tet);
            return(2);
        }

        /* get number of pages in the document */
        n_pages = (int) TET_pcos_get_number(tet, doc, "length:pages");

	/* Write UTF-8 BOM */
	fprintf(outfp, "%c%c%c\n", 0xef, 0xbb, 0xbf);

	/* loop over pages in the document */
        for (pageno = 1; pageno <= n_pages; ++pageno)
        {
            const char *text;
            int page;
            int len;

            page = TET_open_page(tet, doc, pageno, pageoptlist);

            if (page == -1)
            {
                fprintf(stderr, "Error %d in %s() on page %d: %s\n",
                    TET_get_errnum(tet), TET_get_apiname(tet), pageno,
                    TET_get_errmsg(tet));
                continue;                        /* try next page */
            }

	    /* Administrative information */
	    fprintf(outfp, "[ Document: '%s' ]\n",
		TET_pcos_get_string(tet, doc, "filename"));

	    fprintf(outfp, "[ Document options: '%s' ]\n",
		docoptlist);

	    fprintf(outfp, "[ Page options: '%s' ]\n",
		pageoptlist);

	    fprintf(outfp, "[ ----- Page %d ----- ]\n", pageno);


            /* Retrieve all text fragments */
            while ((text = TET_get_text(tet, page, &len)) != 0)
            {
                const TET_char_info *ci;

		fprintf(outfp, "[%s]\n", text);  /* print the retrieved text */

                /* Loop over all glyphs and print their details */
                while ((ci = TET_get_char_info(tet, page)) != NULL)
                {
		    const char *fontname;

		    /* Fetch the font name with pCOS (based on its ID) */
		    fontname = TET_pcos_get_string(tet, doc,
		    		"fonts[%d]/name", ci->fontid);

		    /* Print the character */
		    fprintf(outfp, "U+%04X", ci->uv);

		    /* ...and its ASCII representation if appropriate */
		    if (ci->uv >= 0x20 && ci->uv <= 0x7F)
			fprintf(outfp, " '%c'", (unsigned char) ci->uv);

		    /* Print font name, size, and position */
		    fprintf(outfp, " %s size=%.2f x=%.2f y=%.2f",
			fontname, ci->fontsize, ci->x, ci->y);

		    /* Examine the "type" member */
		    if (ci->type == TET_CT_SEQ_START)
			fprintf(outfp, " ligature_start");

		    else if (ci->type == TET_CT_SEQ_CONT)
			fprintf(outfp, " ligature_cont");

		    /* Separators are only inserted for granularity > word */
		    else if (ci->type == TET_CT_INSERTED)
			fprintf(outfp, " inserted");

		    /* Examine the bit flags in the "attributes" member */
		    if (ci->attributes != TET_ATTR_NONE)
		    {
			if (ci->attributes & TET_ATTR_SUB)
			    fprintf(outfp, "/sub");
			if (ci->attributes & TET_ATTR_SUP)
			    fprintf(outfp, "/sup");
			if (ci->attributes & TET_ATTR_DROPCAP)
			    fprintf(outfp, "/dropcap");
			if (ci->attributes & TET_ATTR_SHADOW)
			    fprintf(outfp, "/shadow");
			if (ci->attributes & TET_ATTR_DEHYPHENATION_PRE)
			    fprintf(outfp, "/dehyphenation_pre");
			if (ci->attributes & TET_ATTR_DEHYPHENATION_ARTIFACT)
			    fprintf(outfp, "/dehyphenation_artifact");
			if (ci->attributes & TET_ATTR_DEHYPHENATION_POST)
			    fprintf(outfp, "/dehyphenation_post");
		    }

		    fprintf(outfp, "\n");
                }

		fprintf(outfp, "\n");
            }

            if (TET_get_errnum(tet) != 0)
            {
                fprintf(stderr, "Error %d in %s() on page %d: %s\n",
                    TET_get_errnum(tet), TET_get_apiname(tet), pageno,
                    TET_get_errmsg(tet));
            }

            TET_close_page(tet, page);
        }

        TET_close_document(tet, doc);
    }

    TET_CATCH (tet)
    {
        if (pageno == 0)
            fprintf(stderr, "Error %d in %s(): %s\n",
                TET_get_errnum(tet), TET_get_apiname(tet), TET_get_errmsg(tet));
        else
            fprintf(stderr, "Error %d in %s() on page %d: %s\n",
                TET_get_errnum(tet), TET_get_apiname(tet), pageno,
                TET_get_errmsg(tet));
    }

    TET_delete(tet);
    fclose(outfp);

    return 0;
}
