#!/usr/bin/perl
#
# Simple PDF glyph dumper based on PDFlib TET
#

use PDFlib::TET;
use strict;

# global option list
my $globaloptlist = "searchpath={{../data} {../../../resource/cmap}}";

# document-specific option list
my $docoptlist = "";

# page-specific option list
my $pageoptlist = "granularity=word";

my $pageno = 0;

eval  {
    my $tet;

    if ($#ARGV !=1) {
        die("usage: glyphinfo.pl <infilename> <outfilename>\n");
    }


    $tet = new PDFlib::TET;

    open(OUTFP, "> $ARGV[1]") ||
        die("Couldn't open output file '" . $ARGV[1] . "'\n");
    binmode(OUTFP, ":utf8");

    my $n_pages;
    my $doc;

    $tet->set_option($globaloptlist);

    $doc = $tet->open_document($ARGV[0], $docoptlist);

    if ($doc == -1) {
        die("Error ". $tet->get_errnum() . " in " . $tet->get_apiname()
            . "(): " . $tet->get_errmsg() . "\n");
    }

    # get number of pages in the document
    $n_pages = $tet->pcos_get_number($doc, "length:pages");

    # Write UTF-8 BOM
        print OUTFP chr(0xFEFF);

    # loop over pages in the document
    for ($pageno = 1; $pageno <= $n_pages; ++$pageno)
    {
        my $text;
        my $page;
        my $len;
        my $previouscolorid = -1;

        $page = $tet->open_page($doc, $pageno, $pageoptlist);

        if ($page == -1) {
            print("Error ". $tet->get_errnum() ." in ". $tet->get_apiname()
                . "(): " . $tet->get_errmsg() . "\n");
            next;                        # try next page
        }

        # Administrative information
        printf OUTFP "[ Document: '%s' ]\n",
            $tet->pcos_get_string($doc, "filename");

        printf OUTFP "[ Document options: '%s' ]\n",
            $docoptlist;

        printf OUTFP "[ Page options: '%s' ]\n",
            $pageoptlist;

        printf OUTFP "[ ----- Page %d ----- ]\n", $pageno;


        # Retrieve all text fragments
        while (defined($text = $tet->get_text($page)) )
        {
            my $ci;

            printf OUTFP "[%s]\n", $text;  # print the retrieved text 

            # Loop over all glyphs and print their details
            while ($ci = $tet->get_char_info($page))
            {
                my $fontname;

                # Fetch the font name with pCOS (based on its ID)
                $fontname = $tet->pcos_get_string($doc,
                        "fonts[" . $ci->{"fontid"} . "]/name");


                # Print the Unicode value ...
                printf OUTFP "U+%04X", $ci->{"uv"};

                # ...and the character itself if it is ASCII
                if ($ci->{"uv"} >= 0x20 && $ci->{"uv"} <= 0x7F) {
                    printf OUTFP " '%c'", $ci->{"uv"};
                } else {
                    printf OUTFP " ???";

                }

                # Print font name, size, and position
                printf OUTFP " %s size=%.2f x=%.2f y=%.2f",
                    $fontname, $ci->{"fontsize"}, $ci->{"x"}, $ci->{"y"};

                # Print the color id
                printf OUTFP " colorid=%d", $ci->{"colorid"};

                # Check whether the text color changed
                if ($ci->{"colorid"} != $previouscolorid)
                {
                    print_color_value($tet, $doc, $ci->{"colorid"});
                    $previouscolorid = $ci->{"colorid"};
                }

                # Examine the "type" member
                if ($ci->{"type"} == PDFlib::TET::CT_SEQ_START) {
                    printf OUTFP " ligature_start";
                }
                elsif ($ci->{"type"} == PDFlib::TET::CT_SEQ_CONT) {
                    printf OUTFP " ligature_cont";
                }

                # Separators are only inserted for granularity > word
                elsif ($ci->{"type"} == PDFlib::TET::CT_INSERTED) {
                    printf OUTFP " inserted";
                }

                # Examine the bit flags in the "attributes" member
                if ($ci->{"attributes"} != PDFlib::TET::ATTR_NONE)
                {
                    if ($ci->{"attributes"} & PDFlib::TET::ATTR_SUB) {
                        printf OUTFP "/sub";
                    }
                    if ($ci->{"attributes"} & PDFlib::TET::ATTR_SUP) {
                        printf OUTFP "/sup";
                    }
                    if ($ci->{"attributes"} & PDFlib::TET::ATTR_DROPCAP) {
                        printf OUTFP "/dropcap";
                    }
                    if ($ci->{"attributes"} & PDFlib::TET::ATTR_SHADOW) {
                        printf OUTFP "/shadow";
                    }
                    if ($ci->{"attributes"} & PDFlib::TET::ATTR_DEHYPHENATION_PRE) {
                        printf OUTFP "/dehyphenation_pre";
                    }
                    if ($ci->{"attributes"} & PDFlib::TET::ATTR_DEHYPHENATION_ARTIFACT) {
                        printf OUTFP "/dehyphenation_artifact";
                    }
                    if ($ci->{"attributes"} & PDFlib::TET::ATTR_DEHYPHENATION_POST) {
                        printf OUTFP "/dehyphenation_post";
                    }
                    if ($ci->{"attributes"} & PDFlib::TET::ATTR_ARTIFACT) {
                        printf OUTFP "/Artifact";
                    }
                }

                printf OUTFP "\n";
            }

            printf OUTFP "\n";
        }

        if ($tet->get_errnum() != 0) {
            print("Error ". $tet->get_errnum() . " in " . 
                    $tet->get_apiname() . "(): on page $pageno" 
                    . $tet->get_errmsg() . "\n");
        }

        $tet->close_page($page);
    }

    $tet->close_document($doc);
};

if ($@) {
    printf("TET Exception occurred:\n");
    if ($pageno == 0) {
        printf("Error $@\n");
    } else {
        printf("Error $@ on page $pageno\n");
    }
    exit(1);
}

sub print_color_value{
    (my $tet, my $doc, my $colorid) = ($_[0], $_[1], $_[2]);
    my $i;
    my $len;

    # We handle only the fill color, but ignore the stroke color.
    # The stroke color can be retrieved analogously with the
    # keyword "stroke".

    my $colorinfo = $tet->get_color_info($doc, $colorid, "usage=fill");

    if ($colorinfo->{"colorspaceid"} == -1 && $colorinfo->{"patternid"} == -1)
    {
        printf OUTFP " (not filled)";
        return;
    }

    printf OUTFP " (";

    if ($colorinfo->{"patternid"} != -1)
    {
        my $patterntype =  $tet->pcos_get_number($doc, "patterns[%d]/PatternType",
                    $colorinfo->{"patternid"});

        if ($patterntype == 1)	# Tiling pattern 
        {
            my $painttype =  $tet->pcos_get_number($doc, "patterns[%d]/PaintType",
                        $colorinfo->{"patternid"});
            if ($painttype == 1)
            {
                printf OUTFP "colored Pattern)";
                return;
            }
            elsif ($painttype == 2)
            {
                 printf OUTFP "uncolored Pattern, base color: ";
                 # FALLTHROUGH to colorspaceid output 
            }
        }
        elsif ($patterntype == 2)	# Shading pattern 
        {
            my $shadingtype =  $tet->pcos_get_number($doc,
                        sprintf("patterns[%d]/Shading/ShadingType",
                        $colorinfo->{"patternid"}));

            printf OUTFP "shading Pattern, ShadingType=%d)", $shadingtype;
            return;
        }
    }

    my $csname = $tet->pcos_get_string($doc, sprintf("colorspaces[%d]/name",
            $colorinfo->{"colorspaceid"}));

    printf OUTFP "%s", $csname;

    # Emit more details depending on the colorspace type 
    if ($csname eq "ICCBased")
    {
        my $iccprofileid = $tet->pcos_get_number($doc,
                                sprintf("colorspaces[%d]/iccprofileid",
                                $colorinfo->{"colorspaceid"}));

        my $errormessage = $tet->pcos_get_string($doc,
                        sprintf("iccprofiles[%d]/errormessage", $iccprofileid));

        # Check whether the embedded profile is damaged 
        if ($errormessage)
        {
            printf OUTFP " (%s)", $errormessage;
        }
        else
        {
            my $profilename =
                $tet->pcos_get_string($doc,
                    sprintf("iccprofiles[%d]/profilename", $iccprofileid));
            printf OUTFP " '%s'", $profilename;

            my $profilecs = $tet->pcos_get_string($doc,
                    sprintf("iccprofiles[%d]/profilecs",
                                    $iccprofileid));
            printf OUTFP " '%s'", $profilecs;
        }
    }
    elsif ($csname eq "Separation")
    {
        my $colorantname =
            $tet->pcos_get_string($doc, sprintf("colorspaces[%d]/colorantname",
                    $colorinfo->{"colorspaceid"}));
        printf OUTFP " '%s'", $colorantname;
    }
    elsif ($csname eq "DeviceN")
    {
        printf OUTFP " ";
        $len =  @{$colorinfo->{"components"}};

        for ($i=0; $i < $len; $i++)
        {
            my $colorantname =
                $tet->pcos_get_string($doc,
                    sprintf("colorspaces[%d]/colorantnames[%d]",
                            $colorinfo->{"colorspaceid"}, $i));

            printf OUTFP "%s", $colorantname;

            if ($i ne $len)
            {
                printf OUTFP "/";
            }
        }
    }
    elsif ($csname eq  "Indexed")
    {
        my $baseid =
             $tet->pcos_get_number($doc, "colorspaces[%d]/baseid",
                    $colorinfo->{"colorspaceid"});

        $csname = $tet->pcos_get_string($doc, 
                sprintf("colorspaces[%d]/name", $baseid));

        printf OUTFP " %s", $csname;

    }

    printf OUTFP " ";
    $len =  @{$colorinfo->{"components"}};
    for ($i=0; $i < $len; $i++)
    {
        printf OUTFP "%g", $colorinfo->{"components"}[$i];

        if ($i != $len-1)
        {
            printf OUTFP "/";
        }
    }
    printf OUTFP ")";
}
