/*
 * Simple PDF glyph dumper based on PDFlib TET
 */

#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>

#include "tet.hpp"

using namespace std;
using namespace pdflib;

namespace
{

wstring get_wstring(const TET& tet, const string& utf8_string);
string get_utf8string(const TET& tet, const wstring& wide_string);

/* global option list */
const wstring globaloptlist = L"searchpath={{../data} "
        L"{../../../resource/cmap}}";

/* document-specific option list */
const wstring docoptlist = L"";

/* page-specific option list */
const wstring pageoptlist = L"granularity=word";

/*
 * Print color space and color value details of a glyph's fill color
 */
void
print_color_value(ostream& os, TET& tet, int doc, int colorid)
{
    wostringstream pcos_path;
    int i;

    /* We handle only the fill color, but ignore the stroke color.
     * The stroke color can be retrieved analogously with the
     * keyword "stroke".
     */
    const TET_color_info *colorinfo =
            tet.get_color_info(doc, colorid, L"usage=fill");

    if (colorinfo->colorspaceid == -1 && colorinfo->patternid == -1)
    {
        os << " (not filled)";
        return;
    }

    os << " (";

    if (colorinfo->patternid != -1)
    {
        pcos_path.str(L"");
        pcos_path << L"patterns[" << colorinfo->patternid << L"]/PatternType";
        int const patterntype = (int) tet.pcos_get_number(doc, pcos_path.str());

        if (patterntype == 1)   /* Tiling pattern */
        {
            pcos_path.str(L"");
            pcos_path << L"patterns[" << colorinfo->patternid << L"]/PaintType";
            int const painttype =
                (int) tet.pcos_get_number(doc, pcos_path.str());
            if (painttype == 1)
            {
                os << "colored Pattern)";
                return;
            }
            else if (painttype == 2)
            {
                os << "uncolored Pattern, base color: ";
                /* FALLTHROUGH to colorspaceid output */
            }
        }
        else if (patterntype == 2)      /* Shading pattern */
        {
            pcos_path.str(L"");
            pcos_path << L"patterns[" << colorinfo->patternid
                   << L"]/Shading/ShadingType";
            int const shadingtype =
                (int) tet.pcos_get_number(doc, pcos_path.str());

            os << "shading Pattern, ShadingType=" << shadingtype << L")";
            return;
        }
    }

    pcos_path.str(L"");
    pcos_path << L"colorspaces[" << colorinfo->colorspaceid << L"]/name";
    wstring csname = tet.pcos_get_string(doc, pcos_path.str());

    os << get_utf8string(tet, csname);

    /* Emit more details depending on the colorspace type */
    if (csname ==  L"ICCBased")
    {
        pcos_path.str(L"");
        pcos_path << L"colorspaces[" << colorinfo->colorspaceid
                    << L"]/iccprofileid";
        int const iccprofileid =
                (int) tet.pcos_get_number(doc, pcos_path.str());

        pcos_path.str(L"");
        pcos_path << L"iccprofiles[" << iccprofileid << L"]/errormessage";
        wstring const errormessage = tet.pcos_get_string(doc, pcos_path.str());

        /* Check whether the embedded profile is damaged */
        if (errormessage.length() > 0)
        {
            os << " (" << get_utf8string(tet, errormessage) << ")";
        }
        else
        {
            pcos_path.str(L"");
            pcos_path << L"iccprofiles[" << iccprofileid << L"]/profilename";
            wstring const profilename =
                        tet.pcos_get_string(doc, pcos_path.str());
            os << " '" << get_utf8string(tet, profilename) << "'";

            pcos_path.str(L"");
            pcos_path << L"iccprofiles[" << iccprofileid << L"]/profilecs";
            wstring const profilecs =
                    tet.pcos_get_string(doc, pcos_path.str());
            os << " '" << get_utf8string(tet, profilecs) << "'";
        }
    }
    else if (csname == L"Separation")
    {
        pcos_path.str(L"");
        pcos_path << L"colorspaces[" << colorinfo->colorspaceid
                << L"]/colorantname";
        wstring const colorantname = tet.pcos_get_string(doc, pcos_path.str());
        os << " '" << get_utf8string(tet, colorantname) << "'";
    }
    else if (csname == L"DeviceN")
    {
        os << " ";

        for (i = 0; i < colorinfo->n; i++)
        {
            pcos_path.str(L"");
            pcos_path << L"colorspaces[" << colorinfo->colorspaceid
                    << L"]/colorantnames[" << i;
            wstring const colorantname =
                    tet.pcos_get_string(doc, pcos_path.str());

            os << get_utf8string(tet, colorantname);

            if (i != colorinfo->n - 1)
                os << "/";
        }
    }
    else if (csname == L"Indexed")
    {
        pcos_path.str(L"");
        pcos_path << L"colorspaces[" << colorinfo->colorspaceid
                << L"]/baseid";
        int const baseid = (int) tet.pcos_get_number(doc, pcos_path.str());

        pcos_path.str(L"");
        pcos_path << L"colorspaces[" << baseid << L"]/name";
        csname = tet.pcos_get_string(doc, pcos_path.str());

        os << " " << get_utf8string(tet, csname);
    }

    os << " ";
    for (i = 0; i < colorinfo->n; i++)
    {
        os << colorinfo->components[i];

        if (i != colorinfo->n - 1)
            os << "/";
    }
    os << ")";
}

} // end of anonymous namespace

int main(int argc, char **argv)
{
    int pageno = 0;
    try
    {
        TET tet;

        if (argc != 3)
        {
            wcerr << L"usage: glyphinfo <infilename> <outfilename>" << endl;
            return(2);
        }

        /*
         * Create an output stream for the glyph information.
         */
        ofstream ofp(argv[2], ios_base::binary);

        if (!ofp)
        {
            wcerr << L"Couldn't open output file " << argv[2] << endl;
            return 2;
        }

        /* And first write a BOM */
        ofp << "\xef\xbb\xbf";

        /* Hex values in uppercase */
        ofp << std::uppercase;

        tet.set_option(globaloptlist);

        /*
         * Caution: For simplicity we assume that the program arguments are
         * encoded as UTF-8, which might not be true in all cases!
         */
        wstring const doc_name(get_wstring(tet, string(argv[1])));
        const int doc = tet.open_document(doc_name, docoptlist);

        if (doc == -1)
        {
            wcerr << L"Error " << tet.get_errnum()
                << L" in " << tet.get_apiname() << L"(): "
                << tet.get_errmsg() << endl;
            return 2;
        }

        /* get number of pages in the document */
        const int n_pages = (int) tet.pcos_get_number(doc, L"length:pages");

        /* loop over pages in the document */
        for (pageno = 1; pageno <= n_pages; ++pageno)
        {
            wstring text;
            const int page = tet.open_page(doc, pageno, pageoptlist);
            int previouscolorid = -1;

            if (page == -1)
            {
                wcerr << L"Error " << tet.get_errnum()
                    << L" in " << tet.get_apiname()
                    << L"(): " << tet.get_errmsg() << endl;
                continue;                        // try next page
            }

            // Administrative information
            ofp << "[ Document: '"
                        << get_utf8string(tet, tet.pcos_get_string(doc, L"filename"))
                        << "' ]" << endl;

            ofp << "[ Document options: '" << get_utf8string(tet, docoptlist)
                        << "' ]" << endl;

            ofp << "[ Page options: '" << get_utf8string(tet, pageoptlist)
                        << "' ]" << endl;

            ofp << "[ ----- Page " << pageno
                        << " ----- ]" << endl;

            // Retrieve all text fragments
            while ((text = tet.get_text(page)) != L"")
            {
                const TET_char_info *ci;

                ofp << "[" << get_utf8string(tet, text) << "]" << endl;

                // Loop over all glyphs and print their details
                while ((ci = tet.get_char_info(page)) != NULL)
                {
                    wostringstream path;

                    // Fetch the font name with pCOS (based on its ID)
                    path << L"fonts[" << ci->fontid << L"]/name";
                    wstring fontname = tet.pcos_get_string(doc, path.str());

                    // Print the character
                    ofp << "U+";
                    ofp.setf(ios_base::hex, ios_base::basefield);
                    ofp.fill('0');
                    ofp.width(4);
                    ofp << ci->uv;
                    ofp.setf(ios_base::dec, ios_base::basefield);

                    // ...and its ASCII representation if appropriate
                    if (ci->uv >= 0x20 && ci->uv <= 0x7F)
                        ofp << " '" << (char) ci->uv << "'";
                    else
                        ofp << " ???";
                        

                    // Print font name, size, and position
                    ofp.setf(ios_base::fixed, ios_base::floatfield);
                    ofp.precision(2);
                    ofp << " " << get_utf8string(tet, fontname)
                        << " size=" << ci->fontsize
                        << " x=" << ci->x
                        << " y=" << ci->y;

                    /* Print the color id */
                    ofp << " colorid=" << ci->colorid;

                    /* Check whether the text color changed */
                    if (ci->colorid != previouscolorid)
                    {
                        print_color_value(ofp, tet, doc, ci->colorid);
                        previouscolorid = ci->colorid;
                    }

                    // Examine the "type" member
                    if (ci->type == TET::CT_SEQ_START)
                        ofp << " ligature_start";

                    else if (ci->type == TET::CT_SEQ_CONT)
                        ofp << " ligature_cont";

                    // Separators are only inserted for granularity > word
                    else if (ci->type == TET::CT_INSERTED)
                        ofp << " inserted";

                    /* Examine the bit flags in the "attributes" member */
                    if (ci->attributes != TET::ATTR_NONE)
                    {
                        if (ci->attributes & TET::ATTR_SUB)
                            ofp << "/sub";
                        if (ci->attributes & TET::ATTR_SUP)
                            ofp << "/sup";
                        if (ci->attributes & TET::ATTR_DROPCAP)
                            ofp << "/dropcap";
                        if (ci->attributes & TET::ATTR_SHADOW)
                            ofp << "/shadow";
                        if (ci->attributes & TET::ATTR_DEHYPHENATION_PRE)
                            ofp << "/dehyphenation_pre";
                        if (ci->attributes & TET::ATTR_DEHYPHENATION_ARTIFACT)
                            ofp << "/dehyphenation_artifact";
                        if (ci->attributes & TET::ATTR_DEHYPHENATION_POST)
                            ofp << "/dehyphenation_post";
                        if (ci->attributes & TET::ATTR_ARTIFACT)
                            ofp << "/Artifact";
                    }
                    ofp << endl;
                }
                ofp << endl;
            }

            if (tet.get_errnum() != 0)
            {
                ofp << "Error " << tet.get_errnum() << " in "
                    << get_utf8string(tet, tet.get_apiname()) << "(): "
                    << get_utf8string(tet, tet.get_errmsg()) << endl;
            }

            tet.close_page(page);
        }

        tet.close_document(doc);

        ofp.close();
    }
    catch (TET::Exception &ex)
    {
        if (pageno == 0)
        {
            wcerr << L"Error " << ex.get_errnum()
                << L" in " << ex.get_apiname()
                << L"(): " << ex.get_errmsg() << endl;
        }
        else
        {
            wcerr << L"Error " << ex.get_errnum()
                << L" in " << ex.get_apiname()
                << L"() on page " << pageno
                << L": " << ex.get_errmsg() << endl;
        }
        return 2;
    }
    catch (exception &e) {
        wcerr << L"C++ exception occurred: " << e.what() << endl;
        return 99;
    }
    catch (...) {
        wcerr << L"Generic C++ exception occurred!" << endl;
        return 99;
    }

    return 0;
}

namespace
{
    /*
     * Get a wstring for the given UTF-8 string.
     */
    wstring get_wstring(const TET& tet, const string& utf8_string)
    {
        const size_t size = sizeof(wstring::value_type);
        string wide_string;

        switch (size)
        {
        case 2:
            wide_string = tet.convert_to_unicode(L"auto", utf8_string,
                                                    L"outputformat=utf16");
            break;

        case 4:
            wide_string = tet.convert_to_unicode(L"auto", utf8_string,
                                                    L"outputformat=utf32");
            break;

        default:
            throw std::logic_error("Unsupported wchar_t size");
        }

        return wstring(reinterpret_cast<const wchar_t *>(wide_string.data()),
                wide_string.length() / size);
    }

    /*
     * Get an UTF-8 string for the given wstring.
     */
    string get_utf8string(const TET& tet, const wstring& wide_string)
    {
        const size_t size = sizeof(wstring::value_type);

        // Convert wide string to corresponding byte string.
        string const wide_string_bytes(
            reinterpret_cast<char const *>(wide_string.c_str()),
            wide_string.length() * size);

        string utf8_string;
        switch (size)
        {
        case 2:
            utf8_string = tet.convert_to_unicode(L"utf16", wide_string_bytes,
                                                    L"outputformat=utf8");
            break;

        case 4:
            utf8_string = tet.convert_to_unicode(L"utf32", wide_string_bytes,
                                                    L"outputformat=utf8");
            break;

        default:
            throw std::logic_error("Unsupported wchar_t size");
        }

        return utf8_string;
    }
} // end anonymous namespace

