#!/usr/bin/env ruby
# TET sample application for dumping PDF information in the XML language TETML

require 'TET'

# global option list
globaloptlist = "searchpath={{../data} {../../../resource/cmap}}"

# document-specific option list
basedocoptlist = ""

# page-specific option list
# Remove the tetml= option if you don't need font and geometry information
pageoptlist = "granularity=word tetml={glyphdetails={all}}"

# set this to 1 to generate TETML output in memory
inmemory = 0

begin
    if (ARGV.length !=2)
        raise("usage: tetml.rb <pdffilename> <tetmlfilename>\n")
    end

    tet = TET.new

    tet.set_option(globaloptlist)

    if (inmemory)
        docoptlist = sprintf("tetml={} %s", basedocoptlist)
    else
        docoptlist = sprintf("tetml={filename={%s}} %s", \
            ARGV[1], basedocoptlist)
    end

    doc = tet.open_document(ARGV[0], docoptlist)

    if (doc == -1)
        raise(sprintf("Error %d in %s(): %s\n", \
            tet.get_errnum(), tet.get_apiname(), tet.get_errmsg()))
    end

    n_pages = tet.pcos_get_number(doc, "length:pages")

    # loop over pages in the document
    1.upto(n_pages) do |pageno|
        tet.process_page(doc, pageno, pageoptlist)
    end

    # This could be combined with the last page-related call
    tet.process_page(doc, 0, "tetml={trailer}")

    if (inmemory)
        fp = File.new(ARGV[1], "w")
            #die(sprintf("tetml: couldn't open output file '%s'\n", fname))

        # Retrieve the generated TETML data from memory. Since we have
        # only a single call the result will contain the full TETML.

        tetml = tet.get_tetml(doc, "")
        if (!tetml)
            die("tetml: couldn't retrieve TETML data\n")
        end

        fp.print tetml
        fp.close
    end

    tet.close_document(doc)

rescue TETException => pe
    print pe.backtrace.join("\n")
    printf("TET exception occurred in tetml:\n")
    print "[" + pe.get_errnum.to_s + "] " + pe.get_apiname + \
                        ": " + pe.get_errmsg + "\n"
rescue Exception => e
    print e.backtrace.join("\n") + "\n" + e.to_s + "\n"
ensure
    tet.delete() if tet
end
