#!/usr/bin/env ruby
#
# Simple PDF glyph dumper based on PDFlib TET
#

require 'TET'

# global option list
globaloptlist = "searchpath={{../data} {../../../resource/cmap}}"

# document-specific option list
docoptlist = ""

# page-specific option list
pageoptlist = "granularity=word"


def print_color_value (outfp, tet, doc, colorid)
    # We handle only the fill color, but ignore the stroke color.
    # The stroke color can be retrieved analogously with the
    # keyword "stroke".
    
    colorinfo = tet.get_color_info(doc, colorid, "usage=fill");

    if (colorinfo.colorspaceid == -1 && colorinfo.patternid == -1)
      outfp.printf " (not filled)"
      return
    end

    outfp.printf " ("

    if (colorinfo.patternid != -1)
      patterntype =
            tet.pcos_get_number(doc, "patterns[" + colorinfo.patternid.to_s + "]/PatternType")
      
      if (patterntype == 1)	# Tiling pattern 
          painttype =	tet.pcos_get_number(doc, "patterns[" + colorinfo.patternid.to_s + "]/PaintType")
        if (painttype == 1)
          outfp.printf "colored Pattern)"
          return  
        elsif (painttype == 2)
          outfp.printf  "uncolored Pattern, base color: "
          # FALLTHROUGH to colorspaceid output 
        end
      
      elsif (patterntype == 2)	# Shading pattern 
        shadingtype = tet.pcos_get_number(doc, "patterns[" + colorinfo.patternid.to_s + "]/Shading/ShadingType")
        outfp.printf "shading Pattern, ShadingType=%d)", shadingtype
        return;
        end
    end
    

    csname = tet.pcos_get_string(doc, "colorspaces[" + colorinfo.colorspaceid.to_s + "]/name")

    outfp.printf "%s", csname

    # Emit more details depending on the colorspace type 
    if (csname == "ICCBased")
      iccprofileid = tet.pcos_get_number(doc,"colorspaces[" + colorinfo.colorspaceid.to_s + "]/iccprofileid").to_i

      errormessage = tet.pcos_get_string(doc, "iccprofiles[" + iccprofileid.to_s + "]/errormessage")

      # Check whether the embedded profile is damaged */
      if (errormessage != "")
        outfp.printf " (%s)", errormessage
      else
                    profilename =tet.pcos_get_string(doc,"iccprofiles[" + iccprofileid.to_s + "]/profilename")
        outfp.printf " '%s'", profilename

        profilecs = tet.pcos_get_string(doc,"iccprofiles[" + iccprofileid.to_s + "]/profilecs")
        outfp.printf  " '%s'", profilecs
      end
    elsif (csname == "Separation")
      colorantname = tet.pcos_get_string(doc, "colorspaces[" + colorinfo.colorspaceid.to_s + "]/colorantname")
      outfp.printf " '%s'", colorantname
    elsif (csname == "DeviceN")
            outfp.printf " "
      
      0.upto(colorinfo.components.length-1) do |i|
        colorantname =tet.pcos_get_string(doc,            "colorspaces[" + colorinfo.colorspaceid.to_s + "]/colorantnames[" + i.to_s + "]")

        outfp.printf "%s", colorantname

        if (i != (colorinfo.components.length-1))
          outfp.printf "/"
        end
      end
    elsif (csname == "Indexed")
      baseid =tet.pcos_get_number(doc, "colorspaces[" + colorinfo.colorspaceid.to_s + "]/baseid").to_i

      csname = tet.pcos_get_string(doc, "colorspaces[" + baseid.to_s + "]/name")

      outfp.printf " %s", csname

    end

    outfp.printf " "
    0.upto((colorinfo.components.length)-1) do |i|
            outfp.printf"%g", colorinfo.components[i]

      if (i != colorinfo.components.length-1)
        outfp.printf "/"
      end
    end
    outfp.printf  ")"
  return 0
end

pageno = 0

begin

    if (ARGV.length != 2)
        raise("usage: glyphinfo.rb <infilename> <outfilename>\n")
    end


    tet = TET.new

    outfp = File.new(ARGV[1], "w")

    tet.set_option(globaloptlist)

    doc = tet.open_document(ARGV[0], docoptlist)

    if (doc == -1)
        raise "Error " + tet.get_errnum().to_s + " in " + tet.get_apiname() \
            + "(): " + tet.get_errmsg()
    end

    # get number of pages in the document
    n_pages = tet.pcos_get_number(doc, "length:pages")

    # Write UTF-8 BOM
    outfp.printf "\xEF\xBB\xBF" #Byte Order Mark

    # loop over pages in the document
    1.upto(n_pages) do |pageno|
      previouscolorid = -1
      page = tet.open_page(doc, pageno, pageoptlist)

      if (page == -1)
          raise "Error " + tet.get_errnum().to_s + " in " + tet.get_apiname()\
        + "(): " + tet.get_errmsg()
          next                        # try next page
      end

      # Administrative information
      outfp.printf "[ Document: '%s' ]\n",
          tet.pcos_get_string(doc, "filename")

      outfp.printf "[ Document options: '%s' ]\n", docoptlist

      outfp.printf "[ Page options: '%s' ]\n", pageoptlist

      outfp.printf "[ ----- Page %d ----- ]\n", pageno


      # Retrieve all text fragments
      while (text = tet.get_text(page))
          outfp.printf "[%s]\n", text  # print the retrieved text

          # Loop over all glyphs and print their details
          while (ci = tet.get_char_info(page))
            # Fetch the font name with pCOS (based on its ID)
            fontname = tet.pcos_get_string(doc,
              "fonts[" + ci.fontid.to_s + "]/name")

            # Print the Unicode value
            outfp.printf "U+%04X", ci.uv

            # ...and the character itself if it is ASCII
            if (ci.uv >= 0x20 && ci.uv <= 0x7F)
                outfp.printf " '%c'", ci.uv
            else
                outfp.printf " ???"
            end

            # Print font name, size, and position
            outfp.printf " %s size=%.2f x=%.2f y=%.2f",
                fontname, ci.fontsize, ci.x, ci.y
            # Print the color id 
            outfp.printf " colorid=%d", ci.colorid
            
            # Check wheather the text color changed
            if (ci.colorid != previouscolorid)
              print_color_value(outfp, tet, doc, ci.colorid)  
              previouscolorid = ci.colorid
            end
                
                
            # Examine the "type" member
            if (ci.type == TET::CT_SEQ_START)
                outfp.printf " ligature_start"
            elsif (ci.type == TET::CT_SEQ_CONT)
                outfp.printf " ligature_cont"
            # Separators are only inserted for granularity > word
            elsif (ci.type == TET::CT_INSERTED)
                outfp.printf " inserted"
            end

            # Examine the bit flags in the "attributes" member
            if (ci.attributes != TET::ATTR_NONE)
                if ((ci.attributes & TET::ATTR_SUB) != 0)
                  outfp.printf "/sub"
                end
                if ((ci.attributes & TET::ATTR_SUP) != 0)
                  outfp.printf "/sup"
                end
                if ((ci.attributes & TET::ATTR_DROPCAP) != 0)
                  outfp.printf "/dropcap"
                end
                if ((ci.attributes & TET::ATTR_SHADOW) != 0)
                  outfp.printf "/shadow"
                end
                if ((ci.attributes & TET::ATTR_DEHYPHENATION_PRE) != 0)
                  outfp.printf "/dehyphenation_pre"
                end
                if ((ci.attributes & TET::ATTR_DEHYPHENATION_ARTIFACT) != 0)
                  outfp.printf "/dehyphenation_artifact"
                end
                if ((ci.attributes & TET::ATTR_DEHYPHENATION_POST) != 0)
                  outfp.printf "/dehyphenation_post"
                end
                if ((ci.attributes & TET::ATTR_ARTIFACT) != 0)
                  outfp.printf "/Artifact"
                end
            end

          outfp.printf "\n"
        end

        outfp.printf "\n"
    end

    if (tet.get_errnum() != 0)
        print("Error "+ tet.get_errnum().to_s + " in " +  \
          tet.get_apiname() + "(): on page pageno"  \
          + tet.get_errmsg() + "\n")
    end

    tet.close_page(page)
  end

  tet.close_document(doc)

rescue TETException => pe
    print pe.backtrace.join("\n") + "\n"
    print "Error [" + pe.get_errnum.to_s + "] " + pe.get_apiname + \
                                ": " + pe.get_errmsg
    print " on page pageno" if (pageno != 0)
    print "\n"
rescue Exception => e
    print e.backtrace.join("\n") + "\n" + e.to_s + "\n"
ensure
    tet.delete() if tet
end
