/*---------------------------------------------------------------------------*
 |          Copyright (c) 2005-2022 PDFlib GmbH. All rights reserved.        |
 +---------------------------------------------------------------------------+
 |    This software may not be copied or distributed except as expressly     |
 |    authorized by PDFlib GmbH's general license agreement or a custom      |
 |    license agreement signed by PDFlib GmbH.                               |
 |    For more information about licensing please refer to www.pdflib.com.   |
 *---------------------------------------------------------------------------*/

/*
 * TET Java class
 */

package com.pdflib;

/** Text and Image Extraction Toolkit (TET):
    Toolkit for extracting Text, Images, and Metadata from PDF Documents.
    <p>
    Note that this is only a syntax summary. For complete information
    please refer to the TET API reference manual which is available
    in the "doc" directory of the TET distribution.
    @author Rainer Schaaf
    @version 6.0
*/

public final class TET implements IpCOS {

    // The initialization code for loading the TET shared library.
    // The library name will be transformed into something platform-
    // specific by the VM, e.g. libtet_java.so or tet_java.dll.

    static {
	try {
	    System.loadLibrary("tet_java");
	} catch (UnsatisfiedLinkError e) {
	    System.err.println(
	"Cannot load the TET shared library/DLL for Java.\n" +
	"Make sure to properly install the native TET library.\n\n" +
	"For your information, the current value of java.library.path is:\n" +
	 System.getProperty("java.library.path") + "\n");

	    throw e;
	}
    }

    // ------------------------------------------------------------------------
    // public functions
    /** Release a document handle and all internal resources related to that document
        @param doc doc
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final void close_document(int doc)
    throws TETException
    {
	TET_close_document(tet, doc);
    }

    /** Release a page handle and all related resources.
        @param page page
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final void close_page(int page)
    throws TETException
    {
	TET_close_page(tet, page);
    }

    /** Convert a string in an arbitrary encoding to a Unicode string in various formats.
        @return  The converted Unicode string.
        @param inputformat inputformat
        @param inputstring inputstring
        @param optlist optlist
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final String convert_to_unicode(String inputformat, byte[] inputstring, String optlist)
    throws TETException
    {
	return TET_convert_to_unicode(tet, inputformat, inputstring, optlist);
    }

    /** Create a named virtual read-only file from data provided in memory.
        @param filename filename
        @param data data
        @param optlist optlist
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final void create_pvf(String filename, byte[] data, String optlist)
    throws TETException
    {
	TET_create_pvf(tet, filename, data, optlist);
    }

    /** Delete a named virtual file and free its data structures.
        @return  -1 if the virtual file exists but is locked, and 1 otherwise.
        @param filename filename
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final int delete_pvf(String filename)
    throws TETException
    {
	return TET_delete_pvf(tet, filename);
    }

    /** Get the name of the API function which caused an exception or failed.
        @return  Name of an API function.
    */
    public final String get_apiname()
    {
	return TET_get_apiname(tet);
    }

    /** Get the text of the last thrown exception or the reason for a failed function call.
        @return  Text containing the description of the most recent error condition.
    */
    public final String get_errmsg()
    {
	return TET_get_errmsg(tet);
    }

    /** Get the number of the last thrown exception or the reason for a failed function call.
        @return  Error number of the most recent error condition.
    */
    public final int get_errnum()
    {
	return TET_get_errnum(tet);
    }

    /** Write image data to memory.
        @return  Data representing the image according to the specified options.
        @param doc doc
        @param imageid imageid
        @param optlist optlist
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final byte[] get_image_data(int doc, int imageid, String optlist)
    throws TETException
    {
	return TET_get_image_data(tet, doc, imageid, optlist);
    }

    /** Get the next text fragment from a page's content.
        @return  A string containing the next text fragment on the page.
        @param page page
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final String get_text(int page)
    throws TETException
    {
	return TET_get_text(tet, page);
    }

    /** Query properties of a virtual file or the PDFlib Virtual Filesystem (PVF).
        @return  The value of some file parameter as requested by keyword.
        @param filename filename
        @param keyword keyword
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final double info_pvf(String filename, String keyword)
    throws TETException
    {
	return TET_info_pvf(tet, filename, keyword);
    }

    /** Open a disk-based or virtual PDF document for content extraction.
        @return  -1 on error, or a document handle otherwise.
        @param filename filename
        @param optlist optlist
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final int open_document(String filename, String optlist)
    throws TETException
    {
	return TET_open_document(tet, filename, optlist);
    }

    /** Open a page for text extraction.
        @return  A handle for the page, or -1 in case of an error.
        @param doc doc
        @param pagenumber pagenumber
        @param optlist optlist
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final int open_page(int doc, int pagenumber, String optlist)
    throws TETException
    {
	return TET_open_page(tet, doc, pagenumber, optlist);
    }

    /** Get the value of a pCOS path with type number or boolean.
        @return  The numerical value of the object identified by the pCOS path.
        @param doc doc
        @param path path
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final double pcos_get_number(int doc, String path)
    throws TETException
    {
	return TET_pcos_get_number(tet, doc, path);
    }

    /** Get the value of a pCOS path with type name, number, string, or boolean.
        @return  A string with the value of the object identified by the pCOS path.
        @param doc doc
        @param path path
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final String pcos_get_string(int doc, String path)
    throws TETException
    {
	return TET_pcos_get_string(tet, doc, path);
    }

    /** Get the contents of a pCOS path with type stream, fstream, or string.
        @return  The unencrypted data contained in the stream or string.
        @param doc doc
        @param optlist optlist
        @param path path
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final byte[] pcos_get_stream(int doc, String optlist, String path)
    throws TETException
    {
	return TET_pcos_get_stream(tet, doc, optlist, path);
    }

    /** Set one or more global options for TET.
        @param optlist optlist
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final void set_option(String optlist)
    throws TETException
    {
	TET_set_option(tet, optlist);
    }

    /** Write image data to disk.
        @return  -1 on error, or the image format otherwise (see IF_TIFF etc.)
        @param doc doc
        @param imageid imageid
        @param optlist optlist
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final int write_image_file(int doc, int imageid, String optlist)
    throws TETException
    {
	return TET_write_image_file(tet, doc, imageid, optlist);
    }

    /** Process a page and create TETML output.
        @return  Always 1. PDF problems are reported in a TETML Exception element.
        @param doc doc
        @param pageno pageno
        @param optlist optlist
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final int process_page(int doc, int pageno, String optlist)
    throws TETException
    {
	return TET_process_page(tet, doc, pageno, optlist);
    }

    /** Retrieve TETML data from memory.
        @return  A byte array containing the next chunk of TETML data.
        @param doc doc
        @param optlist optlist
        @exception com.pdflib.TETException
	TET output cannot be finished after an exception.
    */
    public final byte[] get_tetml(int doc, String optlist)
    throws TETException
    {
	return TET_get_tetml(tet, doc, optlist);
    }

     // ------------------------------------------------------------------------
    // public char_info/color_info/image_info fields

    /** UTF-32 Unicode value of the current character. */
    public int uv;

    /** Character type; see {@link com.pdflib.TET#CT_NORMAL} etc. for possible values. */
    public int type;

    /** Indicates whether the glyph could be mapped to Unicode. */
    public boolean unknown;

    /** Glyph attributes; see {@link com.pdflib.TET#ATTR_NONE} etc. for possible values. */
    public int attributes;

    /** x position of the glyph's or image's reference point.
    <p>
    x/y describe the position of the glyph's or image's reference point.
    <p>
    Text: The reference point is the lower left corner of the glyph box for
    horizontal writing mode, and the top center point for vertical writing
    mode. For artificial characters the x, y coordinates will be those of
    the end point of the most recent real character.
    <p>
    Images: The reference point is the lower left corner of the image.
    */
    public double x;

    /** y position of the glyph's or image's reference point.
    @see com.pdflib.TET#x
    */
    public double y;

    /** Width of glyph or image.
    <p>
    Text: Width of the corresponding glyph (for both horizontal and vertical
    writing mode). For artificial characters the width will be 0.
    <p>
    Images: Width of the image on the page in points, measured
    along the image's edges
    */
    public double width;

    /** Height of glyph or image.
    @see com.pdflib.TET#width
    */
    public double height;

    /** Direction of inline text progression or direction of the pixel rows.
    <p>
    Text: Direction of inline text progression in degrees measured
    counter-clockwise.
    For horizontal writing mode this is the direction of the text baseline;
    for vertical writing mode it is the digression from the standard -90&deg;
    direction. The angle will be in the range -180&deg; &lt; alpha &lt;= +180&deg;. For
    standard horizontal text as well as for standard text in vertical writing
    mode the angle will be 0&deg;.
    <p>
    Images: Direction of the pixel rows. The angle will be in the range
    -180&deg; &lt; alpha &lt;= +180&deg;. For upright images alpha will be 0&deg;.
    */
    public double alpha;

    /** Text slanting angle or direction of pixel columns relative to the perpendicular of alpha.
    <p>
    Text: Text slanting angle in degrees (counter-clockwise), relative to
    the perpendicular of alpha.
    The angle will be 0&deg; for upright text, and
    negative for italicized (slanted) text. The angle will be in the range
    -180&deg; &lt; beta &lt;= 180&deg;, but different from &plusmn;90&deg;. If abs(beta) &gt; 90&deg;
    the text is mirrored at the baseline.
    <p>
    Images: Direction of the pixel columns, relative to the perpendicular of
    alpha. The angle will be in the range -180&deg; &lt; beta &lt;= +180&deg;, but different
    from &plusmn;90&deg;. For upright images beta will be in the range -90&deg;
    &lt; beta &lt; +90&deg;.  If abs(beta) &gt; 90&deg; the image will be mirrored at the
    baseline.
    */
    public double beta;

    /** Index of the image in the pCOS pseudo object images[].
    <p>
    Detailed image properties can be retrieved via the entries in this
    pseudo object.
    */
    public int imageid;

    /** Index of the font in the fonts[] pseudo object.
    <p>
    fontid is never negative.
    */
    public int fontid;

    /** Size of the font (always positive).
    <p>
    The relation of this value to the actual height of glyphs is not fixed,
    but may vary with the font design. For most fonts the font size is chosen
    such that it encompasses all ascenders (including accented characters)
    and descenders.
    */
    public double fontsize;

    /** Text rendering mode; see {@link com.pdflib.TET#TR_FILL} etc. for possible values. */
    public int textrendering;

    /** Color id of the fill and stroke color.
    */
    public int colorid;


    /** Colorspace id or -1.
    */
    public int colorspaceid;


    /** Pattern id or -1.
    */
    public int        patternid;

    /** Color components.
    */
    public double      components[];


    // ------------------------------------------------------------------------
    // public constants
    
    /*
     * Values of the "type" field filled by get_char_info()
     */

    /** Type reported in {@link com.pdflib.TET#type} by {@link com.pdflib.TET#get_char_info}: normal character represented by exactly one glyph. */
    public final static int CT_NORMAL		= 0;

    /** Type reported in {@link com.pdflib.TET#type} by {@link com.pdflib.TET#get_char_info}: start of a sequence, e.g. ligature. */
    public final static int CT_SEQ_START	= 1;

    /** Type reported in {@link com.pdflib.TET#type} by {@link com.pdflib.TET#get_char_info}: continuation of a sequence. */
    public final static int CT_SEQ_CONT		= 10;

    /** Type reported in {@link com.pdflib.TET#type} by {@link com.pdflib.TET#get_char_info}: inserted word, line, or paragraph separator. */
    public final static int CT_INSERTED		= 12;


    /*
     * Bit values for the "attributes" field filled by get_char_info()
     */

    /** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_char_info}: no attribute set. */
    public final static int ATTR_NONE        			= 0x00000000;
 
	/** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_char_info}: subscript. */
    public final static int ATTR_SUB          			= 0x00000001;
    
	/** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_char_info}: superscript. */    
    public final static int ATTR_SUP          			= 0x00000002;
    
	/** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_char_info}: initial large letter. */
    public final static int ATTR_DROPCAP      			= 0x00000004;
    
	/** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_char_info}: shadowed text. */
    public final static int ATTR_SHADOW       			= 0x00000008;

    /** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_char_info}: character before hyphenation. */
    public final static int ATTR_DEHYPHENATION_PRE      = 0x00000010;

    /** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_char_info}: hyphenation character, i.e. soft hyphen (unrelated to Tagged PDF Artifact). */
    public final static int ATTR_DEHYPHENATION_ARTIFACT = 0x00000020;

    /** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_char_info}: character after hyphenation. */
    public final static int ATTR_DEHYPHENATION_POST     = 0x00000040;

    /** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_char_info} and {@link com.pdflib.TET#get_image_info}: text or image marked as Artifact (irrelevant content) in Tagged PDF. */
    public final static int ATTR_ARTIFACT                = 0x00000100;
    

	/*
	 * The following bit values are only used in the "attributes" field filled by get_image_info()
	 */
 
    /** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_image_info}: image extracted from an annotation (appearance stream). */
    public final static int ATTR_ANNOTATION              = 0x00000200;
    
    /** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_image_info}: image extracted from a pattern. */
    public final static int ATTR_PATTERN                 = 0x00000400;
    
    /** Property reported in {@link com.pdflib.TET#attributes} by {@link com.pdflib.TET#get_image_info}: image extracted from from a soft mask in a graphics state (defined in a Transparency Group XObject). */
    public final static int ATTR_SOFTMASK                = 0x00000800;



    /*
     * Values for the "textrendering" field filled by get_char_info()
     */
    
    /** Text rendering mode reported in {@link com.pdflib.TET#textrendering} by {@link com.pdflib.TET#get_char_info}: fill text. */
    public final static int TR_FILL             = 0;
    
    /** Text rendering mode reported in {@link com.pdflib.TET#textrendering} by {@link com.pdflib.TET#get_char_info}: stroke text (outline). */
    public final static int TR_STROKE           = 1;
    
    /** Text rendering mode reported in {@link com.pdflib.TET#textrendering} by {@link com.pdflib.TET#get_char_info}: fill and stroke text. */
    public final static int TR_FILLSTROKE       = 2;
    
    /** Text rendering mode reported in {@link com.pdflib.TET#textrendering} by {@link com.pdflib.TET#get_char_info}: invisible text. */
    public final static int TR_INVISIBLE        = 3;
    
    /** Text rendering mode reported in {@link com.pdflib.TET#textrendering} by {@link com.pdflib.TET#get_char_info}: fill text and add it to the clipping path. */
    public final static int TR_FILL_CLIP        = 4;
    
    /** Text rendering mode reported in {@link com.pdflib.TET#textrendering} by {@link com.pdflib.TET#get_char_info}: stroke text and add it to the clipping path. */
    public final static int TR_STROKE_CLIP      = 5;
    
    /** Text rendering mode reported in {@link com.pdflib.TET#textrendering} by {@link com.pdflib.TET#get_char_info}: fill and stroke text and add it to the clipping path. */
    public final static int TR_FILLSTROKE_CLIP  = 6;
    
    /** Text rendering mode reported in {@link com.pdflib.TET#textrendering} by {@link com.pdflib.TET#get_char_info}: add text to the clipping path. */
    public final static int TR_CLIP             = 7;


    /*
     * Image formats returned by write_image_file()
     */
    
    /** Image format returned by {@link com.pdflib.TET#write_image_file}: image/tiff, *.tif. */
    public final static int IF_TIFF  = 10;
    
    /** Image format returned by {@link com.pdflib.TET#write_image_file}: format image/jpeg, *.jpg. */
    public final static int IF_JPEG  = 20;
    
    /** Image format returned by {@link com.pdflib.TET#write_image_file}: image/jp2, *.jp2. */
    public final static int IF_JP2   = 31;
    
    /** Image format returned by {@link com.pdflib.TET#write_image_file}: image/jpx, *.jpf. */
    public final static int IF_JPF   = 32;
    
    /** Image format returned by {@link com.pdflib.TET#write_image_file}: raw JPEG 2000 code stream, *.j2k. */
    public final static int IF_J2K   = 33;
    
    /** Image format returned by {@link com.pdflib.TET#write_image_file}: image/x-jbig2, *.jbig2. */  
    public final static int IF_JBIG2 = 50; 



    // ------------------------------------------------------------------------
    // public functions

    /** Create a new TET object.
	@exception com.pdflib.TETException May throw an exception in case
	of memory shortage.
     */
    public TET() throws TETException
    {
	tet = TET_new();
    }


    /** Delete a TET context and release all its internal resources.
	This should be called for cleanup when processing is done,
	and after a TETException occurred.
        This method may also be called by the finalizer, but it is safe to
	issue multiple calls.
     */
    public final void delete()
    {
	TET_delete(tet);
	tet = (long) 0;
    }

     /** Get detailed information for the next character in the most recent
	text fragment; the results are reported in public fields.
        @param page page
        @return Bindig-specific error or success code.
	@exception com.pdflib.TETException May throw an exception for
	various reasons.
     */
    public final int get_char_info(int page) throws TETException
    {
	return TET_get_char_info(tet, page);
    }

    /** Get detailed information for a color id which has been retrieved with
        TET_get_char_info(); the results are reported in public fields.
	@exception com.pdflib.TETException May throw an exception for
        @param doc doc
        @param colorid colorid
        @param keyword keyword
        @return Details about the requested color space and color.
	various reasons.
     */
    public final int get_color_info(int doc, int colorid, String keyword) throws TETException
    {
	return TET_get_color_info(tet, doc, colorid, keyword);
    }

    /** Retrieve information about the next image on the page (but not the
        actual pixel data); the results are reported in public fields.
        @param page page
        @return Details about the next image on the page.
	@exception com.pdflib.TETException May throw an exception for
	various reasons.
     */
    public final int get_image_info(int page) throws TETException
    {
	return TET_get_image_info(tet, page);
    }

    // ------------------------------------------------------------------------
    // IpCOS-specific functions
    
    /**
     * Open a disk-based or virtual PDF document via the IpCOS interface.
     *  
     * @param filename	The full path name of the PDF file to be opened.
     * 		The file will be searched by means of the SearchPath resource.
     * @param optlist	An option list specifying document options.
     * 
     * @return  A document handle.
     * @exception java.lang.Exception	see manual
     */
    public int pcos_open_document(String filename, String optlist) throws Exception
    {
        return open_document(filename, optlist);
    }
    
    /** Close PLOP input document via the IpCOS interface.
     * 
     * @param doc	A valid document handle obtained with open_document().
     * @param optlist	An option list specifying document options.
     * 
     * @exception java.lang.Exception	see manual
     */
    public void pcos_close_document(int doc, String optlist) throws Exception
    {
        close_document(doc);
    }

    // ------------------------------------------------------------------------
    // private functions

    private long tet;

    protected final void finalize()
    {
	TET_delete(tet);
	tet = (long) 0;
    }

    private final static native void TET_delete(long jtet);

    // this must _not_ be "static" since it will fill the char_info fields
    private final native int TET_get_char_info(long jtet, int jpage)
	    throws TETException;

    // this must _not_ be "static" since it will fill the color_info fields
    private final native int TET_get_color_info(long jtet, int jdoc, int jcolorid,
            String jkeyword) throws TETException;

    // this must _not_ be "static" since it will fill the image_info fields
    private final native int TET_get_image_info(long jtet, int jpage)
	    throws TETException;

    private final static native long TET_new() throws
            TETException;

    private final static native void TET_close_document(long jtet, int jdoc) throws TETException;
    private final static native void TET_close_page(long jtet, int jpage) throws TETException;
    private final static native String TET_convert_to_unicode(long jtet, String jinputformat, byte[] jinputstring, String joptlist) throws TETException;
    private final static native void TET_create_pvf(long jtet, String jfilename, byte[] jdata, String joptlist) throws TETException;
    private final static native int TET_delete_pvf(long jtet, String jfilename) throws TETException;
    private final static native String TET_get_apiname(long jtet);
    private final static native String TET_get_errmsg(long jtet);
    private final static native int TET_get_errnum(long jtet);
    private final static native byte[] TET_get_image_data(long jtet, int jdoc, int jimageid, String joptlist) throws TETException;
    private final static native String TET_get_text(long jtet, int jpage) throws TETException;
    private final static native double TET_info_pvf(long jtet, String jfilename, String jkeyword) throws TETException;
    private final static native int TET_open_document(long jtet, String jfilename, String joptlist) throws TETException;
    private final static native int TET_open_page(long jtet, int jdoc, int jpagenumber, String joptlist) throws TETException;
    private final static native double TET_pcos_get_number(long jtet, int jdoc, String jpath) throws TETException;
    private final static native String TET_pcos_get_string(long jtet, int jdoc, String jpath) throws TETException;
    private final static native byte[] TET_pcos_get_stream(long jtet, int jdoc, String joptlist, String jpath) throws TETException;
    private final static native void TET_set_option(long jtet, String joptlist) throws TETException;
    private final static native int TET_write_image_file(long jtet, int jdoc, int jimageid, String joptlist) throws TETException;
    private final static native int TET_process_page(long jtet, int jdoc, int jpageno, String joptlist) throws TETException;
    private final static native byte[] TET_get_tetml(long jtet, int jdoc, String joptlist) throws TETException;
}
