<?php
/* 
 * Simple PDF text extractor based on PDFlib TET
 */


/* global option list */
$globaloptlist = "searchpath={{../data} {../../data} {../../../resource/cmap}}";

/* document-specific option list */
$docoptlist = "";

/* page-specific option list */
$pageoptlist = "granularity=page";

/* separator to emit after each chunk of text. This depends on the
 * application's needs; for granularity=word a space character may be useful
 */
$separator = "\n";

$pageno = 0;

try {
    $infilename = "TET-datasheet.pdf";
    $outfilename = "TET-datasheet.txt";

    $tet = new TET();

    if (!$outfp = fopen("$outfilename", "wb")) {
        die("Couldn't open output file '" . $outfilename . "'\n");
    }


    $tet->set_option($globaloptlist);

    $doc = $tet->open_document($infilename, $docoptlist);

    if ($doc == -1) {
        die("Error ". $tet->get_errnum() . " in " . $tet->get_apiname()
            . "(): " . $tet->get_errmsg() . "\n");
    }

    /* get number of pages in the document */
    $n_pages = $tet->pcos_get_number($doc, "length:pages");

    /* loop over pages in the document */
    for ($pageno = 1; $pageno <= $n_pages; ++$pageno) {

        $page = $tet->open_page($doc, $pageno, $pageoptlist);

        if ($page == -1) {
            print("Error ". $tet->get_errnum() ." in ". $tet->get_apiname()
                . "(): " . $tet->get_errmsg() . "\n");
            next;                        /* try next page */
        }

        /* Retrieve all text fragments; This is actually not required
         * for granularity=page, but must be used for other granularities.
         */
        while (($text = $tet->get_text($page)) != "") {

            fwrite($outfp, $text);  /* print the retrieved text */

            /* print a separator between chunks of text */
            fwrite($outfp, $separator);
        }


        if ($tet->get_errnum() != 0) {
            print("Error ". $tet->get_errnum() . " in " . 
                    $tet->get_apiname() . "(): on page $pageno" 
                    . $tet->get_errmsg() . "\n");
        }

        $tet->close_page($page);
    }

    fclose($outfp);

    $tet->close_document($doc);
}

catch (TETException $e) {
    $addpage = ""; 
    if ($pageno > 0) $addpage = " on page " . $pageno; 
    die("TET exception occurred in extractor sample:\n" .
        "[" . $e->get_errnum() . "] " . $e->get_apiname() . 
        $addpage  . ": " . $e->get_errmsg() . "\n");
}
catch (Throwable $e) {
    die(get_class($e) . " occurred in extractor sample:\n" . $e->getMessage() . "\n");
} 

$tet = 0;
?>
