<?php
/* 
 * Page-based image extractor based on PDFlib TET
 */


/* global option list */
$globaloptlist = "searchpath={{../data} {../../data} }";

/* document-specific option list */
$docoptlist = "";

/* page-specific option list, e.g. 
 * "imageanalysis={merge={gap=1}}"
 */
$pageoptlist = "";

$pageno = 0;

try {
    $infilename = "TET-datasheet.pdf";

    $tet = new TET();

    $outfilebase = $infilename;
    /* strip .pdf suffix if present */
    if (strlen($outfilebase) > 4 &&
    strcasecmp(substr($outfilebase, -4), ".pdf") == 0){
    $outfilebase = substr($outfilebase, 0, -4);
    }

    $tet->set_option($globaloptlist);

    $doc = $tet->open_document($infilename, $docoptlist);

    if ($doc == -1) {
        die("Error ". $tet->get_errnum() . " in " . $tet->get_apiname()
            . "(): " . $tet->get_errmsg() . "\n");
    }

    /* Get number of pages in the document */
    $n_pages = $tet->pcos_get_number($doc, "length:pages");

    /* Loop over pages and extract images*/
    for ($pageno = 1; $pageno <= $n_pages; ++$pageno) {
    $imagecount = 0;

    $page = $tet->open_page($doc, $pageno, $pageoptlist);

    if ($page == -1) {
        print("Error ". $tet->get_errnum() ." in ". $tet->get_apiname()
        . "(): " . $tet->get_errmsg() . "\n");
        next;                        /* try next page */
    }

    /* Retrieve all images on the page */
    while ($ti = $tet->get_image_info($page) ) {
        $imagecount++;

        /* Report image details: pixel geometry, color space, etc */
        report_image_info($tet, $doc, $ti->imageid);

        /* Report placement geometry */
        printf("  placed on page %d at position (%g, %g): " .
            "%dx%dpt, alpha=%g, beta=%g\n",
            $pageno, $ti->x, $ti->y,
            $ti->width, $ti->height, $ti->alpha, $ti->beta,
            ($ti->attributes & TET::ATTR_ARTIFACT) != 0 ?  ", Artifact" : "");

        /* Write image data to file */
        $imageoptlist = " filename {" .  $outfilebase . "_p" . 
        $pageno . "_" . $imagecount . "_I" . $ti->imageid . "}";
        if ($tet->write_image_file($doc, $ti->imageid, $imageoptlist) == 0){
        print("Error " . $tet->get_errnum() . " in " .
            $tet->get_apiname() . "(): " . $tet->get_errmsg());
        continue;  /* process next image */
        }
        /* Check whether the image has a mask attached */
        $maskid = $tet->pcos_get_number($doc, "images[" . $ti->imageid . "]/maskid");
        if ($maskid != -1){
        print "  masked with ";
        report_image_info($tet, $doc, $maskid);

        $imageoptlist = " filename {" .  $outfilebase . "_p" . 
            $pageno . "_" . $imagecount . "_I" . $ti->imageid . "_mask_I". $maskid . "}";
        if ($tet->write_image_file($doc, $ti->maskid, $imageoptlist) == 0){
            print("Error " . $tet->get_errnum() . " in " .
            $tet->get_apiname() . "(): " . $tet->get_errmsg());
        }
        }
    }

    if ($tet->get_errnum() != 0) {
        print("Error ". $tet->get_errnum() . " in " . 
            $tet->get_apiname() . "(): on page $pageno" 
            . $tet->get_errmsg() . "\n");
    }

    $tet->close_page($page);
    }

    $tet->close_document($doc);
}
catch (TETException $e) {
    $addpage = ""; 
    if ($pageno > 0) $addpage = " on page " . $pageno; 
    die("TET exception occurred in images_per_page sample:\n" .
        "[" . $e->get_errnum() . "] " . $e->get_apiname() . 
        $addpage  . ": " . $e->get_errmsg() . "\n");
}
catch (Throwable $e) {
    die(get_class($e) . " occurred in images_per_page sample:\n" . 
        $e->getMessage() . "\n");
}

$tet = 0;
/* Print the following information for each image:
 * - pCOS id (required for indexing the images[] array)
 * - pixel size of the underlying PDF Image XObject
 * - number of components, bits per component, and colorspace
 * - mergetype if different from "normal", i.e. "artificial" (=merged)
 *   or "consumed"
 * - "stencilmask" property, i.e. /ImageMask in PDF
 */

function report_image_info($tet, $doc, $imageid) {
    $width = $tet->pcos_get_number($doc, "images[$imageid]/Width");
    $height = $tet->pcos_get_number($doc, "images[$imageid]/Height");
    $bpc = $tet->pcos_get_number($doc, "images[$imageid]/bpc");
    $cs = $tet->pcos_get_number($doc, "images[$imageid]/colorspaceid");
    $components = $tet->pcos_get_number($doc, "colorspaces[$cs]/components");

    printf("image I%d: %dx%d pixel, ", $imageid, $width, $height);
    $csname = $tet->pcos_get_string($doc, "colorspaces[$cs]/name");

    printf("%gx%g bit %s", $components, $bpc, $csname);

    if ($csname == "Indexed"){
        $basecs = $tet->pcos_get_number($doc, "colorspaces[$cs]/baseid");
        $basecsname = $tet->pcos_get_string($doc, "colorspaces[$basecs]/name");
        printf(" %s", $basecsname);
    }

    /* Check whether this image has been created by merging smaller images*/
    $mergetype = $tet->pcos_get_number($doc, "images[$imageid]/mergetype");
    if ($mergetype == 1)
        print(", mergetype=artificial");

    $stencilmask = $tet->pcos_get_number($doc, "images[$imageid]/stencilmask");
    if ($stencilmask == 1)
        print(", used as stencil mask");

     printf("\n");

}


