     H DEBUG(*YES) DFTACTGRP(*NO) BNDDIR('TETLIB/TETLIB')
     H COPYRIGHT('(c) PDFlib GmbH (www.pdflib.com)')
      **************************************************************************
      *   Simple PDF text extractor based on PDFlib TET
      *
      *   Note: All strings passed into PDFlib are unicode strings with varying
      *         Use the %UCS2 build in function to convert a single byte string
      *         All strings returned from PDFlib are unicode strings with varyin
      *         Use the %CHAR build in function to convert a unicode string to a
      **************************************************************************
     d/copy QRPGLESRC,TETLIB
     d/copy QRPGLESRC,IFSIO
      **************************************************************************
      * global option list
     d globaloptlist   c                   %ucs2('-
     d                                     searchpath={{../data}}')

      * document-specific option list
     d docoptlist      c                   %ucs2('')

      * page-specific option list
     d pageoptlist     c                   %ucs2('')

      * here you can insert basic image extract options (more below) */
     d baseimageoptlist...
     d                 c                   %ucs2('')

     d tet             s               *
     d pageno          s             10i 0
     d infile          s            256    varying
     d imageoptlist    s           1024c   varying
     d n_pages         s             10i 0
     d doc             s             10i 0
     d pdfpage         s             10i 0
     d ti              s               *
     d error           s            100
     d error_dsp       s             52
     d x_position      s              8s 3
     d y_position      s              8s 3
     d width           s             10i 0
     d height          s             10i 0
     d alpha           s              8s 3
     d beta            s              8s 3
     d imagecount      s             10i 0
     d maskid          s             10i 0
     d Artifact_text   s             10
     d resBitand       s             10i 0
     d valArtifact     s             10i 0
     d outfd           s             10i 0
     d result          s             10i 0
     d outfilename     s            256    varying
     d crlf            s              2    inz(x'0D0A')
     d errnoValue      s             10i 0

      * prototypes
     d report_image_info...
     d                 pr
     d tet                             *
     d doc                           10i 0
     d img_id                        10i 0

     d edit_g          pr            20
     d position                       8s 3

     D IFSerrno        PR            10I 0

     d utf8_write      pr
     d  tet                            *   value
     d  outfd                        10i 0 value
     d  out                                value like(r_text_long_u)
      **************************************************************************
     c     *entry        plist
     c                   parm                    parm1           128
     c                   parm                    parm2           128
      /free
          if parm1 = *BLANKS or parm2 = *BLANKS;
              error='usage: CALL PGM(IMAGE_PP) PARM(<infile> <outfile>)';
              exsr exit;
          endif;

          outfilename = %trim(parm2);

          infile = %trim(parm1);

          tet=TET_new;
          if tet=*null;
              error='image_extr: out of memory';
              exsr exit;
          endif;

          outfd = open(outfilename:
                       O_WRONLY+O_CREAT+O_TRUNC+O_EXCL :
                       S_IRWXU+S_IRWXG);

          if outfd < 0;
              errnoValue = IFSerrno();
              TET_delete(tet);
              error = 'Couldn''t open output file ' +
                      '''' + outfilename + '''';
              exsr exit;
          endif;

          monitor;

              TET_set_option(tet:globaloptlist);

              doc=TET_open_document(tet:%ucs2(%trim(parm1)):docoptlist);
              if doc=-1;
                  exsr dspError;
                  exsr exit;
              endif;

            // get number of pages in the document
              n_pages=TET_pcos_get_number(tet:doc:%ucs2('length:pages'));

            // loop over pages in the document
              for pageno=1 to n_pages;
                  pdfpage=TET_open_page(tet:doc:pageno:pageoptlist);
                  if pdfpage=-1;
                      exsr dspError;
                      iter;
                  endif;

                  imagecount = 0;

                  // Retrieve all images on the page
                  dou 1=2;
                      ti = TET_get_image_info(tet: pdfpage);
                      if ti=*null;
                          leave;
                      endif;
                      TET_image_info_p=ti;

                      imagecount += 1;

                      // Report image details: pixel geometry, color space, etc.
                      report_image_info(tet: doc: img_id);

                      // Report placement geometry
                      x_position = img_x;
                      y_position = %dech(img_y: 8: 3);
                      width = img_width;
                      height = img_height;
                      alpha = img_alpha;
                      beta = img_beta;

                      utf8_write(tet : outfd : %ucs2(', placed on page '));
                      utf8_write(tet : outfd : %ucs2(%char(pageno)));
                      utf8_write(tet : outfd : %ucs2(' at position ('));
                      utf8_write(tet : outfd :
                                 %ucs2(%trim(edit_g(x_position))));
                      utf8_write(tet : outfd : %ucs2(', '));
                      utf8_write(tet : outfd :
                                 %ucs2(%trim(edit_g(y_position))));
                      utf8_write(tet : outfd : %ucs2('):'));

                      resBitand = %BITAND(attributes : TET_ATTR_ARTIFACT);
                      valArtifact = TET_ATTR_ARTIFACT;
                      if (%BITAND(attributes : TET_ATTR_ARTIFACT) =
                            TET_ATTR_ARTIFACT);
                        Artifact_text = ', Artifact';
                      else;
                        Artifact_text = '';
                      ENDIF;

                      utf8_write(tet : outfd : %ucs2(' '));
                      utf8_write(tet : outfd : %ucs2(%char(width)));
                      utf8_write(tet : outfd : %ucs2('x'));
                      utf8_write(tet : outfd : %ucs2(%char(height)));
                      utf8_write(tet : outfd : %ucs2('pt, alpha='));
                      utf8_write(tet : outfd : %ucs2(%trim(edit_g(alpha))));
                      utf8_write(tet : outfd : %ucs2(', beta='));
                      utf8_write(tet : outfd : %ucs2(%trim(edit_g(beta))));
                      utf8_write(tet : outfd : %ucs2(Artifact_text));

                      // Write image data to file
                      imageoptlist = 'filename={' +
                                       %trim(infile) +
                                       '_p' + %char(pageno) +
                                       '_' + %char(imagecount) + '_I' +
                                       %char(img_id) + '}';

                      if TET_write_image_file(tet: doc :
                                              img_id: imageoptlist) = -1;
                          exsr dspError;
                          iter;
                      endif;

                      // Check whether the image has a mask attached...
                      maskid = TET_pcos_get_number(tet: doc:
                                                   %ucs2('images[' +
                                                           %char(img_id) +
                                                           ']/maskid'));

                      // ...and retrieve it if present
                      if (maskid <> -1);
                          result = write(outfd : %addr(crlf) : 2);
                          utf8_write(tet : outfd : %ucs2('  masked with '));

                          report_image_info(tet: doc: maskid);
                          imageoptlist = 'filename={' + %trim(infile) +
                                                      '_p' + %char(pageno) +
                                                      '_' + %char(imagecount) +
                                                      '_I' + %char(img_id) +
                                                      '_mask_I' +
                                                      %char(maskid) + '}';

                          if (TET_write_image_file(tet:
                                                   doc:
                                                   maskid:
                                                   imageoptlist) = -1);
                              exsr dspError;
                              iter;
                          endif;
                      endif;
                      result = write(outfd : %addr(crlf) : 2);
                  enddo;

                  if TET_get_errnum(tet) <>0;
                      exsr dspError;
                  endif;

                  TET_close_page(tet: pdfpage);
              endfor;
              TET_close_document(tet: doc);
          on-error;
              exsr dspError;
          endmon;

          exsr exit;
       // **********************************************************************
       //  unmonitored error occured
          begsr *pssr;

              if tet<>*null;
                  TET_delete(tet);
              endif;

              error_dsp = 'General program failure.';
              dsply error_dsp;
          endsr '*CANCL';
       // **********************************************************************
          begsr exit;
              if tet<>*null;
                  TET_delete(tet);
                  tet = *null;
              endif;
              if error<>'';
                error_dsp = error;
                dsply error_dsp;
              endif;

              *inlr = *on;
              return ;
          endsr;
       // **********************************************************************
          begsr dspError;
              error='Error '+ %char(TET_get_errnum(tet))+' in '+
                              %char(TET_get_apiname(tet))+'(): '+
                              %char(TET_get_errmsg(tet)) ;
              error_dsp = error;
              dsply error_dsp;
              error ='';
          endsr;
      /END-FREE
      *********************************************************************************************
     P report_image_info...
     P                 B
     d report_image_info...
     D                 PI
     d tet                             *
     d doc                           10i 0
     d img_id                        10i 0

      * Variables
     d width           s             10i 0
     d height          s             10i 0
     d bpc             s             10i 0
     d cs              s             10i 0
     d components      s             10i 0
     d mergetype       s             10i 0
     d stencilmask     s             10i 0
     d csname          s           1024c
     d txt             s             50
     d basecs          s             10i 0
     d basecsname      s           1024c
      /FREE
          width = TET_pcos_get_number(tet: doc: %ucs2('images[' +
                                                        %char(img_id) +
                                                        ']/Width'));
          height = TET_pcos_get_number(tet: doc: %ucs2('images[' +
                                                         %char(img_id) +
                                                         ']/Height'));
          bpc = TET_pcos_get_number(tet: doc: %ucs2('images[' + %char(img_id) +
                                                       ']/bpc'));
          cs = TET_pcos_get_number(tet: doc: %ucs2('images[' + %char(img_id) +
                                                     ']/colorspaceid'));
          components = TET_pcos_get_number(tet: doc: %ucs2('colorspaces[' +
                                                             %char(cs) +
                                                             ']/components'));

          txt = 'image ' + %char(img_id) + ': ' + %char(width) + 'x' +
                  %char(height) + ' pixel,';

          csname = TET_pcos_get_string(tet: doc: %ucs2('colorspaces[' +
                                                         %char(cs) + ']/name'));

          txt = %trimr(txt) + ' ' + %char(components) + 'x' +
                  %char(bpc) + ' bit ' + %char(csname);

          if %trim(csname) = 'Indexed';
              basecs = TET_pcos_get_number(tet: doc: %ucs2('colorspaces[' +
                                                             %char(cs) +
                                                             ']/baseid'));
              basecsname = TET_pcos_get_string(tet: doc: %ucs2('colorspaces[' +
                                                                 %char(basecs) +
                                                                 ']/name'));
              txt = %trimr(txt) + ' ' + %char(%trim(basecsname));
          endif;

          // Check whether the image has been created by merging smaller images
          mergetype = TET_pcos_get_number(tet: doc: %ucs2('images[' +
                                                            %char(img_id) +
                                                            ']/mergetype'));
          if mergetype = 1;
               txt = %trimr(txt) + ', mergetype=artificial';
          endif;

          stencilmask = TET_pcos_get_number(tet: doc: %ucs2('images[' +
                                                              %char(img_id) +
                                                              ']/stencilmask'));

          if (stencilmask <> 0);
              txt = %trimr(txt) + ', used as stencil mask';
          endif;

          utf8_write(tet : outfd : %ucs2(%trimr(txt)));
      /END-FREE
     P                 E
      *********************************************************************************************
     p edit_g          B
     d edit_g          PI            20
     d position                       8s 3

     d result          s             20
      * Variables
      /FREE
          result = %trimr(%trim(%editw(position: '      .   ')):'0');
          if result = '';
              result = '0';
          endif;

          return result;
      /END-FREE
     P                 E
      *********************************************************************************************
     P IFSerrno        B
     D IFSerrno        PI            10I 0
     D p_errno         S               *
     D retval          S             10I 0 based(p_errno)
     c                   eval      p_errno = Geterrno
     c                   return    retval
     P                 E
      *********************************************************************************************
     p utf8_write      b

     d utf8_write      pi
     d  tet                            *   value
     d  outfd                        10i 0 value
     d  out                                value like(r_text_long_u)

     d out_fix         s          16383c
     d rtn_value       s          32767    based(rtn_value_p)
     d len_out         s             10i 0
     d rtnlen          s             10i 0
     d convert_value   s          65535    based(convert_value_p)
     d result          s             10i 0

      /free
          out_fix = out;
          rtn_value_p = %addr(out_fix);
          len_out = %len(out);

          convert_value_p = TET_convert_to_unicode (
                              tet :
                              %ucs2('utf16') :
                              rtn_value :
                              len_out * 2 :
                              rtnlen :
                              %ucs2('outputformat=utf8'));

          result = write(outfd : convert_value_p : len_out);
      /end-free
     p utf8_write      e
