     H DEBUG(*YES) DFTACTGRP(*NO) ACTGRP(*NEW) BNDDIR('TETLIB/TETLIB')
     H COPYRIGHT('(c) PDFlib GmbH (www.pdflib.com)')
      *********************************************************************************************
      *   Simple PDF text extractor based on PDFlib TET
      *
      *   Note: All strings passed into PDFlib are unicode strings with varying length.
      *         Use the %UCS2 build in function to convert a single byte string into a unicode string.
      *         All strings returned from PDFlib are unicode strings with varying length.
      *         Use the %CHAR build in function to convert a unicode string to a single byte string.
      *********************************************************************************************
     d/copy QRPGLESRC,TETLIB
     d/copy QRPGLESRC,IFSIO
      *********************************************************************************************
      * global option list
     d globaloptlist   c                   %ucs2('-
     d                                     searchpath={../../../resource/cmap} -
     d                                     outputformat=utf8')

      * document-specific option list
     d docoptlist      c                   %ucs2('')

      * page-specific option list
     d pageoptlist     c                   %ucs2('granularity=page')

     d tet             s               *
     d outfd           s             10i 0
     d doc             s             10i 0
     d pageno          s             10i 0
     d n_pages         s             10i 0
     d pdfpage         s             10i 0
     d len             s             10i 0
     d suffix          c                   '.txt'
     d outfilename     s            256    varying
     d outfilebase     s            256    varying
     d error           s             52
     d txt             s             52
     d text            s          16383    based(text_p)
      *********************************************************************************************
     c     *entry        plist
     c                   parm                    parm1           128
     c                   parm                    parm2           128

     c                   if        parm1 = *BLANKS or parm2 = *BLANKS
     c                   eval      error='usage: CALL PGM(EXTRACTOR) '+
     c                                    'PARM(<in> <out>)'
     c                   exsr      exit
     c                   endif
      *********************************************************************************************
     c                   eval      tet=TET_new
     c                   if        tet=*null
     c                   eval      error='extractor: out of memory'
     c                   exsr      exit
     c                   endif
      *
     c                   monitor
      *
     c                   eval      outfilebase=%trim(parm2)
     c                   eval      outfilename=outfilebase+suffix

     c                   eval      outfd=open(outfilename:
     c                                     O_WRONLY+O_CREAT+O_TRUNC+O_EXCL :
     c                                     S_IRWXU+S_IRWXG)
     c                   if        outfd<0
     c                   eval      error='Couldn''t open "'+outfilename+'".'
     c                   callp     TET_delete(tet)
     c                   exsr      exit
     c                   endif
      *
     c                   callp     TET_set_option(tet:globaloptlist)
     c                   eval      doc=TET_open_document(tet:
     c                                     %ucs2(%trim(parm1)):docoptlist)
     c                   if        doc=-1
     c                   eval      error='Error '+
     c                               %char(TET_get_errnum(tet))+' in '+
     c                               %char(TET_get_apiname(tet))+'(): '+
     c                               %char(TET_get_errmsg(tet))
     c                   callp     TET_delete(tet)
     c                   callp     close(outfd)
     c                   exsr      exit
     c                   endif
      * get number of pages in the document
     c                   eval      n_pages=TET_pcos_get_number(tet:doc:
     c                                     %ucs2('length:pages'))

      * loop over pages in the document
     c                   do        n_pages       pageno
     c                   eval      pdfpage=TET_open_page(tet:doc:pageno:
     c                                         pageoptlist)
     c                   if        pdfpage=-1
     c                   eval      error='Error '+
     c                               %char(TET_get_errnum(tet))+' in '+
     c                               %char(TET_get_apiname(tet))+'() onpage '+
     c                               %char(pageno)+' '+
     c                               %char(TET_get_errmsg(tet))
     c     error         dsply
     c                   iter                                                   Try next page
     c                   endif
      * Retrieve all text fragments; This is actually not required
      * for granularity=page, but must be used for other granularities.
     c                   do        *hival
     c                   eval      text_p=TET_get_text(tet:pdfpage:len)
     c                   if        text_p=*null
     c                   leave
     c                   endif
      * print the retrieved text
     c                   callp     write(outfd : text_p :
     c                                    %len(%str(text_p)))
     c                   enddo

     c                   callp     TET_close_page(tet: pdfpage)
     c                   enddo
      * Error Handling
     c                   on-error
     c                   exsr      dsperror
     c                   endmon
      *
     c                   callp     close(outfd)
     c                   callp     TET_delete(tet)
      *
     c                   exsr      exit
      *********************************************************************************************
      * unmonitored error occured
     c     *pssr         begsr
     c                   if        tet<>*null
     c                   callp     TET_delete(tet)
     c                   endif
     c                   eval      error='General program failure.'
     c     error         dsply
     c                   endsr     '*CANCL'
      *********************************************************************************************
     c     dsperror      begsr
     c                   eval      txt='TET exception occured in extractor:'
     c     txt           dsply
     c                   eval      txt=
     c                               %char(TET_get_errnum(tet))+' in '+
     c                               %char(TET_get_apiname(tet))+'(): '+
     c                               %char(TET_get_errmsg(tet))
     c     txt           dsply
     c                   endsr
      *********************************************************************************************
     c     exit          begsr
     c                   if        error<>*blanks
     c                   eval      error='Error: '+error
     c     error         dsply
     c                   endif
     c                   seton                                        lr
     c                   return
     c                   endsr