     H DEBUG(*YES) DFTACTGRP(*NO) BNDDIR('TETLIB/TETLIB')
     H COPYRIGHT('(c) PDFlib GmbH (www.pdflib.com)')
      *********************************************************************************************
      *   Extract text from PDF and filter according to font name and size.
      *   This can be used to identify headings in the document and create a
      *   table of contents.
      *
      *   Note: All strings passed into PDFlib are unicode strings with varying length.
      *         Use the %UCS2 build in function to convert a single byte string into a unicode string.
      *         All strings returned from PDFlib are unicode strings with varying length.
      *         Use the %CHAR build in function to convert a unicode string to a single byte string.
      *********************************************************************************************
     d/copy QRPGLESRC,TETLIB
     d/copy QRPGLESRC,IFSIO
      *********************************************************************************************
      * global option list
     d globaloptlist   c                   %ucs2('-
     d                                     searchpath={../../../resource/cmap}')

      * document-specific option list
     d docoptlist      c                   %ucs2('')

      * page-specific option list
     d pageoptlist     c                   %ucs2('granularity=line')

      * Search text with at least this size (use 0 to catch all sizes)
     d fontsize...
     d trigger         c                   10

      * Catch text where the font name contains this string
      * (use empty string to catch all font names)
     d fontname...
     d trigger         c                   %ucs2('Bold')

     d tet             s               *
     d pageno          s             10i 0
     d n_pages         s             10i 0
     d doc             s             10i 0
     d pdfpage         s             10i 0
     d len             s             10i 0
     d text            s          16383c   based(text_p)
     d text_write      s          16383c
     d fontname        s           1024c   varying
     d error           s             52
     d txt             s             52
     d fontsize        s              7  2
     d outfilename     s            256    varying
     d outfilebase     s            256    varying
     d outfd           s             10i 0
     d suffix          c                   '.txt'
     d crlf            s              2    inz(x'0D0A')
      *********************************************************************************************
     d utf8_write      pr
     d  tet                            *   value
     d  outfd                        10i 0 value
     d  out                                value like(r_text_long_u)
      *********************************************************************************************
     c     *entry        plist
     c                   parm                    parm1           128
     c                   parm                    parm2           128

     c                   if        parm1 = *BLANKS or parm2 = *BLANKS
     c                   eval      error='usage: CALL PGM(FONTFILTER) '+
     c                                    'PARM(<in> <out>)'
     c                   exsr      exit
     c                   endif
      *
     c                   eval      tet=TET_new
     c                   if        tet=*null
     c                   eval      error='fontfilter: out of memory'
     c                   exsr      exit
     c                   endif
      *
     c                   monitor
     c                   eval      outfilebase=%trim(parm2)
     c                   eval      outfilename=outfilebase+suffix

     c                   eval      outfd=open(outfilename:
     c                                     O_WRONLY+O_CREAT+O_TRUNC+O_EXCL :
     c                                     S_IRWXU+S_IRWXG)
     c                   if        outfd<0
     c                   eval      error='Couldn''t open "'+outfilename+'".'
     c                   callp     TET_delete(tet)
     c                   exsr      exit
     c                   endif

     c                   callp     TET_set_option(tet:globaloptlist)
     c                   eval      doc=TET_open_document(tet:
     c                                     %ucs2(%trim(parm1)):docoptlist)
     c                   if        doc=-1
     c                   eval      error='Error '+
     c                               %char(TET_get_errnum(tet))+' in '+
     c                               %char(TET_get_apiname(tet))+'(): '+
     c                               %char(TET_get_errmsg(tet))
     c                   callp     TET_delete(tet)
     c                   exsr      exit
     c                   endif
      *
      * get number of pages in the document
     c                   eval      n_pages=TET_pcos_get_number(tet:doc:
     c                                     %ucs2('length:pages'))

     c                   do        n_pages       pageno
     c                   eval      pdfpage=TET_open_page(tet:doc:pageno:
     c                                         pageoptlist)
     c                   if        pdfpage=-1
     c                   eval      error='Error '+
     c                               %char(TET_get_errnum(tet))+' in '+
     c                               %char(TET_get_apiname(tet))+'() onpage '+
     c                               %char(pageno)+' '+
     c                               %char(TET_get_errmsg(tet))
     c     error         dsply
     c                   iter                                                   Try next page
     c                   endif

      * Retrieve all text fragments for the page
     c                   do        *hival
     c                   eval      text_p=TET_get_text(tet:pdfpage:len)
     c
     c                   if        text_p=*null
     c                   leave
     c                   endif
     c
     c                   eval      text_write = text

      * Loop over all characters
     c                   do        *hival
     c                   eval      tet_char_info_p=
     c                                  TET_get_char_info(tet:pdfpage)
     c                   if        tet_char_info_p = *null
     c                   leave
     c                   endif
      * We need only the font name and size; the text
      * position could be fetched from ci->x and ci->y
     c                   eval      fontname=TET_pcos_get_string(tet:doc:
     c                               %ucs2('fonts['+%char(ci_fontid)+']/name'))

      * Check whether we found a match
      * C only: some versions of strstr don't allow empty
      * strings, so we better check
     c                   if        ci_fontsize >= fontsizetrigger and
     c                                %scan(fontnametrigger : fontname) > 0
     c                   callp     utf8_write(tet :
     c                                        outfd : %ucs2('['))

     c                   eval(h)   fontsize=ci_fontsize

      * print the retrieved text
     c                   callp     utf8_write(tet : outfd : fontname)
     c                   callp     utf8_write(tet :
     c                                        outfd : %ucs2(' '))
     c                   callp     utf8_write(tet :
     c                                        outfd : %ucs2(%char(fontsize)))
     c                   callp     utf8_write(tet :
     c                                        outfd : %ucs2('] '))
     c                   callp     write(outfd : %addr(text_write) :
     c                                    %len(%str(%addr(text_write))))
      * new line
     c                   callp     write(outfd : %addr(crlf) : 2)

      * In this sample we check only the first character of
      * each fragment.
     c                   leave
     c                   endif
     c                   enddo
     c                   enddo
     c                   callp     TET_close_page(tet:page)

     c                   enddo
     c                   callp     TET_close_document(tet:doc)

      * Error Handling
     c                   on-error
     c                   exsr      dsperror
     c                   endmon
     c                   callp     TET_delete(tet)
     c                   exsr      exit
      *********************************************************************************************
      * unmonitored error occured
     c     *pssr         begsr
     c                   if        tet<>*null
     c                   callp     TET_delete(tet)
     c                   endif
     c                   eval      error='General program failure.'
     c     error         dsply
     c                   endsr     '*CANCL'
      *********************************************************************************************
     c     dsperror      begsr
     c                   eval      txt='TET exception occured in fontfilter:'
     c     txt           dsply
     c                   eval      txt=
     c                               %char(TET_get_errnum(tet))+' in '+
     c                               %char(TET_get_apiname(tet))+'(): '+
     c                               %char(TET_get_errmsg(tet))
     c     txt           dsply
     c                   endsr
      *********************************************************************************************
     c     exit          begsr
     c                   if        error<>*blanks
     c                   eval      error='Error: '+error
     c     error         dsply
     c                   endif
     c                   seton                                        lr
     c                   return
     c                   endsr
      *********************************************************************************************
     p utf8_write      b

     d utf8_write      pi
     d  tet                            *   value
     d  outfd                        10i 0 value
     d  out                                value like(r_text_long_u)

     d out_fix         s          16383c
     d rtn_value       s          32767    based(rtn_value_p)
     d len_out         s             10i 0
     d rtnlen          s             10i 0
     d convert_value   s          65535    based(convert_value_p)
     d result          s             10i 0

      /free
          out_fix = out;
          rtn_value_p = %addr(out_fix);
          len_out = %len(out);

          convert_value_p = TET_convert_to_unicode (
                              tet :
                              %ucs2('utf16') :
                              rtn_value :
                              len_out * 2 :
                              rtnlen :
                              %ucs2('outputformat=utf8'));

          result = write(outfd : convert_value_p : len_out);
      /end-free
     p utf8_write      e