PDFlib Cookbook

cookbook

textflow/widows_and_orphans

Create multi-column text output which may span multiple pages.

Download PHP Code  Switch to Java Code  Show Output 

<?php
/*
 * Textflow starter:
 * Create multi-column text output which may span multiple pages
 *
 * Required software: PDFlib/PDFlib+PDI/PPS 10
 * Required data: none
 */

/* This is where the data files are. Adjust as necessary. */
$searchpath = dirname(__FILE__,3)."/input";

$outfile = "widows_and_orphans.pdf";
$title = "Widows and Orphans";

try {
    $p = new pdflib();

    $p->set_option("searchpath={" . $searchpath . "}");

    /* This means we must check return values of load_font() etc. */
    $p->set_option("errorpolicy=return");

    if ($p->begin_document("", "") == 0)
        throw new Exception("Error: " . $p->get_errmsg());

    $p->set_info("Creator", "PDFlib Cookbook");
    $p->set_info("Title", $title);
    
    $pagewidth = 500;
    $pageheight = 500;
    
    $header_fontsize = 24;

    /*
     * Text that together with the specified fontsize below provokes
     * an orphan.
     */
    $orphan_explanation = str_repeat("This text provokes an \"orphan\" at the end of the "
        . "first fitbox. ", 10);
    $orphan = str_repeat("Unless action is taken, the first line of this paragraph appears "
        . "as an orphan at the end of the first fitbox. ", 2);
    $orphan_more_text = str_repeat("This is text after the paragraph with the orphan. ", 2);
    $orphan_text = $orphan_explanation
        . "<nextline leading=80%><nextparagraph leading=100%>"
        . $orphan
        . "<nextline leading=80%><nextparagraph leading=100%>"
        . $orphan_more_text;
      
    /*
     * Format the text that provokes an orphan without any special
     * action to avoid widows and orphans.
     */
    format_text_simple($p, $orphan_text, 13.1,
        $pagewidth, $pageheight,
        $header_fontsize,
        "Orphan");
    
    /*
     * Format the same text with an algorithm that avoids widows and
     * orphans.
     */
    format_text_widow_orphan_aware($p, $orphan_text, 13.1,
        $pagewidth, $pageheight,
        $header_fontsize,
        "Orphan avoided");
    
    /*
     * Text that together with the specified fontsize below provokes
     * a widow.
     */
    $widow_explanation = str_repeat("This text provokes a \"widow\" at the start of the "
        . "second fitbox. ", 7);
    $widow = str_repeat("Unless action is taken, the last line of this paragraph appears "
        . "as a widow at the start of the second fitbox. ", 2);
    $widow_more_text = str_repeat("This is text after the paragraph with the widow. ", 2);
    $widow_text = $widow_explanation
        . "<nextline leading=80%><nextparagraph leading=100%>"
        . $widow
        . "<nextline leading=80%><nextparagraph leading=100%>"
        . $widow_more_text;
    
    /*
     * Format the text that provokes a widow without any special
     * action to avoid widows and orphans.
     */
    format_text_simple($p, $widow_text, 13.6,
        $pagewidth, $pageheight,
        $header_fontsize,
        "Widow");
    
    /*
     * Format the same text with an algorithm that avoids widows and
     * orphans.
     */
    format_text_widow_orphan_aware($p, $widow_text, 13.6,
        $pagewidth, $pageheight,
        $header_fontsize,
        "Widow avoided");
    
    $p->end_document("");

    $buf = $p->get_buffer();
    $len = strlen($buf);
    
    header("Content-type: application/pdf");
    header("Content-Length: $len");
    header("Content-Disposition: inline; filename=" . $outfile);
    print $buf;
    
} catch (PDFlibException $e) {
    echo("PDFlib exception occurred:\n".
        "[" . $e->get_errnum() . "] " . $e->get_apiname() .
        ": " . $e->get_errmsg() . "\n");
    exit(1);
} catch (Throwable $e) {
    echo("PHP exception occurred: " . $e->getMessage() . "\n");
    exit(1);
}

/**
 * Format the textflow into the fitboxes over multiple pages, without doing
 * anything to avoid widows and orphans.
 */
function format_text_simple($p,
    $text, $fontsize,
    $pagewidth, $pageheight,
    $header_fontsize, $title) {
    
    $optlist = "fontname=NotoSerif-Regular "
        . "fontsize=" . $fontsize . " "
        . "alignment=left adjustmethod=nofit";
    
    $tf = $p->create_textflow($text, $optlist);
    if ($tf == 0) {
        throw new Exception("Error: Unable to create textflow: " .
            $p->get_errmsg());
    }
    
    $fitbox_width = $pagewidth * 0.5;
    $fitbox_height = $pageheight * 0.5;
    
    $llx = ($pagewidth - $fitbox_width) / 2;
    $lly = ($pageheight - $fitbox_height) / 2;
    $urx = $llx + $fitbox_width;
    $ury = $lly + $fitbox_height;
    
    $header_options = "fontname=NotoSerif-Regular fontsize="
        . $header_fontsize . " position={center} boxsize={"
        . $pagewidth . " " . $header_fontsize . "}";
    
    /*
     * Simple algorithm to format the Textflow into as many fitboxes
     * as necessary.
     */
    for ($result = "_boxfull", $pagecount = 1;
            $result == "_boxfull"; $pagecount += 1) {
        
        $p->begin_page_ext($pagewidth, $pageheight, "");

        $p->fit_textline($title . " (page " . $pagecount . ")",
            0, $pageheight - 2 * $header_fontsize,
            $header_options);

        $result = $p->fit_textflow($tf, $llx, $lly, $urx, $ury,
                        "showborder=true");

        $p->end_page_ext("");
    }
    
    $p->delete_textflow($tf);
}

/**
 * Format the textflow into the fitboxes over multiple pages, while
 * applying an algorithm that avoids widows and orphans.
 */
function format_text_widow_orphan_aware($p,
    $text, $fontsize,
    $pagewidth, $pageheight,
    $header_fontsize, $title) {
    
    /*
     * Same option list as in format_text_simple(), but with
     * minlinecount=2 to avoid orphans. 
     */
    $optlist = "minlinecount=2 fontname=NotoSerif-Regular "
        . "fontsize=" . $fontsize . " "
        . "alignment=left adjustmethod=nofit";

    $tf = $p->create_textflow($text, $optlist);
    if ($tf == 0) {
        throw new Exception("Error: Unable to create textflow: " .
                    $p->get_errmsg());
    }
  
    $fitbox_width = $pagewidth * 0.5;
    $fitbox_height = $pageheight * 0.5;
    
    $llx = ($pagewidth - $fitbox_width) / 2;
    $lly = ($pageheight - $fitbox_height) / 2;
    $urx = $llx + $fitbox_width;
    $ury = $lly + $fitbox_height;
    
    $header_options = "fontname=NotoSerif-Regular fontsize="
        . $header_fontsize . " position={center} boxsize={"
        . $pagewidth . " " . $header_fontsize . "}";
    
    for ($result = "_boxfull", $pagecount = 1;
            $result == "_boxfull"; $pagecount += 1) {
        
        $p->begin_page_ext($pagewidth, $pageheight, "");

        /*
         * Fit the the remaining Textflow into the first fitbox in blind
         * mode (option blind=true), i.e. without creating any real output,
         * while setting "minlinecount=2" to avoid an orphan. Query the
         * number of lines in the fitbox by using keyword "boxlinecount".
         */
        $result = $p->fit_textflow($tf, $llx, $lly, $urx, $ury, "blind=true");

        $boxlinecount = (int) $p->info_textflow($tf, "boxlinecount");

        /*
         * Count how many times the textflow must be rewound: At least
         * one time for the first blind fit, and another time if a second
         * blind fit is performed because the fitbox is full.
         */
        $rewindcount = 1;
        
        if ($result == "_boxfull") {
            
            /*
             * Fit the next part of the Textflow into the second fitbox in
             * blind mode. We don't care here that the second fitbox will
             * actually be placed on the next page in the output file.
             */
            $p->fit_textflow($tf, $llx, $lly, $urx, $ury, "blind=true");

            /*
             * Query the number of lines of the first paragraph in the
             * second fitbox. If the count is equal to one, we found a
             * single-line "widow". In order to avoid that, we reduce the
             * number of lines for the first fitbox by one.
             */
            $firstparalinecount = (int) $p->info_textflow($tf,
                "firstparalinecount");
            if ($firstparalinecount == 1) {
                $boxlinecount -= 1;
            }
            
            $rewindcount += 1;
        }

        /* Place header line */
        $p->fit_textline($title . " (page " . $pagecount . ")",
            0, $pageheight - 2 * $header_fontsize,
            $header_options);

        /*
         * Now do the actual output on the page.
         * Rewind the Textflow one or two steps (determined by
         * rewindcount) and set the calculated maximum number of lines.
         */
        $result = $p->fit_textflow($tf, $llx, $lly, $urx, $ury,
            "rewind=-" . $rewindcount
            . " maxlines=" . $boxlinecount . " showborder=true");

        $p->end_page_ext("");
    }
    
    $p->delete_textflow($tf);
}

?>