PDFlib Cookbook

cookbook

pdf_import/split_document

Split PDF document in separate output documents where each output document contains a range of one or more pages from the input document.

Download Java Code  Switch to PHP Code  Show Output  Show Output 

/*
 * Split PDF document:
 * Split PDF document in separate output documents where each output
 * document contains a range of one or more pages from the input document. 
 *
 * Interactive elements (e.g. bookmarks) will be dropped.
 *
 * Required software: PDFlib+PDI/PPS 9
 * Required data: PDF input document
 */

package com.pdflib.cookbook.pdflib.pdf_import;

import com.pdflib.pdflib;
import com.pdflib.PDFlibException;

public class split_document {
    public static void main(String argv[]) {
        /* This is where the data files are. Adjust as necessary. */
        String searchpath = "../input";

        /* By default annotations are also imported. In some cases this
         * requires the Noto fonts for creating annotation appearance streams.
         * We therefore set the searchpath to also point to the font directory.
         */
        String fontpath = "../resource/font";
        String outfile_basename = "split_document";
        String title = "Split PDF Document";
        String infile = "PDFlib-datasheet.pdf";

        /*
         * Document will be split into sub-documents where each document has
         * this many pages (except the last sub-document potentially).
         */
        final int SUBDOC_PAGES = 2;

        pdflib p = null;
        int exitcode = 0;

        try {
            p = new pdflib();

            p.set_option("searchpath={" + searchpath + "}");

            p.set_option("searchpath={" + fontpath + "}");

            /* This means we must check return values of load_font() etc. */
            p.set_option("errorpolicy=return");

            int indoc = p.open_pdi_document(infile, "");
            if (indoc == -1)
                throw new Exception("Error: " + p.get_errmsg());

            /*
             * Determine the number of pages in the input document and compute
             * the number of output documents.
             */
            int page_count = (int) p.pcos_get_number(indoc, "length:pages");
            int outdoc_count = page_count / SUBDOC_PAGES
                    + (page_count % SUBDOC_PAGES > 0 ? 1 : 0);

            for (int outdoc_counter = 0, page = 0;
                    outdoc_counter < outdoc_count; outdoc_counter += 1) {
                String outfile = outfile_basename + "_" + (outdoc_counter + 1)
                    + ".pdf";

                /*
                 * Open new sub-document.
                 */
                if (p.begin_document(outfile, "") == -1)
                    throw new Exception("Error: " + p.get_errmsg());

                p.set_info("Creator", "PDFlib Cookbook");
                p.set_info("Title", title);
                p.set_info("Subject", "Sub-document " + (outdoc_counter + 1)
                    + " of " + outdoc_count + " of input document '" + infile
                    + "'");

                for (int i = 0; page < page_count && i < SUBDOC_PAGES;
                                                    page += 1, i += 1) {
                    /* Page size may be adjusted by fit_pdi_page() */
                    p.begin_page_ext(0, 0, "width=a4.width height=a4.height");

                    int pagehdl = p.open_pdi_page(indoc, page + 1, "");
                    if (pagehdl == -1)
                        throw new Exception("Error: " + p.get_errmsg());

                    /*
                     * Place the imported page on the output page, and adjust
                     * the page size
                     */
                    p.fit_pdi_page(pagehdl, 0, 0, "adjustpage");
                    p.close_pdi_page(pagehdl);

                    p.end_page_ext("");
                }

                /* Close the current sub-document */
                p.end_document("");
            }

            /* Close the input document */
            p.close_pdi_document(indoc);
        }
        catch (PDFlibException e) {
            System.err.println("PDFlib exception occurred:");
            System.err.println("[" + e.get_errnum() + "] " + e.get_apiname()
                + ": " + e.get_errmsg());
            exitcode = 1;
        }
        catch (Exception e) {
            System.err.println(e);
            exitcode = 1;
        }
        finally {
            if (p != null) {
                p.delete();
            }
            System.exit(exitcode);
        }
    }
}