pCOS Cookbook

cookbook

interactive/check_weblink

Check HTTP response codes for the Web links in a PDF document.

Download Java Code  Show Output  Show Input (pCOS-path-reference.pdf) 

/*
 * Check HTTP response codes for the Web links in a PDF document
 * 
 * Required software: pCOS interface 8 (PDFlib+PDI/PPS 9, TET 4.1, PLOP 5.0)
 * Required data: PDF document
 */
package com.pdflib.cookbook.pcos.interactive;

import com.pdflib.IpCOS;
import java.net.HttpURLConnection;
import java.net.UnknownHostException;
import java.net.MalformedURLException;
import java.net.SocketTimeoutException;
import java.net.URL;
import com.pdflib.cookbook.pcos.pcos_cookbook_example;

public class check_weblink extends pcos_cookbook_example {
    /* This is where the data files are. Adjust as necessary. */
    private final static String SEARCH_PATH = "../input";

    public void example_code(IpCOS p, int doc) throws Exception {

        System.out.println("Link check for file: '" 
            + p.pcos_get_string(doc, "filename") + "'");

        // Get number of pages in the document
        int pagecount = (int) p.pcos_get_number(doc, "length:pages");

        for (int page = 0; page < pagecount; page++) {
        	// Get number of annotations on the page
            int anncount = (int) p.pcos_get_number(doc, "length:pages[" + page + "]/Annots");
            if (anncount == 0) {
                continue;
            }

            System.out.printf("Page %d:\n", page+1);
            
            // Iterate over all annotations
            for (int ann = 0; ann < anncount; ann++) {
                String objtype, subtype;

                subtype = p.pcos_get_string(doc, "pages[" + page + "]/Annots[" + ann + "]/Subtype");
                
                if (!subtype.equals("Link")){
                    continue;
                }
    
                // Check whether this is a link with a URI (web link)
                objtype = p.pcos_get_string(doc, "type:pages[" + page + "]/Annots[" + ann + "]/A/URI");
                if (objtype.equals("string")) {
                    String destination = p.pcos_get_string(doc, "pages[" + page + "]/Annots[" + ann + "]/A/URI");

                    try {
                    	// Access the destination and emit the result

                        System.out.printf("  %s\n", destination);
                        URL url = new URL(destination);
                        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
                        conn.setRequestMethod("HEAD");
                        conn.setConnectTimeout(1000);
                            
                        int responseCode = conn.getResponseCode();
                        String ResponseMessage = conn.getResponseMessage();
                        System.out.printf("    [%d] %s", responseCode, ResponseMessage);

                        if (responseCode == HttpURLConnection.HTTP_MOVED_TEMP || 
                            responseCode == HttpURLConnection.HTTP_MOVED_PERM){
                            // get redirect URL from "Location" header field
                            String newUrl = conn.getHeaderField("Location");
                            System.out.printf("; redirect to %s", newUrl);
                        }
                        System.out.println("");
                    } catch (MalformedURLException e){
                        System.out.println("    [400] Bad Request: " + e.getMessage()); 
                    } catch (UnknownHostException e){
                        System.out.println("    [404] Not Found");                        
                    } catch (ClassCastException e){
                        System.out.println("    [405] Not a Web link");
                    } catch (SocketTimeoutException e){
                        System.out.println("    [408] Request Timeout: " + e.getMessage());
                    } catch (Exception e) {
                        System.out.println("    [500] " + e.getMessage());
                    }
                }
            }
        }
    }

    public check_weblink(String[] argv, String readable_name, String search_path) {
        super(argv, readable_name, search_path);
    }

    public static void main(String argv[]) {
        check_weblink example = new check_weblink(argv, "check weblink", SEARCH_PATH);
        example.execute();
    }
        
}