Create an alphabetically sorted "back-of-the-book" index.

Download XSLT Code     Show Output     Show Input TETML File

<?xml version="1.0" encoding="UTF-8"?>


           Copyright (c) 2008-2015 PDFlib GmbH. All rights reserved.

    This software may not be copied or distributed except as expressly

    authorized by PDFlib GmbH's general license agreement or a custom

    license agreement signed by PDFlib GmbH.

    For more information about licensing please refer to

    Purpose: Create an alphabetically sorted "back-of-the-book" index


    Required input: TETML in "word" or "wordplus" mode.


    Stylesheet parameters: none


    Version: $Id: index.xsl,v 1.3 2015/11/30 11:45:02 stm Exp $


<xsl:stylesheet version="1.0" xmlns:xsl=""



    <xsl:output method="text" />


    <!-- Minimum word length for inclusion in index -->

    <xsl:param name="min-length">4</xsl:param>



        Index required by the "Muenchian" method. We index the Word elements

        based on the content of the Text subelements.


    <xsl:key name="words" match="tet:TET/tet:Document/tet:Pages//tet:Word" use="tet:Text" />


    <!-- Characters that may appear at the beginning of a word -->

    <xsl:variable name="allowed-chars"

        select="'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvw'" />



        Index for the unique letters


    <xsl:key name="letter" match="tet:TET/tet:Document/tet:Pages//tet:Word"



                        'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />


    <xsl:template match="/">

        <!-- Make sure that word information is present in the input TETML. -->

        <xsl:if test="tet:TET/tet:Document/tet:Pages//tet:Content[not(@granularity = 'word')]">

                <xsl:message terminate="yes">

                        <xsl:text>Stylesheet index.xsl processing TETML for document '</xsl:text>

                        <xsl:value-of select="tet:TET/tet:Document/@filename" />

                        <xsl:text>': this stylesheet requires word info in TETML. </xsl:text>

                        <xsl:text>Create the input in page mode "word" or "wordplus".</xsl:text>



        <xsl:text>Alphabetical list of words in the document along with their page number:&#xa;&#xa;</xsl:text>



            Group by first letter, sort by first letter.



            select="tet:TET/tet:Document/tet:Pages//tet:Word[generate-id() =







                select="translate(tet:Text, 'abcdefghijklmnopqrstuvwxyz',

                               'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />





    <xsl:template match="tet:Word" mode="index-letters">

        <!-- Get the group key -->

        <xsl:variable name="key"

            select="translate(substring(tet:Text, 1, 1),


                                      'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />


        <!-- Suppress groups that are not in the allowed set of characters -->

        <xsl:if test="string-length(translate($key, $allowed-chars, '')) = 0">


            <!-- Select all words that start with the current group letter -->

            <xsl:variable name="letter-group"

                select="key('letter', $key)

                   [generate-id() = generate-id(key('words', tet:Text)[1])]" />



                Filter out words that are not long enough or that start with

                a disallowed character.


            <xsl:variable name="allowed-words"

                select="$letter-group[string-length(tet:Text) &gt;= $min-length and

                               string-length(translate(substring(tet:Text, 1, 1), $allowed-chars, '')) = 0]"/>


            <!-- Suppress empty groups -->

            <xsl:if test="count($allowed-words) &gt; 0">

                <!-- Output label for current index group -->

                <xsl:value-of select="$key" />



                <xsl:apply-templates select="$allowed-words" mode="index-words">




                                            'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />







    <xsl:template match="tet:Word" mode="index-words">

        <!-- Find all occurences of index term -->

        <xsl:variable name="occurences" select="key('words', tet:Text)" />


        <!-- Output text of index term -->

        <xsl:value-of select="tet:Text" />

        <xsl:text> </xsl:text>


        <!-- Output page numbers where the term occurs -->

        <xsl:for-each select="$occurences/ancestor::tet:Page">

            <!-- Separate multiple page numbers by blanks -->

            <xsl:if test="position() != 1">

                <xsl:text> </xsl:text>


            <xsl:value-of select="@number" />



        <!-- Terminate word entry with new-line -->