tetml/tetml2html
Convert TETML to HTML.
Download XSLT Code Show Output
<?xml version="1.0"?>
<!--
Copyright (c) 2008-2017 PDFlib GmbH. All rights reserved.
This software may not be copied or distributed except as expressly
authorized by PDFlib GmbH's general license agreement or a custom
license agreement signed by PDFlib GmbH.
For more information about licensing please refer to www.pdflib.com.
Purpose: convert TETML to HTML
Required input:
TET TETML in wordplus mode. The script includes information about the
images for each page. To make the links for the images work
correctly, the images must be extracted together with TETML. With the
TET command line tool this can be accomplished like this:
tet -i -m wordplus <input PDF document>
Stylesheet parameters:
debug: 0: no debug info, >0: increasingly verbose
bookmark-toc 0: no table of contents generated from PDF bookmarks
1: generate table of contents from PDF bookmarks if
bookmarks are present
toc-generate: 0: no table of contents
1: generate table of contents for headings recognized
by font size and font name, unless a table of
contents was generated from bookmarks
toc-exclude-min, toc-exclude-max:
Specify a range of pages to exclude from the generation of the HTML
table of contents. This can be used to prevent duplicate entries if
also entries in the PDF table of contents are detected as headings
because of their font size.
h<n>.min-size, h<n>.max-size, h<n>.font-name with n=1..5:
"Para" elements must include at least one character whose size is greater
or equal to the h<n>.min-size parameter and less than the
h<n>.max-size parameter to be recognized as a h1..h5 heading.
If h<n>.font-name is not the empty string, additionally the font name
must match.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:tet="http://www.pdflib.com/XML/TET5/TET-5.0" exclude-result-prefixes="tet"
>
<xsl:output method="html" indent="yes" />
<xsl:param name="debug">0</xsl:param>
<xsl:param name="bookmark-toc">1</xsl:param>
<xsl:param name="toc-generate">1</xsl:param>
<xsl:param name="toc-exclude-min">-1</xsl:param>
<xsl:param name="toc-exclude-max">-1</xsl:param>
<xsl:param name="h1.min-size">30</xsl:param>
<xsl:param name="h1.max-size">10000</xsl:param>
<xsl:param name="h1.font-name">ThesisAntiqua-Bold</xsl:param>
<xsl:param name="h2.min-size">24</xsl:param>
<xsl:param name="h2.max-size">30</xsl:param>
<xsl:param name="h2.font-name" >TheSansExtraLight-Italic</xsl:param>
<xsl:param name="h3.min-size">14</xsl:param>
<xsl:param name="h3.max-size">24</xsl:param>
<xsl:param name="h3.font-name">ThesisAntiqua-Bold</xsl:param>
<xsl:param name="h4.min-size">8</xsl:param>
<xsl:param name="h4.max-size">14</xsl:param>
<xsl:param name="h4.font-name">ThesisAntiqua-Bold</xsl:param>
<!-- Unused heading level, values set to make matching impossible -->
<xsl:param name="h5.min-size">10001</xsl:param>
<xsl:param name="h5.max-size">10000</xsl:param>
<xsl:param name="h5.font-name" />
<xsl:variable name="pdf-basename">
<xsl:call-template name="pdf-basename">
<xsl:with-param name="full-pdf-name"
select="/tet:TET/tet:Document/@filename" />
</xsl:call-template>
</xsl:variable>
<xsl:variable name="resources"
select="/tet:TET/tet:Document/tet:Pages/tet:Resources" />
<xsl:key name="bookmark-by-destination" match="tet:Bookmark" use="@destination" />
<xsl:key name="destination-by-anchor-id" match="tet:Destination" use="@anchor" />
<xsl:key name="destination-by-id" match="tet:Destination" use="@id" />
<!-- key for A elements of type 'start' or 'rect' -->
<xsl:key name="anchor-by-id" match="tet:A[contains('|start|rect|', concat('|', @type, '|'))]" use="@id" />
<xsl:template match="/">
<!-- Make sure that the input TETML was prepared in wordplus mode including
geometry -->
<xsl:if
test="tet:TET/tet:Document/tet:Pages/tet:Page/tet:Content[not(@granularity = 'word') or not(@geometry = 'true')]"
>
<xsl:message terminate="yes">
<xsl:text>Stylesheet tetml2html.xsl processing TETML for document '</xsl:text>
<xsl:value-of select="tet:TET/tet:Document/@filename" />
<xsl:text>': this stylesheet requires TETML in wordplus mode. </xsl:text>
<xsl:text>Create the input in page mode "wordplus".</xsl:text>
</xsl:message>
</xsl:if>
<xsl:text disable-output-escaping='yes'><!DOCTYPE html></xsl:text>
<html>
<head>
<title>
<xsl:text>HTML version of </xsl:text>
<xsl:value-of select="tet:TET/tet:Document/@filename" />
</title>
<style type="text/css">
.dropcap { float:left; font-size:88px; line-height:88px;
padding-top:3px; padding-right:3px; }
<!-- The text-shadow CSS element is not honored by IE -->
.shadowed { text-shadow: 2px 2px 3px #000; }
h2.toc { text-indent: 20px; }
h3.toc { text-indent: 40px; }
h4.toc { text-indent: 60px; }
h5.toc { text-indent: 80px; }
table, td, th { border: 1px solid gray }
</style>
</head>
<body>
<xsl:choose>
<xsl:when test="$bookmark-toc > 0 and tet:TET/tet:Document/tet:Bookmarks">
<xsl:apply-templates select="tet:TET/tet:Document/tet:Bookmarks" />
</xsl:when>
<xsl:when test="$toc-generate > 0">
<xsl:apply-templates
select="tet:TET/tet:Document/tet:Pages/tet:Page[not(@number >= $toc-exclude-min and
@number <= $toc-exclude-max)]"
mode="toc" />
</xsl:when>
</xsl:choose>
<xsl:apply-templates select="tet:TET/tet:Document/tet:Pages/tet:Page"
mode="body" />
</body>
</html>
</xsl:template>
<!-- Group of templates for generating the Table of Contents. These templates
are all defined with mode "toc". They generate links to anchors for all the Para
elements that are identified as headings. -->
<xsl:template match="tet:Page" mode="toc">
<xsl:for-each select="tet:Content/tet:Para">
<xsl:choose>
<xsl:when
test="tet:Box/tet:Word/tet:Box/tet:Glyph[
@size >= $h1.min-size
and @size <= $h1.max-size
and ($h1.font-name = '' or /tet:TET/tet:Document/tet:Pages/tet:Resources/tet:Fonts/tet:Font[@name = $h1.font-name]/@id = @font)]"
>
<xsl:call-template name="toc-entry">
<xsl:with-param name="toc-heading"
select="'h1'" />
</xsl:call-template>
</xsl:when>
<xsl:when
test="tet:Box/tet:Word/tet:Box/tet:Glyph[
@size >= $h2.min-size
and @size <= $h2.max-size
and ($h2.font-name = '' or /tet:TET/tet:Document/tet:Pages/tet:Resources/tet:Fonts/tet:Font[@name = $h2.font-name]/@id = @font)]"
>
<xsl:call-template name="toc-entry">
<xsl:with-param name="toc-heading"
select="'h2'" />
</xsl:call-template>
</xsl:when>
<xsl:when
test="tet:Box/tet:Word/tet:Box/tet:Glyph[
@size >= $h3.min-size
and @size <= $h3.max-size
and ($h3.font-name = '' or /tet:TET/tet:Document/tet:Pages/tet:Resources/tet:Fonts/tet:Font[@name = $h3.font-name]/@id = @font)]"
>
<xsl:call-template name="toc-entry">
<xsl:with-param name="toc-heading"
select="'h3'" />
</xsl:call-template>
</xsl:when>
<xsl:when
test="tet:Box/tet:Word/tet:Box/tet:Glyph[
@size >= $h4.min-size
and @size <= $h4.max-size
and ($h4.font-name = '' or /tet:TET/tet:Document/tet:Pages/tet:Resources/tet:Fonts/tet:Font[@name = $h4.font-name]/@id = @font)]"
>
<xsl:call-template name="toc-entry">
<xsl:with-param name="toc-heading"
select="'h4'" />
</xsl:call-template>
</xsl:when>
<xsl:when
test="tet:Box/tet:Word/tet:Box/tet:Glyph[
@size >= $h5.min-size
and @size <= $h5.max-size
and ($h5.font-name = '' or /tet:TET/tet:Document/tet:Pages/tet:Resources/tet:Fonts/tet:Font[@name = $h5.font-name]/@id = @font)]"
>
<xsl:call-template name="toc-entry">
<xsl:with-param name="toc-heading"
select="'h5'" />
</xsl:call-template>
</xsl:when>
<!-- no xsl:otherwise as normal Paras are suppressed in the TOC -->
</xsl:choose>
</xsl:for-each>
</xsl:template>
<!-- Generate an entry for the provided Para element as the specified heading
element $toc-heading (h1..h5) -->
<xsl:template name="toc-entry">
<xsl:param name="toc-heading" />
<xsl:element name="{$toc-heading}">
<xsl:attribute name="class"><xsl:text>toc</xsl:text></xsl:attribute>
<a>
<xsl:attribute name="href">
<xsl:text>#</xsl:text>
<xsl:value-of select="generate-id()" />
</xsl:attribute>
<xsl:apply-templates select="tet:Box/tet:Word/tet:Text | tet:Word/tet:Text" />
</a>
</xsl:element>
</xsl:template>
<!-- Templates to generate a table of contents from the Bookmarks elements -->
<xsl:template match="tet:Bookmarks">
<xsl:apply-templates select="tet:Bookmark" />
</xsl:template>
<xsl:template match="tet:Bookmark">
<!-- Determine heading level through distance from Bookmark root. -->
<xsl:variable name="distance" select="count(ancestor-or-self::tet:Bookmark)" />
<!-- Limit heading levels to 6 according HTML restrictions. -->
<xsl:variable name="heading-level">
<xsl:text>h</xsl:text>
<xsl:choose>
<xsl:when test="$distance > 6">
<xsl:text>6</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$distance" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:element name="{$heading-level}">
<xsl:attribute name="class">
<xsl:text>toc</xsl:text>
</xsl:attribute>
<a>
<xsl:variable name="destination" select="key('destination-by-id', @destination)" />
<xsl:variable name="anchor" select="key('anchor-by-id', $destination/@anchor)" />
<xsl:attribute name="href">
<xsl:text>#</xsl:text>
<xsl:value-of select="generate-id($anchor)" />
</xsl:attribute>
<xsl:value-of select="tet:Title" />
</a>
</xsl:element>
<xsl:apply-templates select="tet:Bookmark" />
</xsl:template>
<!-- Central functions to recursively iterate over the children of
Content elements.
Always change the select statements in process-first-content-child
and process-content-child-siblings in a consistent manner. -->
<xsl:template name="process-first-content-child">
<!-- Select the first child of the Content element -->
<xsl:apply-templates select="(tet:Content/tet:Para | tet:Content/tet:Table | tet:Content/tet:List)[1]" />
</xsl:template>
<xsl:template name="process-content-child-siblings">
<!-- Select the first sibling of the current Content element child -->
<xsl:apply-templates select="following-sibling::*[self::tet:Para or self::tet:Table or self::tet:List][1]" />
</xsl:template>
<!-- Group of templates to generate the text body of the document. The headings
are identified in the same manner as in toc mode, only that in this case the anchors
are generated through "id" attributes for the h1, h2, ... elements. -->
<xsl:template match="tet:Page" mode="body">
<xsl:if test="$debug > 0">
<hr />
<i>
<xsl:text>[Page </xsl:text>
<xsl:value-of select="@number" />
<xsl:text> of </xsl:text>
<xsl:value-of select="ancestor::tet:Document[1]/@filename" />
<xsl:text>]</xsl:text>
</i>
<xsl:apply-templates select="tet:Exception" />
</xsl:if>
<xsl:choose>
<!-- If a Content has Words as direct children, there must not be Para
or Table children. Emit the Words as a single paragraph. -->
<xsl:when test="tet:Content/tet:Word">
<div>
<xsl:apply-templates
select="(tet:Content/tet:A | tet:Content/tet:Word)[1]" />
</div>
</xsl:when>
<!-- Otherwise start recursive traversal of Content children -->
<xsl:otherwise>
<xsl:call-template name="process-first-content-child" />
</xsl:otherwise>
</xsl:choose>
<xsl:if test=".//tet:PlacedImage">
<!-- Create an unordered list of images on the page. They can occur at
arbitrary nesting depths, e.g. in Lists with nested Lists. -->
<div>
<span style="font-style:italic">
<xsl:text>Images on page </xsl:text>
<xsl:value-of select="@number" />
<xsl:text>:</xsl:text>
</span>
<ul>
<xsl:apply-templates mode="body" select=".//tet:PlacedImage" />
</ul>
</div>
</xsl:if>
</xsl:template>
<!-- Print out exceptions in an eye-catching color -->
<xsl:template match="tet:Exception">
<div style="color: red">
<xsl:text>Exception occurred at page level:
"</xsl:text>
<xsl:value-of select="." />
<xsl:text>"</xsl:text>
</div>
</xsl:template>
<!-- Generate a heading element for the provided Para element as the specified
heading element $heading-type (h1..h5) -->
<xsl:template name="heading">
<xsl:param name="heading-type" />
<xsl:element name="{$heading-type}">
<xsl:attribute name="id"><xsl:value-of select="generate-id()" /></xsl:attribute>
<xsl:apply-templates select="tet:Box/tet:Word/tet:Text | tet:Word/tet:Text" />
</xsl:element>
</xsl:template>
<!-- Recurse to process next Para child -->
<xsl:template name="process-para-child-siblings">
<!-- Select the first sibling of the current Content element child -->
<xsl:apply-templates select="(tet:A | tet:Box/tet:A | tet:Word | tet:Box/tet:Word)[1]" />
</xsl:template>
<xsl:template match="tet:Para">
<xsl:choose>
<xsl:when
test="tet:Box/tet:Word/tet:Box/tet:Glyph[
@size >= $h1.min-size
and @size < $h1.max-size
and ($h1.font-name = '' or /tet:TET/tet:Document/tet:Pages/tet:Resources/tet:Fonts/tet:Font[@name = $h1.font-name]/@id = @font)]"
>
<xsl:element name="h1">
<xsl:attribute name="id"><xsl:value-of select="generate-id()" /></xsl:attribute>
<xsl:call-template name="process-para-child-siblings" />
</xsl:element>
</xsl:when>
<xsl:when
test="tet:Box/tet:Word/tet:Box/tet:Glyph[
@size >= $h2.min-size
and @size < $h2.max-size
and ($h2.font-name = '' or /tet:TET/tet:Document/tet:Pages/tet:Resources/tet:Fonts/tet:Font[@name = $h2.font-name]/@id = @font)]"
>
<xsl:element name="h2">
<xsl:attribute name="id"><xsl:value-of select="generate-id()" /></xsl:attribute>
<xsl:call-template name="process-para-child-siblings" />
</xsl:element>
</xsl:when>
<xsl:when
test="tet:Box/tet:Word/tet:Box/tet:Glyph[
@size >= $h3.min-size
and @size < $h3.max-size
and ($h3.font-name = '' or /tet:TET/tet:Document/tet:Pages/tet:Resources/tet:Fonts/tet:Font[@name = $h3.font-name]/@id = @font)]"
>
<xsl:element name="h3">
<xsl:attribute name="id"><xsl:value-of select="generate-id()" /></xsl:attribute>
<xsl:call-template name="process-para-child-siblings" />
</xsl:element>
</xsl:when>
<xsl:when
test="tet:Box/tet:Word/tet:Box/tet:Glyph[
@size >= $h4.min-size
and @size < $h4.max-size
and ($h4.font-name = '' or /tet:TET/tet:Document/tet:Pages/tet:Resources/tet:Fonts/tet:Font[@name = $h4.font-name]/@id = @font)]"
>
<xsl:element name="h4">
<xsl:attribute name="id"><xsl:value-of select="generate-id()" /></xsl:attribute>
<xsl:call-template name="process-para-child-siblings" />
</xsl:element>
</xsl:when>
<xsl:when
test="tet:Box/tet:Word/tet:Box/tet:Glyph[
@size >= $h5.min-size
and @size < $h5.max-size
and ($h5.font-name = '' or /tet:TET/tet:Document/tet:Pages/tet:Resources/tet:Fonts/tet:Font[@name = $h5.font-name]/@id = @font)]"
>
<xsl:element name="h5">
<xsl:attribute name="id"><xsl:value-of select="generate-id()" /></xsl:attribute>
<xsl:call-template name="process-para-child-siblings" />
</xsl:element>
</xsl:when>
<xsl:otherwise>
<div>
<xsl:call-template name="process-para-child-siblings" />
</div>
</xsl:otherwise>
</xsl:choose>
<xsl:call-template name="process-content-child-siblings" />
</xsl:template>
<xsl:template match="tet:List">
<!-- There may be A anchor elements mixed with Item and PlacedImage elements
in the List. As it is not possible to mix HTML anchor elements with <li> elements
inside the unordered list and to enclose an <ul> list in an <a> element, find all
anchor elements that are associated with tet:Bookmark elements, and generate
HTML anchors before the list -->
<xsl:variable name="anchor-children" select="(tet:A | tet:Item/tet:A)[@type = 'start']" />
<xsl:variable name="destinations" select="key('destination-by-anchor-id', $anchor-children/@id)" />
<xsl:variable name="bookmarks" select="key('bookmark-by-destination', $destinations/@id)" />
<!-- Now we have the bookmarks which are associated with the tet:A elements in the tet:List.
Work in the opposite direction to finally retrieve a list of all tet:A elements associated
with those bookmarks. -->
<xsl:variable name="destinations-with-bookmarks" select="key('destination-by-id', $bookmarks/@destination)" />
<xsl:variable name="anchor-children-with-bookmarks"
select="key('anchor-by-id', $destinations-with-bookmarks/@anchor)" />
<xsl:for-each select="$anchor-children-with-bookmarks">
<xsl:element name="a">
<xsl:attribute name="id">
<xsl:value-of select="generate-id(.)" />
</xsl:attribute>
</xsl:element>
</xsl:for-each>
<ul>
<xsl:apply-templates select="tet:Item" />
</ul>
<xsl:call-template name="process-content-child-siblings" />
</xsl:template>
<xsl:template match="tet:Item">
<li>
<xsl:apply-templates select="tet:Body" />
</li>
</xsl:template>
<!-- This starts the recursive iterating over the Body's children -->
<xsl:template match="tet:Body">
<xsl:apply-templates select="(tet:Para | tet:Table | tet:List)[1]" />
</xsl:template>
<xsl:template match="tet:A">
<xsl:variable name="anchor-id" select="@id" />
<xsl:if test="$debug > 1">
<hr />
<i>
<xsl:text>A id: </xsl:text>
<xsl:value-of select="$anchor-id" />
<xsl:text> type: </xsl:text>
<xsl:value-of select="@type" />
</i>
<hr />
</xsl:if>
<xsl:choose>
<!-- Only a 'start' anchor for an URI annotation or for a bookmark destination
is relevant at the beginning of a sequence, which can be used to produce a heading
with a link. -->
<xsl:when test="@type = 'start'
and (@id = ancestor::tet:Page[1]/tet:Annotations/tet:Annotation[tet:Action[@type = 'URI']]/@anchor
or @id = /tet:TET/tet:Document/tet:Destinations/tet:Destination[key('bookmark-by-destination', @id)]/@anchor)">
<xsl:choose>
<xsl:when
test="(following-sibling::tet:A | following-sibling::tet:Box/tet:A | ../following-sibling::tet:Box/tet:A)[@type = 'stop' and @id = $anchor-id]"
>
<!-- Found a corresponding 'stop' anchor. -->
<xsl:call-template name="link">
<xsl:with-param name="anchor-id" select="$anchor-id" />
<xsl:with-param name="link-text"
select="(following-sibling::tet:Word | following-sibling::tet:Box/tet:Word | ../following-sibling::tet:Box/tet:Word)
[(following-sibling::tet:A | following-sibling::tet:Box/tet:A | ../following-sibling::tet:Box/tet:A)[@type = 'stop' and @id = $anchor-id][1]]/tet:Text" />
</xsl:call-template>
<xsl:if test="$debug > 1">
<hr />
<i>
<xsl:text>A id: </xsl:text>
<xsl:value-of select="$anchor-id" />
<xsl:text> type: </xsl:text>
<xsl:value-of select="'stop'" />
</i>
<hr />
</xsl:if>
<!-- Recurse after the stop anchor. -->
<xsl:apply-templates
select="((following-sibling::tet:A | following-sibling::tet:Box/tet:A | ../following-sibling::tet:Box/tet:A)
[@type = 'stop' and @id = $anchor-id][1]/following-sibling::*[self::tet:A or self::tet:Word]
| (following-sibling::tet:A | following-sibling::tet:Box/tet:A | ../following-sibling::tet:Box/tet:A)
[@type = 'stop' and @id = $anchor-id][1]/following-sibling::tet:Box/*[self::tet:A or self::tet:Word]
| (following-sibling::tet:A | following-sibling::tet:Box/tet:A | ../following-sibling::tet:Box/tet:A)
[@type = 'stop' and @id = $anchor-id][1]/../following-sibling::tet:Box/*[self::tet:A or self::tet:Word])[1]" />
</xsl:when>
<!-- No corresponding stop anchor. Include text until end of paragraph in link.
Then we are done with the paragraph. -->
<xsl:otherwise>
<xsl:call-template name="link">
<xsl:with-param name="anchor-id" select="$anchor-id" />
<xsl:with-param name="link-text"
select="(following-sibling::tet:Word | following-sibling::tet:Box/tet:Word | ../following-sibling::tet:Box/tet:Word)/tet:Text" />
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<!-- For an anchor of type "rect" only include a linked self-contained anchor.
Then recurse with next Word or A element -->
<xsl:when test="@type = 'rect'">
<xsl:element name="a">
<xsl:attribute name="id">
<xsl:value-of select="generate-id(.)" />
</xsl:attribute>
</xsl:element>
<xsl:apply-templates
select="(following-sibling::*[self::tet:A or self::tet:Word]
| following-sibling::tet:Box/*[self::tet:A or self::tet:Word]
| ../following-sibling::tet:Box/*[self::tet:A or self::tet:Word])[1]" />
</xsl:when>
<!-- Ignore A element, recurse with next Word or A element -->
<xsl:otherwise>
<xsl:apply-templates
select="(following-sibling::*[self::tet:A or self::tet:Word]
| following-sibling::tet:Box/*[self::tet:A or self::tet:Word]
| ../following-sibling::tet:Box/*[self::tet:A or self::tet:Word])[1]" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Process the sequence of words before the desired anchor type,
and recurse with the anchor that terminates the Word sequence -->
<xsl:template name="process-words-and-anchor">
<xsl:param name="anchor-type" />
<xsl:variable name="anchor-id"
select="(following-sibling::tet:A | following-sibling::tet:Box/tet:A)
[@type = $anchor-type
and (@id = ancestor::tet:Page[1]/tet:Annotations/tet:Annotation[tet:Action[@type = 'URI']]/@anchor
or @id = /tet:TET/tet:Document/tet:Destinations/tet:Destination[key('bookmark-by-destination', @id)]/@anchor)][1]/@id" />
<xsl:variable name="unique-anchor-id"
select="generate-id((following-sibling::tet:A | following-sibling::tet:Box/tet:A | ../following-sibling::tet:Box/tet:A)[@type = $anchor-type and @id = $anchor-id][1])" />
<!-- Process Text elements of Words before the anchor -->
<xsl:apply-templates
select="(. | following-sibling::tet:Word | following-sibling::tet:Box/tet:Word | ../following-sibling::tet:Box/tet:Word)
[following-sibling::tet:A[generate-id() = $unique-anchor-id] or ../following-sibling::tet:Box/tet:A[generate-id() = $unique-anchor-id]]/tet:Text" />
<!-- Recurse with anchor -->
<xsl:apply-templates
select="(following-sibling::tet:A | following-sibling::tet:Box/tet:A | ../following-sibling::tet:Box/tet:A)
[@type = $anchor-type
and (@id = ancestor::tet:Page[1]/tet:Annotations/tet:Annotation[tet:Action[@type = 'URI']]/@anchor
or @id = /tet:TET/tet:Document/tet:Destinations/tet:Destination[key('bookmark-by-destination', @id)]/@anchor)][1]" />
</xsl:template>
<xsl:template match="tet:Word">
<xsl:if test="$debug > 1">
<hr />
<i>
<xsl:text>Word: </xsl:text>
<xsl:value-of select="tet:Text" />
</i>
<hr />
</xsl:if>
<xsl:choose>
<!-- If we have a word sequence before a 'start' anchor, process the Words and recurse with the anchor. -->
<xsl:when
test="(following-sibling::tet:A | following-sibling::tet:Box/tet:A | ../following-sibling::tet:Box/tet:A)
[@id = ancestor::tet:Page[1]/tet:Annotations/tet:Annotation[tet:Action[@type = 'URI']]/@anchor
or @id = /tet:TET/tet:Document/tet:Destinations/tet:Destination[key('bookmark-by-destination', @id)]/@anchor][1]/@type = 'start'"
>
<xsl:call-template name="process-words-and-anchor">
<xsl:with-param name="anchor-type" select="'start'" />
</xsl:call-template>
</xsl:when>
<!-- If we have a word sequence before a 'rect' anchor, process the Words and recurse with the anchor. -->
<xsl:when
test="(following-sibling::tet:A | following-sibling::tet:Box/tet:A | ../following-sibling::tet:Box/tet:A)
[@id = ancestor::tet:Page[1]/tet:Annotations/tet:Annotation[tet:Action[@type = 'URI']]/@anchor
or @id = /tet:TET/tet:Document/tet:Destinations/tet:Destination[key('bookmark-by-destination', @id)]/@anchor][1]/@type = 'rect'"
>
<xsl:call-template name="process-words-and-anchor">
<xsl:with-param name="anchor-type" select="'rect'" />
</xsl:call-template>
</xsl:when>
<!-- Otherwise there's a trailing sequence of Words that terminates the paragraph. -->
<xsl:otherwise>
<xsl:apply-templates
select="(. | following-sibling::tet:Word | following-sibling::tet:Box/tet:Word | ../following-sibling::tet:Box/tet:Word)/tet:Text" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- To be called with the current element being an A element. This
generates an HTML <a> element with either a href attribute for a
URI link or with an "id" attribute if this is an anchor for a
bookmark reference. -->
<xsl:template name="link">
<xsl:param name="anchor-id" />
<xsl:param name="link-text" />
<xsl:if test="$debug > 1">
<hr />
<i>
<xsl:text>Link for A id: </xsl:text>
<xsl:value-of select="$anchor-id" />
<xsl:text> word count: </xsl:text>
<xsl:value-of select="count($link-text)" />
</i>
<hr />
</xsl:if>
<xsl:if test="count($link-text)">
<xsl:text> </xsl:text>
<!-- Check that anchor has at least one tet:Destination associated to which at least
one tet:Bookmark points -->
<xsl:variable name="destinations" select="key('destination-by-anchor-id', $anchor-id)" />
<xsl:variable name="destination-bookmarks" select="key('bookmark-by-destination', $destinations/@id)" />
<xsl:choose>
<xsl:when test="$destination-bookmarks">
<!-- Determine heading level through distance from Bookmark root. As there can be multiple bookmarks,
use the first selected one -->
<xsl:variable name="distance" select="count($destination-bookmarks[1]/ancestor-or-self::tet:Bookmark)" />
<!-- Limit heading levels to 6 according HTML restrictions. -->
<xsl:variable name="heading-level">
<xsl:text>h</xsl:text>
<xsl:choose>
<xsl:when test="$distance > 6">
<xsl:text>6</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$distance" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<!-- generate HTML anchor with id of the TETML anchor -->
<xsl:element name="{$heading-level}">
<xsl:element name="a">
<xsl:attribute name="id">
<xsl:value-of
select="generate-id(.)" />
</xsl:attribute>
<xsl:apply-templates select="$link-text" />
</xsl:element>
</xsl:element>
</xsl:when>
<xsl:when test="ancestor::tet:Page[1]/tet:Annotations/tet:Annotation[@anchor = $anchor-id]/tet:Action/@URI">
<xsl:element name="a">
<xsl:attribute name="href">
<xsl:value-of
select="ancestor::tet:Page[1]/tet:Annotations/tet:Annotation[@anchor = $anchor-id]/tet:Action/@URI" />
</xsl:attribute>
<xsl:apply-templates select="$link-text" />
</xsl:element>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="$link-text" />
</xsl:otherwise>
</xsl:choose>
</xsl:if>
</xsl:template>
<xsl:template match="tet:Table">
<table>
<tbody>
<xsl:apply-templates select="tet:Row" />
</tbody>
</table>
<xsl:call-template name="process-content-child-siblings" />
</xsl:template>
<xsl:template match="tet:Row">
<tr>
<xsl:apply-templates select="tet:Cell" />
</tr>
</xsl:template>
<!-- Process tables also recursively -->
<xsl:template match="tet:Cell">
<td>
<xsl:if test="@colSpan">
<xsl:attribute name="colspan">
<xsl:value-of select="@colSpan" />
</xsl:attribute>
</xsl:if>
<xsl:apply-templates
select="(tet:Para | tet:Table)[1]" />
</td>
</xsl:template>
<!-- Print information about a placed image on the page, together with a link
to the actual image. As the images created by TET are mostly not conforming to HTML,
we do not put the images inline on the HTML page. -->
<xsl:template mode="body" match="tet:PlacedImage">
<xsl:variable name="image-id" select="@image" />
<xsl:variable name="image-resource"
select="$resources/tet:Images/tet:Image[@id = $image-id]" />
<xsl:variable name="image-name"
select="concat($pdf-basename, '_', $image-id, $image-resource/@extractedAs)" />
<xsl:variable name="colorspace"
select="$resources/tet:ColorSpaces/tet:ColorSpace[@id = $image-resource/@colorspace]" />
<li>
<a>
<xsl:attribute name="href">
<xsl:value-of select="$image-name" />
</xsl:attribute>
<xsl:value-of select="$image-id" />
</a>
<xsl:text>: Dimensions </xsl:text>
<xsl:value-of select="$image-resource/@width" />
<xsl:text>x</xsl:text>
<xsl:value-of select="$image-resource/@height" />
<xsl:text>, </xsl:text>
<xsl:value-of select="$image-resource/@bitsPerComponent" />
<xsl:text> bits per component, colorspace '</xsl:text>
<xsl:value-of select="$colorspace/@name" />
<xsl:text>' with </xsl:text>
<xsl:value-of select="$colorspace/@components" />
<xsl:text> component(s)</xsl:text>
</li>
</xsl:template>
<xsl:template match="tet:Text">
<xsl:text> </xsl:text>
<!-- Detect and output some text formatting options.
The first character of a word is output with a dropcap style if
the first character has the "dropcap" attribute set to true.
A whole word is output with a shadowed style if any character has
the "shadow" attribute set to true. And finally a word can be
output as superscript or subscript if any character has the
corresponding "sup" or "sub" attribute set to true. As not both
superscript and subscript can be active at the same time,
superscript is arbitrarily choosen as having precedence.
-->
<xsl:variable name="dropcapped">
<xsl:choose>
<xsl:when
test="following-sibling::tet:Box/tet:Glyph[1][@dropcap = 'true']"
>
<span class="dropcap">
<xsl:value-of select="substring(., 1, 1)" />
</span>
<xsl:value-of select="substring(., 2)" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="." />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="shadowed">
<xsl:choose>
<xsl:when
test="following-sibling::tet:Box/tet:Glyph[@shadow = 'true']"
>
<span class="shadowed">
<xsl:copy-of select="$dropcapped" />
</span>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="$dropcapped" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:choose>
<xsl:when test="following-sibling::tet:Box/tet:Glyph[@sup = 'true']">
<sup>
<xsl:copy-of select="$shadowed" />
</sup>
</xsl:when>
<xsl:when test="following-sibling::tet:Box/tet:Glyph[@sub = 'true']">
<sub>
<xsl:copy-of select="$shadowed" />
</sub>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="$shadowed" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Retrieve the basename of the PDF document. The assumption is that it has
a four-character ".pdf" suffix that is stripped off. Then the string behind the last
"/" or "\" is taken -->
<xsl:template name="pdf-basename">
<xsl:param name="full-pdf-name" />
<xsl:variable name="slash-normalized" select="translate($full-pdf-name, '\\', '/')" />
<xsl:variable name="suffix-stripped"
select="substring($slash-normalized, 0, string-length($slash-normalized) - 3)" />
<xsl:call-template name="strip-dirs">
<xsl:with-param name="path" select="$suffix-stripped" />
</xsl:call-template>
</xsl:template>
<xsl:template name="strip-dirs">
<xsl:param name="path" />
<xsl:variable name="rest" select="substring-after($path, '/')" />
<xsl:choose>
<xsl:when test="string-length($rest) = 0">
<xsl:value-of select="$path" />
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="strip-dirs">
<xsl:with-param name="path" select="$rest" />
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>