-#! /usr/bin/perl
-
-# $Cambridge: exim/doc/doc-docbook/TidyHTML-spec,v 1.4 2006/04/04 14:03:49 ph10 Exp $
-
-# Script to tidy up the spec HTML files that are generated by xmlto. The
-# following changes are made:
-#
-# 1. Tidy the index.html file by splitting the very long lines.
-# 2. Create reverse links from chapter and section titles back to the TOC.
-# 3. Tidy the ix01.html file - the actual index - by splitting long lines.
-# 4. Insert links from the letter divisions to the top of the Index.
-# 5. Turn <div class="literallayout"><p> into <div class="literallayout"> and
-# a matching </p></div> into </div> to get rid of unwanted vertical white
-# space.
-# 6. Before each occurrence of </td> insert so that the table's cell
-# is a little bit wider than the text itself.
-
-chdir "spec_html";
-
-$tocref = 1;
-
-# Read in the index.html file. It's really the TOC.
-
-open(IN, "index.html") || die "Failed to open index.html for reading: $!\n";
-@toc = <IN>;
-close(IN);
-
-# Insert a newline after every > except when it is preceded by 'class="quote"',
-# because the whole toc is generated as one humungous line that is hard to
-# check. We have to avoid it in the quote case because that puts a space into
-# the output, and similarly for the </span> the comes afterwards. Easy way out
-# is just not to do it for all </span> occurrences. Unfortunately, Perl does
-# not implement lookbehinds where the alternatives are of different lengths, so
-# we have to take two passes.
-
-
-foreach $line (@toc)
- {
- $line =~ s/(?<!class="quote")>\s*/>\n/g;
- $line =~ s/<\/span>\n/<\/span>/g;
- }
-
-# Split the lines so that each one is a separate element in the vector.
-
-for ($i = 0; $i < scalar(@toc); $i++)
- { splice @toc, $i, 1, (split /(?<=\n)/, $toc[$i]); }
-
-# We want to create reverse links from each chapter and section title back to
-# the relevant place in the TOC. Scan the TOC for the relevant entries. Add
-# an id to each entry, and create tables that remember the file names and the
-# new link ids.
-
-foreach $line (@toc)
- {
- if ($line =~ /^<a href="((?:ch|ix)\d+\.html)(#[^"]+)?">/)
- {
- my($chix) = $1;
- my($ss) = $2;
- my($id) = sprintf "%04d", $tocref++;
- $line =~ s/<a/<a id="toc$id"/;
- $backref{"$chix$ss"} = "toc$id";
- push @chlist, $chix;
- }
- }
-
-# Write out the modified index.html file.
-
-open (OUT, ">index.html") || die "Failed to open index.html for writing: $!\n";
-print OUT @toc;
-close(OUT);
-
-# Now scan each of the other page files and insert the reverse links. While
-# we are at it, we tidy up <div class="literallayout"> by removing unwanted
-# paragraph marks, which generate unwanted vertical space. We also insert
-# before </td> to push table cells apart from each other.
-
-foreach $file (@chlist)
- {
- open(IN, "$file") || die "Failed to open $file for reading: $!\n";
- @text = <IN>;
- close(IN);
-
- # Insert a newline after certain elements, and split the lines so that each
- # one is a separate element in the vector. This makes it easier to recognize
- # these elements.
-
- foreach $line (@text)
- {
- $line =~ s/<p>\s*(?!\n)/<p>\n/g;
- $line =~ s/<\/p>\s*(?!\n)/<\/p>\n/g;
- $line =~ s/<\/div>\s*(?!\n)/<\/div>\n/g;
- $line =~ s/<div([^>]*)>(?!\n)/<div$1>\n/g;
- }
-
- for ($i = 0; $i < scalar(@text); $i++)
- { splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); }
-
- $thisdiv = 0;
-
- for ($i = 0; $i < scalar(@text); $i++)
- {
- if ($text[$i] =~ /^(.*?)<a( xmlns="[^"]+")? id="([^"]+)"><\/a>(.+?)<\/h(.*)$/)
- {
- my($pre, $opt, $id, $title, $post) = ($1, $2, $3, $4, $5);
-
- # Section reference
- my($ref) = $backref{"$file#$id"};
-
- # If not found, try for a chapter reference
- $ref = $backref{"$file"} if !defined $ref;
-
- # Adjust the line
- $text[$i]= "$pre<a$opt href=\"index.html#$ref\" id=\"$id\">$title</a></h$post";
- }
-
- elsif ($text[$i] =~ /^<div [^>]*?class="literallayout">$/ && $text[$i+1] eq "<p>\n")
- {
- $text[++$i] = "";
- $thisdiv = 1;
- }
- elsif ($thisdiv && $text[$i] eq "</p>\n" && $text[$i+1] eq "</div>\n")
- {
- $text[$i] = "";
- $thisdiv = 0;
- }
- elsif ($text[$i] =~ /^\s*<\/td>/)
- {
- $text[$i] = " $text[$i]";
- }
- }
-
- open(OUT, ">$file") || die "Failed to open $file for writing: $!\n";
- print OUT @text;
- close(OUT);
- }
-
-# Now process the ix01.html file
-
-open(IN, "ix01.html") || die "Failed to open ix01.html for reading: $!\n";
-@index = <IN>;
-close(IN);
-
-# Insert a newline after every > because the whole index is generated as one
-# humungous line that is hard to check. Then split the lines so that each one
-# is a separate element in the vector.
-
-foreach $line (@index) { $line =~ s/>\s*/>\n/g; }
-for ($i = 0; $i < scalar(@index); $i++)
- { splice @index, $i, 1, (split /(?<=\n)/, $index[$i]); }
-
-# We want to add a list of letters at the top of the index, and link back
-# to them from each letter heading. First find the index title and remember
-# where to insert the list of letters.
-
-for ($i = 0; $i < scalar(@index); $i++)
- {
- if ($index[$i] =~ /^<\/h2>$/)
- {
- $listindex = $i;
- last;
- }
- }
-
-# Now scan through for the letter headings and build the cross references,
-# while also building up the list to insert.
-
-$list = "<h4>\n";
-for (; $i < scalar(@index); $i++)
- {
- if ($index[$i] =~ /^(.)<\/h3>$/)
- {
- $letter = $1;
- $index[$i-1] =~ s/^/<a id="${letter}B" href="#${letter}T">/;
- $index[$i] =~ s/$/<\/a>/;
- $list .= "<a id=\"${letter}T\" href=\"#${letter}B\"> $letter</a>\n";
- }
- }
-
-# Now we know which letters we have, we can insert the list.
-
-$list .= "</h4>\n";
-splice @index, $listindex, 0, $list;
-
-# Write out the modified index.html file.
-
-open (OUT, ">ix01.html") || die "Failed to open ix01.html for writing: $!\n";
-print OUT @index;
-close(OUT);
-
-
-# End