X-Git-Url: https://git.exim.org/users/jgh/exim.git/blobdiff_plain/168e428fc4dfcf7f4d377d137743d8332784fa35..a466095c0f9c7f48b1c9f857b5a17cab69fecd28:/doc/doc-docbook/TidyHTML-spec
diff --git a/doc/doc-docbook/TidyHTML-spec b/doc/doc-docbook/TidyHTML-spec
index 8459bcf17..4e7b8a2ca 100755
--- a/doc/doc-docbook/TidyHTML-spec
+++ b/doc/doc-docbook/TidyHTML-spec
@@ -1,6 +1,6 @@
#! /usr/bin/perl
-# $Cambridge: exim/doc/doc-docbook/TidyHTML-spec,v 1.1 2005/06/16 10:32:31 ph10 Exp $
+# $Cambridge: exim/doc/doc-docbook/TidyHTML-spec,v 1.4 2006/04/04 14:03:49 ph10 Exp $
# Script to tidy up the spec HTML files that are generated by xmlto. The
# following changes are made:
@@ -9,8 +9,13 @@
# 2. Create reverse links from chapter and section titles back to the TOC.
# 3. Tidy the ix01.html file - the actual index - by splitting long lines.
# 4. Insert links from the letter divisions to the top of the Index.
+# 5. Turn
into
and
+# a matching
into
to get rid of unwanted vertical white
+# space.
+# 6. Before each occurrence of insert so that the table's cell
+# is a little bit wider than the text itself.
-chdir "spec.html";
+chdir "spec_html";
$tocref = 1;
@@ -20,11 +25,23 @@ open(IN, "index.html") || die "Failed to open index.html for reading: $!\n";
@toc = ;
close(IN);
-# Insert a newline after every > because the whole toc is generated as one
-# humungous line that is hard to check. Then split the lines so that each one
-# is a separate element in the vector.
+# Insert a newline after every > except when it is preceded by 'class="quote"',
+# because the whole toc is generated as one humungous line that is hard to
+# check. We have to avoid it in the quote case because that puts a space into
+# the output, and similarly for the the comes afterwards. Easy way out
+# is just not to do it for all occurrences. Unfortunately, Perl does
+# not implement lookbehinds where the alternatives are of different lengths, so
+# we have to take two passes.
+
+
+foreach $line (@toc)
+ {
+ $line =~ s/(?\s*/>\n/g;
+ $line =~ s/<\/span>\n/<\/span>/g;
+ }
+
+# Split the lines so that each one is a separate element in the vector.
-foreach $line (@toc) { $line =~ s/>\s*/>\n/g; }
for ($i = 0; $i < scalar(@toc); $i++)
{ splice @toc, $i, 1, (split /(?<=\n)/, $toc[$i]); }
@@ -52,7 +69,10 @@ open (OUT, ">index.html") || die "Failed to open index.html for writing: $!\n";
print OUT @toc;
close(OUT);
-# Now scan each of the other page files and insert the reverse links.
+# Now scan each of the other page files and insert the reverse links. While
+# we are at it, we tidy up by removing unwanted
+# paragraph marks, which generate unwanted vertical space. We also insert
+# before to push table cells apart from each other.
foreach $file (@chlist)
{
@@ -60,9 +80,26 @@ foreach $file (@chlist)
@text =
;
close(IN);
+ # Insert a newline after certain elements, and split the lines so that each
+ # one is a separate element in the vector. This makes it easier to recognize
+ # these elements.
+
foreach $line (@text)
{
- if ($line =~ /^(.*?)<\/a>(.+?)<\/h(.*)$/)
+ $line =~ s/\s*(?!\n)/
\n/g;
+ $line =~ s/<\/p>\s*(?!\n)/<\/p>\n/g;
+ $line =~ s/<\/div>\s*(?!\n)/<\/div>\n/g;
+ $line =~ s/
]*)>(?!\n)/
\n/g;
+ }
+
+ for ($i = 0; $i < scalar(@text); $i++)
+ { splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); }
+
+ $thisdiv = 0;
+
+ for ($i = 0; $i < scalar(@text); $i++)
+ {
+ if ($text[$i] =~ /^(.*?)
<\/a>(.+?)<\/h(.*)$/)
{
my($pre, $opt, $id, $title, $post) = ($1, $2, $3, $4, $5);
@@ -73,7 +110,22 @@ foreach $file (@chlist)
$ref = $backref{"$file"} if !defined $ref;
# Adjust the line
- $line = "$pre$title$title]*?class="literallayout">$/ && $text[$i+1] eq "
\n")
+ {
+ $text[++$i] = "";
+ $thisdiv = 1;
+ }
+ elsif ($thisdiv && $text[$i] eq "
\n" && $text[$i+1] eq "
\n")
+ {
+ $text[$i] = "";
+ $thisdiv = 0;
+ }
+ elsif ($text[$i] =~ /^\s*<\/td>/)
+ {
+ $text[$i] = " $text[$i]";
}
}