X-Git-Url: https://git.exim.org/users/jgh/exim.git/blobdiff_plain/168e428fc4dfcf7f4d377d137743d8332784fa35..90b6341f7282beed1175e942a113c30c212425c9:/doc/doc-docbook/TidyHTML-spec diff --git a/doc/doc-docbook/TidyHTML-spec b/doc/doc-docbook/TidyHTML-spec index 8459bcf17..4e7b8a2ca 100755 --- a/doc/doc-docbook/TidyHTML-spec +++ b/doc/doc-docbook/TidyHTML-spec @@ -1,6 +1,6 @@ #! /usr/bin/perl -# $Cambridge: exim/doc/doc-docbook/TidyHTML-spec,v 1.1 2005/06/16 10:32:31 ph10 Exp $ +# $Cambridge: exim/doc/doc-docbook/TidyHTML-spec,v 1.4 2006/04/04 14:03:49 ph10 Exp $ # Script to tidy up the spec HTML files that are generated by xmlto. The # following changes are made: @@ -9,8 +9,13 @@ # 2. Create reverse links from chapter and section titles back to the TOC. # 3. Tidy the ix01.html file - the actual index - by splitting long lines. # 4. Insert links from the letter divisions to the top of the Index. +# 5. Turn

into

and +# a matching

into
to get rid of unwanted vertical white +# space. +# 6. Before each occurrence of insert   so that the table's cell +# is a little bit wider than the text itself. -chdir "spec.html"; +chdir "spec_html"; $tocref = 1; @@ -20,11 +25,23 @@ open(IN, "index.html") || die "Failed to open index.html for reading: $!\n"; @toc = ; close(IN); -# Insert a newline after every > because the whole toc is generated as one -# humungous line that is hard to check. Then split the lines so that each one -# is a separate element in the vector. +# Insert a newline after every > except when it is preceded by 'class="quote"', +# because the whole toc is generated as one humungous line that is hard to +# check. We have to avoid it in the quote case because that puts a space into +# the output, and similarly for the the comes afterwards. Easy way out +# is just not to do it for all occurrences. Unfortunately, Perl does +# not implement lookbehinds where the alternatives are of different lengths, so +# we have to take two passes. + + +foreach $line (@toc) + { + $line =~ s/(?\s*/>\n/g; + $line =~ s/<\/span>\n/<\/span>/g; + } + +# Split the lines so that each one is a separate element in the vector. -foreach $line (@toc) { $line =~ s/>\s*/>\n/g; } for ($i = 0; $i < scalar(@toc); $i++) { splice @toc, $i, 1, (split /(?<=\n)/, $toc[$i]); } @@ -52,7 +69,10 @@ open (OUT, ">index.html") || die "Failed to open index.html for writing: $!\n"; print OUT @toc; close(OUT); -# Now scan each of the other page files and insert the reverse links. +# Now scan each of the other page files and insert the reverse links. While +# we are at it, we tidy up
by removing unwanted +# paragraph marks, which generate unwanted vertical space. We also insert +#   before to push table cells apart from each other. foreach $file (@chlist) { @@ -60,9 +80,26 @@ foreach $file (@chlist) @text = ; close(IN); + # Insert a newline after certain elements, and split the lines so that each + # one is a separate element in the vector. This makes it easier to recognize + # these elements. + foreach $line (@text) { - if ($line =~ /^(.*?)<\/a>(.+?)<\/h(.*)$/) + $line =~ s/

\s*(?!\n)/

\n/g; + $line =~ s/<\/p>\s*(?!\n)/<\/p>\n/g; + $line =~ s/<\/div>\s*(?!\n)/<\/div>\n/g; + $line =~ s/]*)>(?!\n)/\n/g; + } + + for ($i = 0; $i < scalar(@text); $i++) + { splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); } + + $thisdiv = 0; + + for ($i = 0; $i < scalar(@text); $i++) + { + if ($text[$i] =~ /^(.*?)<\/a>(.+?)<\/h(.*)$/) { my($pre, $opt, $id, $title, $post) = ($1, $2, $3, $4, $5); @@ -73,7 +110,22 @@ foreach $file (@chlist) $ref = $backref{"$file"} if !defined $ref; # Adjust the line - $line = "$pre$title$title]*?class="literallayout">$/ && $text[$i+1] eq "

\n") + { + $text[++$i] = ""; + $thisdiv = 1; + } + elsif ($thisdiv && $text[$i] eq "

\n" && $text[$i+1] eq "
\n") + { + $text[$i] = ""; + $thisdiv = 0; + } + elsif ($text[$i] =~ /^\s*<\/td>/) + { + $text[$i] = " $text[$i]"; } }