2 # $Cambridge: exim/doc/doc-scripts/g2h,v 1.2 2005/01/27 10:25:35 ph10 Exp $
4 # This is a script that turns the SGCAL source of Exim's documentation into
5 # HTML. It can be used for both the filter document and the main Exim
6 # specification. The syntax is
8 # g2h [-split no|section|chapter] <source file> <title>
10 # Previously, -split section was used for the filter document, and -split
11 # chapter for the main specification. However, the filter document has gained
12 # some chapters, so they are both split by chapter now. Only one -split can be
15 # A number of assumptions about the style of the input markup are made.
17 # The HTML is written into the directory html/ using the source file base
20 # Written by Philip Hazel
21 # Starting 21-Dec-2001
22 # Last modified 26-Nov-2003
24 #############################################################################
28 ##################################################
29 # Open an output file #
30 ##################################################
33 open (OUT, ">$_[0]") || die "Can't open $_[0]\n";
37 print OUT "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n";
39 print OUT "<html>\n<head>\n<title>$doctitle" .
40 (($thischapter > 0)? " chapter $thischapter" : "") .
41 (($thissection > 0)? " section $thissection" : "") .
42 "</title>\n</head>\n" .
43 "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " .
44 "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";
46 # Forward/backward links when chapter splitting
50 print OUT "<font size=2>\n";
51 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> \n",
52 $thischapter - 1) if $thischapter > 1;
53 printf OUT ("<a href=\"${file_base}_%s.html\">Next</a> \n",
54 $thischapter + 1) if $thischapter < $maxchapter;
55 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
56 print OUT " " x 6, "($doctitle)\n</font><hr>\n";
59 # Forward/backward links when section splitting
63 print OUT "<font size=2>\n";
64 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> \n",
65 $thissection - 1) if $thissection > 1;
66 printf OUT ("<a href=\"${file_base}_%s.html\">Next</a> \n",
67 $thissection + 1) if $thissection < $maxsection;
68 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
69 print OUT " " x 6, "($doctitle)\n</font><hr>\n";
72 # Save the final component of the current file name (for TOC creation)
74 $_[0] =~ /^(?:.*)\/([^\/]+)$/;
80 ##################################################
81 # Close an output file #
82 ##################################################
84 # The first argument is one of:
86 # "CHAP" a chapter is ending
87 # "SECT" a section is ending
88 # "" the whole thing is ending
90 # In the first two cases $thischapter and $thissection contain the new chapter
91 # and section numbers, respectively. In the third case, we can deduce what is
92 # ending from the flags. The variables contain the current values.
97 print OUT "<hr>\n" if !$lastwasrule;
102 print OUT "<font size=2>\n";
103 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ",
104 $thischapter - 2) if ($thischapter > 2);
105 print OUT "<a href=\"${file_base}_$thischapter.html\">Next</a> ";
106 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
107 print OUT " " x 6, "($doctitle)\n</font>\n";
112 print OUT "<font size=2>\n";
113 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ",
114 $thissection - 2) if ($thissection > 2);
115 print OUT "<a href=\"${file_base}_$thissection.html\">Next</a> ";
116 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
117 print OUT " " x 6, "($doctitle)\n</font>\n";
124 print OUT "<font size=2>\n";
125 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ",
126 $thischapter - 1) if ($thischapter > 1);
127 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
128 print OUT " " x 6, "($doctitle)\n</font>\n";
132 print OUT "<font size=2>\n";
133 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ",
134 $thissection - 1) if ($thissection > 1);
135 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
136 print OUT " " x 6, "($doctitle)\n</font>\n";
140 print OUT "</body>\n</html>\n";
146 ##################################################
147 # Handle an index line #
148 ##################################################
150 # This function returns an empty string so that it can be called as part
151 # of an s operator when handling index items within paragraphs. The two
154 # the text to index, already converted to HTML
155 # 1 for the concept index, 0 for the options index
159 my($hash) = $_[1]? \%cindex : \%oindex;
162 # Up the index count, and compute the reference to the file and the
167 "${file_base}_$thischapter.html#IX$index_count"
169 "${file_base}_$thissection.html#IX$index_count"
173 # Create the index key, which consists of the text with all the HTML
174 # coding and any leading quotation marks removed. Turn the primary/secondary
175 # splitting string "||" into ":".
180 $key =~ s/<[^>]+>//g;
181 $key =~ s/&#(\d+);/chr($1)/eg;
185 # Turn all spaces in the text into so that they don't ever split.
186 # However, there may be spaces in the HTML that already exists in the
187 # text, so we have to avoid changing spaces inside <>.
189 $text =~ s/ (?=[^<>]*(?:<|$))/ /g;
191 # If this is the first encounter with this index key, we create a
192 # straightforward reference.
194 if (!defined $$hash{$key})
196 $$hash{$key} = "<a href=\"$ref\">$text</a>";
199 # For the second and subsequent encounters, add "[2]" etc. to the
200 # index text. We find out the number by counting occurrences of "<a"
201 # in the existing string.
206 $number++ while $$hash{$key} =~ /<a/g;
207 $$hash{$key} .= " <a href=\"$ref\">[$number]</a>";
210 # Place the name in the current output
212 print OUT "<a name=\"IX$index_count\"></a>\n";
218 ##################################################
219 # Handle emphasis bars #
220 ##################################################
222 # Set colour green for text marked with "emphasis bars", keeping
223 # track in case the matching isn't perfect.
230 return "<font color=green>\n";
242 ##################################################
243 # Convert marked-up text #
244 ##################################################
246 # This function converts text from SGCAL markup to HTML markup, with a couple
249 # 1. We don't touch $t because that is handled by the .display code.
251 # 2. The text may contain embedded .index, .em, and .nem directives. We
252 # handle .em and .nem, but leave .index because it must be done during
253 # paragraph outputting.
255 # In a non-"rm" display, we turn $rm{ into cancelling of <tt>. Otherwise
256 # it is ignored - in practice it is only used in that special case.
258 # The order in which things are done in this function is highly sensitive!
262 my($rmspecial) = $_[1];
264 # Escape all & characters (they aren't involved in markup) but for the moment
265 # use &+ instead of &# so that we can handle # characters in the text.
269 # Turn SGCAL literals into HTML literals that don't look like SGCAL
270 # markup, so won't be touched by what follows. Again, use + instead of #.
273 $s =~ s/@([^@])/"&+".sprintf("%0.3d",ord($1)).";"/eg;
275 # Now turn any #s that are markup into spaces, and convert the previously
276 # created literals to the correct form.
279 $s =~ s/&\+(\d+);/&#$1;/g;
281 # Some simple markup that doesn't involve argument text.
283 $s =~ s/\$~//g; # turn $~ into nothing
284 $s =~ s/__/_/g; # turn __ into _
285 $s =~ s/--(?=$|\s|\d)/–/mg; # turn -- into endash in text or number range
286 $s =~ s/\(c\)/©/g; # turn (c) into copyright symbol
290 # $s =~ s/`([^']+)'/``$1''/g;
292 $s =~ s/`([^']+)'/“$1”/g;
294 # This is a fudge for some specific usages of $<; can't just do a global
295 # is it occurs in things like "$<variable name>" as well.
297 $s =~ s/(\d)\$<-/$1-/g; # turn 0$<- into 0-
298 $s =~ s/\$<//g; # other $< is ignored
300 # Turn <<...>> into equivalent SGCAL markup that doesn't involve the use of
301 # < and >, and then escape the remaining < and > characters in the text.
303 $s =~ s/<<([^>]*?)>>/<\$it{$1}>/g; # turn <<xxx>> into <$it{xxx}>
309 $s =~ s/\$sm\{//g; # turn $sm{ into nothing
310 $s =~ s/\$smc\{//g; # turn $smc{ into nothing
311 $s =~ s/\$smi\{//g; # turn $smi{ into nothing
313 $s =~ s/\$tt\{([^\}]*?)\}/<tt>$1<\/tt>/g; # turn $tt{xxx} into <tt>xxx</tt>
314 $s =~ s/\$it\{([^\}]*?)\}/<em>$1<\/em>/g; # turn $it{xxx} into <em>xxx</em>
315 $s =~ s/\$bf\{([^\}]*?)\}/<b>$1<\/b>/g; # turn $bf{xxx} into <b>xxx</b>
317 $s =~ s/\$cb\{([^\}]*?)\}/<tt><b>$1<\/b><\/tt>/g; # turn $cb{xxx} into
318 # <tt><b>xxx</b></tt>
320 $s =~ s/\\\\([^\\]*?)\\\\/<font size=-1>$1<\/font>/g; # turn \\xxx\\ into
322 $s =~ s/\\\?([^?]*?)\?\\/<a href="$1">$1<\/a>/g; # turn \?URL?\ into URL
324 $s =~ s/\\\(([^)]*?)\)\\/<i>$1<\/i>/g; # turn \(xxx)\ into <i>xxx</i>
325 $s =~ s/\\\"([^\"]*?)\"\\/<tt>$1<\/tt>/g; # turn \"xxx"\ into <tt>xxx</tt>
328 $s =~ s/\\\$([^\$]*?)\$\\/<tt>\$$1<\/tt>/g; # turn \$xxx$\ into <tt>$xxx</tt>
329 $s =~ s/\\\-([^\\]*?)\-\\/<i>-$1<\/i>/g; # turn \-xxx-\ into -italic
330 $s =~ s/\\\*\*([^*]*?)\*\*\\/<b>$1<\/b>/g; # turn \**xxx**\ into <b>xxx</b>
331 $s =~ s/\\\*([^*]*?)\*\\/<i>$1<\/i>/g; # turn \*xxx*\ into italic
332 $s =~ s/\\%([^*]*?)%\\/<b>$1<\/b>/g; # turn \%xxx%\ into bold
333 $s =~ s/\\([^\\]*?)\\/<tt>$1<\/tt>/g; # turn \xxx\ into <tt>xxx</tt>
334 $s =~ s/::([^\$]*?)::/<i>$1:<\/i>/g; # turn ::xxx:: into italic:
335 $s =~ s/\$\*\$/\*/g; # turn $*$ into *
341 $s =~ s/\$rm\{([^\}]*?)\}/<\/tt>$1<tt>/g; # turn $rm{xxx} into </tt>xxx<tt>
345 $s =~ s/\$rm\{([^\}]*?)\}/$1/g; # turn $rm{xxx} into xxx
348 # There is one case where the terminating } of an escape sequence is
349 # in another paragraph - this follows $sm{ - it can be fixed by
350 # removing any stray } in a paragraph that contains no { chars.
352 $s =~ s/\}//g if !/\{/;
354 # Remove any null flags ($$)
358 # If the paragraph starts with $c\b, remove it.
362 # If the paragraph starts with $e\b, indent it slightly.
364 $s =~ s/^\$e\b/ /;
366 # Handle .em, and .nem directives that occur within the paragraph
368 $s =~ s/\.em\s*\n/&setinem(1)/eg;
369 $s =~ s/\.nem\s*\n/&setinem(0)/eg;
371 # Explicitly included HTML
373 $s =~ s/\[\(([^)]+)\)\]/<$1>/g; # turn [(...)] into <...>
375 # Finally, do the substitutions and return the modified text.
377 $s =~ s/~~(\w+)/$var_value{$1}/eg;
384 ##################################################
385 # Start/end a paragraph #
386 ##################################################
388 # We want to leave paragraphs unterminated until we know that a horizontal
389 # rule does not follow, to avoid getting space inserted before the rule,
390 # which doesn't look good. So we have this function to help control things.
391 # If the argument is 1 we are starting a new paragraph; if it is 0 we want
392 # to force the ending of any incomplete paragraph.
409 ##################################################
410 # Handle a "paragraph" #
411 ##################################################
413 # Read a paragraph of text, which may contain many lines and may contain
414 # .index, .em, and .nem directives within it. We may also encounter
415 # ".if ~~html" within paragraphs. Process those directives,
416 # convert the markup, and output the rest as an HTML paragraph.
419 sub handle_paragraph{
424 if (/^\.if\s+~~html\b/)
427 $par =~ s/\s+$//; # lose unwanted whitespace and newlines
430 elsif ($htmlcond && /^\.else\b/)
432 while (<IN>) { last if /^\.fi\b/; }
436 elsif ($htmlcond && /^\.fi\b/)
442 last if /^\s*$/ || (/^\./ && !/^\.index\b/ && !/^\.em\b/ && !/^\.nem\b/);
445 $par = &handle_text($par, 0);
447 # We can't handle .index until this point, when we do it just before
448 # outputting the paragraph.
453 $par =~ s/\.index\s+([^\n]+)\n/&handle_index($1, 1)/eg;
460 ##################################################
461 # Handle a non-paragraph directive #
462 ##################################################
464 # The directives .index, .em, and .nem can also appear within paragraphs,
465 # and are then handled within the handle_paragraph() code.
467 sub handle_directive{
468 my($new_lastwasitem) = 0;
472 if (/^\.r?set\b/ || /^\.(?:\s|$)/) {} # ignore .(r)set and comments
474 elsif (/^\.justify\b/) {} # and .justify
476 elsif (/^\.newline\b/) { print OUT "<br>\n"; }
478 elsif (/^\.blank\b/ || /^\.space\b/) { print OUT "<br>\n"; }
480 elsif (/^\.rule\b/) { &setpar(0); print OUT "<hr>\n"; $lastwasrule = 1; }
482 elsif (/^\.index\s+(.*)/) { &handle_index(&handle_text($1), 1); }
484 # Emphasis is handled by colour
489 print OUT "<font color=green>" if ! $inem;
496 print OUT "</font>" if $inem;
500 # Ignore tab setting stuff - we use tables instead.
502 elsif (/^\.tabs(?:et)?\b/) {}
504 # .tempindent is used only to align some of the expansion stuff nicely;
505 # just ignore it. It is used in conjunction with .push/.pop.
507 elsif (/^\.(tempindent|push|pop)\b/) {}
509 # There are some instances of .if ~~sys.fancy in the source. Some of those
510 # that are not inside displays are two-part things, in which case we just keep
511 # the non-fancy part. For diagrams, however, they are in three parts:
514 # <aspic drawing stuff for PostScript and PDF>
516 # <ascii art for txt and Texinfo>
518 # <HTML instructions for including a gif>
521 # In this case, we skip to the third part.
523 elsif (/^\.if\s+~~sys\.fancy/ || /^\.else\b/)
526 { last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; }
530 while (<IN>) { last if /^\.else\b/ || /^\.fi\b/; }
534 # Similarly, for .if !~~sys.fancy, take the non-fancy part.
536 elsif (/^\.if\s+!\s*~~sys.fancy/) {}
538 # There are some explicit tests for ~~html for direct HTML inclusions
540 elsif (/^\.if\s+~~html\b/) {}
542 # There are occasional requirements to do things differently for Texinfo/HTML
543 # and PS/txt versions. The latter are produced by SGCAL, so that's what the
546 elsif (/\.if\s+~~sgcal/)
548 while (<IN>) { last if /\.else\b/ || /\.fi\b/; }
551 # Also there is a texinfo flag
553 elsif (/^\.if\s+~~texinfo\b/)
556 { last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; }
559 # Ignore any other .if, .else, or .fi directives
561 elsif (/^\.if\b/ || /^\.fi\b/ || /^\.else\b/) {}
565 elsif (/^\.indent\b/) {}
567 # Various flavours of numberpars map to corresponding list types.
569 elsif (/^\.numberpars\b/)
574 if ($rest =~ /(?:\$\.|\" \")/)
576 unshift @endlist, "ul";
577 unshift @listtype, "";
578 print OUT "<ul>\n<li>";
582 $nptype = ($rest =~ /roman/)? "a" : "1";
583 unshift @endlist, "ol";
584 unshift @listtype, " TYPE=\"$nptype\"";
585 print OUT "<ol>\n<li$listtype[0]>";
592 print OUT "</li>\n<li$listtype[0]>";
598 print OUT "</li>\n</$endlist[0]>\n";
603 # .display asis can use <pre> which uses a typewriter font.
604 # Otherwise, we have to do our own line breaking. Turn tabbed lines
605 # into an HTML table. There will always be a .tabs line first.
607 elsif (/^\.display\b/)
614 # For non asis displays, start a paragraph, and set up to put an
615 # explicit break after every line.
621 $indent = "<tt> </tt>";
624 # For asis displays, use <pre> and no explicit breaks
630 $indent = " ";
633 # Now read through until we hit .endd (or EOF, but that shouldn't happen)
634 # and process the lines in the display.
640 # The presence of .tabs[et] starts a table
645 print OUT "<table cellspacing=0 cellpadding=0>\n";
648 # Some displays have an indent setting - ignore
650 elsif (/^\.indent\b/) {}
652 # Some displays have .blank inside them
659 # Some displays have emphasis inside them
663 print OUT "<font color=green>" if ! $inem;
669 print OUT "</font>" if $inem;
673 # There are occasional instances of .if [!]~~sys.fancy inside displays.
674 # In both cases we want the non-fancy alternative. (The only thing that
675 # matters in practice is noticing .tabs[et] actually.) Assume the syntax
678 elsif (/^\.if\s+~~sys.fancy/ || /^\.else\b/)
682 last if /^\.fi\b/ || /^\.else/;
686 elsif (/^\.if\s+!\s*~~sys.fancy/) {}
690 # Ignore .newline and .linelength
692 elsif (/^\.newline\b/ || /^\.linelength\b/) {}
696 elsif (/^\.(\s|$)/) {}
698 # There shouldn't be any other directives inside displays
702 print "*** Ignored directive inside .display: $_";
705 # Handle a data line within a display. If it's an asis display, the only
706 # conversion is to escape the HTML characters. Otherwise, process the
720 $_ = &handle_text($_, !$rm);
721 $_ = "<tt>$_</tt>" if !$rm && $_ ne "";
724 # In a table, break fields at $t. For non-rm we must break the
725 # <tt> group as well.
731 s/\s*\$t\s*/ <\/td><td>/g;
735 s/\s*\$t\s*/ <\/tt><\/td><td><tt>/g;
738 print OUT "<tr><td> $_</td></tr>\n";
741 # Otherwise, output straight, with <br> for non asis displays
746 print OUT "$indent$_$eol\n";
749 } # Loop for display contents
751 # Finish off the table and the <pre> - leave a paragraph open
753 print OUT "</table>\n" if $intable;
754 print OUT "</pre>\n" if $asis;
757 # Handle configuration option definitions
759 elsif (/^\.startconf\s+(.*)/)
766 my($option, $type, $default) =
767 /^\.conf\s+(\S+)\s+("(?:[^"]|"")+"|\S+)\s+("(?:[^"]|"")+"|.*)/;
769 $option =~ s/\@_/_/g; # Underscore will be quoted in option name
771 # If $type ends with $**$, add ",expanded" as there doesn't seem to be
772 # a dagger character generally available.
774 $type =~ s/^"([^"]+)"/$1/;
775 $type =~ s/\$\*\*\$/, expanded/;
777 # Default may be quoted, and it may also have quotes that are required,
780 $default =~ s/^"(.*)"$/$1/;
781 $default =~ s/""/"/g;
782 $default = &handle_text($default, 0);
786 &handle_index($option, 0);
787 print OUT "<h3>$option</h3>\n" .
788 "<i>Use:</i> $confuse<br>" .
789 "<i>Type:</i> $type<br><i>Default:</i> $default<br>\n";
792 elsif (/^\.endconf\b/)
794 print OUT "<hr><br>\n";
798 # Handle "items" - used for expansion items and the like. We force the
799 # item text into bold, and put a rule between items.
801 elsif (/^\.startitems\b/) {}
803 elsif (/^\.item\s+(.*)/)
807 $arg =~ s/^"(.*)"$/$1/;
808 $arg = &handle_text($arg, 0);
810 # If there are two .items in a row, we don't want to put in the
811 # separator line or start a new paragraph.
822 print OUT "<b>$arg</b>\n";
823 $new_lastwasitem = 1;
826 elsif (/^\.enditems\b/)
828 print OUT "<hr><br>\n";
832 # Handle command line option items
834 elsif (/^\.startoptions\b/) {}
836 elsif (/^\.option\s+(.*)/)
839 $arg =~ s/"([^"]*)"/$1/g;
844 # For indexing, we want to take up to the first # or < in the line,
847 my($name) = $arg =~ /^([^#<]+)/;
848 $name = &handle_text($name, 0);
849 &handle_index("-$name", 0);
851 # Output as heading, after the index
853 $arg = &handle_text($arg, 0);
854 print OUT "<h3>-$arg</h3>\n";
857 elsif (/^\.endoptions\b/)
859 print OUT "<hr><br>\n";
862 # Found an SGCAL directive that isn't dealt with. Oh dear.
866 print "*** Unexpected SGCAL directive: line $. ignored:\n";
870 # Remember if last was a .item, and read the next line
872 $lastwasitem = $new_lastwasitem;
878 ##################################################
879 # First Pass - collect references #
880 ##################################################
885 open (IN, $source_file) || die "Can't open $source_file (first pass)\n";
888 # At the start of the specification text, there are some textual replacement
889 # definitions. They set values, but not cross-references. They may be preceded
892 $_ = <IN> while (/^\.(\s|$)/);
894 while (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/)
900 # Now skip on till we hit the start of the first chapter. It will be numbered
901 # 0 if we hit ".set chapter -1". There is only ever one unnumbered chapter.
903 while (!/^\.chapter/)
905 $thischapter = -1 if /^\.set\s+chapter\s+-1/;
909 # Loop for handling chapters
916 # Scan through chapter, setting up cross-references to the chapter
917 # and to the sections within it.
921 last if /^\.chapter/;
930 # Handle .(r)set directives.
932 if (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/ && $1 ne "runningfoot")
934 my($key,$value) = ($1,$2);
935 $value =~ s/~~chapter/$thischapter/e;
936 $value =~ s/~~section/$thissection/e;
938 # Only one of $chapsplit or $sectsplit can be set.
943 "<a href=\"${file_base}_$thischapter.html\">$value</a>"
945 "<a href=\"#CHAP$thischapter\">$value</a>";
948 elsif ($key =~ /^SECT/)
951 "<a href=\"${file_base}_$thischapter.html" .
952 "#SECT$thischapter.$thissection\">$value</a>"
954 $sectsplit? "<a href=\"${file_base}_$thissection.html\">$value</a>"
956 "<a href=\"#SECT$thischapter.$thissection\">$value</a>";
959 $var_value{$key} = $value;
971 ##################################################
972 # Second Pass - generate HTML #
973 ##################################################
984 # Open the source file and get the first line
986 open (IN, $source_file) || die "Can't open $source_file (2nd pass)\n";
989 # Skip on till we hit the start of the first chapter, but note if we
990 # pass ".set chapter -1", which is used to indicate no chapter numbering for
991 # the first chapter (we number is 0). Keep track of whether we are in macro
992 # definitions or not, and when not, notice occurrences of .index, because this
993 # are the "x see y" type entries.
995 while (!/^\.chapter/)
997 $thischapter = -1 if /^\.set\s+chapter\s+-1/;
998 $inmacro = 1 if /^\.macro/;
999 $inmacro = 0 if /^\.endm/;
1000 if (!$inmacro && /^\.index\s+(.*)/)
1004 $s = &handle_text($s, 0);
1005 $s =~ s/ / /g; # All spaces unsplittable
1007 $key =~ s/<[^>]+>//g;
1008 $key =~ s/&#(\d+);/chr($1)/eg;
1016 open (TOC, ">$html/${file_base}_toc.html") ||
1017 die "Can't open $html/${file_base}_toc.html\n";
1019 print TOC "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n";
1020 print TOC "<html>\n<head>\n<title>$doctitle Contents</title>\n</head>\n" .
1021 "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " .
1022 "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";
1023 print TOC "<h1>$doctitle</h1><hr>\n<ul>\n";
1025 # Open the data file if we are not splitting at chapters
1027 &openout("$html/${file_base}.html") if !$chapsplit;
1029 # Loop for handling chapters. At the start of this loop, $_ is either EOF,
1030 # or contains a .chapter line.
1032 $firstchapter = $thischapter + 1;
1036 print TOC "</ul>\n" if $insection;
1043 # Start a new file if required
1047 &closeout("CHAP") if $thischapter != $firstchapter;
1048 &openout("$html/${file_base}_$thischapter.html");
1051 # Set up the chapter title. Save it for the TOC. Set up the anchor and
1052 # link back to the TOC and show the title.
1054 $_ =~ /^\.chapter\s+(.*)/;
1056 my($title) = (($thischapter > 0)? "$thischapter. " : "") . &handle_text($1, 0);
1059 print TOC "<li><a " .
1060 "name=\"TOC$tocn\" " .
1061 "href=\"$current_file#CHAP$thischapter\">$title</a></li>\n";
1064 print OUT "<a name=\"CHAP$thischapter\" href=\"${file_base}_toc.html#TOC$tocn\">\n";
1065 print OUT "$title\n</a></h1>\n";
1067 # Scan the contents of the chapter
1072 last if /^\.chapter/;
1074 # Handle the start of a new section, starting a new file if required
1076 if (/^\.section\s+(.*)/)
1080 print TOC "<ul>\n" if !$insection;
1083 my($title) = (($thischapter > 0)? "$thischapter." : "") .
1084 "$thissection. " . &handle_text($1, 0);
1089 &openout("$html/${file_base}_$thissection.html");
1093 printf TOC ("<li><a " .
1094 "name=\"TOC$tocn\" " .
1095 "href=\"$current_file#SECT%s$thissection\">%s</a></li>\n",
1096 ($thischapter > 0)? "$thischapter." : "", $title);
1100 printf OUT ("<a name=\"SECT%s$thissection\" ",
1101 ($thischapter > 0)? "$thischapter." : "");
1102 print OUT "href=\"${file_base}_toc.html#TOC$tocn\">\n";
1103 print OUT "$title\n</a></h2>\n";
1108 # Blank lines at this level are ignored
1115 # Directive and non-directive lines are handled independently, though
1116 # in each case further lines may be read. Afterwards, the next line is
1117 # in $_. If .em is at the start of a paragraph, treat it with the
1118 # paragraph, because the matching .nem will be too. Messy!
1127 print OUT "<font color=green>" if ! $inem;
1129 # Used to handle it here - but that fails if it is .section.
1130 # Just let the next iteration of the loop handle it.
1131 # &handle_directive();
1137 &handle_paragraph();
1147 &handle_directive();
1155 &handle_paragraph();
1160 } # Loop for each line in a chapter
1161 } # Loop for each chapter
1163 # Close the last file, end off the TOC, and we are done.
1167 print TOC "</ul>\n" if $insection;
1169 if (defined %cindex)
1171 $cindex_tocn = ++$tocn;
1172 print TOC "<li><a name=\"TOC$tocn\" ".
1173 "href=\"${file_base}_cindex.html\">Concept Index</a></li>\n";
1176 if (defined %oindex)
1178 $oindex_tocn = ++$tocn;
1179 print TOC "<li><a name=\"TOC$tocn\" ".
1180 "href=\"${file_base}_oindex.html\">Option Index</a></li>\n";
1183 print TOC "</ul>\n</body>\n</html>\n";
1191 ##################################################
1192 # Adjust index points #
1193 ##################################################
1195 # Because of the way the source is written, there are often index entries
1196 # that immediately follow the start of chapters and sections and the definition
1197 # of "items" like "helo = verify". This gets the correct page numbers for the
1198 # PostScript and PDF formats. However, for HTML we want the index anchor to be
1199 # before the section heading, because browsers tend to put the index point at
1200 # the top of the screen. So we re-read all the files we've just created, and
1201 # move some of the index points about. This is necessary only if indexes exist.
1202 # The files are small enough to be handled entirely in memory.
1204 sub adjust_index_points {
1205 print "Adjusting index points to precede headings\n";
1209 opendir(DIR, "$html") || die "Failed to opendir $html\n";
1210 while ($file = readdir(DIR))
1213 next unless $file =~ /^${file_base}_\d+\.html$/;
1215 open(IN, "<$html/$file") ||
1216 die "Failed to open $html/$file (read)\n";
1220 for ($i = 0; $i < @lines; $i++)
1222 if ($lines[$i] =~ /^<a name="IX\d+"><\/a>$/)
1224 # Handle an index line that follows a heading definition. Move it back
1225 # to just before the <h1> or whatever. This preserves the order of
1226 # multiple index lines, not that that matters.
1228 if ($lines[$i-1] =~ /^<\/a><\/h(\d)>/)
1232 for ($j = $i-2; $j > 0 && $j > $i - 10; $j--)
1234 if ($lines[$j] =~ /<h$1>/)
1242 splice(@lines, $j, 0, splice(@lines, $i, 1));
1246 # Handle an index line that follows an "item". Move it back one line.
1248 elsif ($lines[$i-1] =~ /^<b>.*<\/b>\s*$/)
1250 splice(@lines, $i-1, 0, splice(@lines, $i, 1));
1253 # Handle an index line that follows a "conf" definition
1255 elsif ($lines[$i-1] =~ /^<i>Type:<\/i>/ && $lines[$i-2] =~ /^<h3>/)
1257 splice(@lines, $i-2, 0, splice(@lines, $i, 1));
1260 # Handle an index line that follows an "option" definition
1262 elsif ($lines[$i-1] =~ /^<h3>/)
1264 splice(@lines, $i-1, 0, splice(@lines, $i, 1));
1269 open(OUT, ">$html/$file") ||
1270 die "Failed to open $html/$file (write)\n";
1281 ##################################################
1283 ##################################################
1287 my($ifname) = $_[1];
1288 my($ititle) = $_[2];
1291 open(INDEX, ">$html/${file_base}_$_[1].html") ||
1292 die "Failed to open $html/${file_base}_$ifname\n";
1294 print INDEX "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n";
1295 print INDEX "<html>\n<head>\n<title>$doctitle $ititle</title>\n";
1296 print INDEX "<base target=\"body\">\n</head>\n";
1298 print INDEX "<body bgcolor=\"#FFFFDF\" text=\"#00005A\" " .
1299 "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";
1301 print INDEX "<h3>$ititle</h3>\n";
1303 # We have to scan the keys in the hash twice; first to build the list
1304 # of initial letters, and then to do the business. The first time we
1305 # do not need to sort them.
1307 foreach $key (keys %$hash)
1309 my($initial) = substr($key,0,1);
1310 $initial = "\U$initial";
1311 $indexindex{$initial} = 1 if $initial ge "A" && $initial le "Z";
1314 print INDEX "<p>\n";
1315 foreach $key (sort keys %indexindex)
1317 print INDEX " <a href=\"#$key\" target=\"index\">$key</a>\n";
1319 print INDEX "<hr></p>\n";
1322 print INDEX "<p>\n";
1329 $aa =~ s/^\x93//; # Seems like the actual char values are
1330 $bb =~ s/^\x93//; # set by this time, not "“"
1332 return ("\L$aa" eq "\L$bb")? ("$aa" cmp "$bb") : ("\L$aa" cmp "\L$bb");
1336 my($initial) = substr($key,0,1);
1337 $initial = "\U$initial";
1338 if ($initial ne $letter && $initial ge "A" && $initial le "Z")
1340 print INDEX "<br>\n";
1341 print INDEX "<a name=\"$initial\"></a>\n";
1342 print INDEX "<font size=\"+1\">\U$initial\E</font><br>\n";
1345 print INDEX "$$hash{$key}<br>\n";
1348 print INDEX "</p>\n";
1350 print INDEX "</body>\n</html>\n";
1357 ##################################################
1358 # Show usage and die #
1359 ##################################################
1362 die "Usage: g2h [-split no|section|chapter] <source> <title>\n";
1367 ##################################################
1368 # Entry point and main program #
1369 ##################################################
1372 # Directory in which to put the new HTML files
1401 while (scalar @ARGV > 0 && $ARGV[0] =~ /^-/)
1403 if ($ARGV[0] eq "-split" && !$splitset)
1407 my($type) = shift @ARGV;
1408 if ($type eq "section") { $sectsplit = 1; }
1409 elsif ($type eq "chapter") { $chapsplit = 1; }
1410 elsif ($type eq "no" ) { $sectsplit = $chapsplit = 0; }
1416 # Get the source file and its base
1418 &usage() if scalar @ARGV <= 0;
1419 $source_file = shift @ARGV;
1420 ($file_base) = $source_file =~ /^(.*)\.src$/;
1422 &usage() if scalar @ARGV <= 0;
1423 $doctitle = shift @ARGV;
1425 print "\nCreate HTML for $doctitle from $source_file\n";
1427 # Remove the old HTML files
1429 print "Removing old HTML files\n";
1430 system("/bin/rm -rf $html/${file_base}_*.html");
1432 # First pass identifies all the chapters and sections, and collects the
1433 # values of the cross-referencing variables.
1435 print "Scanning for cross-references\n";
1438 $maxchapter = $thischapter; # Used if chapter splitting
1439 $maxsection = $thissection; # Used if section splitting
1441 # Second pass actually creates the HTML files.
1443 print "Creating the HTML files\n";
1446 # Reprocess for moving some of the index points, if indexes were created
1448 &adjust_index_points() if scalar(keys %cindex) > 0 || scalar(keys %oindex) > 0;
1450 # Finally, we must create the option and concept indexes if any data
1451 # has been collected for them.
1453 if (scalar(keys %cindex) > 0)
1455 print "Creating concept index\n";
1456 &create_index(\%cindex, "cindex", "Concepts");
1459 if (scalar(keys %oindex) > 0)
1461 print "Creating option index\n";
1462 &create_index(\%oindex, "oindex", "Options");