2 # $Cambridge: exim/doc/doc-scripts/DoIndex,v 1.1 2004/10/07 15:04:35 ph10 Exp $
4 # Script for producing the Index for the Exim manual from the output of the
5 # SGCAL run. This is copied from the script for the Exim book.
8 ##############################################################################
9 # Patterns for matching things to be removed from the sort keys
11 # This was copied from the Exim book processor, but we have now found a
12 # better way of doing this. Leave the code until I am quite sure...
14 # $pat[0] = qr/ \(\\\*see also\*\\[^)]+\)/;
15 # $pat[1] = qr/(?<!@)\/\//; # //
16 # $pat[2] = qr/(?<!@)\/\\/; # /\
17 # $pat[3] = qr/(?<!@)\\\//; # \/
18 # $pat[4] = qr/(?<!@) \\ # non-@ \, followed by one of
20 # [\.\/] | # dot or slash
25 # \([.\/]? | # ( and optional . or slash
26 # [[\$\\%?!-"] | # [ $ \ % ! " or -
28 # \^{1,2}\/? # ^ or ^^ and optional slash
30 # $pat[5] = qr/(?: []\$\\%)?!"] | # ] $ \ % ) ? " or ! )
31 # \*{1,2} | # * or ** ) optional
32 # \^{1,2})? # ^ or ^^ )
34 # $pat[6] = qr/(?<!@)::/;
35 # $pat[7] = qr/\sR[FS]\b/;
41 # $pat[13] = qr/\(e?s\)/;
46 # $keysplit = qr/^(.*?)(\|\|.*?)?\s(R[AZ])?\s?(\d+)$/;
48 $keysplit = qr/^(.*?)(\@\|\@\|.*?)?\s(R[AZ])?\s?(\d+)$/;
56 ############old#############
57 #foreach $pattern (@pat) # Remove strings by pattern
59 # $x =~ s/$pattern//g;
60 # $y =~ s/$pattern//g;
62 ##########################
70 # Remove all special characters, except those preceded by @
72 $x =~ s/(?<!\@)[^\w\@\s]//g;
73 $y =~ s/(?<!\@)[^\w\@\s]//g;
75 # Remove the escaping @s
82 ################old ########################
83 #$x =~ s/:(\w+):/$1/g; # :fail: etc => fail
84 #$y =~ s/:(\w+):/$1/g;
86 #$x =~ s/^\@[^a-z]+/\@/i; # Make keys starting with @
87 #$y =~ s/^\@[^a-z]+/\@/i; # sort on @ followed by the first letter
88 ##############################################3
91 $x =~ s/\@_/\x7f/g; # Make underscore sort late (option names)
94 # Split up to sort on individual parts
96 my($xp,$xs,$xr,$xn) = $x =~ /$keysplit/;
97 my($yp,$ys,$yr,$yn) = $y =~ /$keysplit/;
99 $xr = "" if !defined $xr;
100 $yr = "" if !defined $yr;
102 $xs = "" if !defined $xs;
103 $ys = "" if !defined $ys;
107 print "a=$a\n x=$x\n xp=$xp\n xs=$xs\n xr=$xr\n xn=$xn\n";
108 print "b=$b\n y=$y\n yp=$yp\n ys=$ys\n yr=$yr\n yn=$yn\n";
111 my ($c) = "\L$xp" cmp "\L$yp"; # Caseless, primary text only
112 $c = $xp cmp $yp if $c == 0; # Caseful, primary text only
113 $c = "\L$xs" cmp "\L$ys" if $c == 0; # Caseless, secondary text only
114 $c = $xs cmp $ys if $c == 0; # Caseful, secondary text only
115 $c = $xn <=> $yn if $c == 0; # Compare the numbers
116 $c = $xr cmp $yr if $c == 0; # Sort RA before RZ
122 ##############################################################################
123 # Function for getting the next line from the @lines vector, using the global
124 # index $1. If the next pair of lines specifies a range of pages, combine them.
125 # That's why $linenumber has to be global - so we can increment it. If there's
126 # a range error, return "".
129 my($line) = $lines[$linenumber];
130 my($aa,$zz,$tline,$nextline,$tnextline);
132 if ($line =~ / RA (\d+)/)
135 $nextline = $lines[++$linenumber];
136 if ($nextline =~ / RZ (\d+)/)
142 print STDERR "** Bad range data (1)\n";
143 print STDERR " $line\n";
144 print STDERR " $nextline\n";
149 $tnextline = $nextline;
151 $tline =~ s/ RA \d+//;
152 $tnextline =~ s/ RZ \d+//;
154 if ($tline ne $tnextline)
156 print STDERR "** Bad range data (2)\n";
157 print STDERR " $line\n";
158 print STDERR " $nextline\n";
162 $line = ($aa eq $zz)? "$tline $aa" : "$tline $aa--$zz";
165 elsif ($line =~ / RZ (\d+)/)
167 print STDERR "** Bad range data (RZ without RA)\n";
168 print STDERR " $line\n";
178 ##############################################################################
179 # Function for outputting a line, checking for the current primary
180 # and indenting a bit for secondaries. We also need a newpar
181 # before each item, because the main indent is set to a largish indent
182 # for long reference lists, but the parindent is set to counter this.
183 # This is where we handle the break between letters. We know that any non-
184 # alphamerics at the start of lines are markup, except for @. A reference
185 # value of 99999 is for the "see also" lines. Suppress it.
188 my($text,$ref) = ($_[0],$_[1]);
189 my ($letter) = $text =~ /^[^A-Za-z0-9\@]*(.)/;
191 return if $text =~ /^\s*$/;
193 if ($ref eq "99999") # dummy for see also
199 $ref = "#$ref"; # prepend space
202 if ($letter =~ /\d/) { $letter = "0"; } else { $letter = "\U$letter"; }
204 print OUT ".newpar\n";
206 if ($letter ne $currentletter && $letter ge "A")
208 print OUT ".newletter\n";
209 $currentletter = $letter;
212 $text =~ s/\@'/\$'/g; # Turns @' into $' so that it prints a non-curly quote
214 if ($text =~ /^(.+)\|\|(.*)$/)
216 my($primary,$secondary) = ($1,$2);
218 if ($primary ne $lastprimary)
220 print OUT ".primary $primary\n";
221 $lastprimary = $primary;
224 $primary =~ s/"/""/g;
225 $secondary =~ s/"/""/g;
227 my($contprim) = $primary;
228 $contprim =~ s/ \(\\\*see also\*\\[^)]+\)//;
230 print OUT ".secondary \"$primary\" \"$secondary$ref\" \"$contprim\"\n";
233 # Not a two-part item; insert @ if the first char is a dot
237 print OUT "@" if $text =~ /^\./;
238 print OUT "$text$ref\n";
239 $lastprimary = $text;
247 ##############################################################################
256 my($arg) = shift @ARGV;
257 if ($arg eq "-k") { $show_keys = 1; }
258 elsif ($arg eq "-s") { $save_sorted = 1; }
259 elsif ($arg eq "-t") { $test_index = $save_sorted = 1; }
260 else { die "Unknown option $arg\n"; }
265 open(IN, "z-testindex") || die "Can't open z-testindex\n";
269 open(IN, "z-rawindex") || die "Can't open z-rawindex\n";
272 open(OUT, ">z-index") || die "Can't open z-index\n";
274 # Extract index lines ($e lines are contents). Until we hit the first
275 # $e line, we are dealing with "see also" index lines, for which we want
276 # to turn the line number into 99999.
290 s/(\D)$/$1 99999/ if $prestuff; # No number in "see also"
293 $index_pagenumber = $1 if /^Index\$e(\d+)/;
297 # Sort, ignoring markup
299 print STDERR "Sorting ...\n";
300 @lines = sort cf @lines;
302 # Keep a copy of the sorted data, for reference
306 open(X, ">z-indexsorted") || die "Can't open z-indexsorted\n";
307 foreach $line (@lines)
314 # Heading for the index file
318 .linelength ~~sys.linelength + 16.0
328 \$c [~~sys.pagenumber]
335 .macro primary "text"
336 .if ~~sys.leftonpage < 2ld
343 .macro secondary "prim" "sec" "contprim"
344 .if ~~sys.leftonpage < 1ld
347 ~~3 \$it\{(continued)\}
354 .if ~~sys.leftonpage < 4ld
363 .page $index_pagenumber
370 # Process the lines and output the result.
371 # Note that $linenumber is global, and is changed by getnextentry() for
372 # pairs of lines that represent ranges.
376 $currenttext = $currentref = "";
380 print STDERR "Processing ...\n";
382 for ($linenumber = 0; $linenumber < @lines; $linenumber++)
384 $line = &getnextentry();
386 if ($line eq "") # Bad range data - but carry on to get all of it
392 # Split off the text and reference
394 ($text,$ref) = $line =~ /^(.*)\s+([\d-]+)$/;
396 # If same as current text, just add the new reference, unless its a duplicate
398 if ($text eq $currenttext)
400 if ($ref ne $lastref)
402 $currentref .= ", $ref";
408 # Not the same as the current text. Output the current text, then
409 # set up a new current.
411 &outline($currenttext, $currentref);
413 $currenttext = $text;
414 $currentref = $lastref = $ref;
417 # Output the final line and close the file
419 &outline($currenttext, $currentref);
422 die "** Aborted\n" if $badrange;
426 system("sgcal z-index -to zi-gcode -index /dev/null");
427 system("sgtops zi-gcode -to zi-ps");
428 print "PostScript in zi-ps\n";