doc/doc-scripts/DoIndex

   1 #! /usr/bin/perl -w
   2
   3 # Script for producing the Index for the Exim manual from the output of the
   4 # SGCAL run. This is copied from the script for the Exim book.
   5
   6
   7 ##############################################################################
   8 # Patterns for matching things to be removed from the sort keys
   9
  10 # This was copied from the Exim book processor, but we have now found a
  11 # better way of doing this. Leave the code until I am quite sure...
  12
  13 # $pat[0]  = qr/ \(\\\*see also\*\\[^)]+\)/;
  14 # $pat[1]  = qr/(?<!@)\/\//;                     # //
  15 # $pat[2]  = qr/(?<!@)\/\\/;                     # /\
  16 # $pat[3]  = qr/(?<!@)\\\//;                     # \/
  17 # $pat[4]  = qr/(?<!@) \\                        # non-@ \, followed by one of
  18 #                             (?:
  19 #                             [\.\/] |           # dot or slash
  20 #                             !- |               # !-
  21 #                             !\+ |              # !+
  22 #                             !\. |              # !.
  23 #                             "\+ |              # "+
  24 #                             \([.\/]? |         # ( and optional . or slash
  25 #                             [[\$\\%?!-"] |     # [ $ \ % ! " or -
  26 #                             \*{1,2} |          # * or **
  27 #                             \^{1,2}\/?         # ^ or ^^ and optional slash
  28 #                             )/x;
  29 # $pat[5]  = qr/(?: []\$\\%)?!"] |               # ] $ \ % ) ? " or ! )
  30 #                   \*{1,2}  |                   # * or **            ) optional
  31 #                   \^{1,2})?                    # ^ or ^^            )
  32 #                   \\/x;                        # then \
  33 # $pat[6]  = qr/(?<!@)::/;
  34 # $pat[7]  = qr/\sR[FS]\b/;
  35 # $pat[8]  = qr/``/;
  36 # $pat[9]  = qr/''/;
  37 # $pat[10] = qr/`/;
  38 # $pat[11] = qr/'/;
  39 # $pat[12] = qr/,/;
  40 # $pat[13] = qr/\(e?s\)/;
  41
  42
  43 # Other patterns
  44
  45 # $keysplit = qr/^(.*?)(\|\|.*?)?\s(R[AZ])?\s?(\d+)$/;
  46
  47 $keysplit = qr/^(.*?)(\@\|\@\|.*?)?\s(R[AZ])?\s?(\d+)$/;
  48
  49
  50 # The sort function
  51
  52 sub cf {
  53 my($x,$y) = ($a,$b);
  54
  55 ############old#############
  56 #foreach $pattern (@pat)    # Remove strings by pattern
  57 #  {
  58 #  $x =~ s/$pattern//g;
  59 #  $y =~ s/$pattern//g;
  60 #  }
  61 ##########################
  62
  63
  64 # Turn || into @|@|
  65
  66 $x =~ s/\|\|/@|@|/g;
  67 $y =~ s/\|\|/@|@|/g;
  68
  69 # Remove all special characters, except those preceded by @
  70
  71 $x =~ s/(?<!\@)[^\w\@\s]//g;
  72 $y =~ s/(?<!\@)[^\w\@\s]//g;
  73
  74 # Remove the escaping @s
  75
  76 #$x =~ s/\@(.)/$1/g;
  77 #$y =~ s/\@(.)/$1/g;
  78
  79
  80
  81 ################old ########################
  82 #$x =~ s/:(\w+):/$1/g;      # :fail: etc => fail
  83 #$y =~ s/:(\w+):/$1/g;
  84
  85 #$x =~ s/^\@[^a-z]+/\@/i;   # Make keys starting with @
  86 #$y =~ s/^\@[^a-z]+/\@/i;   # sort on @ followed by the first letter
  87 ##############################################3
  88
  89
  90 $x =~ s/\@_/\x7f/g;        # Make underscore sort late (option names)
  91 $y =~ s/\@_/\x7f/g;
  92
  93 # Split up to sort on individual parts
  94
  95 my($xp,$xs,$xr,$xn) = $x =~ /$keysplit/;
  96 my($yp,$ys,$yr,$yn) = $y =~ /$keysplit/;
  97
  98 $xr = "" if !defined $xr;
  99 $yr = "" if !defined $yr;
 100
 101 $xs = "" if !defined $xs;
 102 $ys = "" if !defined $ys;
 103
 104 if ($show_keys)
 105   {
 106   print "a=$a\n  x=$x\n  xp=$xp\n  xs=$xs\n  xr=$xr\n  xn=$xn\n";
 107   print "b=$b\n  y=$y\n  yp=$yp\n  ys=$ys\n  yr=$yr\n  yn=$yn\n";
 108   }
 109
 110 my ($c) = "\L$xp" cmp "\L$yp";        # Caseless, primary text only
 111 $c = $xp cmp $yp if $c == 0;          # Caseful, primary text only
 112 $c = "\L$xs" cmp "\L$ys" if $c == 0;  # Caseless, secondary text only
 113 $c = $xs cmp $ys if $c == 0;          # Caseful, secondary text only
 114 $c = $xn <=> $yn if $c == 0;          # Compare the numbers
 115 $c = $xr cmp $yr if $c == 0;          # Sort RA before RZ
 116 return $c;
 117 }
 118
 119
 120
 121 ##############################################################################
 122 # Function for getting the next line from the @lines vector, using the global
 123 # index $1. If the next pair of lines specifies a range of pages, combine them.
 124 # That's why $linenumber has to be global - so we can increment it. If there's
 125 # a range error, return "".
 126
 127 sub getnextentry {
 128 my($line) = $lines[$linenumber];
 129 my($aa,$zz,$tline,$nextline,$tnextline);
 130
 131 if ($line =~ / RA (\d+)/)
 132   {
 133   $aa = $1;
 134   $nextline = $lines[++$linenumber];
 135   if ($nextline =~ / RZ (\d+)/)
 136     {
 137     $zz = $1;
 138     }
 139   else
 140     {
 141     print STDERR "** Bad range data (1)\n";
 142     print STDERR "   $line\n";
 143     print STDERR "   $nextline\n";
 144     return "";
 145     }
 146
 147   $tline = $line;
 148   $tnextline = $nextline;
 149
 150   $tline =~ s/ RA \d+//;
 151   $tnextline =~ s/ RZ \d+//;
 152
 153   if ($tline ne $tnextline)
 154     {
 155     print STDERR "** Bad range data (2)\n";
 156     print STDERR "   $line\n";
 157     print STDERR "   $nextline\n";
 158     return "";
 159     }
 160
 161   $line = ($aa eq $zz)? "$tline $aa" : "$tline $aa--$zz";
 162   }
 163
 164 elsif ($line =~ / RZ (\d+)/)
 165   {
 166   print STDERR "** Bad range data (RZ without RA)\n";
 167   print STDERR "   $line\n";
 168   return "";
 169   }
 170
 171 return $line
 172 }
 173
 174
 175
 176
 177 ##############################################################################
 178 # Function for outputting a line, checking for the current primary
 179 # and indenting a bit for secondaries. We also need a newpar
 180 # before each item, because the main indent is set to a largish indent
 181 # for long reference lists, but the parindent is set to counter this.
 182 # This is where we handle the break between letters. We know that any non-
 183 # alphamerics at the start of lines are markup, except for @. A reference
 184 # value of 99999 is for the "see also" lines. Suppress it.
 185
 186 sub outline {
 187 my($text,$ref) = ($_[0],$_[1]);
 188 my ($letter) = $text =~ /^[^A-Za-z0-9\@]*(.)/;
 189
 190 return if $text =~ /^\s*$/;
 191
 192 if ($ref eq "99999")    # dummy for see also
 193   {
 194   $ref = ""
 195   }
 196 else
 197   {
 198   $ref = "#$ref";       # prepend space
 199   }
 200
 201 if ($letter =~ /\d/) { $letter = "0"; } else { $letter = "\U$letter"; }
 202
 203 print OUT ".newpar\n";
 204
 205 if ($letter ne $currentletter && $letter ge "A")
 206   {
 207   print OUT ".newletter\n";
 208   $currentletter = $letter;
 209   }
 210
 211 $text =~ s/\@'/\$'/g;   # Turns @' into $' so that it prints a non-curly quote
 212
 213 if ($text =~ /^(.+)\|\|(.*)$/)
 214   {
 215   my($primary,$secondary) = ($1,$2);
 216
 217   if ($primary ne $lastprimary)
 218     {
 219     print OUT ".primary $primary\n";
 220     $lastprimary = $primary;
 221     }
 222
 223   $primary =~ s/"/""/g;
 224   $secondary =~ s/"/""/g;
 225
 226   my($contprim) = $primary;
 227   $contprim =~ s/ \(\\\*see also\*\\[^)]+\)//;
 228
 229   print OUT ".secondary \"$primary\" \"$secondary$ref\" \"$contprim\"\n";
 230   }
 231
 232 # Not a two-part item; insert @ if the first char is a dot
 233
 234 else
 235   {
 236   print OUT "@" if $text =~ /^\./;
 237   print OUT "$text$ref\n";
 238   $lastprimary = $text;
 239   }
 240 }
 241
 242
 243
 244
 245
 246 ##############################################################################
 247 # The main script
 248
 249 $save_sorted = 0;
 250 $test_index = 0;
 251 $show_keys = 0;
 252
 253 while (@ARGV > 0)
 254   {
 255   my($arg) = shift @ARGV;
 256   if    ($arg eq "-k") { $show_keys = 1; }
 257   elsif ($arg eq "-s") { $save_sorted = 1; }
 258   elsif ($arg eq "-t") { $test_index = $save_sorted = 1; }
 259   else  { die "Unknown option $arg\n"; }
 260   }
 261
 262 if ($test_index)
 263   {
 264   open(IN, "z-testindex") || die "Can't open z-testindex\n";
 265   }
 266 else
 267   {
 268   open(IN, "z-rawindex") || die "Can't open z-rawindex\n";
 269   }
 270
 271 open(OUT, ">z-index")  || die "Can't open z-index\n";
 272
 273 # Extract index lines ($e lines are contents). Until we hit the first
 274 # $e line, we are dealing with "see also" index lines, for which we want
 275 # to turn the line number into 99999.
 276
 277 $#lines = -1;
 278 $prestuff = 1;
 279
 280 while (<IN>)
 281   {
 282   s/\n$//;
 283   if (/\$e/)
 284     {
 285     $prestuff = 0;
 286     }
 287   else
 288     {
 289     s/(\D)$/$1 99999/ if $prestuff;          # No number in "see also"
 290     push(@lines, $_);
 291     }
 292   $index_pagenumber = $1 if /^Index\$e(\d+)/;
 293   }
 294 close(IN);
 295
 296 # Sort, ignoring markup
 297
 298 print STDERR "Sorting ...\n";
 299 @lines = sort cf @lines;
 300
 301 # Keep a copy of the sorted data, for reference
 302
 303 if ($save_sorted)
 304   {
 305   open(X, ">z-indexsorted") || die "Can't open z-indexsorted\n";
 306   foreach $line (@lines)
 307    {
 308    print X "$line\n";
 309    }
 310   close(X);
 311   }
 312
 313 # Heading for the index file
 314
 315 print OUT <<"EOF";
 316 .library "a4ps"
 317 .linelength ~~sys.linelength + 16.0
 318
 319 .include "markup.sg"
 320
 321 .indent 3em
 322 .parspace 0
 323 .parindent -3em
 324 .justify left
 325 .
 326 .foot
 327 \$c [~~sys.pagenumber]
 328 .endfoot
 329 .
 330 .cancelflag #
 331 .flag # "\$S*1"
 332 .set INDEX true
 333 .
 334 .macro primary "text"
 335 .if ~~sys.leftonpage < 2ld
 336 .newcolumn
 337 .fi
 338 ~~1
 339 .newpar
 340 .endm
 341 .
 342 .macro secondary "prim" "sec" "contprim"
 343 .if ~~sys.leftonpage < 1ld
 344 .newcolumn
 345 .newpar
 346 ~~3 \$it\{(continued)\}
 347 .newpar
 348 .fi
 349 ##~~2
 350 .endm
 351 .
 352 .macro newletter
 353 .if ~~sys.leftonpage < 4ld
 354 .newcolumn
 355 .else
 356 .space 1ld
 357 .fi
 358 .newpar
 359 .endm
 360 .
 361 .set chapter -1
 362 .page $index_pagenumber
 363 .chapter Index
 364 .columns 2
 365 .newpar
 366 .
 367 EOF
 368
 369 # Process the lines and output the result.
 370 # Note that $linenumber is global, and is changed by getnextentry() for
 371 # pairs of lines that represent ranges.
 372
 373 $lastprimary = "";
 374 $lastref = "";
 375 $currenttext = $currentref = "";
 376 $currentletter = "";
 377 $badrange = 0;
 378
 379 print STDERR "Processing ...\n";
 380
 381 for ($linenumber = 0; $linenumber < @lines; $linenumber++)
 382   {
 383   $line = &getnextentry();
 384
 385   if ($line eq "")   # Bad range data - but carry on to get all of it
 386     {
 387     $badrange = 1;
 388     next;
 389     }
 390
 391   # Split off the text and reference
 392
 393   ($text,$ref) = $line =~ /^(.*)\s+([\d-]+)$/;
 394
 395   # If same as current text, just add the new reference, unless its a duplicate
 396
 397   if ($text eq $currenttext)
 398     {
 399     if ($ref ne $lastref)
 400       {
 401       $currentref .= ", $ref";
 402       $lastref = $ref;
 403       }
 404     next;
 405     }
 406
 407   # Not the same as the current text. Output the current text, then
 408   # set up a new current.
 409
 410   &outline($currenttext, $currentref);
 411
 412   $currenttext = $text;
 413   $currentref = $lastref = $ref;
 414   }
 415
 416 # Output the final line and close the file
 417
 418 &outline($currenttext, $currentref);
 419 close(OUT);
 420
 421 die "** Aborted\n" if $badrange;
 422
 423 # Format the index
 424
 425 system("sgcal z-index -to zi-gcode -index /dev/null");
 426 system("sgtops zi-gcode -to zi-ps");
 427 print "PostScript in zi-ps\n";
 428
 429 # End