3 # $Cambridge: exim/doc/doc-docbook/Pre-xml,v 1.1 2005/06/16 10:32:31 ph10 Exp $
5 # Script to pre-process XML input before processing it for various purposes.
6 # Options specify which transformations are to be done. Monospaced literal
7 # layout blocks are never touched.
11 # -abstract: Remove the <abstract> element
13 # -ascii: Replace &8230; (sic, no x) with ...
14 # Replace ’ by '
15 # Replace “ by "
16 # Replace ” by "
17 # Replace – by -
18 # Replace † by *
19 # Replace ‡ by **
20 # Replace   by a space
21 # Replace © by (c)
22 # Put quotes round <literal> text
23 # Put quotes round <quote> text
25 # -bookinfo: Remove the <bookinfo> element from the file
27 # -fi: Replace "fi" by fi except when it is in an XML element, or
30 # -noindex Remove the XML to generate a Concept and an Options index.
31 # -oneindex Ditto, but add XML to generate a single index.
35 # The function that processes non-literal monospaced text
41 $s =~ s/fi(?![^<>]*>)/fi/g if $ligatures;
45 $s =~ s/…/.../g;
51 $s =~ s/‡/**/g;
53 $s =~ s/©/(c)/g;
55 $s =~ s/<\/quote>/"/g;
75 if ($arg eq "-fi") { $ligatures = 1; }
76 elsif ($arg eq "-abstract") { $abstract = 1; }
77 elsif ($arg eq "-ascii") { $ascii = 1; }
78 elsif ($arg eq "-bookinfo") { $bookinfo = 1; }
79 elsif ($arg eq "-noindex") { $noindex = 1; }
80 elsif ($arg eq "-oneindex") { $oneindex = 1; }
81 else { die "** Pre-xml: Unknown option \"$arg\"\n"; }
86 # Remove <abstract> if required
88 next if ($abstract && /^\s*<abstract>/);
90 # Remove <bookinfo> if required
92 if ($bookinfo && /^<bookinfo/)
94 while (<STDIN>) { last if /^<\/bookinfo/; }
98 # Copy monospaced literallayout blocks
100 if (/^<literallayout class="monospaced">/)
106 last if /^<\/literallayout>/;
111 # Adjust index-generation code if required
113 if (($noindex || $oneindex) && /^<index[\s>]/)
117 last if /^<\/index>/;
120 if ($oneindex && !$madeindex)
123 print "<index><title>Index</title></index>\n";
129 # A line that is not in a monospaced literal block; keep track of which
130 # parts are in <literal> and which not. The latter get processed by the
137 if (/^(.*?)<\/literal>(.*)$/)
140 print "\"" if $ascii;
152 # Not in literal state
156 if (/^(.*?)<literal>(.*)$/)
160 print "\"" if $ascii;
170 } # Loop for different parts of one line
171 } # Loop for multiple lines