3 # $Cambridge: exim/doc/doc-docbook/Pre-xml,v 1.2 2005/11/10 12:30:13 ph10 Exp $
5 # Script to pre-process XML input before processing it for various purposes.
6 # Options specify which transformations are to be done. Monospaced literal
7 # layout blocks are never touched.
11 # -abstract: Remove the <abstract> element
13 # -ascii: Replace &8230; (sic, no x) with ...
14 # Replace ’ by '
15 # Replace “ by "
16 # Replace ” by "
17 # Replace – by -
18 # Replace † by *
19 # Replace ‡ by **
20 # Replace   by a space
21 # Replace © by (c)
22 # Put quotes round <literal> text
23 # Put quotes round <quote> text
25 # -bookinfo: Remove the <bookinfo> element from the file
27 # -fi: Replace "fi" by fi except when it is in an XML element, or
30 # -html: Certain things are done only for HTML output:
32 # If <literallayout> is followed by optional # space and then a
33 # newline, the space and newline are removed, because otherwise you
34 # get a blank line in the HTML output.
36 # -noindex Remove the XML to generate a Concept and an Options index.
37 # -oneindex Ditto, but add XML to generate a single index.
41 # The function that processes non-literal monospaced text
47 $s =~ s/fi(?![^<>]*>)/fi/g if $ligatures;
51 $s =~ s/…/.../g;
57 $s =~ s/‡/**/g;
61 $s =~ s/<\/quote>/"/g;
83 if ($arg eq "-fi") { $ligatures = 1; }
84 elsif ($arg eq "-abstract") { $abstract = 1; }
85 elsif ($arg eq "-ascii") { $ascii = 1; }
86 elsif ($arg eq "-bookinfo") { $bookinfo = 1; }
87 elsif ($arg eq "-html") { $html = 1; }
88 elsif ($arg eq "-noindex") { $noindex = 1; }
89 elsif ($arg eq "-oneindex") { $oneindex = 1; }
90 else { die "** Pre-xml: Unknown option \"$arg\"\n"; }
95 # Remove <abstract> if required
97 next if ($abstract && /^\s*<abstract>/);
99 # Remove <bookinfo> if required
101 if ($bookinfo && /^<bookinfo/)
103 while (<STDIN>) { last if /^<\/bookinfo/; }
107 # Copy monospaced literallayout blocks
109 if (/^<literallayout class="monospaced">/)
111 $_ = substr($_, 0, -1) if $html;
116 last if /^<\/literallayout>/;
121 # Adjust index-generation code if required
123 if (($noindex || $oneindex) && /^<index[\s>]/)
127 last if /^<\/index>/;
130 if ($oneindex && !$madeindex)
133 print "<index><title>Index</title></index>\n";
139 # A line that is not in a monospaced literal block; keep track of which
140 # parts are in <literal> and which not. The latter get processed by the
141 # function above. Items in <literal> get quoted unless they are also in
142 # a <literallayout> block, or are already being quoted.
146 $_ = substr($_, 0, -1) if $html && /^<literallayout[^>]*>\s*\n$/;
147 $inliterallayout = 1 if /^<literallayout/;
148 $inliterallayout = 0 if /^<\/literallayout/;
152 if (/^(.*?)<\/literal>(?!<\/quote>)(.*)$/)
155 print "\"" if $ascii && !$inliterallayout;
167 # Not in literal state
171 if (/^(.*?)(?<!<quote>)<literal>(.*)$/)
175 print "\"" if $ascii && !$inliterallayout;
185 } # Loop for different parts of one line
186 } # Loop for multiple lines