git://git.exim.org
/
exim.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Use xmlto; add to 'clean' target.
[exim.git]
/
doc
/
doc-docbook
/
Pre-xml
diff --git
a/doc/doc-docbook/Pre-xml
b/doc/doc-docbook/Pre-xml
index 4e28ada091c9d85bef7b9c546bc2afbff9261cc3..22b343d8db8b234b95ed28bf35b952f478ff6916 100755
(executable)
--- a/
doc/doc-docbook/Pre-xml
+++ b/
doc/doc-docbook/Pre-xml
@@
-1,27
+1,22
@@
#! /usr/bin/perl
#! /usr/bin/perl
-# $Cambridge: exim/doc/doc-docbook/Pre-xml,v 1.2 2005/11/10 12:30:13 ph10 Exp $
-
# Script to pre-process XML input before processing it for various purposes.
# Options specify which transformations are to be done. Monospaced literal
# layout blocks are never touched.
# Changes:
# Script to pre-process XML input before processing it for various purposes.
# Options specify which transformations are to be done. Monospaced literal
# layout blocks are never touched.
# Changes:
-# -abstract: Remove the <abstract> element
-
-# -ascii: Replace &8230; (sic, no x) with ...
-# Replace ’ by '
-# Replace “ by "
-# Replace ” by "
-# Replace – by -
-# Replace † by *
-# Replace ‡ by **
-# Replace   by a space
-# Replace © by (c)
-# Put quotes round <literal> text
+# -ascii: Replace ’ by '
+# Replace © by (c)
+# Replace † by *
+# Replace ‡ by **
+# Replace by a space
+# Replace – by -
# Put quotes round <quote> text
#
# Put quotes round <quote> text
#
+# -quoteliteral:
+# Put quotes round <literal> text
+#
# -bookinfo: Remove the <bookinfo> element from the file
#
# -fi: Replace "fi" by fi except when it is in an XML element, or
# -bookinfo: Remove the <bookinfo> element from the file
#
# -fi: Replace "fi" by fi except when it is in an XML element, or
@@
-29,16
+24,24
@@
#
# -html: Certain things are done only for HTML output:
#
#
# -html: Certain things are done only for HTML output:
#
-# If <literallayout> is followed by optional
#
space and then a
+# If <literallayout> is followed by optional space and then a
# newline, the space and newline are removed, because otherwise you
# get a blank line in the HTML output.
#
# newline, the space and newline are removed, because otherwise you
# get a blank line in the HTML output.
#
-# -noindex Remove the XML t
o generate a Concept and an Options index
.
+# -noindex Remove the XML t
hat generates indexes
.
# -oneindex Ditto, but add XML to generate a single index.
# -oneindex Ditto, but add XML to generate a single index.
+#
+# -optbreak Insert an optional line break (zero-width space, ​) after
+# every underscore in text within <option> and <variable> elements,
+# except when preceded by <entry> (i.e. not in tables). The same is
+# also done within a word of four or more upper-case letters (for
+# compile-time options).
+#
+# -epub Convert date formats to comply with epub specification
+#
-
-# The function that processes non-literal monospaced text
+# The function that processes non-literal, non-monospaced text
sub process()
{
sub process()
{
@@
-46,17
+49,23
@@
my($s) = $_[0];
$s =~ s/fi(?![^<>]*>)/fi/g if $ligatures;
$s =~ s/fi(?![^<>]*>)/fi/g if $ligatures;
+if ($optbreak)
+ {
+ $s =~ s%(?<!<entry>)(<option>|<varname>)([^<]+)%
+ my($x,$y) = ($1,$2); $y =~ s/_/_​/g; "$x"."$y"%gex;
+
+ $s =~ s?\b([A-Z_]{4,})\b?
+ my($x) = $1; $x =~ s/_/_​/g; "$x"?gex;
+ }
+
if ($ascii)
{
if ($ascii)
{
- $s =~ s/…/.../g;
$s =~ s/’/'/g;
$s =~ s/’/'/g;
- $s =~ s/“/"/g;
- $s =~ s/”/"/g;
- $s =~ s/–/-/g;
- $s =~ s/†/*/g;
- $s =~ s/‡/**/g;
- $s =~ s/ / /g;
- $s =~ s/©/(c)/g;
+ $s =~ s/©/(c)/g;
+ $s =~ s/†/*/g;
+ $s =~ s/‡/**/g;
+ $s =~ s/&nsbp;/ /g;
+ $s =~ s/–/-/g;
$s =~ s/<quote>/"/g;
$s =~ s/<\/quote>/"/g;
}
$s =~ s/<quote>/"/g;
$s =~ s/<\/quote>/"/g;
}
@@
-64,12
+73,18
@@
if ($ascii)
$s;
}
$s;
}
+# Mapping needed for epub
+
+my %months = (
+ jan => '01', feb => '02', mar => '03', apr => '04', may => '05', jun => '06',
+ jul => '07', aug => '08', sep => '09', oct => '10', nov => '11', dec => '12',
+);
# The main program
# The main program
-$abstract = 0;
$ascii = 0;
$bookinfo = 0;
$ascii = 0;
$bookinfo = 0;
+$epub = 0;
$html = 0;
$inliteral = 0;
$inliterallayout = 0;
$html = 0;
$inliteral = 0;
$inliterallayout = 0;
@@
-77,25
+92,25
@@
$ligatures = 0;
$madeindex = 0;
$noindex = 0;
$oneindex = 0;
$madeindex = 0;
$noindex = 0;
$oneindex = 0;
+$optbreak = 0;
+$quoteliteral = 0;
foreach $arg (@ARGV)
{
if ($arg eq "-fi") { $ligatures = 1; }
foreach $arg (@ARGV)
{
if ($arg eq "-fi") { $ligatures = 1; }
- elsif ($arg eq "-abstract") { $abstract = 1; }
elsif ($arg eq "-ascii") { $ascii = 1; }
elsif ($arg eq "-bookinfo") { $bookinfo = 1; }
elsif ($arg eq "-ascii") { $ascii = 1; }
elsif ($arg eq "-bookinfo") { $bookinfo = 1; }
+ elsif ($arg eq "-epub") { $epub = 1; }
elsif ($arg eq "-html") { $html = 1; }
elsif ($arg eq "-noindex") { $noindex = 1; }
elsif ($arg eq "-oneindex") { $oneindex = 1; }
elsif ($arg eq "-html") { $html = 1; }
elsif ($arg eq "-noindex") { $noindex = 1; }
elsif ($arg eq "-oneindex") { $oneindex = 1; }
+ elsif ($arg eq "-optbreak") { $optbreak = 1; }
+ elsif ($arg eq "-quoteliteral") { $quoteliteral = 1; }
else { die "** Pre-xml: Unknown option \"$arg\"\n"; }
}
while (<STDIN>)
{
else { die "** Pre-xml: Unknown option \"$arg\"\n"; }
}
while (<STDIN>)
{
- # Remove <abstract> if required
-
- next if ($abstract && /^\s*<abstract>/);
-
# Remove <bookinfo> if required
if ($bookinfo && /^<bookinfo/)
# Remove <bookinfo> if required
if ($bookinfo && /^<bookinfo/)
@@
-136,6
+151,25
@@
while (<STDIN>)
next;
}
next;
}
+ # Adjust dates to YYYY-MM-DD
+
+ if ($epub && /^\s*<date[\s>]/)
+ {
+ print;
+ while (<STDIN>)
+ {
+ last if /^\s*<\/date>/;
+
+ if (/^ \s* (\d{1,2}) \s+ ([a-zA-Z]{3}) \s+ (\d{4})/x)
+ {
+ die "Unknown month '$2'\n" unless exists $months{lc $2};
+ my $month = $months{lc $2};
+ s//$3-$month-$1/;
+ }
+ print;
+ }
+ }
+
# A line that is not in a monospaced literal block; keep track of which
# parts are in <literal> and which not. The latter get processed by the
# function above. Items in <literal> get quoted unless they are also in
# A line that is not in a monospaced literal block; keep track of which
# parts are in <literal> and which not. The latter get processed by the
# function above. Items in <literal> get quoted unless they are also in
@@
-152,7
+186,7
@@
while (<STDIN>)
if (/^(.*?)<\/literal>(?!<\/quote>)(.*)$/)
{
print $1;
if (/^(.*?)<\/literal>(?!<\/quote>)(.*)$/)
{
print $1;
- print "\"" if $
ascii
&& !$inliterallayout;
+ print "\"" if $
quoteliteral
&& !$inliterallayout;
print "</literal>";
$inliteral = 0;
$_ = "$2\n";
print "</literal>";
$inliteral = 0;
$_ = "$2\n";
@@
-172,7
+206,7
@@
while (<STDIN>)
{
print &process($1);
print "<literal>";
{
print &process($1);
print "<literal>";
- print "\"" if $
ascii
&& !$inliterallayout;
+ print "\"" if $
quoteliteral
&& !$inliterallayout;
$inliteral = 1;
$_ = "$2\n";
}
$inliteral = 1;
$_ = "$2\n";
}