Am unable to keep the build process from inserting spurious \x{c2}
characters into the created .txt files.
Strip the characters in Tidytxt.
Add SanityTestText to do a final audit for non-ASCII characters in the
.txt files. Dependency: pcregrep if available, else uses Perl.
/bin/rm -rf filter-txt.html
xmlto -x MyStyle-txt-html.xsl html-nochunks filter-txt.xml
w3m -dump filter-txt.html | ./Tidytxt >filter.txt
/bin/rm -rf filter-txt.html
xmlto -x MyStyle-txt-html.xsl html-nochunks filter-txt.xml
w3m -dump filter-txt.html | ./Tidytxt >filter.txt
+ ./SanityTestText filter.txt
# I have not found a way of making docbook2texi write its output anywhere
# other than the file name that it makes up. The --to-stdout option does not
# I have not found a way of making docbook2texi write its output anywhere
# other than the file name that it makes up. The --to-stdout option does not
/bin/rm -rf spec-txt.html
xmlto -x MyStyle-txt-html.xsl html-nochunks spec-txt.xml
w3m -dump spec-txt.html | ./Tidytxt >spec.txt
/bin/rm -rf spec-txt.html
xmlto -x MyStyle-txt-html.xsl html-nochunks spec-txt.xml
w3m -dump spec-txt.html | ./Tidytxt >spec.txt
+ ./SanityTestText spec.txt
# I have not found a way of making docbook2texi write its output anywhere
# other than the file name that it makes up. The --to-stdout option does not
# I have not found a way of making docbook2texi write its output anywhere
# other than the file name that it makes up. The --to-stdout option does not
--- /dev/null
+#!/bin/sh
+
+# Portability note:
+# This tool is only used in building spec.txt for a release, not used as
+# part of the normal build/install process, so only Maintainers are affected
+# by requirements here.
+
+filename="$1"
+
+if echo a | pcregrep -q a 2>/dev/null
+then
+ pcregrep -q '[^\x{20}-\x{7E}]' "$filename"
+ grepstatus=$?
+else
+ perl -ne 'BEGIN {$rv=1};END {exit $rv};
+ if (/[^\r\n\x{20}-\x{7E}]/) { $rv = 0; last }' < "$filename"
+ grepstatus=$?
+fi
+
+case $grepstatus in
+0)
+ echo >&2 "$0: found non-ASCII characters in $filename"
+ exit 1
+ ;;
+1)
+ exit 0
+ ;;
+2)
+ echo >&2 "$0: problem checking for non-ASCII characters in $filename"
+ exit 2
+ ;;
+*)
+ echo >&2 "$0: unhandled return value from pcregrep: $grepstatus"
+ exit 3
+ ;;
+esac
$line =~ s/\x{e2}\x{95}\x{b2}/\\/g;
$line =~ s/\x{e2}\x{95}\x{b3}/X/g;
$line =~ s/\x{e2}\x{95}\x{b2}/\\/g;
$line =~ s/\x{e2}\x{95}\x{b3}/X/g;
+ # w3m rendering issue apparently only seen by pdp
+ # affects section numbers after the ToC, some info on spool-file -lines, etc
+ # always appears to be a spurious extra character, safely just dropped.
+ $line =~ s/\x{c2}//g;
+
if ($line =~ /^\s*$/)
{
$line = "" if $lastwasblank;
if ($line =~ /^\s*$/)
{
$line = "" if $lastwasblank;