From: Nigel Metheringham Date: Fri, 24 Sep 2010 15:49:15 +0000 (+0100) Subject: Initial script to convert docs to text X-Git-Tag: exim-4_89_1~88 X-Git-Url: https://git.exim.org/exim-website.git/commitdiff_plain/c6d3a19107d0dc596f04ba74ff18a6988b000441 Initial script to convert docs to text --- diff --git a/script/eximhtml2txt.pl b/script/eximhtml2txt.pl new file mode 100755 index 0000000..02a5b49 --- /dev/null +++ b/script/eximhtml2txt.pl @@ -0,0 +1,29 @@ +#!/usr/bin/perl +# +use strict; +use warnings; + +use HTML::FormatText; +use HTML::TreeBuilder; + +sub process_chapter { + my $fn = shift; + + my $tree = HTML::TreeBuilder->new->parse_file($fn); + my ($chapter) = $tree->look_down( "_tag", "div", "class", "chapter", ); + return '' unless ($chapter); + my $formatter = HTML::FormatText->new( leftmargin => 0, rightmargin => 72 ); + + my $text = $formatter->format($chapter); + $tree->delete; + return $text; +} + +my $dir = shift; +foreach my $fn ( glob("$dir/ch*.html") ) { + print "=" x 72, "\n"; + print $fn, "\n"; + print "=" x 72, "\n"; + print process_chapter($fn); + print "-" x 72, "\n"; +}