From c6d3a19107d0dc596f04ba74ff18a6988b000441 Mon Sep 17 00:00:00 2001 From: Nigel Metheringham Date: Fri, 24 Sep 2010 16:49:15 +0100 Subject: [PATCH] Initial script to convert docs to text --- script/eximhtml2txt.pl | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100755 script/eximhtml2txt.pl diff --git a/script/eximhtml2txt.pl b/script/eximhtml2txt.pl new file mode 100755 index 0000000..02a5b49 --- /dev/null +++ b/script/eximhtml2txt.pl @@ -0,0 +1,29 @@ +#!/usr/bin/perl +# +use strict; +use warnings; + +use HTML::FormatText; +use HTML::TreeBuilder; + +sub process_chapter { + my $fn = shift; + + my $tree = HTML::TreeBuilder->new->parse_file($fn); + my ($chapter) = $tree->look_down( "_tag", "div", "class", "chapter", ); + return '' unless ($chapter); + my $formatter = HTML::FormatText->new( leftmargin => 0, rightmargin => 72 ); + + my $text = $formatter->format($chapter); + $tree->delete; + return $text; +} + +my $dir = shift; +foreach my $fn ( glob("$dir/ch*.html") ) { + print "=" x 72, "\n"; + print $fn, "\n"; + print "=" x 72, "\n"; + print process_chapter($fn); + print "-" x 72, "\n"; +} -- 2.30.2