Initial script to convert docs to text redesign2010
authorNigel Metheringham <nigel.metheringham@dev.intechnology.co.uk>
Fri, 24 Sep 2010 15:49:15 +0000 (16:49 +0100)
committerNigel Metheringham <nigel.metheringham@dev.intechnology.co.uk>
Fri, 24 Sep 2010 15:49:15 +0000 (16:49 +0100)
script/eximhtml2txt.pl [new file with mode: 0755]

diff --git a/script/eximhtml2txt.pl b/script/eximhtml2txt.pl
new file mode 100755 (executable)
index 0000000..02a5b49
--- /dev/null
@@ -0,0 +1,29 @@
+#!/usr/bin/perl
+#
+use strict;
+use warnings;
+
+use HTML::FormatText;
+use HTML::TreeBuilder;
+
+sub process_chapter {
+    my $fn = shift;
+
+    my $tree = HTML::TreeBuilder->new->parse_file($fn);
+    my ($chapter) = $tree->look_down( "_tag", "div", "class", "chapter", );
+    return '' unless ($chapter);
+    my $formatter = HTML::FormatText->new( leftmargin => 0, rightmargin => 72 );
+
+    my $text = $formatter->format($chapter);
+    $tree->delete;
+    return $text;
+}
+
+my $dir = shift;
+foreach my $fn ( glob("$dir/ch*.html") ) {
+    print "=" x 72, "\n";
+    print $fn, "\n";
+    print "=" x 72, "\n";
+    print process_chapter($fn);
+    print "-" x 72, "\n";
+}