#!/usr/bin/perl # Fix up html files after they've been extracted from .lit books or # .doc files. # Strip out all the formatting so that what's left is plain-jane html. $goout = ""; $lastline = ""; while ($line = <>) { chop $line; $line = $lastline . $line; # $a = chr(128); # $b = chr(255); # if ($line =~ /^.*([$a-$b]).*$/) { # print STDERR "here: $1, " . ord($1) . ", $line\n"; # } $line = replace_pattern ($line,"<[pP][^>]*>","
") if ($line =~ /<[pP]/); $line = replace_pattern ($line,"