#!/usr/bin/perl
use strict;
use warnings;
## See documentation below. Script will require customization
use File::Find;
use Date::Parse;
use HTML::TreeBuilder;
use Data::Dumper;
my $dumb_spamblock = '(at)not-real.'; # String that is removed from email address
my $dir = shift || die "must specfy directory to search";
if ( $dir eq 'debug' ) {
debug(@ARGV);
}
$dir .= '/' unless $dir =~ /\/$/; # make path relative below
# Do all the work
find( { wanted => \&wanted }, $dir );
sub wanted {
return if -d; # don't need to process directories
return unless /^\d+\.html$/;
#output_file( $File::Find::name, parse_file($_) );
output_file( $File::Find::name, fast_parse($_) );
}
sub output_file {
my ( $file, $data ) = @_;
#$file =~ s/$dir//; # make path relative to top level
local $SIG{__WARN__} = sub { "$file: @_" };
# Get last_mod date
my $date = str2time( $data->{comments}{received} );
unless ( $data ) {
warn "Failed to parse received date in $file\n";
$date = str2time( $data->{comments}{send} );
unless ( $date ) {
warn "Failed to parse any dates: skipping $file\n";
return;
}
}
$data->{received} = $date;
my $comments = $data->{comments};
$comments->{email} =~ s/\Q$dumb_spamblock/-blabla-/;
my $metas = join "\n", map { qq[] }
sort keys %{$data->{comments}};
my $title = $comments->{subject} || '';
my $html = <
$title
$metas
$data->{body}