Sunday, January 03, 2010

Make your mbox attachments searchable in Thunderbird.

#!/bin/perl
# filename : mboxtagattachments
# usaage : cat Inbox | mboxtagattachments > newInbox
# searcch tag "X-META-01" in thunderbird

$OUTPUTENCODING = "UTF-8";

use MIME::Base64 qw( encode_base64 decode_base64 );
use Text::Iconv;
use Data::Dumper;

$STATE = "INIT";
$LASTLINE = 1;
while (($_=) || $LASTLINE) {
if (!$_) {
$LASTLINE = 0;
$_ = "From -";
}
s/[\n\r]*$//g;
$_ .= "\r\n";
if (/^From -/) {
if ($RAW_PROLOG) {
## -------------------------------------------
## HANDLE MAIL THAT ENDED
## -------------------------------------------

%subvalues = getSubValues($MAIL_HEADER{"Content-Type"});
$CHARSET = $subvalues{"charset"};

# print STDERR "$CHARSET\n";
# if ($CHARSET) {
# $MAIL_BODY = Text::Iconv->new($CHARSET, $OUTPUTENCODING)->convert($MAIL_BODY);
#}
#print STDERR Dumper(\%MAIL_HEADER);

print $RAW_PROLOG;
for (keys %MAIL_ATTACHMENTS) {
$fn = $MAIL_ATTACHMENTS{$_};
if ($fn =~ /[^[:alnum:][:punct:][:space:]]/) {
if ($CHARSET) {
$fn = Text::Iconv->new($OUTPUTENCODING, $CHARSET)->convert($fn);
}
}
print "X-META-01: $fn\r\n";
}
print $RAW_HEADER;
print "\r\n";
print $RAW_BODY;
}
$RAW_PROLOG = $_;
$RAW_HEADER = "";
$RAW_BODY = "";
%MAIL_HEADER = ();
$MAIL_BODY = "";
%MAIL_ATTACHMENTS = ();
$STATE = "HEADER";
$KEY = "";
next;
}
if ($STATE eq "HEADER") {
if (/^\s*$/ && ($MAIL_HEADER{"From"} ne "")) {
$STATE = "BODY";
if ($MAIL_HEADER{"Content-Type"} =~ /multipart/) {

%subvalues = getSubValues($MAIL_HEADER{"Content-Type"});
$partbody_boundary = $subvalues{"boundary"};

%partbody_headers = ();
$partbody_count = 0;
$partbody_filename = "";
$KEY = "";
$STATE = "PART-HEADER";
}
} else {
if (/^X-META-01:/) {
next;
} else {
$RAW_HEADER .= $_;
}
}
if (/^\s/) {
chomp;
$KEY = $PREVKEY;
$VALUE = $_;
} else {
($KEY, $VALUE) = /^(\S[^:]*):(.*)/;
$PREVKEY = $KEY;
}
if ($KEY eq "Subject") {
$VALUE =~ s/^\s*//;
$VALUE = decode($VALUE);
}
if ($KEY) {
$MAIL_HEADER{$KEY} .= $VALUE;
}
next;
}
if ($STATE eq "PART-BODY") {
$RAW_BODY .= $_;
if (/$partbody_boundary/) {
{
## -----------------------------------
## HANDLE COMPLETED PART
## -----------------------------------

%subvalues = getSubValues($partbody_headers{"Content-Type"});
$partbody_filename = decode($subvalues{"name"});

if ($partbody_filename eq "") {
%subvalues = getSubValues($partbody_headers{"Content-Disposition"});
$partbody_filename = $subvalues{"filename"};
}

# if ($partbody_filename eq "") {
# for (my $c=0 ; $subvalues{"filename*$c*"}; $c++) {
# $partbody_filename .= $subvalues{"filename*$c*"};
# }
# $partbody_filename = decode($partbody_filename, "url");
# }

if ($partbody_filename) {
$MAIL_ATTACHMENTS{$partbody_count} = $partbody_filename;
}

# print "PART $partbody_count\n";
# print Dumper(\%partbody_headers);
# print "FILENAME $partbody_filename\n";
# print $partbody;
}
$STATE = "PART-HEADER";
$partbody = "";
$partbody_count++;
$partbody_filename = "";
} else {
$partbody .= $_;
}
next;
}
if ($STATE eq "PART-HEADER") {
$RAW_BODY .= $_;
if (/^\s*$/) {
$STATE = "PART-BODY";
}
if (/^\s/) {
chomp;
if ($_) {
$partbody_headers{$PREVKEY} .= "\n".$_;
}
next;
} else {
($KEY, $VALUE) = /^(\S[^:]*):(.*)/;
if ($KEY && $VALUE) {
$partbody_headers{$KEY} = $VALUE;
}
$PREVKEY = $KEY;
}
next;
}
if ($STATE eq "BODY") {
$RAW_BODY .= $_;
$MAIL_BODY .= $_;
next;
}
}

sub getSubValues {
my $value = shift @_;
my %subvalues = ();
my $initial;
my $oldvalue = "";

($initial, $value) = $value =~ m/^([^;\n\r]*)[;[:space:]\n\r]*(.*)/sg;
$subvalues{""} = $initial;
while ($value) {
($key, $value) = $value =~ /([^=]*)=(.*)/s;
if ($value =~ /^"/) {
($keyvalue, $value) = $value =~ /"([^"]*)"[[:space:]\n\r]*(.*)/s;
} else {
($keyvalue, $value) = $value =~ /\s*([^;[:space:]]*)[;[:space:]\n\r]*(.*)/s;
}
$subvalues{$key} = $keyvalue;
if ($oldvalue eq $value) {
break;
}
$oldvalue = $value;
}
return %subvalues;
}

sub encode {
my $fn = shift @_;
my $encoding = shift @_;
my $charset = shift @_;
if ($charset eq "") {
$charset = $OUTPUTENCODING;
}
if ($encoding eq "url") {
$fn = "$charset''".URLEncode(Text::Iconv->new($OUTPUTENCODING, $charset)->convert($fn));
} else {
$fn = encode_base64(Text::Iconv->new($OUTPUTENCODING, $charset)->convert($fn));
chomp($fn);
$fn = "=?$charset?$fn?=";
}
return $fn;
}
sub decode {
my $fn = shift @_;
my $encoding = shift;
if ($encoding eq "url") {
$fn =~ s/([^']*?)''([^;]*?);/Text::Iconv->new($1, $OUTPUTENCODING)->convert(URLDecode($2))/eg;
} else {
$fn =~ s/=\?([^\?]*?)\?([^\?]*?)\?([^\?]*?)\?=/Text::Iconv->new($1, $OUTPUTENCODING)->convert(decode_base64($3))/eg;
}
return $fn;
}
sub URLDecode {
my $theURL = $_[0];
$theURL =~ tr/+/ /;
$theURL =~ s/%([a-fA-F0-9]{2,2})/chr(hex($1))/eg;
$theURL =~ s///g;
return $theURL;
}
sub URLEncode {
my $theURL = $_[0];
$theURL =~ s/([\W])/%".uc(sprintf("%2.2x",ord($1)))/eg;
return $theURL;
}
exit;