#!/usr/local/bin/perl

############
# killtnef #
############

use strict;
use Convert::TNEF;
use MIME::Parser;

my $VERSION = 1.0;

my $message;
while (defined(my $line = <STDIN>)) {
  my $msg_bound;
  if ($line =~ /^(From\s+.+)\r?\n/ and length($message) or eof) {
    parse_message(\$message, $msg_bound); # All the action happens here...
    $message = "";
    $msg_bound=$line;
  } elsif ($line =~ /^(From\s+.+)\r?\n/) {
    $msg_bound=$line;  # The first 
  } 
  $message.=$line;
}
exit;


# Subroutines ###############################################################

sub parse_message {
  my $msg_body = shift @_;
  my $msg_bound = shift @_;
  my $mime_parser = new MIME::Parser;

  # This module likes to use tmp files, but I try to stop it here.
  $mime_parser->use_inner_files(1);
  $mime_parser->output_to_core(1);
  my $ent = $mime_parser->parse_data($$msg_body);
  my $num_parts=$ent->parts;

  # Determine if we have a MIME w/ms-tnef and act accordingly.
  if ( ($num_parts < 1) || ($$msg_body !~ /ms-tnef/i) ) {
    print "$$msg_body";
  } else {
    # Get the head info
    my $head = $ent->head;
    my $ReturnPath = $head->get('Return-Path');
    my @all_received = $head->get('Received');
    my $Date = $head->get('Date');
    my $From = $head->get('From');
    my $XSender = $head->get('X-Sender');
    my $To = $head->get('To');
    my $Subject = $head->get('Subject');
    my $MessageID = $head->get('Message-ID');

    # Build a new MIME message based on the one we are examining
    # - LHH: it would probably be better to build this $new_ent
    #        using $ent->head as the basis, thus getting *all* of
    #        the headers, instead of just these few.  We only needed
    #        these few headers for the project this script was
    #        originally written for, but if someone wants to change
    #        this and submit a patch, that would be great.
    my $new_ent = MIME::Entity->build(
				'Type'        => "multipart/mixed",
				'Return-Path' => $ReturnPath,
				'Date'        => $Date,
				'From'        => $From,
				'X-Sender'    => $XSender,
				'To'          => $To,
				'Subject'     => $Subject,
				'Message-ID'  => $MessageID
                                  );
    my $new_head=$new_ent->head;
    foreach my $received (@all_received) {
      $new_head->add('Received', $received);
    }

    # Loop over each MIME part adding each to the new message
    foreach my $mime_part_i (0 .. ($num_parts - 1)) {
      my $ent_part=$ent->parts($mime_part_i);
      if ($ent_part->mime_type =~ /ms-tnef/i )  {
        add_tnef_parts($ent_part, $new_ent);
      } else {
        $new_ent->add_part($ent_part);
      } 
    }

    # Set the preamble and epilogue equal to the original
    $new_ent->preamble($ent->preamble);
    $new_ent->epilogue($ent->epilogue);

    # Print the newly constructed MIME message
    print "$msg_bound"; 
    print STDOUT $new_ent->stringify;
  }
}

sub add_tnef_parts {
  my $ent = shift;
  my $new = shift;

  # LHH: This should probably be loaded from /etc/mime.types, or something.
  #      A command line argument to this script pointing it at a mime.types
  #      file is probably appropriate.
  my %content_type = (
                       txt  => "text/plain",
                       html => "text/html",
                       doc  => "application/msword",
                       xls  => "application/vnd.ms-excel",
                       ppt  => "application/vnd.ms-powerpoint",
                       pdf  => "application/pdf",
                       rtf  => "application/rtf",
                       zip  => "application/zip",
                       ps   => "application/ps",
                       wpd  => "application/wordperfect5.1",
                       wks  => "application/vnd.ms-works",
                       jpg  => "image/jpeg",
                       gif  => "image/gif",
                       tif  => "image/tif",
                       png  => "image/png"
                     );

  ## Create a tnef object
  my %TnefOpts=('output_to_core' => '4194304', 'output_dir' => '/tmp');
  my $tnef = Convert::TNEF->read_ent($ent, \%TnefOpts);
  my $head=$new->head;	# Get the header object from the new message
  if (! $tnef) {
    warn "TNEF CONVERT DID NOT WORK: " . $Convert::TNEF::errstr . "\n";
    warn "  - Failed on msg w/subj: " . $head->get('Subject') . "\n";
    return '';
  }

  #############################################################################
  # This section of code smokes lots of crack, and tries to dig the From:
  # header out of the $tnef->message if the new message we are appending
  # this attachment to does not already have a "From" header.  This is
  # required on most of the Outlook emails that never touch SMTP, only
  # Exchange servers, and never had valid SMTP From headers placed!
  #############################################################################
  my $msg=$tnef->message;
  my $mapi_props=$msg->data('MAPIProps');
  #warn join(", ", keys %{$msg->{MAPIProps}}) . "\n";
  #warn $msg->{MAPIProps}->{MBS_Data} . "\n\n----------------------------\n\n";
  #warn "$mapi_props\n\n---------------------------------\n\n";
  my $test=0x0024;
  #if ($mapi_props =~ m/(\0\0\0\xf8.{20})/) { warn "MATCHED a prop $1\n"; }
  #if (0) {
  if (! length($head->get('From')) ) {
    my $from='';
    my $cntrl_chars='[\c' . join('\c', ('A' .. 'Z')) . ']';
    if ($mapi_props =~ m/From:\s+([^\s\@]+\@[^\s]+)/) {
      $from=$1;
    } elsif ($mapi_props =~ m/\xf8\?\cA\0\0\0$cntrl_chars\0\0\0([^\0]+)\0+\cB\cA/) {
      $from=$1;
    } else {
      if ($mapi_props =~ m/(\xf8\?\cA.{30})/) { warn "MATCH: $1\n"; }
      #$from="Unknown Sender";
    }
    if( length($from)) { $head->replace('from', $from); }
  }
  #############################################################################

  for ($tnef->attachments) {
    $_->longname=~/^[\w\W]+\.([A-Za-z]{2,4})$/;
    my $ext = $1;
    if (!$content_type{lc($ext)}) {
      warn "No MIME type for (" . $_->longname . "/" . $_->name . ")\n";
    } 
    $new->attach( 
                   Type => $content_type{lc($ext)},
                   Encoding => 'base64',
                   Data => $_->data, 
                   Disposition => 'attachment',
                   Filename => $_->longname
                 );
  }
  # If you want to delete the working files
  $tnef->purge;
}


# POD documentation

=head1 SYNOPSIS


cat mbox_msg_w_tnef | killtnef > mbox_msg_mime_compliant

=head1 README

killtnef - Converts emails with MS-TNEF, Microsoft's proprietary Transport Neutral Encapsulated Format, attachments into standard MIME-compliant emails.

This script reads an mbox, or a single email message, from STDIN,
extracts data from any ms-tnef attachments that may exist, and writes
a new mbox (or a single email message) to STDOUT which has each of the
files that were encoded in any ms-tnef attachments attached separately,
as RFC-822/RFC-1521 compliant MIME emails/attachments.

Any email(s) containing no ms-tnef MIME attachments are passed through
this script untouched.

=head1 DESCRIPTION

This script was originally written to convert about 35,000 emails from
some Microsoft Outlook *.pst (post office) files, almost all of which
had ms-tnef encoded attachments, into MIME-compliant emails so that
they could be imported into an email-archiving system that 10East
supplies to some of its customers.  If anyone is curious, an imapd
was used to move the emails from the *.pst files to mbox format using
Outlook 2000 as an IMAP client.

This script can also be used as an incoming mail filter which will
automatically convert ms-tnef attachments into MIME-compliant
attachments.

=head1 AUTHORSHIP

Andrew Orr <aorr@10east.com>

Lester Hightower <hightowe@10east.com>

=head1 LICENSE

This software is licensed under the terms of the GNU Public License,
which is available for review at http://www.gnu.org/copyleft/gpl.html

=head1 CHANGE LOG

Feb-22-2002: Originally created by Andy Orr

Feb-26-2002: A few enhancements and bug-fixes by Lester Hightower

Mar-06-2002: Documentation and a few comments added by Lester Hightower
in preparation for submitting this script to CPAN.

Mar-07-2002: After realizing that a POD README section is needed for the
HTML pages generated for the script index in CPAN, LHH added one and
submitted this as killtnef-1.0.1.pl.

=head1 PREREQUISITES

This script requires the C<strict> module.
It also requires C<Convert::TNEF 0.16> and C<MIME::Parser 5.406>

=head1 COREQUISITES

None.

=pod OSNAMES

Any Unix-like.

=pod SCRIPT CATEGORIES

Mail
Mail/Converters
Mail/Filters

=cut

