Code:Extract Mail Body
From Exterior Memory
(Redirected from Extract Mail Body Script)
This script will go through all files in a folder, assumming each file is a (RFC 2822) e-mail message. If it is so, it will look if the mail has another e-mail as attachment. If so, it extracts that e-mail and stores it in another folder, and deletes the original file.
This script is usefull in conjuction with a cron job that learns spam emails. SpamAssassin can be configured to mark spam mails by giving a report email with the offending (alleged spam) mail attached to it. This script will extract the original spam mail, so it can be processed by sa-learn.
For more information, see SpamAssassin_Rules.
#!/usr/bin/php4 -qC <?php $srcdir = "/home/freek/Maildir/.Spam.confirmed/cur"; $dstdir = "/home/freek/Maildir/.Spam.negatives/cur"; # Check if '/home/freek/pear' is in include_path $extrapath = '/home/freek/pear'; $includepath = ini_get('include_path'); $includepath = explode(':', $includepath); if (!in_array($extrapath, $includepath)) : # It's not. Add after '.' element, or at the start, if there is no '.' element. $pos = array_search('.', $includepath); if (is_int($pos)) { $pos++; } else { $pos = 0; } array_splice($includepath, $pos, 0, $extrapath); $includepath = implode(':', $includepath); ini_set('include_path', $includepath); endif; include_once 'Compat/Function/is_a.php'; include_once 'Console/Getopt.php'; include_once 'Mail/mimeDecode.php'; if (!empty($_ENV["PWD"])) chdir($_ENV["PWD"]); list($options, $arguments) = Console_Getopt::getopt($argv, "sc:", array("silent","s","c=")); // correct for older (newer?) version of Getopt, which also included scriptname if ($argv[0] == $arguments[0]) array_shift($arguments); $options = flattenOptions($options); if (count($arguments) == 1) : $dstdir = $arguments[0]; elseif (count($arguments) == 2) : $srcdir = $arguments[0]; $dstdir = $arguments[1]; elseif (count($arguments) != 0) : usage(); endif; $simulation = (isset($options['s']) or isset($options['simulation'])); if ($simulation) echo "Simulation mode\n"; $maxcount = (isset($options['c']) ? $options['c'] : 0); function flattenOptions($array) { $flatArray = array(); foreach( $array as $subElement ) { if( is_array($subElement) ) $flatArray[$subElement[0]] = (empty($subElement[1]) ? true : $subElement[1]); } return $flatArray; } function usage() { echo "extract-spame.php [-s|--simulation]\n"; exit; } // echo "Start\n"; if (!is_dir($srcdir)) die("$srcdir is not a directory"); if (!is_dir($dstdir)) die("$dstdir is not a directory"); if ($handle = opendir($srcdir)) { // echo "Directory handle: $handle\n"; // echo "Files:\n"; /* This is the correct way to loop over the directory. */ $count = 0; while (false !== ($file = readdir($handle))) { process_file($file); if (($count > $maxcount) and ($maxcount > 0)) break; $count++; } closedir($handle); } function process_file($file) { echo $file, " ... "; global $srcdir, $dstdir; global $simulation; if (!is_file($srcdir.'/'.$file)) { echo "skipped (not a regular file)\n"; return false; } if (file_exists($dstdir.'/'.$file)) { if (unlink ($srcdir.'/'.$file)) { echo "deleted (destination file already exists)\n"; return false; } echo "skipped (destination file already exists)\n"; return false; } if (!is_readable($srcdir.'/'.$file)) { echo "skipped (source not readable)\n"; return false; } if (!is_writable($dstdir.'/'.$file)) { // disabled because it gives false errors // echo "skipped (destination not writable)\n"; // return false; } $fullmessage = implode(, file($srcdir.'/'.$file)); $spammessage = get_spam_body($fullmessage); if ($spammessage === false) { echo "failed\n"; return false; } if (!is_string($spammessage) or empty($spammessage)) { echo "(unexpected spammessage) failed\n"; return false; } if ($simulation) { echo "(simulation only) skipped\n"; return false; } # Write new spammessage to file if (!$handle = fopen($dstdir.'/'.$file, 'w')) { echo "(Cannot open file $dstdir.'/'.$file for writing) failed\n"; return false; } if (fwrite($handle, $spammessage) === false) { echo "(Cannot write to file $dstdir.'/'.$file) failed\n"; fclose($handle); return false; } if (!fclose($handle)) { echo "(Could not close file $dstdir.'/'.$file) failed?\n"; fclose($handle); return false; } // done. delete original file. if (!unlink ($srcdir.'/'.$file)) { echo "(could not delete original file) done\n"; return true; } echo "(original file deleted) done\n"; return true; } function get_spam_body($fullmessage) { $mime = new Mail_mimeDecode($fullmessage); $message = $mime->decode(array('include_bodies' => true, 'decode_bodies' => false, 'decode_headers' => false)); // First attempt: check for attachment with spam mail if (isset($message->parts) and is_array($message->parts)) { foreach($message->parts as $part) { if ($part->headers['content-type'] == "message/rfc822; x-spam-type=original") { if (empty($part->body)) { echo "(body of original mail empty; perhaps unmodified mimeDecode class?) \n"; return false; } return $part->body; break; } } } // Second attempt: check for "***SPAM*** " in subject if (!$found and isset($message->headers["subject"])) { $subject = $message->headers["subject"]; if (strpos($subject, "***SPAM*** ") === 0) { // too complex to simplify header. $found = true; return $fullmessage; } } // nothing found return false; } // echo "Stop\n"; ?>