Path: blob/master/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php
12256 views
<?php12final class PhabricatorMetaMTAEmailBodyParser extends Phobject {34/**5* Mails can have bodies such as6*7* !claim8*9* taking this task10*11* Or12*13* !assign alincoln14*15* please, take this task I took; its hard16*17* This function parses such an email body and returns a dictionary18* containing a clean body text (e.g. "taking this task"), and a list of19* commands. For example, this body above might parse as:20*21* array(22* 'body' => 'please, take this task I took; it's hard',23* 'commands' => array(24* array('assign', 'alincoln'),25* ),26* )27*28* @param string Raw mail text body.29* @return dict Parsed body.30*/31public function parseBody($body) {32$body = $this->stripTextBody($body);3334$commands = array();3536$lines = phutil_split_lines($body, $retain_endings = true);3738// We'll match commands at the beginning and end of the mail, but not39// in the middle of the mail body.40list($top_commands, $lines) = $this->stripCommands($lines);41list($end_commands, $lines) = $this->stripCommands(array_reverse($lines));42$lines = array_reverse($lines);43$commands = array_merge($top_commands, array_reverse($end_commands));4445$lines = rtrim(implode('', $lines));4647return array(48'body' => $lines,49'commands' => $commands,50);51}5253private function stripCommands(array $lines) {54$saw_command = false;55$commands = array();56foreach ($lines as $key => $line) {57if (!strlen(trim($line)) && $saw_command) {58unset($lines[$key]);59continue;60}6162$matches = null;63if (!preg_match('/^\s*!(\w+.*$)/', $line, $matches)) {64break;65}6667$arg_str = $matches[1];68$argv = preg_split('/[,\s]+/', trim($arg_str));69$commands[] = $argv;70unset($lines[$key]);7172$saw_command = true;73}7475return array($commands, $lines);76}7778public function stripTextBody($body) {79return trim($this->stripSignature($this->stripQuotedText($body)));80}8182private function stripQuotedText($body) {83$body = phutil_string_cast($body);8485// Look for "On <date>, <user> wrote:". This may be split across multiple86// lines. We need to be careful not to remove all of a message like this:87//88// On which day do you want to meet?89//90// On <date>, <user> wrote:91// > Let's set up a meeting.9293$start = null;94$lines = phutil_split_lines($body);95foreach ($lines as $key => $line) {96if (preg_match('/^\s*>?\s*On\b/', $line)) {97$start = $key;98}99if ($start !== null) {100if (preg_match('/\bwrote:/', $line)) {101$lines = array_slice($lines, 0, $start);102$body = implode('', $lines);103break;104}105}106}107108// Outlook english109$body = preg_replace(110'/^\s*(> )?-----Original Message-----.*?/imsU',111'',112$body);113114// Outlook danish115$body = preg_replace(116'/^\s*(> )?-----Oprindelig Meddelelse-----.*?/imsU',117'',118$body);119120// See example in T3217.121$body = preg_replace(122'/^________________________________________\s+From:.*?/imsU',123'',124$body);125126// French GMail quoted text. See T8199.127$body = preg_replace(128'/^\s*\d{4}-\d{2}-\d{2} \d+:\d+ GMT.*:.*?/imsU',129'',130$body);131132return rtrim($body);133}134135private function stripSignature($body) {136// Quasi-"standard" delimiter, for lols see:137// https://bugzilla.mozilla.org/show_bug.cgi?id=58406138$body = preg_replace(139'/^-- +$.*/sm',140'',141$body);142143// Mailbox seems to make an attempt to comply with the "standard" but144// omits the leading newline and uses an em dash. This may or may not have145// the trailing space, but it's unique enough that there's no real ambiguity146// in detecting it.147$body = preg_replace(148"/\s*\xE2\x80\x94\s*\nSent from Mailbox\s*\z/su",149'',150$body);151152// HTC Mail application (mobile)153$body = preg_replace(154'/^\s*^Sent from my HTC smartphone.*/sm',155'',156$body);157158// Apple iPhone159$body = preg_replace(160'/^\s*^Sent from my iPhone\s*$.*/sm',161'',162$body);163164return rtrim($body);165}166167}168169170