Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/phabricator
Path: blob/master/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php
12256 views
1
<?php
2
3
final class PhabricatorMetaMTAEmailBodyParser extends Phobject {
4
5
/**
6
* Mails can have bodies such as
7
*
8
* !claim
9
*
10
* taking this task
11
*
12
* Or
13
*
14
* !assign alincoln
15
*
16
* please, take this task I took; its hard
17
*
18
* This function parses such an email body and returns a dictionary
19
* containing a clean body text (e.g. "taking this task"), and a list of
20
* commands. For example, this body above might parse as:
21
*
22
* array(
23
* 'body' => 'please, take this task I took; it's hard',
24
* 'commands' => array(
25
* array('assign', 'alincoln'),
26
* ),
27
* )
28
*
29
* @param string Raw mail text body.
30
* @return dict Parsed body.
31
*/
32
public function parseBody($body) {
33
$body = $this->stripTextBody($body);
34
35
$commands = array();
36
37
$lines = phutil_split_lines($body, $retain_endings = true);
38
39
// We'll match commands at the beginning and end of the mail, but not
40
// in the middle of the mail body.
41
list($top_commands, $lines) = $this->stripCommands($lines);
42
list($end_commands, $lines) = $this->stripCommands(array_reverse($lines));
43
$lines = array_reverse($lines);
44
$commands = array_merge($top_commands, array_reverse($end_commands));
45
46
$lines = rtrim(implode('', $lines));
47
48
return array(
49
'body' => $lines,
50
'commands' => $commands,
51
);
52
}
53
54
private function stripCommands(array $lines) {
55
$saw_command = false;
56
$commands = array();
57
foreach ($lines as $key => $line) {
58
if (!strlen(trim($line)) && $saw_command) {
59
unset($lines[$key]);
60
continue;
61
}
62
63
$matches = null;
64
if (!preg_match('/^\s*!(\w+.*$)/', $line, $matches)) {
65
break;
66
}
67
68
$arg_str = $matches[1];
69
$argv = preg_split('/[,\s]+/', trim($arg_str));
70
$commands[] = $argv;
71
unset($lines[$key]);
72
73
$saw_command = true;
74
}
75
76
return array($commands, $lines);
77
}
78
79
public function stripTextBody($body) {
80
return trim($this->stripSignature($this->stripQuotedText($body)));
81
}
82
83
private function stripQuotedText($body) {
84
$body = phutil_string_cast($body);
85
86
// Look for "On <date>, <user> wrote:". This may be split across multiple
87
// lines. We need to be careful not to remove all of a message like this:
88
//
89
// On which day do you want to meet?
90
//
91
// On <date>, <user> wrote:
92
// > Let's set up a meeting.
93
94
$start = null;
95
$lines = phutil_split_lines($body);
96
foreach ($lines as $key => $line) {
97
if (preg_match('/^\s*>?\s*On\b/', $line)) {
98
$start = $key;
99
}
100
if ($start !== null) {
101
if (preg_match('/\bwrote:/', $line)) {
102
$lines = array_slice($lines, 0, $start);
103
$body = implode('', $lines);
104
break;
105
}
106
}
107
}
108
109
// Outlook english
110
$body = preg_replace(
111
'/^\s*(> )?-----Original Message-----.*?/imsU',
112
'',
113
$body);
114
115
// Outlook danish
116
$body = preg_replace(
117
'/^\s*(> )?-----Oprindelig Meddelelse-----.*?/imsU',
118
'',
119
$body);
120
121
// See example in T3217.
122
$body = preg_replace(
123
'/^________________________________________\s+From:.*?/imsU',
124
'',
125
$body);
126
127
// French GMail quoted text. See T8199.
128
$body = preg_replace(
129
'/^\s*\d{4}-\d{2}-\d{2} \d+:\d+ GMT.*:.*?/imsU',
130
'',
131
$body);
132
133
return rtrim($body);
134
}
135
136
private function stripSignature($body) {
137
// Quasi-"standard" delimiter, for lols see:
138
// https://bugzilla.mozilla.org/show_bug.cgi?id=58406
139
$body = preg_replace(
140
'/^-- +$.*/sm',
141
'',
142
$body);
143
144
// Mailbox seems to make an attempt to comply with the "standard" but
145
// omits the leading newline and uses an em dash. This may or may not have
146
// the trailing space, but it's unique enough that there's no real ambiguity
147
// in detecting it.
148
$body = preg_replace(
149
"/\s*\xE2\x80\x94\s*\nSent from Mailbox\s*\z/su",
150
'',
151
$body);
152
153
// HTC Mail application (mobile)
154
$body = preg_replace(
155
'/^\s*^Sent from my HTC smartphone.*/sm',
156
'',
157
$body);
158
159
// Apple iPhone
160
$body = preg_replace(
161
'/^\s*^Sent from my iPhone\s*$.*/sm',
162
'',
163
$body);
164
165
return rtrim($body);
166
}
167
168
}
169
170