Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/phabricator
Path: blob/master/src/infrastructure/markup/markuprule/PhutilRemarkupHyperlinkRule.php
12241 views
1
<?php
2
3
final class PhutilRemarkupHyperlinkRule extends PhutilRemarkupRule {
4
5
const KEY_HYPERLINKS = 'hyperlinks';
6
7
public function getPriority() {
8
return 400.0;
9
}
10
11
public function apply($text) {
12
static $angle_pattern;
13
static $curly_pattern;
14
static $bare_pattern;
15
16
if ($angle_pattern === null) {
17
// See T13608. A previous version of this code matched bare URIs
18
// starting with "\w{3,}", which can take a very long time to match
19
// against long inputs.
20
//
21
// Use a protocol length limit in all patterns for general sanity,
22
// and a negative lookbehind in the bare pattern to avoid explosive
23
// complexity during expression evaluation.
24
25
$protocol_fragment = '\w{3,32}';
26
$uri_fragment = '[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+';
27
28
$angle_pattern = sprintf(
29
'(<(%s://%s?)>)',
30
$protocol_fragment,
31
$uri_fragment);
32
33
$curly_pattern = sprintf(
34
'({(%s://%s?)})',
35
$protocol_fragment,
36
$uri_fragment);
37
38
$bare_pattern = sprintf(
39
'((?<!\w)%s://%s)',
40
$protocol_fragment,
41
$uri_fragment);
42
}
43
44
// Hyperlinks with explicit "<>" around them get linked exactly, without
45
// the "<>". Angle brackets are basically special and mean "this is a URL
46
// with weird characters". This is assumed to be reasonable because they
47
// don't appear in most normal text or most normal URLs.
48
$text = preg_replace_callback(
49
$angle_pattern,
50
array($this, 'markupHyperlinkAngle'),
51
$text);
52
53
// We match "{uri}", but do not link it by default.
54
$text = preg_replace_callback(
55
$curly_pattern,
56
array($this, 'markupHyperlinkCurly'),
57
$text);
58
59
// Anything else we match "ungreedily", which means we'll look for
60
// stuff that's probably puncutation or otherwise not part of the URL and
61
// not link it. This lets someone write "QuicK! Go to
62
// http://www.example.com/!". We also apply some paren balancing rules.
63
64
// NOTE: We're explicitly avoiding capturing stored blocks, so text like
65
// `http://www.example.com/[[x | y]]` doesn't get aggressively captured.
66
67
$text = preg_replace_callback(
68
$bare_pattern,
69
array($this, 'markupHyperlinkUngreedy'),
70
$text);
71
72
return $text;
73
}
74
75
public function markupHyperlinkAngle(array $matches) {
76
return $this->markupHyperlink('<', $matches);
77
}
78
79
public function markupHyperlinkCurly(array $matches) {
80
return $this->markupHyperlink('{', $matches);
81
}
82
83
protected function markupHyperlink($mode, array $matches) {
84
$raw_uri = $matches[1];
85
86
try {
87
$uri = new PhutilURI($raw_uri);
88
} catch (Exception $ex) {
89
return $matches[0];
90
}
91
92
$engine = $this->getEngine();
93
94
$token = $engine->storeText($raw_uri);
95
96
$list_key = self::KEY_HYPERLINKS;
97
$link_list = $engine->getTextMetadata($list_key, array());
98
99
$link_list[] = array(
100
'token' => $token,
101
'uri' => $raw_uri,
102
'mode' => $mode,
103
);
104
105
$engine->setTextMetadata($list_key, $link_list);
106
107
return $token;
108
}
109
110
protected function renderHyperlink($link, $is_embed) {
111
// If the URI is "{uri}" and no handler picked it up, we just render it
112
// as plain text.
113
if ($is_embed) {
114
return $this->renderRawLink($link, $is_embed);
115
}
116
117
$engine = $this->getEngine();
118
119
$same_window = $engine->getConfig('uri.same-window', false);
120
if ($same_window) {
121
$target = null;
122
} else {
123
$target = '_blank';
124
}
125
126
return phutil_tag(
127
'a',
128
array(
129
'href' => $link,
130
'class' => 'remarkup-link',
131
'target' => $target,
132
'rel' => 'noreferrer',
133
),
134
$link);
135
}
136
137
private function renderRawLink($link, $is_embed) {
138
if ($is_embed) {
139
return '{'.$link.'}';
140
} else {
141
return $link;
142
}
143
}
144
145
protected function markupHyperlinkUngreedy($matches) {
146
$match = $matches[0];
147
$tail = null;
148
$trailing = null;
149
if (preg_match('/[;,.:!?]+$/', $match, $trailing)) {
150
$tail = $trailing[0];
151
$match = substr($match, 0, -strlen($tail));
152
}
153
154
// If there's a closing paren at the end but no balancing open paren in
155
// the URL, don't link the close paren. This is an attempt to gracefully
156
// handle the two common paren cases, Wikipedia links and English language
157
// parentheticals, e.g.:
158
//
159
// http://en.wikipedia.org/wiki/Noun_(disambiguation)
160
// (see also http://www.example.com)
161
//
162
// We could apply a craftier heuristic here which tries to actually balance
163
// the parens, but this is probably sufficient.
164
if (preg_match('/\\)$/', $match) && !preg_match('/\\(/', $match)) {
165
$tail = ')'.$tail;
166
$match = substr($match, 0, -1);
167
}
168
169
try {
170
$uri = new PhutilURI($match);
171
} catch (Exception $ex) {
172
return $matches[0];
173
}
174
175
$link = $this->markupHyperlink(null, array(null, $match));
176
177
return hsprintf('%s%s', $link, $tail);
178
}
179
180
public function didMarkupText() {
181
$engine = $this->getEngine();
182
183
$protocols = $engine->getConfig('uri.allowed-protocols', array());
184
$is_toc = $engine->getState('toc');
185
$is_text = $engine->isTextMode();
186
$is_mail = $engine->isHTMLMailMode();
187
188
$list_key = self::KEY_HYPERLINKS;
189
$raw_list = $engine->getTextMetadata($list_key, array());
190
191
$links = array();
192
foreach ($raw_list as $key => $link) {
193
$token = $link['token'];
194
$raw_uri = $link['uri'];
195
$mode = $link['mode'];
196
197
$is_embed = ($mode === '{');
198
$is_literal = ($mode === '<');
199
200
// If we're rendering in a "Table of Contents" or a plain text mode,
201
// we're going to render the raw URI without modifications.
202
if ($is_toc || $is_text) {
203
$result = $this->renderRawLink($raw_uri, $is_embed);
204
$engine->overwriteStoredText($token, $result);
205
continue;
206
}
207
208
// If this URI doesn't use a whitelisted protocol, don't link it. This
209
// is primarily intended to prevent "javascript://" silliness.
210
$uri = new PhutilURI($raw_uri);
211
$protocol = $uri->getProtocol();
212
$valid_protocol = idx($protocols, $protocol);
213
if (!$valid_protocol) {
214
$result = $this->renderRawLink($raw_uri, $is_embed);
215
$engine->overwriteStoredText($token, $result);
216
continue;
217
}
218
219
// If the URI is written as "<uri>", we'll render it literally even if
220
// some handler would otherwise deal with it.
221
// If we're rendering for HTML mail, we also render literally.
222
if ($is_literal || $is_mail) {
223
$result = $this->renderHyperlink($raw_uri, $is_embed);
224
$engine->overwriteStoredText($token, $result);
225
continue;
226
}
227
228
// Otherwise, this link is a valid resource which extensions are allowed
229
// to handle.
230
$links[$key] = $link;
231
}
232
233
if (!$links) {
234
return;
235
}
236
237
foreach ($links as $key => $link) {
238
$links[$key] = new PhutilRemarkupHyperlinkRef($link);
239
}
240
241
$extensions = PhutilRemarkupHyperlinkEngineExtension::getAllLinkEngines();
242
foreach ($extensions as $extension) {
243
$extension = id(clone $extension)
244
->setEngine($engine)
245
->processHyperlinks($links);
246
247
foreach ($links as $key => $link) {
248
$result = $link->getResult();
249
if ($result !== null) {
250
$engine->overwriteStoredText($link->getToken(), $result);
251
unset($links[$key]);
252
}
253
}
254
255
if (!$links) {
256
break;
257
}
258
}
259
260
// Render any remaining links in a normal way.
261
foreach ($links as $link) {
262
$result = $this->renderHyperlink($link->getURI(), $link->isEmbed());
263
$engine->overwriteStoredText($link->getToken(), $result);
264
}
265
}
266
267
}
268
269