Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/phabricator
Path: blob/master/src/infrastructure/markup/PhutilRemarkupBlockStorage.php
12241 views
1
<?php
2
3
/**
4
* Remarkup prevents several classes of text-processing problems by replacing
5
* tokens in the text as they are marked up. For example, if you write something
6
* like this:
7
*
8
* //D12//
9
*
10
* It is processed in several stages. First the "D12" matches and is replaced
11
* with a token, in the form of "<0x01><ID number><literal "Z">". The first
12
* byte, "<0x01>" is a single byte with value 1 that marks a token. If this is
13
* token ID "444", the text may now look like this:
14
*
15
* //<0x01>444Z//
16
*
17
* Now the italics match and are replaced, using the next token ID:
18
*
19
* <0x01>445Z
20
*
21
* When processing completes, all the tokens are replaced with their final
22
* equivalents. For example, token 444 is evaluated to:
23
*
24
* <a href="http://...">...</a>
25
*
26
* Then token 445 is evaluated:
27
*
28
* <em><0x01>444Z</em>
29
*
30
* ...and all tokens it contains are replaced:
31
*
32
* <em><a href="http://...">...</a></em>
33
*
34
* If we didn't do this, the italics rule could match the "//" in "http://",
35
* or any other number of processing mistakes could occur, some of which create
36
* security risks.
37
*
38
* This class generates keys, and stores the map of keys to replacement text.
39
*/
40
final class PhutilRemarkupBlockStorage extends Phobject {
41
42
const MAGIC_BYTE = "\1";
43
44
private $map = array();
45
private $index = 0;
46
47
public function store($text) {
48
$key = self::MAGIC_BYTE.(++$this->index).'Z';
49
$this->map[$key] = $text;
50
return $key;
51
}
52
53
public function restore($corpus, $text_mode = false) {
54
$map = $this->map;
55
56
if (!$text_mode) {
57
foreach ($map as $key => $content) {
58
$map[$key] = phutil_escape_html($content);
59
}
60
$corpus = phutil_escape_html($corpus);
61
}
62
63
// NOTE: Tokens may contain other tokens: for example, a table may have
64
// links inside it. So we can't do a single simple find/replace, because
65
// we need to find and replace child tokens inside the content of parent
66
// tokens.
67
68
// However, we know that rules which have child tokens must always store
69
// all their child tokens first, before they store their parent token: you
70
// have to pass the "store(text)" API a block of text with tokens already
71
// in it, so you must have created child tokens already.
72
73
// Thus, all child tokens will appear in the list before parent tokens, so
74
// if we start at the beginning of the list and replace all the tokens we
75
// find in each piece of content, we'll end up expanding all subtokens
76
// correctly.
77
78
$map[] = $corpus;
79
$seen = array();
80
foreach ($map as $key => $content) {
81
$seen[$key] = true;
82
83
// If the content contains no token magic, we don't need to replace
84
// anything.
85
if (strpos($content, self::MAGIC_BYTE) === false) {
86
continue;
87
}
88
89
$matches = null;
90
preg_match_all(
91
'/'.self::MAGIC_BYTE.'\d+Z/',
92
$content,
93
$matches,
94
PREG_OFFSET_CAPTURE);
95
96
$matches = $matches[0];
97
98
// See PHI1114. We're replacing all the matches in one pass because this
99
// is significantly faster than doing "substr_replace()" in a loop if the
100
// corpus is large and we have a large number of matches.
101
102
// Build a list of string pieces in "$parts" by interleaving the
103
// plain strings between each token and the replacement token text, then
104
// implode the whole thing when we're done.
105
106
$parts = array();
107
$pos = 0;
108
foreach ($matches as $next) {
109
$subkey = $next[0];
110
111
// If we've matched a token pattern but don't actually have any
112
// corresponding token, just skip this match. This should not be
113
// possible, and should perhaps be an error.
114
if (!isset($seen[$subkey])) {
115
if (!isset($map[$subkey])) {
116
throw new Exception(
117
pht(
118
'Matched token key "%s" while processing remarkup block, but '.
119
'this token does not exist in the token map.',
120
$subkey));
121
} else {
122
throw new Exception(
123
pht(
124
'Matched token key "%s" while processing remarkup block, but '.
125
'this token appears later in the list than the key being '.
126
'processed ("%s").',
127
$subkey,
128
$key));
129
}
130
}
131
132
$subpos = $next[1];
133
134
// If there were any non-token bytes since the last token, add them.
135
if ($subpos > $pos) {
136
$parts[] = substr($content, $pos, $subpos - $pos);
137
}
138
139
// Add the token replacement text.
140
$parts[] = $map[$subkey];
141
142
// Move the non-token cursor forward over the token.
143
$pos = $subpos + strlen($subkey);
144
}
145
146
// Add any leftover non-token bytes after the last token.
147
$parts[] = substr($content, $pos);
148
149
$content = implode('', $parts);
150
151
$map[$key] = $content;
152
}
153
$corpus = last($map);
154
155
if (!$text_mode) {
156
$corpus = phutil_safe_html($corpus);
157
}
158
159
return $corpus;
160
}
161
162
public function overwrite($key, $new_text) {
163
$this->map[$key] = $new_text;
164
return $this;
165
}
166
167
public function getMap() {
168
return $this->map;
169
}
170
171
public function setMap(array $map) {
172
$this->map = $map;
173
return $this;
174
}
175
176
}
177
178