Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/phabricator
Path: blob/master/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php
12242 views
1
<?php
2
3
final class PhutilProseDifferenceEngine extends Phobject {
4
5
public function getDiff($u, $v) {
6
return $this->buildDiff($u, $v, 0);
7
}
8
9
private function buildDiff($u, $v, $level) {
10
$u_parts = $this->splitCorpus($u, $level);
11
$v_parts = $this->splitCorpus($v, $level);
12
13
if ($level === 0) {
14
$diff = $this->newHashDiff($u_parts, $v_parts);
15
$too_large = false;
16
} else {
17
list($diff, $too_large) = $this->newEditDistanceMatrixDiff(
18
$u_parts,
19
$v_parts,
20
$level);
21
}
22
23
$diff->reorderParts();
24
25
// If we just built a character-level diff, we're all done and do not
26
// need to go any deeper.
27
if ($level == 3) {
28
return $diff;
29
}
30
31
$blocks = array();
32
$block = null;
33
foreach ($diff->getParts() as $part) {
34
$type = $part['type'];
35
$text = $part['text'];
36
switch ($type) {
37
case '=':
38
if ($block) {
39
$blocks[] = $block;
40
$block = null;
41
}
42
$blocks[] = array(
43
'type' => $type,
44
'text' => $text,
45
);
46
break;
47
case '-':
48
if (!$block) {
49
$block = array(
50
'type' => '!',
51
'old' => '',
52
'new' => '',
53
);
54
}
55
$block['old'] .= $text;
56
break;
57
case '+':
58
if (!$block) {
59
$block = array(
60
'type' => '!',
61
'old' => '',
62
'new' => '',
63
);
64
}
65
$block['new'] .= $text;
66
break;
67
}
68
}
69
70
if ($block) {
71
$blocks[] = $block;
72
}
73
74
$result = new PhutilProseDiff();
75
foreach ($blocks as $block) {
76
$type = $block['type'];
77
if ($type == '=') {
78
$result->addPart('=', $block['text']);
79
} else {
80
$old = $block['old'];
81
$new = $block['new'];
82
if (!strlen($old) && !strlen($new)) {
83
// Nothing to do.
84
} else if (!strlen($old)) {
85
$result->addPart('+', $new);
86
} else if (!strlen($new)) {
87
$result->addPart('-', $old);
88
} else {
89
if ($too_large) {
90
// If this text was too big to diff, don't try to subdivide it.
91
$result->addPart('-', $old);
92
$result->addPart('+', $new);
93
} else {
94
$subdiff = $this->buildDiff(
95
$old,
96
$new,
97
$level + 1);
98
99
foreach ($subdiff->getParts() as $part) {
100
$result->addPart($part['type'], $part['text']);
101
}
102
}
103
}
104
}
105
}
106
107
$result->reorderParts();
108
109
return $result;
110
}
111
112
private function splitCorpus($corpus, $level) {
113
switch ($level) {
114
case 0:
115
// Level 0: Split into paragraphs.
116
$expr = '/([\n]+)/';
117
break;
118
case 1:
119
// Level 1: Split into sentences.
120
$expr = '/([\n,!;?\.]+)/';
121
break;
122
case 2:
123
// Level 2: Split into words.
124
$expr = '/(\s+)/';
125
break;
126
case 3:
127
// Level 3: Split into characters.
128
return phutil_utf8v_combined($corpus);
129
}
130
131
$pieces = preg_split($expr, $corpus, -1, PREG_SPLIT_DELIM_CAPTURE);
132
return $this->stitchPieces($pieces, $level);
133
}
134
135
private function stitchPieces(array $pieces, $level) {
136
$results = array();
137
$count = count($pieces);
138
for ($ii = 0; $ii < $count; $ii += 2) {
139
$result = $pieces[$ii];
140
if ($ii + 1 < $count) {
141
$result .= $pieces[$ii + 1];
142
}
143
144
if ($level < 2) {
145
$trimmed_pieces = $this->trimApart($result);
146
foreach ($trimmed_pieces as $trimmed_piece) {
147
$results[] = $trimmed_piece;
148
}
149
} else {
150
$results[] = $result;
151
}
152
}
153
154
// If the input ended with a delimiter, we can get an empty final piece.
155
// Just discard it.
156
if (last($results) == '') {
157
array_pop($results);
158
}
159
160
return $results;
161
}
162
163
private function newEditDistanceMatrixDiff(
164
array $u_parts,
165
array $v_parts,
166
$level) {
167
168
$matrix = id(new PhutilEditDistanceMatrix())
169
->setMaximumLength(128)
170
->setSequences($u_parts, $v_parts)
171
->setComputeString(true);
172
173
// For word-level and character-level changes, smooth the output string
174
// to reduce the choppiness of the diff.
175
if ($level > 1) {
176
$matrix->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_FULL);
177
}
178
179
$u_pos = 0;
180
$v_pos = 0;
181
182
$edits = $matrix->getEditString();
183
$edits_length = strlen($edits);
184
185
$diff = new PhutilProseDiff();
186
for ($ii = 0; $ii < $edits_length; $ii++) {
187
$c = $edits[$ii];
188
if ($c == 's') {
189
$diff->addPart('=', $u_parts[$u_pos]);
190
$u_pos++;
191
$v_pos++;
192
} else if ($c == 'd') {
193
$diff->addPart('-', $u_parts[$u_pos]);
194
$u_pos++;
195
} else if ($c == 'i') {
196
$diff->addPart('+', $v_parts[$v_pos]);
197
$v_pos++;
198
} else if ($c == 'x') {
199
$diff->addPart('-', $u_parts[$u_pos]);
200
$diff->addPart('+', $v_parts[$v_pos]);
201
$u_pos++;
202
$v_pos++;
203
} else {
204
throw new Exception(
205
pht(
206
'Unexpected character ("%s") in edit string.',
207
$c));
208
}
209
}
210
211
return array($diff, $matrix->didReachMaximumLength());
212
}
213
214
private function newHashDiff(array $u_parts, array $v_parts) {
215
216
$u_ref = new PhabricatorDocumentRef();
217
$v_ref = new PhabricatorDocumentRef();
218
219
$u_blocks = $this->newDocumentEngineBlocks($u_parts);
220
$v_blocks = $this->newDocumentEngineBlocks($v_parts);
221
222
$rows = id(new PhabricatorDocumentEngineBlocks())
223
->addBlockList($u_ref, $u_blocks)
224
->addBlockList($v_ref, $v_blocks)
225
->newTwoUpLayout();
226
227
$diff = new PhutilProseDiff();
228
foreach ($rows as $row) {
229
list($u_block, $v_block) = $row;
230
231
if ($u_block && $v_block) {
232
if ($u_block->getDifferenceType() === '-') {
233
$diff->addPart('-', $u_block->getContent());
234
$diff->addPart('+', $v_block->getContent());
235
} else {
236
$diff->addPart('=', $u_block->getContent());
237
}
238
} else if ($u_block) {
239
$diff->addPart('-', $u_block->getContent());
240
} else {
241
$diff->addPart('+', $v_block->getContent());
242
}
243
}
244
245
return $diff;
246
}
247
248
private function newDocumentEngineBlocks(array $parts) {
249
$blocks = array();
250
251
foreach ($parts as $part) {
252
$hash = PhabricatorHash::digestForIndex($part);
253
254
$blocks[] = id(new PhabricatorDocumentEngineBlock())
255
->setContent($part)
256
->setDifferenceHash($hash);
257
}
258
259
return $blocks;
260
}
261
262
public static function trimApart($input) {
263
// Split pieces into separate text and whitespace sections: make one
264
// piece out of all the whitespace at the beginning, one piece out of
265
// all the actual text in the middle, and one piece out of all the
266
// whitespace at the end.
267
268
$parts = array();
269
270
$length = strlen($input);
271
272
$corpus = ltrim($input);
273
$l_length = strlen($corpus);
274
if ($l_length !== $length) {
275
$parts[] = substr($input, 0, $length - $l_length);
276
}
277
278
$corpus = rtrim($corpus);
279
$lr_length = strlen($corpus);
280
281
if ($lr_length) {
282
$parts[] = $corpus;
283
}
284
285
if ($lr_length !== $l_length) {
286
// NOTE: This will be a negative value; we're slicing from the end of
287
// the input string.
288
$parts[] = substr($input, $lr_length - $l_length);
289
}
290
291
return $parts;
292
}
293
294
}
295
296