Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/phabricator
Path: blob/master/src/applications/differential/engine/DifferentialChangesetEngine.php
12256 views
1
<?php
2
3
final class DifferentialChangesetEngine extends Phobject {
4
5
private $viewer;
6
7
public function setViewer(PhabricatorUser $viewer) {
8
$this->viewer = $viewer;
9
return $this;
10
}
11
12
public function getViewer() {
13
return $this->viewer;
14
}
15
16
public function rebuildChangesets(array $changesets) {
17
assert_instances_of($changesets, 'DifferentialChangeset');
18
19
$changesets = $this->loadChangesetFiles($changesets);
20
21
foreach ($changesets as $changeset) {
22
$this->detectGeneratedCode($changeset);
23
$this->computeHashes($changeset);
24
}
25
26
$this->detectCopiedCode($changesets);
27
}
28
29
private function loadChangesetFiles(array $changesets) {
30
$viewer = $this->getViewer();
31
32
$file_phids = array();
33
foreach ($changesets as $changeset) {
34
$file_phid = $changeset->getNewFileObjectPHID();
35
if ($file_phid !== null) {
36
$file_phids[] = $file_phid;
37
}
38
}
39
40
if ($file_phids) {
41
$files = id(new PhabricatorFileQuery())
42
->setViewer($viewer)
43
->withPHIDs($file_phids)
44
->execute();
45
$files = mpull($files, null, 'getPHID');
46
} else {
47
$files = array();
48
}
49
50
foreach ($changesets as $changeset_key => $changeset) {
51
$file_phid = $changeset->getNewFileObjectPHID();
52
if ($file_phid === null) {
53
continue;
54
}
55
56
$file = idx($files, $file_phid);
57
if (!$file) {
58
unset($changesets[$changeset_key]);
59
continue;
60
}
61
62
$changeset->attachNewFileObject($file);
63
}
64
65
return $changesets;
66
}
67
68
69
/* -( Generated Code )----------------------------------------------------- */
70
71
72
private function detectGeneratedCode(DifferentialChangeset $changeset) {
73
$is_generated_trusted = $this->isTrustedGeneratedCode($changeset);
74
if ($is_generated_trusted) {
75
$changeset->setTrustedChangesetAttribute(
76
DifferentialChangeset::ATTRIBUTE_GENERATED,
77
$is_generated_trusted);
78
}
79
80
$is_generated_untrusted = $this->isUntrustedGeneratedCode($changeset);
81
if ($is_generated_untrusted) {
82
$changeset->setUntrustedChangesetAttribute(
83
DifferentialChangeset::ATTRIBUTE_GENERATED,
84
$is_generated_untrusted);
85
}
86
}
87
88
private function isTrustedGeneratedCode(DifferentialChangeset $changeset) {
89
90
$filename = $changeset->getFilename();
91
92
$paths = PhabricatorEnv::getEnvConfig('differential.generated-paths');
93
foreach ($paths as $regexp) {
94
if (preg_match($regexp, $filename)) {
95
return true;
96
}
97
}
98
99
return false;
100
}
101
102
private function isUntrustedGeneratedCode(DifferentialChangeset $changeset) {
103
104
if ($changeset->getHunks()) {
105
$new_data = $changeset->makeNewFile();
106
if (strpos($new_data, '@'.'generated') !== false) {
107
return true;
108
}
109
110
// See PHI1112. This is the official pattern for marking Go code as
111
// generated.
112
if (preg_match('(^// Code generated .* DO NOT EDIT\.$)m', $new_data)) {
113
return true;
114
}
115
}
116
117
return false;
118
}
119
120
121
/* -( Content Hashes )----------------------------------------------------- */
122
123
124
private function computeHashes(DifferentialChangeset $changeset) {
125
126
$effect_key = DifferentialChangeset::METADATA_EFFECT_HASH;
127
128
$effect_hash = $this->newEffectHash($changeset);
129
if ($effect_hash !== null) {
130
$changeset->setChangesetMetadata($effect_key, $effect_hash);
131
}
132
}
133
134
private function newEffectHash(DifferentialChangeset $changeset) {
135
136
if ($changeset->getHunks()) {
137
$new_data = $changeset->makeNewFile();
138
return PhabricatorHash::digestForIndex($new_data);
139
}
140
141
if ($changeset->getNewFileObjectPHID()) {
142
$file = $changeset->getNewFileObject();
143
144
// See T13522. For now, the "contentHash" is not really a content hash
145
// for files >4MB. This is okay: we will just always detect them as
146
// changed, which is the safer behavior.
147
148
$hash = $file->getContentHash();
149
if ($hash !== null) {
150
$hash = sprintf('file-hash:%s', $hash);
151
return PhabricatorHash::digestForIndex($hash);
152
}
153
}
154
155
return null;
156
}
157
158
159
/* -( Copied Code )-------------------------------------------------------- */
160
161
162
private function detectCopiedCode(array $changesets) {
163
// See PHI944. If the total number of changed lines is excessively large,
164
// don't bother with copied code detection. This can take a lot of time and
165
// memory and it's not generally of any use for very large changes.
166
$max_size = 65535;
167
168
$total_size = 0;
169
foreach ($changesets as $changeset) {
170
$total_size += ($changeset->getAddLines() + $changeset->getDelLines());
171
}
172
173
if ($total_size > $max_size) {
174
return;
175
}
176
177
$min_width = 30;
178
$min_lines = 3;
179
180
$map = array();
181
$files = array();
182
$types = array();
183
foreach ($changesets as $changeset) {
184
$file = $changeset->getFilename();
185
foreach ($changeset->getHunks() as $hunk) {
186
$lines = $hunk->getStructuredOldFile();
187
foreach ($lines as $line => $info) {
188
$type = $info['type'];
189
if ($type == '\\') {
190
continue;
191
}
192
$types[$file][$line] = $type;
193
194
$text = $info['text'];
195
$text = trim($text);
196
$files[$file][$line] = $text;
197
198
if (strlen($text) >= $min_width) {
199
$map[$text][] = array($file, $line);
200
}
201
}
202
}
203
}
204
205
foreach ($changesets as $changeset) {
206
$copies = array();
207
foreach ($changeset->getHunks() as $hunk) {
208
$added = $hunk->getStructuredNewFile();
209
$atype = array();
210
211
foreach ($added as $line => $info) {
212
$atype[$line] = $info['type'];
213
$added[$line] = trim($info['text']);
214
}
215
216
$skip_lines = 0;
217
foreach ($added as $line => $code) {
218
if ($skip_lines) {
219
// We're skipping lines that we already processed because we
220
// extended a block above them downward to include them.
221
$skip_lines--;
222
continue;
223
}
224
225
if ($atype[$line] !== '+') {
226
// This line hasn't been changed in the new file, so don't try
227
// to figure out where it came from.
228
continue;
229
}
230
231
if (empty($map[$code])) {
232
// This line was too short to trigger copy/move detection.
233
continue;
234
}
235
236
if (count($map[$code]) > 16) {
237
// If there are a large number of identical lines in this diff,
238
// don't try to figure out where this block came from: the analysis
239
// is O(N^2), since we need to compare every line against every
240
// other line. Even if we arrive at a result, it is unlikely to be
241
// meaningful. See T5041.
242
continue;
243
}
244
245
$best_length = 0;
246
247
// Explore all candidates.
248
foreach ($map[$code] as $val) {
249
list($file, $orig_line) = $val;
250
$length = 1;
251
252
// Search backward and forward to find all of the adjacent lines
253
// which match.
254
foreach (array(-1, 1) as $direction) {
255
$offset = $direction;
256
while (true) {
257
if (isset($copies[$line + $offset])) {
258
// If we run into a block above us which we've already
259
// attributed to a move or copy from elsewhere, stop
260
// looking.
261
break;
262
}
263
264
if (!isset($added[$line + $offset])) {
265
// If we've run off the beginning or end of the new file,
266
// stop looking.
267
break;
268
}
269
270
if (!isset($files[$file][$orig_line + $offset])) {
271
// If we've run off the beginning or end of the original
272
// file, we also stop looking.
273
break;
274
}
275
276
$old = $files[$file][$orig_line + $offset];
277
$new = $added[$line + $offset];
278
if ($old !== $new) {
279
// If the old line doesn't match the new line, stop
280
// looking.
281
break;
282
}
283
284
$length++;
285
$offset += $direction;
286
}
287
}
288
289
if ($length < $best_length) {
290
// If we already know of a better source (more matching lines)
291
// for this move/copy, stick with that one. We prefer long
292
// copies/moves which match a lot of context over short ones.
293
continue;
294
}
295
296
if ($length == $best_length) {
297
if (idx($types[$file], $orig_line) != '-') {
298
// If we already know of an equally good source (same number
299
// of matching lines) and this isn't a move, stick with the
300
// other one. We prefer moves over copies.
301
continue;
302
}
303
}
304
305
$best_length = $length;
306
// ($offset - 1) contains number of forward matching lines.
307
$best_offset = $offset - 1;
308
$best_file = $file;
309
$best_line = $orig_line;
310
}
311
312
$file = ($best_file == $changeset->getFilename() ? '' : $best_file);
313
for ($i = $best_length; $i--; ) {
314
$type = idx($types[$best_file], $best_line + $best_offset - $i);
315
$copies[$line + $best_offset - $i] = ($best_length < $min_lines
316
? array() // Ignore short blocks.
317
: array($file, $best_line + $best_offset - $i, $type));
318
}
319
320
$skip_lines = $best_offset;
321
}
322
}
323
324
$copies = array_filter($copies);
325
if ($copies) {
326
$metadata = $changeset->getMetadata();
327
$metadata['copy:lines'] = $copies;
328
$changeset->setMetadata($metadata);
329
}
330
}
331
332
}
333
334
}
335
336