Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/diff/defaultLinesDiffComputer/heuristicSequenceOptimizations.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { forEachWithNeighbors } from '../../../../base/common/arrays.js';
7
import { OffsetRange } from '../../core/ranges/offsetRange.js';
8
import { ISequence, OffsetPair, SequenceDiff } from './algorithms/diffAlgorithm.js';
9
import { LineSequence } from './lineSequence.js';
10
import { LinesSliceCharSequence } from './linesSliceCharSequence.js';
11
12
export function optimizeSequenceDiffs(sequence1: ISequence, sequence2: ISequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {
13
let result = sequenceDiffs;
14
result = joinSequenceDiffsByShifting(sequence1, sequence2, result);
15
// Sometimes, calling this function twice improves the result.
16
// Uncomment the second invocation and run the tests to see the difference.
17
result = joinSequenceDiffsByShifting(sequence1, sequence2, result);
18
result = shiftSequenceDiffs(sequence1, sequence2, result);
19
return result;
20
}
21
22
/**
23
* This function fixes issues like this:
24
* ```
25
* import { Baz, Bar } from "foo";
26
* ```
27
* <->
28
* ```
29
* import { Baz, Bar, Foo } from "foo";
30
* ```
31
* Computed diff: [ {Add "," after Bar}, {Add "Foo " after space} }
32
* Improved diff: [{Add ", Foo" after Bar}]
33
*/
34
function joinSequenceDiffsByShifting(sequence1: ISequence, sequence2: ISequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {
35
if (sequenceDiffs.length === 0) {
36
return sequenceDiffs;
37
}
38
39
const result: SequenceDiff[] = [];
40
result.push(sequenceDiffs[0]);
41
42
// First move them all to the left as much as possible and join them if possible
43
for (let i = 1; i < sequenceDiffs.length; i++) {
44
const prevResult = result[result.length - 1];
45
let cur = sequenceDiffs[i];
46
47
if (cur.seq1Range.isEmpty || cur.seq2Range.isEmpty) {
48
const length = cur.seq1Range.start - prevResult.seq1Range.endExclusive;
49
let d;
50
for (d = 1; d <= length; d++) {
51
if (
52
sequence1.getElement(cur.seq1Range.start - d) !== sequence1.getElement(cur.seq1Range.endExclusive - d) ||
53
sequence2.getElement(cur.seq2Range.start - d) !== sequence2.getElement(cur.seq2Range.endExclusive - d)) {
54
break;
55
}
56
}
57
d--;
58
59
if (d === length) {
60
// Merge previous and current diff
61
result[result.length - 1] = new SequenceDiff(
62
new OffsetRange(prevResult.seq1Range.start, cur.seq1Range.endExclusive - length),
63
new OffsetRange(prevResult.seq2Range.start, cur.seq2Range.endExclusive - length),
64
);
65
continue;
66
}
67
68
cur = cur.delta(-d);
69
}
70
71
result.push(cur);
72
}
73
74
const result2: SequenceDiff[] = [];
75
// Then move them all to the right and join them again if possible
76
for (let i = 0; i < result.length - 1; i++) {
77
const nextResult = result[i + 1];
78
let cur = result[i];
79
80
if (cur.seq1Range.isEmpty || cur.seq2Range.isEmpty) {
81
const length = nextResult.seq1Range.start - cur.seq1Range.endExclusive;
82
let d;
83
for (d = 0; d < length; d++) {
84
if (
85
!sequence1.isStronglyEqual(cur.seq1Range.start + d, cur.seq1Range.endExclusive + d) ||
86
!sequence2.isStronglyEqual(cur.seq2Range.start + d, cur.seq2Range.endExclusive + d)
87
) {
88
break;
89
}
90
}
91
92
if (d === length) {
93
// Merge previous and current diff, write to result!
94
result[i + 1] = new SequenceDiff(
95
new OffsetRange(cur.seq1Range.start + length, nextResult.seq1Range.endExclusive),
96
new OffsetRange(cur.seq2Range.start + length, nextResult.seq2Range.endExclusive),
97
);
98
continue;
99
}
100
101
if (d > 0) {
102
cur = cur.delta(d);
103
}
104
}
105
106
result2.push(cur);
107
}
108
109
if (result.length > 0) {
110
result2.push(result[result.length - 1]);
111
}
112
113
return result2;
114
}
115
116
// align character level diffs at whitespace characters
117
// import { IBar } from "foo";
118
// import { I[Arr, I]Bar } from "foo";
119
// ->
120
// import { [IArr, ]IBar } from "foo";
121
122
// import { ITransaction, observableValue, transaction } from 'vs/base/common/observable';
123
// import { ITransaction, observable[FromEvent, observable]Value, transaction } from 'vs/base/common/observable';
124
// ->
125
// import { ITransaction, [observableFromEvent, ]observableValue, transaction } from 'vs/base/common/observable';
126
127
// collectBrackets(level + 1, levelPerBracketType);
128
// collectBrackets(level + 1, levelPerBracket[ + 1, levelPerBracket]Type);
129
// ->
130
// collectBrackets(level + 1, [levelPerBracket + 1, ]levelPerBracketType);
131
132
function shiftSequenceDiffs(sequence1: ISequence, sequence2: ISequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {
133
if (!sequence1.getBoundaryScore || !sequence2.getBoundaryScore) {
134
return sequenceDiffs;
135
}
136
137
for (let i = 0; i < sequenceDiffs.length; i++) {
138
const prevDiff = (i > 0 ? sequenceDiffs[i - 1] : undefined);
139
const diff = sequenceDiffs[i];
140
const nextDiff = (i + 1 < sequenceDiffs.length ? sequenceDiffs[i + 1] : undefined);
141
142
const seq1ValidRange = new OffsetRange(prevDiff ? prevDiff.seq1Range.endExclusive + 1 : 0, nextDiff ? nextDiff.seq1Range.start - 1 : sequence1.length);
143
const seq2ValidRange = new OffsetRange(prevDiff ? prevDiff.seq2Range.endExclusive + 1 : 0, nextDiff ? nextDiff.seq2Range.start - 1 : sequence2.length);
144
145
if (diff.seq1Range.isEmpty) {
146
sequenceDiffs[i] = shiftDiffToBetterPosition(diff, sequence1, sequence2, seq1ValidRange, seq2ValidRange);
147
} else if (diff.seq2Range.isEmpty) {
148
sequenceDiffs[i] = shiftDiffToBetterPosition(diff.swap(), sequence2, sequence1, seq2ValidRange, seq1ValidRange).swap();
149
}
150
}
151
152
return sequenceDiffs;
153
}
154
155
function shiftDiffToBetterPosition(diff: SequenceDiff, sequence1: ISequence, sequence2: ISequence, seq1ValidRange: OffsetRange, seq2ValidRange: OffsetRange,) {
156
const maxShiftLimit = 100; // To prevent performance issues
157
158
// don't touch previous or next!
159
let deltaBefore = 1;
160
while (
161
diff.seq1Range.start - deltaBefore >= seq1ValidRange.start &&
162
diff.seq2Range.start - deltaBefore >= seq2ValidRange.start &&
163
sequence2.isStronglyEqual(diff.seq2Range.start - deltaBefore, diff.seq2Range.endExclusive - deltaBefore) && deltaBefore < maxShiftLimit
164
) {
165
deltaBefore++;
166
}
167
deltaBefore--;
168
169
let deltaAfter = 0;
170
while (
171
diff.seq1Range.start + deltaAfter < seq1ValidRange.endExclusive &&
172
diff.seq2Range.endExclusive + deltaAfter < seq2ValidRange.endExclusive &&
173
sequence2.isStronglyEqual(diff.seq2Range.start + deltaAfter, diff.seq2Range.endExclusive + deltaAfter) && deltaAfter < maxShiftLimit
174
) {
175
deltaAfter++;
176
}
177
178
if (deltaBefore === 0 && deltaAfter === 0) {
179
return diff;
180
}
181
182
// Visualize `[sequence1.text, diff.seq1Range.start + deltaAfter]`
183
// and `[sequence2.text, diff.seq2Range.start + deltaAfter, diff.seq2Range.endExclusive + deltaAfter]`
184
185
let bestDelta = 0;
186
let bestScore = -1;
187
// find best scored delta
188
for (let delta = -deltaBefore; delta <= deltaAfter; delta++) {
189
const seq2OffsetStart = diff.seq2Range.start + delta;
190
const seq2OffsetEndExclusive = diff.seq2Range.endExclusive + delta;
191
const seq1Offset = diff.seq1Range.start + delta;
192
193
const score = sequence1.getBoundaryScore!(seq1Offset) + sequence2.getBoundaryScore!(seq2OffsetStart) + sequence2.getBoundaryScore!(seq2OffsetEndExclusive);
194
if (score > bestScore) {
195
bestScore = score;
196
bestDelta = delta;
197
}
198
}
199
200
return diff.delta(bestDelta);
201
}
202
203
export function removeShortMatches(sequence1: ISequence, sequence2: ISequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {
204
const result: SequenceDiff[] = [];
205
for (const s of sequenceDiffs) {
206
const last = result[result.length - 1];
207
if (!last) {
208
result.push(s);
209
continue;
210
}
211
212
if (s.seq1Range.start - last.seq1Range.endExclusive <= 2 || s.seq2Range.start - last.seq2Range.endExclusive <= 2) {
213
result[result.length - 1] = new SequenceDiff(last.seq1Range.join(s.seq1Range), last.seq2Range.join(s.seq2Range));
214
} else {
215
result.push(s);
216
}
217
}
218
219
return result;
220
}
221
222
export function extendDiffsToEntireWordIfAppropriate(
223
sequence1: LinesSliceCharSequence,
224
sequence2: LinesSliceCharSequence,
225
sequenceDiffs: SequenceDiff[],
226
findParent: (seq: LinesSliceCharSequence, idx: number) => OffsetRange | undefined,
227
force: boolean = false,
228
): SequenceDiff[] {
229
const equalMappings = SequenceDiff.invert(sequenceDiffs, sequence1.length);
230
231
const additional: SequenceDiff[] = [];
232
233
let lastPoint = new OffsetPair(0, 0);
234
235
function scanWord(pair: OffsetPair, equalMapping: SequenceDiff) {
236
if (pair.offset1 < lastPoint.offset1 || pair.offset2 < lastPoint.offset2) {
237
return;
238
}
239
240
const w1 = findParent(sequence1, pair.offset1);
241
const w2 = findParent(sequence2, pair.offset2);
242
if (!w1 || !w2) {
243
return;
244
}
245
let w = new SequenceDiff(w1, w2);
246
const equalPart = w.intersect(equalMapping)!;
247
248
let equalChars1 = equalPart.seq1Range.length;
249
let equalChars2 = equalPart.seq2Range.length;
250
251
// The words do not touch previous equals mappings, as we would have processed them already.
252
// But they might touch the next ones.
253
254
while (equalMappings.length > 0) {
255
const next = equalMappings[0];
256
const intersects = next.seq1Range.intersects(w.seq1Range) || next.seq2Range.intersects(w.seq2Range);
257
if (!intersects) {
258
break;
259
}
260
261
const v1 = findParent(sequence1, next.seq1Range.start);
262
const v2 = findParent(sequence2, next.seq2Range.start);
263
// Because there is an intersection, we know that the words are not empty.
264
const v = new SequenceDiff(v1!, v2!);
265
const equalPart = v.intersect(next)!;
266
267
equalChars1 += equalPart.seq1Range.length;
268
equalChars2 += equalPart.seq2Range.length;
269
270
w = w.join(v);
271
272
if (w.seq1Range.endExclusive >= next.seq1Range.endExclusive) {
273
// The word extends beyond the next equal mapping.
274
equalMappings.shift();
275
} else {
276
break;
277
}
278
}
279
280
if ((force && equalChars1 + equalChars2 < w.seq1Range.length + w.seq2Range.length) || equalChars1 + equalChars2 < (w.seq1Range.length + w.seq2Range.length) * 2 / 3) {
281
additional.push(w);
282
}
283
284
lastPoint = w.getEndExclusives();
285
}
286
287
while (equalMappings.length > 0) {
288
const next = equalMappings.shift()!;
289
if (next.seq1Range.isEmpty) {
290
continue;
291
}
292
scanWord(next.getStarts(), next);
293
// The equal parts are not empty, so -1 gives us a character that is equal in both parts.
294
scanWord(next.getEndExclusives().delta(-1), next);
295
}
296
297
const merged = mergeSequenceDiffs(sequenceDiffs, additional);
298
return merged;
299
}
300
301
function mergeSequenceDiffs(sequenceDiffs1: SequenceDiff[], sequenceDiffs2: SequenceDiff[]): SequenceDiff[] {
302
const result: SequenceDiff[] = [];
303
304
while (sequenceDiffs1.length > 0 || sequenceDiffs2.length > 0) {
305
const sd1 = sequenceDiffs1[0];
306
const sd2 = sequenceDiffs2[0];
307
308
let next: SequenceDiff;
309
if (sd1 && (!sd2 || sd1.seq1Range.start < sd2.seq1Range.start)) {
310
next = sequenceDiffs1.shift()!;
311
} else {
312
next = sequenceDiffs2.shift()!;
313
}
314
315
if (result.length > 0 && result[result.length - 1].seq1Range.endExclusive >= next.seq1Range.start) {
316
result[result.length - 1] = result[result.length - 1].join(next);
317
} else {
318
result.push(next);
319
}
320
}
321
322
return result;
323
}
324
325
export function removeVeryShortMatchingLinesBetweenDiffs(sequence1: LineSequence, _sequence2: LineSequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {
326
let diffs = sequenceDiffs;
327
if (diffs.length === 0) {
328
return diffs;
329
}
330
331
let counter = 0;
332
let shouldRepeat: boolean;
333
do {
334
shouldRepeat = false;
335
336
const result: SequenceDiff[] = [
337
diffs[0]
338
];
339
340
for (let i = 1; i < diffs.length; i++) {
341
const cur = diffs[i];
342
const lastResult = result[result.length - 1];
343
344
function shouldJoinDiffs(before: SequenceDiff, after: SequenceDiff): boolean {
345
const unchangedRange = new OffsetRange(lastResult.seq1Range.endExclusive, cur.seq1Range.start);
346
347
const unchangedText = sequence1.getText(unchangedRange);
348
const unchangedTextWithoutWs = unchangedText.replace(/\s/g, '');
349
if (unchangedTextWithoutWs.length <= 4
350
&& (before.seq1Range.length + before.seq2Range.length > 5 || after.seq1Range.length + after.seq2Range.length > 5)) {
351
return true;
352
}
353
354
return false;
355
}
356
357
const shouldJoin = shouldJoinDiffs(lastResult, cur);
358
if (shouldJoin) {
359
shouldRepeat = true;
360
result[result.length - 1] = result[result.length - 1].join(cur);
361
} else {
362
result.push(cur);
363
}
364
}
365
366
diffs = result;
367
} while (counter++ < 10 && shouldRepeat);
368
369
return diffs;
370
}
371
372
export function removeVeryShortMatchingTextBetweenLongDiffs(sequence1: LinesSliceCharSequence, sequence2: LinesSliceCharSequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {
373
let diffs = sequenceDiffs;
374
if (diffs.length === 0) {
375
return diffs;
376
}
377
378
let counter = 0;
379
let shouldRepeat: boolean;
380
do {
381
shouldRepeat = false;
382
383
const result: SequenceDiff[] = [
384
diffs[0]
385
];
386
387
for (let i = 1; i < diffs.length; i++) {
388
const cur = diffs[i];
389
const lastResult = result[result.length - 1];
390
391
function shouldJoinDiffs(before: SequenceDiff, after: SequenceDiff): boolean {
392
const unchangedRange = new OffsetRange(lastResult.seq1Range.endExclusive, cur.seq1Range.start);
393
394
const unchangedLineCount = sequence1.countLinesIn(unchangedRange);
395
if (unchangedLineCount > 5 || unchangedRange.length > 500) {
396
return false;
397
}
398
399
const unchangedText = sequence1.getText(unchangedRange).trim();
400
if (unchangedText.length > 20 || unchangedText.split(/\r\n|\r|\n/).length > 1) {
401
return false;
402
}
403
404
const beforeLineCount1 = sequence1.countLinesIn(before.seq1Range);
405
const beforeSeq1Length = before.seq1Range.length;
406
const beforeLineCount2 = sequence2.countLinesIn(before.seq2Range);
407
const beforeSeq2Length = before.seq2Range.length;
408
409
const afterLineCount1 = sequence1.countLinesIn(after.seq1Range);
410
const afterSeq1Length = after.seq1Range.length;
411
const afterLineCount2 = sequence2.countLinesIn(after.seq2Range);
412
const afterSeq2Length = after.seq2Range.length;
413
414
// TODO: Maybe a neural net can be used to derive the result from these numbers
415
416
const max = 2 * 40 + 50;
417
function cap(v: number): number {
418
return Math.min(v, max);
419
}
420
421
if (Math.pow(Math.pow(cap(beforeLineCount1 * 40 + beforeSeq1Length), 1.5) + Math.pow(cap(beforeLineCount2 * 40 + beforeSeq2Length), 1.5), 1.5)
422
+ Math.pow(Math.pow(cap(afterLineCount1 * 40 + afterSeq1Length), 1.5) + Math.pow(cap(afterLineCount2 * 40 + afterSeq2Length), 1.5), 1.5) > ((max ** 1.5) ** 1.5) * 1.3) {
423
return true;
424
}
425
return false;
426
}
427
428
const shouldJoin = shouldJoinDiffs(lastResult, cur);
429
if (shouldJoin) {
430
shouldRepeat = true;
431
result[result.length - 1] = result[result.length - 1].join(cur);
432
} else {
433
result.push(cur);
434
}
435
}
436
437
diffs = result;
438
} while (counter++ < 10 && shouldRepeat);
439
440
const newDiffs: SequenceDiff[] = [];
441
442
// Remove short suffixes/prefixes
443
forEachWithNeighbors(diffs, (prev, cur, next) => {
444
let newDiff = cur;
445
446
function shouldMarkAsChanged(text: string): boolean {
447
return text.length > 0 && text.trim().length <= 3 && cur.seq1Range.length + cur.seq2Range.length > 100;
448
}
449
450
const fullRange1 = sequence1.extendToFullLines(cur.seq1Range);
451
const prefix = sequence1.getText(new OffsetRange(fullRange1.start, cur.seq1Range.start));
452
if (shouldMarkAsChanged(prefix)) {
453
newDiff = newDiff.deltaStart(-prefix.length);
454
}
455
const suffix = sequence1.getText(new OffsetRange(cur.seq1Range.endExclusive, fullRange1.endExclusive));
456
if (shouldMarkAsChanged(suffix)) {
457
newDiff = newDiff.deltaEnd(suffix.length);
458
}
459
460
const availableSpace = SequenceDiff.fromOffsetPairs(
461
prev ? prev.getEndExclusives() : OffsetPair.zero,
462
next ? next.getStarts() : OffsetPair.max,
463
);
464
const result = newDiff.intersect(availableSpace)!;
465
if (newDiffs.length > 0 && result.getStarts().equals(newDiffs[newDiffs.length - 1].getEndExclusives())) {
466
newDiffs[newDiffs.length - 1] = newDiffs[newDiffs.length - 1].join(result);
467
} else {
468
newDiffs.push(result);
469
}
470
});
471
472
return newDiffs;
473
}
474
475