Path: blob/main/src/vs/editor/common/diff/defaultLinesDiffComputer/heuristicSequenceOptimizations.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { forEachWithNeighbors } from '../../../../base/common/arrays.js';6import { OffsetRange } from '../../core/ranges/offsetRange.js';7import { ISequence, OffsetPair, SequenceDiff } from './algorithms/diffAlgorithm.js';8import { LineSequence } from './lineSequence.js';9import { LinesSliceCharSequence } from './linesSliceCharSequence.js';1011export function optimizeSequenceDiffs(sequence1: ISequence, sequence2: ISequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {12let result = sequenceDiffs;13result = joinSequenceDiffsByShifting(sequence1, sequence2, result);14// Sometimes, calling this function twice improves the result.15// Uncomment the second invocation and run the tests to see the difference.16result = joinSequenceDiffsByShifting(sequence1, sequence2, result);17result = shiftSequenceDiffs(sequence1, sequence2, result);18return result;19}2021/**22* This function fixes issues like this:23* ```24* import { Baz, Bar } from "foo";25* ```26* <->27* ```28* import { Baz, Bar, Foo } from "foo";29* ```30* Computed diff: [ {Add "," after Bar}, {Add "Foo " after space} }31* Improved diff: [{Add ", Foo" after Bar}]32*/33function joinSequenceDiffsByShifting(sequence1: ISequence, sequence2: ISequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {34if (sequenceDiffs.length === 0) {35return sequenceDiffs;36}3738const result: SequenceDiff[] = [];39result.push(sequenceDiffs[0]);4041// First move them all to the left as much as possible and join them if possible42for (let i = 1; i < sequenceDiffs.length; i++) {43const prevResult = result[result.length - 1];44let cur = sequenceDiffs[i];4546if (cur.seq1Range.isEmpty || cur.seq2Range.isEmpty) {47const length = cur.seq1Range.start - prevResult.seq1Range.endExclusive;48let d;49for (d = 1; d <= length; d++) {50if (51sequence1.getElement(cur.seq1Range.start - d) !== sequence1.getElement(cur.seq1Range.endExclusive - d) ||52sequence2.getElement(cur.seq2Range.start - d) !== sequence2.getElement(cur.seq2Range.endExclusive - d)) {53break;54}55}56d--;5758if (d === length) {59// Merge previous and current diff60result[result.length - 1] = new SequenceDiff(61new OffsetRange(prevResult.seq1Range.start, cur.seq1Range.endExclusive - length),62new OffsetRange(prevResult.seq2Range.start, cur.seq2Range.endExclusive - length),63);64continue;65}6667cur = cur.delta(-d);68}6970result.push(cur);71}7273const result2: SequenceDiff[] = [];74// Then move them all to the right and join them again if possible75for (let i = 0; i < result.length - 1; i++) {76const nextResult = result[i + 1];77let cur = result[i];7879if (cur.seq1Range.isEmpty || cur.seq2Range.isEmpty) {80const length = nextResult.seq1Range.start - cur.seq1Range.endExclusive;81let d;82for (d = 0; d < length; d++) {83if (84!sequence1.isStronglyEqual(cur.seq1Range.start + d, cur.seq1Range.endExclusive + d) ||85!sequence2.isStronglyEqual(cur.seq2Range.start + d, cur.seq2Range.endExclusive + d)86) {87break;88}89}9091if (d === length) {92// Merge previous and current diff, write to result!93result[i + 1] = new SequenceDiff(94new OffsetRange(cur.seq1Range.start + length, nextResult.seq1Range.endExclusive),95new OffsetRange(cur.seq2Range.start + length, nextResult.seq2Range.endExclusive),96);97continue;98}99100if (d > 0) {101cur = cur.delta(d);102}103}104105result2.push(cur);106}107108if (result.length > 0) {109result2.push(result[result.length - 1]);110}111112return result2;113}114115// align character level diffs at whitespace characters116// import { IBar } from "foo";117// import { I[Arr, I]Bar } from "foo";118// ->119// import { [IArr, ]IBar } from "foo";120121// import { ITransaction, observableValue, transaction } from 'vs/base/common/observable';122// import { ITransaction, observable[FromEvent, observable]Value, transaction } from 'vs/base/common/observable';123// ->124// import { ITransaction, [observableFromEvent, ]observableValue, transaction } from 'vs/base/common/observable';125126// collectBrackets(level + 1, levelPerBracketType);127// collectBrackets(level + 1, levelPerBracket[ + 1, levelPerBracket]Type);128// ->129// collectBrackets(level + 1, [levelPerBracket + 1, ]levelPerBracketType);130131function shiftSequenceDiffs(sequence1: ISequence, sequence2: ISequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {132if (!sequence1.getBoundaryScore || !sequence2.getBoundaryScore) {133return sequenceDiffs;134}135136for (let i = 0; i < sequenceDiffs.length; i++) {137const prevDiff = (i > 0 ? sequenceDiffs[i - 1] : undefined);138const diff = sequenceDiffs[i];139const nextDiff = (i + 1 < sequenceDiffs.length ? sequenceDiffs[i + 1] : undefined);140141const seq1ValidRange = new OffsetRange(prevDiff ? prevDiff.seq1Range.endExclusive + 1 : 0, nextDiff ? nextDiff.seq1Range.start - 1 : sequence1.length);142const seq2ValidRange = new OffsetRange(prevDiff ? prevDiff.seq2Range.endExclusive + 1 : 0, nextDiff ? nextDiff.seq2Range.start - 1 : sequence2.length);143144if (diff.seq1Range.isEmpty) {145sequenceDiffs[i] = shiftDiffToBetterPosition(diff, sequence1, sequence2, seq1ValidRange, seq2ValidRange);146} else if (diff.seq2Range.isEmpty) {147sequenceDiffs[i] = shiftDiffToBetterPosition(diff.swap(), sequence2, sequence1, seq2ValidRange, seq1ValidRange).swap();148}149}150151return sequenceDiffs;152}153154function shiftDiffToBetterPosition(diff: SequenceDiff, sequence1: ISequence, sequence2: ISequence, seq1ValidRange: OffsetRange, seq2ValidRange: OffsetRange,) {155const maxShiftLimit = 100; // To prevent performance issues156157// don't touch previous or next!158let deltaBefore = 1;159while (160diff.seq1Range.start - deltaBefore >= seq1ValidRange.start &&161diff.seq2Range.start - deltaBefore >= seq2ValidRange.start &&162sequence2.isStronglyEqual(diff.seq2Range.start - deltaBefore, diff.seq2Range.endExclusive - deltaBefore) && deltaBefore < maxShiftLimit163) {164deltaBefore++;165}166deltaBefore--;167168let deltaAfter = 0;169while (170diff.seq1Range.start + deltaAfter < seq1ValidRange.endExclusive &&171diff.seq2Range.endExclusive + deltaAfter < seq2ValidRange.endExclusive &&172sequence2.isStronglyEqual(diff.seq2Range.start + deltaAfter, diff.seq2Range.endExclusive + deltaAfter) && deltaAfter < maxShiftLimit173) {174deltaAfter++;175}176177if (deltaBefore === 0 && deltaAfter === 0) {178return diff;179}180181// Visualize `[sequence1.text, diff.seq1Range.start + deltaAfter]`182// and `[sequence2.text, diff.seq2Range.start + deltaAfter, diff.seq2Range.endExclusive + deltaAfter]`183184let bestDelta = 0;185let bestScore = -1;186// find best scored delta187for (let delta = -deltaBefore; delta <= deltaAfter; delta++) {188const seq2OffsetStart = diff.seq2Range.start + delta;189const seq2OffsetEndExclusive = diff.seq2Range.endExclusive + delta;190const seq1Offset = diff.seq1Range.start + delta;191192const score = sequence1.getBoundaryScore!(seq1Offset) + sequence2.getBoundaryScore!(seq2OffsetStart) + sequence2.getBoundaryScore!(seq2OffsetEndExclusive);193if (score > bestScore) {194bestScore = score;195bestDelta = delta;196}197}198199return diff.delta(bestDelta);200}201202export function removeShortMatches(sequence1: ISequence, sequence2: ISequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {203const result: SequenceDiff[] = [];204for (const s of sequenceDiffs) {205const last = result[result.length - 1];206if (!last) {207result.push(s);208continue;209}210211if (s.seq1Range.start - last.seq1Range.endExclusive <= 2 || s.seq2Range.start - last.seq2Range.endExclusive <= 2) {212result[result.length - 1] = new SequenceDiff(last.seq1Range.join(s.seq1Range), last.seq2Range.join(s.seq2Range));213} else {214result.push(s);215}216}217218return result;219}220221export function extendDiffsToEntireWordIfAppropriate(222sequence1: LinesSliceCharSequence,223sequence2: LinesSliceCharSequence,224sequenceDiffs: SequenceDiff[],225findParent: (seq: LinesSliceCharSequence, idx: number) => OffsetRange | undefined,226force: boolean = false,227): SequenceDiff[] {228const equalMappings = SequenceDiff.invert(sequenceDiffs, sequence1.length);229230const additional: SequenceDiff[] = [];231232let lastPoint = new OffsetPair(0, 0);233234function scanWord(pair: OffsetPair, equalMapping: SequenceDiff) {235if (pair.offset1 < lastPoint.offset1 || pair.offset2 < lastPoint.offset2) {236return;237}238239const w1 = findParent(sequence1, pair.offset1);240const w2 = findParent(sequence2, pair.offset2);241if (!w1 || !w2) {242return;243}244let w = new SequenceDiff(w1, w2);245const equalPart = w.intersect(equalMapping)!;246247let equalChars1 = equalPart.seq1Range.length;248let equalChars2 = equalPart.seq2Range.length;249250// The words do not touch previous equals mappings, as we would have processed them already.251// But they might touch the next ones.252253while (equalMappings.length > 0) {254const next = equalMappings[0];255const intersects = next.seq1Range.intersects(w.seq1Range) || next.seq2Range.intersects(w.seq2Range);256if (!intersects) {257break;258}259260const v1 = findParent(sequence1, next.seq1Range.start);261const v2 = findParent(sequence2, next.seq2Range.start);262// Because there is an intersection, we know that the words are not empty.263const v = new SequenceDiff(v1!, v2!);264const equalPart = v.intersect(next)!;265266equalChars1 += equalPart.seq1Range.length;267equalChars2 += equalPart.seq2Range.length;268269w = w.join(v);270271if (w.seq1Range.endExclusive >= next.seq1Range.endExclusive) {272// The word extends beyond the next equal mapping.273equalMappings.shift();274} else {275break;276}277}278279if ((force && equalChars1 + equalChars2 < w.seq1Range.length + w.seq2Range.length) || equalChars1 + equalChars2 < (w.seq1Range.length + w.seq2Range.length) * 2 / 3) {280additional.push(w);281}282283lastPoint = w.getEndExclusives();284}285286while (equalMappings.length > 0) {287const next = equalMappings.shift()!;288if (next.seq1Range.isEmpty) {289continue;290}291scanWord(next.getStarts(), next);292// The equal parts are not empty, so -1 gives us a character that is equal in both parts.293scanWord(next.getEndExclusives().delta(-1), next);294}295296const merged = mergeSequenceDiffs(sequenceDiffs, additional);297return merged;298}299300function mergeSequenceDiffs(sequenceDiffs1: SequenceDiff[], sequenceDiffs2: SequenceDiff[]): SequenceDiff[] {301const result: SequenceDiff[] = [];302303while (sequenceDiffs1.length > 0 || sequenceDiffs2.length > 0) {304const sd1 = sequenceDiffs1[0];305const sd2 = sequenceDiffs2[0];306307let next: SequenceDiff;308if (sd1 && (!sd2 || sd1.seq1Range.start < sd2.seq1Range.start)) {309next = sequenceDiffs1.shift()!;310} else {311next = sequenceDiffs2.shift()!;312}313314if (result.length > 0 && result[result.length - 1].seq1Range.endExclusive >= next.seq1Range.start) {315result[result.length - 1] = result[result.length - 1].join(next);316} else {317result.push(next);318}319}320321return result;322}323324export function removeVeryShortMatchingLinesBetweenDiffs(sequence1: LineSequence, _sequence2: LineSequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {325let diffs = sequenceDiffs;326if (diffs.length === 0) {327return diffs;328}329330let counter = 0;331let shouldRepeat: boolean;332do {333shouldRepeat = false;334335const result: SequenceDiff[] = [336diffs[0]337];338339for (let i = 1; i < diffs.length; i++) {340const cur = diffs[i];341const lastResult = result[result.length - 1];342343function shouldJoinDiffs(before: SequenceDiff, after: SequenceDiff): boolean {344const unchangedRange = new OffsetRange(lastResult.seq1Range.endExclusive, cur.seq1Range.start);345346const unchangedText = sequence1.getText(unchangedRange);347const unchangedTextWithoutWs = unchangedText.replace(/\s/g, '');348if (unchangedTextWithoutWs.length <= 4349&& (before.seq1Range.length + before.seq2Range.length > 5 || after.seq1Range.length + after.seq2Range.length > 5)) {350return true;351}352353return false;354}355356const shouldJoin = shouldJoinDiffs(lastResult, cur);357if (shouldJoin) {358shouldRepeat = true;359result[result.length - 1] = result[result.length - 1].join(cur);360} else {361result.push(cur);362}363}364365diffs = result;366} while (counter++ < 10 && shouldRepeat);367368return diffs;369}370371export function removeVeryShortMatchingTextBetweenLongDiffs(sequence1: LinesSliceCharSequence, sequence2: LinesSliceCharSequence, sequenceDiffs: SequenceDiff[]): SequenceDiff[] {372let diffs = sequenceDiffs;373if (diffs.length === 0) {374return diffs;375}376377let counter = 0;378let shouldRepeat: boolean;379do {380shouldRepeat = false;381382const result: SequenceDiff[] = [383diffs[0]384];385386for (let i = 1; i < diffs.length; i++) {387const cur = diffs[i];388const lastResult = result[result.length - 1];389390function shouldJoinDiffs(before: SequenceDiff, after: SequenceDiff): boolean {391const unchangedRange = new OffsetRange(lastResult.seq1Range.endExclusive, cur.seq1Range.start);392393const unchangedLineCount = sequence1.countLinesIn(unchangedRange);394if (unchangedLineCount > 5 || unchangedRange.length > 500) {395return false;396}397398const unchangedText = sequence1.getText(unchangedRange).trim();399if (unchangedText.length > 20 || unchangedText.split(/\r\n|\r|\n/).length > 1) {400return false;401}402403const beforeLineCount1 = sequence1.countLinesIn(before.seq1Range);404const beforeSeq1Length = before.seq1Range.length;405const beforeLineCount2 = sequence2.countLinesIn(before.seq2Range);406const beforeSeq2Length = before.seq2Range.length;407408const afterLineCount1 = sequence1.countLinesIn(after.seq1Range);409const afterSeq1Length = after.seq1Range.length;410const afterLineCount2 = sequence2.countLinesIn(after.seq2Range);411const afterSeq2Length = after.seq2Range.length;412413// TODO: Maybe a neural net can be used to derive the result from these numbers414415const max = 2 * 40 + 50;416function cap(v: number): number {417return Math.min(v, max);418}419420if (Math.pow(Math.pow(cap(beforeLineCount1 * 40 + beforeSeq1Length), 1.5) + Math.pow(cap(beforeLineCount2 * 40 + beforeSeq2Length), 1.5), 1.5)421+ Math.pow(Math.pow(cap(afterLineCount1 * 40 + afterSeq1Length), 1.5) + Math.pow(cap(afterLineCount2 * 40 + afterSeq2Length), 1.5), 1.5) > ((max ** 1.5) ** 1.5) * 1.3) {422return true;423}424return false;425}426427const shouldJoin = shouldJoinDiffs(lastResult, cur);428if (shouldJoin) {429shouldRepeat = true;430result[result.length - 1] = result[result.length - 1].join(cur);431} else {432result.push(cur);433}434}435436diffs = result;437} while (counter++ < 10 && shouldRepeat);438439const newDiffs: SequenceDiff[] = [];440441// Remove short suffixes/prefixes442forEachWithNeighbors(diffs, (prev, cur, next) => {443let newDiff = cur;444445function shouldMarkAsChanged(text: string): boolean {446return text.length > 0 && text.trim().length <= 3 && cur.seq1Range.length + cur.seq2Range.length > 100;447}448449const fullRange1 = sequence1.extendToFullLines(cur.seq1Range);450const prefix = sequence1.getText(new OffsetRange(fullRange1.start, cur.seq1Range.start));451if (shouldMarkAsChanged(prefix)) {452newDiff = newDiff.deltaStart(-prefix.length);453}454const suffix = sequence1.getText(new OffsetRange(cur.seq1Range.endExclusive, fullRange1.endExclusive));455if (shouldMarkAsChanged(suffix)) {456newDiff = newDiff.deltaEnd(suffix.length);457}458459const availableSpace = SequenceDiff.fromOffsetPairs(460prev ? prev.getEndExclusives() : OffsetPair.zero,461next ? next.getStarts() : OffsetPair.max,462);463const result = newDiff.intersect(availableSpace)!;464if (newDiffs.length > 0 && result.getStarts().equals(newDiffs[newDiffs.length - 1].getEndExclusives())) {465newDiffs[newDiffs.length - 1] = newDiffs[newDiffs.length - 1].join(result);466} else {467newDiffs.push(result);468}469});470471return newDiffs;472}473474475