Path: blob/main/extensions/copilot/src/platform/editSurvivalTracking/common/editSurvivalTracker.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { StringEdit } from '../../../util/vs/editor/common/core/edits/stringEdit';6import { OffsetRange } from '../../../util/vs/editor/common/core/ranges/offsetRange';78/**9* Tracks how much given edits surive after applying other edits through `handleEdits`.10*/11export class EditSurvivalTracker {12private _text: string;13private readonly _originalEdits: StringEdit;14private _combinedEditsSinceStart = StringEdit.empty;15private readonly _textAfterTrackedEdits: string;1617constructor(18private readonly originalText: string,19trackedEdits: StringEdit,20) {21this._text = trackedEdits.apply(this.originalText);22this._textAfterTrackedEdits = this._text;23this._originalEdits = trackedEdits;24}2526handleEdits(edit: StringEdit): void {27const newText = edit.apply(this._text);28let newEdits = this._combinedEditsSinceStart.compose(edit);29newEdits = newEdits.removeCommonSuffixPrefix(this._textAfterTrackedEdits);30this._combinedEditsSinceStart = newEdits;31this._text = newText;32}3334/**35* fourGram: Number between 0 (no edits survived) and 1 (all edits survived).36* noRevert: Number between 0 (the text after user edits equals the text before the AI edits) and 1 (the text after user edits does not revert any text to the initial state)37*/38computeTrackedEditsSurvivalScore(): { fourGram: number; noRevert: number; textBeforeAiEdits: string[]; textAfterAiEdits: string[]; textAfterUserEdits: string[] } {39let similarityScoreSumFourGram = 0;40let similarityScoreSumMax = 0;4142let noRevertSum = 0;43let noRevertSumMax = 0;4445const allTextBefore: string[] = [];46const allTextAfter: string[] = [];47const allTextCurrent: string[] = [];4849const ranges = this._originalEdits.getNewRanges();50const updatedRanges = applyEditsToRanges(ranges, this._combinedEditsSinceStart);5152for (let i = 0; i < ranges.length; i++) {53const originalEdit = this._originalEdits.replacements[i];5455const textBeforeAiEdits = this.originalText.substring(originalEdit.replaceRange.start, originalEdit.replaceRange.endExclusive);56const textAfterAiEdits = originalEdit.newText;57const newRange = updatedRanges[i];58const textAfterUserEdits = this._text.substring(newRange.start, newRange.endExclusive);5960allTextBefore.push(textBeforeAiEdits);61allTextAfter.push(textAfterAiEdits);62allTextCurrent.push(textAfterUserEdits);6364const similarity = compute4GramTextSimilarity(textAfterUserEdits, textAfterAiEdits);6566const aiEditSimilarity = compute4GramTextSimilarity(textAfterAiEdits, textBeforeAiEdits);67const userEditSimilarity = compute4GramTextSimilarity(textAfterUserEdits, textBeforeAiEdits);68if (aiEditSimilarity !== 1) {69// Should not happen, as the ai edit does not do no-ops70const v = 1 - Math.max(userEditSimilarity - aiEditSimilarity, 0) / (1 - aiEditSimilarity);71noRevertSum += originalEdit.replaceRange.length * v;72noRevertSumMax += originalEdit.replaceRange.length;73}7475const similarityScoreFourGram = originalEdit.newText.length * similarity;76const similarityScoreMax = originalEdit.newText.length;7778similarityScoreSumFourGram += similarityScoreFourGram;79similarityScoreSumMax += similarityScoreMax;80}8182return {83fourGram: similarityScoreSumMax === 0 ? 1 : (similarityScoreSumFourGram / similarityScoreSumMax),84noRevert: noRevertSumMax === 0 ? 1 : (noRevertSum / noRevertSumMax),85textBeforeAiEdits: allTextBefore,86textAfterAiEdits: allTextAfter,87textAfterUserEdits: allTextCurrent,88};89}90}9192/**93* Computes a number between 0 and 1 that reflects how similar the two texts are.94* Counts how many 4-grams are shared between the two texts.95*/96export function compute4GramTextSimilarity(text1: string, text2: string): number {97const n = 4;9899if (text1.length < n || text2.length < n) {100return text1 === text2 ? 1 : 0;101}102103const nGramIdx = new Map<string, number>();104105for (let i = 0; i <= text1.length - n; i++) {106const nGram = text1.substring(i, i + n);107const count = nGramIdx.get(nGram) || 0;108nGramIdx.set(nGram, count + 1);109}110111for (let i = 0; i <= text2.length - n; i++) {112const nGram = text2.substring(i, i + n);113const count = nGramIdx.get(nGram) || 0;114nGramIdx.set(nGram, count - 1);115}116117const totalNGramCount = text1.length - n + 1 + text2.length - n + 1;118119let differentNGramCount = 0;120for (const count of nGramIdx.values()) {121differentNGramCount += Math.abs(count);122}123124const equalNGramCount = totalNGramCount - differentNGramCount;125126return equalNGramCount / totalNGramCount;127}128129export function applyEditsToRanges(sortedRanges: OffsetRange[], edits: StringEdit): OffsetRange[] {130sortedRanges = sortedRanges.slice();131132// treat edits as deletion of the replace range and then as insertion that extends the first range133const result: OffsetRange[] = [];134135let offset = 0;136137for (const e of edits.replacements) {138while (true) {139// ranges before the current edit140const r = sortedRanges[0];141if (!r || r.endExclusive >= e.replaceRange.start) {142break;143}144sortedRanges.shift();145result.push(r.delta(offset));146}147148const intersecting: OffsetRange[] = [];149while (true) {150const r = sortedRanges[0];151if (!r || !r.intersectsOrTouches(e.replaceRange)) {152break;153}154sortedRanges.shift();155intersecting.push(r);156}157158for (let i = intersecting.length - 1; i >= 0; i--) {159let r = intersecting[i];160161const overlap = r.intersect(e.replaceRange)!.length;162r = r.deltaEnd(-overlap + (i === 0 ? e.newText.length : 0));163164const rangeAheadOfReplaceRange = r.start - e.replaceRange.start;165if (rangeAheadOfReplaceRange > 0) {166r = r.delta(-rangeAheadOfReplaceRange);167}168169if (i !== 0) {170r = r.delta(e.newText.length);171}172173// We already took our offset into account.174// Because we add r back to the queue (which then adds offset again),175// we have to remove it here.176r = r.delta(-(e.newText.length - e.replaceRange.length));177178sortedRanges.unshift(r);179}180181offset += e.newText.length - e.replaceRange.length;182}183184while (true) {185const r = sortedRanges[0];186if (!r) {187break;188}189sortedRanges.shift();190result.push(r.delta(offset));191}192193return result;194}195196197