Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/editSurvivalTracking/common/editSurvivalTracker.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { StringEdit } from '../../../util/vs/editor/common/core/edits/stringEdit';
7
import { OffsetRange } from '../../../util/vs/editor/common/core/ranges/offsetRange';
8
9
/**
10
* Tracks how much given edits surive after applying other edits through `handleEdits`.
11
*/
12
export class EditSurvivalTracker {
13
private _text: string;
14
private readonly _originalEdits: StringEdit;
15
private _combinedEditsSinceStart = StringEdit.empty;
16
private readonly _textAfterTrackedEdits: string;
17
18
constructor(
19
private readonly originalText: string,
20
trackedEdits: StringEdit,
21
) {
22
this._text = trackedEdits.apply(this.originalText);
23
this._textAfterTrackedEdits = this._text;
24
this._originalEdits = trackedEdits;
25
}
26
27
handleEdits(edit: StringEdit): void {
28
const newText = edit.apply(this._text);
29
let newEdits = this._combinedEditsSinceStart.compose(edit);
30
newEdits = newEdits.removeCommonSuffixPrefix(this._textAfterTrackedEdits);
31
this._combinedEditsSinceStart = newEdits;
32
this._text = newText;
33
}
34
35
/**
36
* fourGram: Number between 0 (no edits survived) and 1 (all edits survived).
37
* noRevert: Number between 0 (the text after user edits equals the text before the AI edits) and 1 (the text after user edits does not revert any text to the initial state)
38
*/
39
computeTrackedEditsSurvivalScore(): { fourGram: number; noRevert: number; textBeforeAiEdits: string[]; textAfterAiEdits: string[]; textAfterUserEdits: string[] } {
40
let similarityScoreSumFourGram = 0;
41
let similarityScoreSumMax = 0;
42
43
let noRevertSum = 0;
44
let noRevertSumMax = 0;
45
46
const allTextBefore: string[] = [];
47
const allTextAfter: string[] = [];
48
const allTextCurrent: string[] = [];
49
50
const ranges = this._originalEdits.getNewRanges();
51
const updatedRanges = applyEditsToRanges(ranges, this._combinedEditsSinceStart);
52
53
for (let i = 0; i < ranges.length; i++) {
54
const originalEdit = this._originalEdits.replacements[i];
55
56
const textBeforeAiEdits = this.originalText.substring(originalEdit.replaceRange.start, originalEdit.replaceRange.endExclusive);
57
const textAfterAiEdits = originalEdit.newText;
58
const newRange = updatedRanges[i];
59
const textAfterUserEdits = this._text.substring(newRange.start, newRange.endExclusive);
60
61
allTextBefore.push(textBeforeAiEdits);
62
allTextAfter.push(textAfterAiEdits);
63
allTextCurrent.push(textAfterUserEdits);
64
65
const similarity = compute4GramTextSimilarity(textAfterUserEdits, textAfterAiEdits);
66
67
const aiEditSimilarity = compute4GramTextSimilarity(textAfterAiEdits, textBeforeAiEdits);
68
const userEditSimilarity = compute4GramTextSimilarity(textAfterUserEdits, textBeforeAiEdits);
69
if (aiEditSimilarity !== 1) {
70
// Should not happen, as the ai edit does not do no-ops
71
const v = 1 - Math.max(userEditSimilarity - aiEditSimilarity, 0) / (1 - aiEditSimilarity);
72
noRevertSum += originalEdit.replaceRange.length * v;
73
noRevertSumMax += originalEdit.replaceRange.length;
74
}
75
76
const similarityScoreFourGram = originalEdit.newText.length * similarity;
77
const similarityScoreMax = originalEdit.newText.length;
78
79
similarityScoreSumFourGram += similarityScoreFourGram;
80
similarityScoreSumMax += similarityScoreMax;
81
}
82
83
return {
84
fourGram: similarityScoreSumMax === 0 ? 1 : (similarityScoreSumFourGram / similarityScoreSumMax),
85
noRevert: noRevertSumMax === 0 ? 1 : (noRevertSum / noRevertSumMax),
86
textBeforeAiEdits: allTextBefore,
87
textAfterAiEdits: allTextAfter,
88
textAfterUserEdits: allTextCurrent,
89
};
90
}
91
}
92
93
/**
94
* Computes a number between 0 and 1 that reflects how similar the two texts are.
95
* Counts how many 4-grams are shared between the two texts.
96
*/
97
export function compute4GramTextSimilarity(text1: string, text2: string): number {
98
const n = 4;
99
100
if (text1.length < n || text2.length < n) {
101
return text1 === text2 ? 1 : 0;
102
}
103
104
const nGramIdx = new Map<string, number>();
105
106
for (let i = 0; i <= text1.length - n; i++) {
107
const nGram = text1.substring(i, i + n);
108
const count = nGramIdx.get(nGram) || 0;
109
nGramIdx.set(nGram, count + 1);
110
}
111
112
for (let i = 0; i <= text2.length - n; i++) {
113
const nGram = text2.substring(i, i + n);
114
const count = nGramIdx.get(nGram) || 0;
115
nGramIdx.set(nGram, count - 1);
116
}
117
118
const totalNGramCount = text1.length - n + 1 + text2.length - n + 1;
119
120
let differentNGramCount = 0;
121
for (const count of nGramIdx.values()) {
122
differentNGramCount += Math.abs(count);
123
}
124
125
const equalNGramCount = totalNGramCount - differentNGramCount;
126
127
return equalNGramCount / totalNGramCount;
128
}
129
130
export function applyEditsToRanges(sortedRanges: OffsetRange[], edits: StringEdit): OffsetRange[] {
131
sortedRanges = sortedRanges.slice();
132
133
// treat edits as deletion of the replace range and then as insertion that extends the first range
134
const result: OffsetRange[] = [];
135
136
let offset = 0;
137
138
for (const e of edits.replacements) {
139
while (true) {
140
// ranges before the current edit
141
const r = sortedRanges[0];
142
if (!r || r.endExclusive >= e.replaceRange.start) {
143
break;
144
}
145
sortedRanges.shift();
146
result.push(r.delta(offset));
147
}
148
149
const intersecting: OffsetRange[] = [];
150
while (true) {
151
const r = sortedRanges[0];
152
if (!r || !r.intersectsOrTouches(e.replaceRange)) {
153
break;
154
}
155
sortedRanges.shift();
156
intersecting.push(r);
157
}
158
159
for (let i = intersecting.length - 1; i >= 0; i--) {
160
let r = intersecting[i];
161
162
const overlap = r.intersect(e.replaceRange)!.length;
163
r = r.deltaEnd(-overlap + (i === 0 ? e.newText.length : 0));
164
165
const rangeAheadOfReplaceRange = r.start - e.replaceRange.start;
166
if (rangeAheadOfReplaceRange > 0) {
167
r = r.delta(-rangeAheadOfReplaceRange);
168
}
169
170
if (i !== 0) {
171
r = r.delta(e.newText.length);
172
}
173
174
// We already took our offset into account.
175
// Because we add r back to the queue (which then adds offset again),
176
// we have to remove it here.
177
r = r.delta(-(e.newText.length - e.replaceRange.length));
178
179
sortedRanges.unshift(r);
180
}
181
182
offset += e.newText.length - e.replaceRange.length;
183
}
184
185
while (true) {
186
const r = sortedRanges[0];
187
if (!r) {
188
break;
189
}
190
sortedRanges.shift();
191
result.push(r.delta(offset));
192
}
193
194
return result;
195
}
196
197