Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/prompts/node/inline/summarizedDocument/implementation.ts
13406 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
import { AbstractDocument } from '../../../../../platform/editing/common/abstractText';
6
import { OverlayNode } from '../../../../../platform/parser/node/nodes';
7
import { min } from '../../../../../util/common/arrays';
8
import { compareBy, groupAdjacentBy, numberComparator, sumBy } from '../../../../../util/vs/base/common/arrays';
9
import { CachedFunction } from '../../../../../util/vs/base/common/cache';
10
import { StringEdit, StringReplacement } from '../../../../../util/vs/editor/common/core/edits/stringEdit';
11
import { Position } from '../../../../../util/vs/editor/common/core/position';
12
import { OffsetRange } from '../../../../../util/vs/editor/common/core/ranges/offsetRange';
13
import { PositionOffsetTransformer } from '../../../../../util/vs/editor/common/core/text/positionToOffset';
14
import { TextLength } from '../../../../../util/vs/editor/common/core/text/textLength';
15
import { Range } from '../../../../../vscodeTypes';
16
import { IAstVisualization, subtractRange, toAstNode } from '../visualization';
17
import { ConcatenatedStringFragment, LiteralStringFragment, OriginalStringFragment, pushFragment, StringFragment } from './fragments';
18
import { ProjectedText } from './projectedText';
19
20
// Please avoid using vscode API types in this file!
21
// This makes testing and reusing much easier (e.g. for inline edits, where the original document is before the edits, which means vscode.TextDocument is not sufficient).
22
23
export interface IDocumentToSummarize<TDocument extends AbstractDocument> {
24
document: TDocument;
25
selection: Range | undefined; // TODO set this through a scoring function
26
overlayNodeRoot: OverlayNode;
27
}
28
29
export interface ISummarizedDocumentSettings<TDocument extends AbstractDocument> {
30
31
/**
32
* Custom cost function. Return `false` to make sure a node is removed
33
*/
34
costFnOverride?: ((node: RemovableNode, currentCost: number, document: TDocument) => number | false) | ICostFnFactory<TDocument>;
35
36
/**
37
* If set to true, summarization tries to preserve type checking, e.g. by not removing important imports or removing used method signatures.
38
*/
39
tryPreserveTypeChecking?: boolean;
40
41
/**
42
* If set to true, summarization always uses ellipsis for elisions, like in between sibling AST nodes.
43
*/
44
alwaysUseEllipsisForElisions?: boolean;
45
46
/**
47
* The style for line numbers in the summarized document.
48
*/
49
lineNumberStyle?: SummarizedDocumentLineNumberStyle;
50
}
51
52
export interface ICostFnFactory<TDocument extends AbstractDocument> {
53
createCostFn(doc: TDocument): (node: RemovableNode, currentCost: number) => number | false;
54
}
55
56
export class RemovableNode {
57
constructor(
58
public readonly parent: RemovableNode | undefined,
59
private readonly overlayNode: OverlayNode,
60
public readonly range: OffsetRange,
61
public readonly children: readonly RemovableNode[],
62
private readonly _document: AbstractDocument,
63
) { }
64
65
public get kind(): string {
66
return this.overlayNode.kind;
67
}
68
69
public get text(): string {
70
return this._document.getTextInOffsetRange(this.range);
71
}
72
}
73
74
export class ProjectedDocument<TDocument extends AbstractDocument = AbstractDocument> extends ProjectedText {
75
constructor(
76
public readonly baseDocument: TDocument,
77
edits: StringEdit,
78
) {
79
super(baseDocument.getText(), edits);
80
}
81
82
getLanguageId(this: ProjectedDocument<AbstractDocument & { languageId: string }>): string {
83
return this.baseDocument.languageId;
84
}
85
}
86
87
export interface IProjectedDocumentDebugInfo extends ProjectedText {
88
getVisualization?(): IAstVisualization;
89
}
90
91
export function summarizeDocumentsSyncImpl<TDocument extends AbstractDocument>(
92
charLimit: number,
93
settings: ISummarizedDocumentSettings<TDocument>,
94
docs: IDocumentToSummarize<TDocument>[],
95
): ProjectedDocument<TDocument>[] {
96
97
const rootMarkedNodes: SurvivingTextNode[] = [];
98
const bestSummarizationResultsPerDocIdx: StringFragment[] = [];
99
100
const allNodesWithScores: { docIdx: number; node: SurvivingTextNode; cost: number }[] = [];
101
102
for (let i = 0; i < docs.length; i++) {
103
const { document, overlayNodeRoot, selection } = docs[i];
104
105
const text = document.getText();
106
const offsetSelection = selection ? document.rangeToOffsetRange(selection) : undefined;
107
const removableNodeRoot = createRemovableNodeFromOverlayNode(overlayNodeRoot, document);
108
const rootTextNode = TextNode.fromRootNode(removableNodeRoot, document);
109
const rootMarkedNode = SurvivingTextNode.fromNode(rootTextNode, !!settings.tryPreserveTypeChecking, !!settings.alwaysUseEllipsisForElisions, settings.lineNumberStyle ?? SummarizedDocumentLineNumberStyle.None);
110
111
if (offsetSelection) {
112
// mark all leaf nodes that intersect userSelection
113
rootMarkedNode.visitAll(node => {
114
if (!node.node.range.intersectsOrTouches(offsetSelection)) {
115
return false;
116
}
117
if (node.node.children.length === 0) {
118
node.markAsSurviving();
119
}
120
return true;
121
});
122
}
123
124
rootMarkedNodes.push(rootMarkedNode);
125
bestSummarizationResultsPerDocIdx.push(rootMarkedNode.getTextFragment());
126
127
const distanceScoreToSelection = (node: TextNode) => {
128
if (!offsetSelection) {
129
return 0;
130
}
131
if (node.range.endExclusive < offsetSelection.start) {
132
// this node is above
133
return offsetSelection.start - node.range.endExclusive;
134
}
135
if (node.range.start > offsetSelection.endExclusive) {
136
// this node is below
137
return 3 * (node.range.start - offsetSelection.endExclusive); // we will punish code that is below
138
}
139
// this node is intersecting
140
return 0;
141
};
142
143
const scopeDistanceDown: CachedFunction<TextNode, number> = new CachedFunction(node => {
144
if (!offsetSelection) {
145
return 0;
146
}
147
if (node.children.length === 0) {
148
return node.range.intersectsOrTouches(offsetSelection) ? 0 : Number.MAX_SAFE_INTEGER;
149
} else {
150
return min(node.children.map(n => scopeDistanceDown.get(n))) + 1;
151
}
152
});
153
const scopeDistance: CachedFunction<TextNode, number> = new CachedFunction(node => {
154
const parentScopeDistance = node.parent ? scopeDistance.get(node.parent) : Number.MAX_SAFE_INTEGER;
155
const nodeScopeDistanceDown = scopeDistanceDown.get(node);
156
return Math.min(parentScopeDistance, nodeScopeDistanceDown);
157
});
158
159
const tryPreserveTypeChecking = !!settings.tryPreserveTypeChecking;
160
let costFn: (node: TextNode) => number | false = node => {
161
if (tryPreserveTypeChecking && node.node?.kind === 'import_statement') {
162
return 0;
163
}
164
return 100 * scopeDistance.get(node)
165
+ node.depth
166
+ 10 * (distanceScoreToSelection(node) / text.length);
167
};
168
169
const costFnOverride = typeof settings.costFnOverride === 'object' ? settings.costFnOverride.createCostFn(document) : settings.costFnOverride;
170
if (costFnOverride !== undefined) {
171
const oldCostFn = costFn;
172
173
costFn = (n: TextNode) => {
174
const currentScore = oldCostFn(n);
175
if (currentScore === false) {
176
return false;
177
}
178
if (!n.node) {
179
return currentScore;
180
}
181
return costFnOverride(n.node, currentScore, document);
182
};
183
}
184
185
const allNodes = rootMarkedNode.getDescendantsAndSelf();
186
187
for (const node of allNodes) {
188
if (!node.node.node) {
189
continue;
190
}
191
const cost = costFn(node.node);
192
if (cost === false) {
193
continue;
194
}
195
allNodesWithScores.push({
196
docIdx: i,
197
node,
198
cost
199
});
200
}
201
}
202
203
allNodesWithScores.sort(compareBy(n => n.cost, numberComparator));
204
205
206
const getLineNumberText = (lineNumber: number) => {
207
return `${lineNumber}: `;
208
};
209
210
for (const { node, docIdx } of allNodesWithScores) {
211
node.markAsSurviving();
212
213
// total length of all nodes
214
let totalLength = sumBy(rootMarkedNodes, c => c.getTextFragment().length);
215
if (settings.lineNumberStyle === SummarizedDocumentLineNumberStyle.Full) {
216
const textLen = TextLength.sum(rootMarkedNodes, c => c.getTextFragment().textLength);
217
const maxLineNumber = docs[docIdx].document.getLineCount();
218
const totalLineNumberChars = textLen.lineCount * getLineNumberText(maxLineNumber).length; // This is an upper bound approximation.
219
totalLength += totalLineNumberChars;
220
}
221
if (totalLength > charLimit) {
222
break;
223
}
224
225
bestSummarizationResultsPerDocIdx[docIdx] = rootMarkedNodes[docIdx].getTextFragment();
226
}
227
228
const result: ProjectedDocument<TDocument>[] = [];
229
230
for (let i = 0; i < bestSummarizationResultsPerDocIdx.length; i++) {
231
const bestSummarizationResult = bestSummarizationResultsPerDocIdx[i];
232
const { document } = docs[i];
233
let e = bestSummarizationResult.toEditFromOriginal(document.getText().length);
234
235
if (settings.lineNumberStyle === SummarizedDocumentLineNumberStyle.Full) {
236
const summarizedDoc = e.apply(document.getText());
237
const t = new PositionOffsetTransformer(summarizedDoc);
238
const lineNumberReplacements: StringReplacement[] = [];
239
const lineCount = t.textLength.lineCount;
240
for (let curLine = 1; curLine <= lineCount; curLine++) {
241
const offset = t.getOffset(new Position(curLine, 1));
242
const offsetBefore = e.applyInverseToOffset(offset);
243
const pos = document.getPositionAtOffset(offsetBefore);
244
lineNumberReplacements.push(new StringReplacement(OffsetRange.emptyAt(offset), getLineNumberText(pos.line + 1)));
245
}
246
e = e.compose(new StringEdit(lineNumberReplacements));
247
}
248
249
const projectedDoc = new ProjectedDocument(document, e);
250
const r = projectedDoc as IProjectedDocumentDebugInfo;
251
252
const rootMarkedNode = rootMarkedNodes[i];
253
254
r.getVisualization = () => ({
255
...{ $fileExtension: 'ast.w' },
256
source: {
257
value: projectedDoc.originalText,
258
decorations: subtractRange(
259
OffsetRange.ofLength(projectedDoc.originalText.length),
260
projectedDoc.edits.replacements.map(e => e.replaceRange),
261
).map(r => ({
262
range: [r.start, r.endExclusive],
263
color: 'lime',
264
}))
265
},
266
root: toAstNode(rootMarkedNode, n => ({
267
label: (n.node.node?.kind || 'unknown') + ` (${allNodesWithScores.find(nws => nws.node === n)?.cost})`,
268
range: n.node.range,
269
children: n.childNodes,
270
isMarked: n['_surviving'],
271
}))
272
});
273
274
result.push(projectedDoc);
275
}
276
277
return result;
278
}
279
280
function createRemovableNodeFromOverlayNode(node: OverlayNode, document: AbstractDocument, parent: RemovableNode | undefined = undefined): RemovableNode {
281
const range = new OffsetRange(node.startIndex, node.endIndex);
282
const children: RemovableNode[] = [];
283
const result = new RemovableNode(parent, node, range, children, document);
284
for (const n of node.children) {
285
children.push(createRemovableNodeFromOverlayNode(n, document, result));
286
}
287
return result;
288
}
289
290
/**
291
* A dense representation of the text in a document.
292
* Based on overlay nodes.
293
*
294
* **Dense**: every character is contained by some leaf node.
295
*/
296
class TextNode {
297
public static fromRootNode(node: RemovableNode, document: AbstractDocument): TextNode {
298
const fullRange = new OffsetRange(0, document.length);
299
if (node.range.equals(fullRange)) {
300
return TextNode.fromNode(node, document);
301
}
302
303
// The root node does not cover the entire document!
304
// So we have to create a virtual root that actually does cover the entire document.
305
306
const startGap = new OffsetRange(0, node.range.start);
307
const endGap = new OffsetRange(node.range.endExclusive, document.length);
308
309
const children: TextNode[] = [];
310
const rootNode = new TextNode(undefined, fullRange, children, 0, null, document);
311
312
if (!startGap.isEmpty) {
313
children.push(new TextNode(undefined, startGap, [], 0, rootNode, document));
314
}
315
children.push(TextNode.fromNode(node, document, 1, null));
316
if (!endGap.isEmpty) {
317
children.push(new TextNode(undefined, endGap, [], 0, rootNode, document));
318
}
319
return rootNode;
320
}
321
322
private static fromNode(node: RemovableNode, document: AbstractDocument, depth = 0, parent: TextNode | null = null): TextNode {
323
const children: TextNode[] = [];
324
const result = new TextNode(node, node.range, children, depth, parent, document);
325
if (node.children.length > 0) {
326
let lastEnd = node.range.start;
327
for (const n of node.children) {
328
const gap = new OffsetRange(lastEnd, n.range.start);
329
if (!gap.isEmpty) {
330
children.push(new TextNode(undefined, gap, [], depth, result, document));
331
}
332
children.push(TextNode.fromNode(n, document, depth + 1, result));
333
lastEnd = n.range.endExclusive;
334
}
335
const gap = new OffsetRange(lastEnd, node.range.endExclusive);
336
if (!gap.isEmpty) {
337
children.push(new TextNode(undefined, gap, [], depth, result, document));
338
}
339
}
340
return result;
341
}
342
343
constructor(
344
public readonly node: RemovableNode | undefined,
345
public readonly range: OffsetRange,
346
public readonly children: readonly TextNode[],
347
public readonly depth: number,
348
public readonly parent: TextNode | null,
349
public readonly document: AbstractDocument,
350
) { }
351
352
getLeadingWs(): string {
353
return getLeadingWs(this.document.getText(), this.range);
354
}
355
356
getIndentation(): string {
357
let indentation = this.getLeadingWs();
358
const lastNewLineIdx = indentation.lastIndexOf('\n');
359
if (lastNewLineIdx !== -1) {
360
indentation = indentation.substring(lastNewLineIdx + 1);
361
}
362
return indentation;
363
}
364
365
getTrailingWs(): string {
366
return getTrailingWs(this.document.getText(), this.range);
367
}
368
}
369
370
function getLeadingWs(str: string, range: OffsetRange): string {
371
const val = range.substring(str);
372
const trimmed = val.length - val.trimStart().length;
373
const ws = val.substring(0, trimmed);
374
return ws;
375
}
376
377
function getTrailingWs(str: string, range: OffsetRange): string {
378
const val = range.substring(str);
379
const trimmed = val.length - val.trimEnd().length;
380
const ws = val.substring(val.length - trimmed);
381
return ws;
382
}
383
384
export enum SummarizedDocumentLineNumberStyle {
385
None,
386
OmittedRanges,
387
Full,
388
}
389
390
class SurvivingTextNode {
391
public static fromNode(node: TextNode, tryPreserveTypeChecking: boolean, alwaysUseEllipsisForElisions: boolean, lineNumberStyle: SummarizedDocumentLineNumberStyle): SurvivingTextNode {
392
return SurvivingTextNode.fromNodeParent(node, null, tryPreserveTypeChecking, alwaysUseEllipsisForElisions, lineNumberStyle);
393
}
394
395
private static fromNodeParent(node: TextNode, parent: SurvivingTextNode | null, tryPreserveTypeChecking: boolean, alwaysUseEllipsisForElisions: boolean, lineNumberStyle: SummarizedDocumentLineNumberStyle): SurvivingTextNode {
396
const children: SurvivingTextNode[] = [];
397
const result = new SurvivingTextNode(node, parent, children, tryPreserveTypeChecking, alwaysUseEllipsisForElisions, lineNumberStyle);
398
for (const child of node.children) {
399
const childNode = SurvivingTextNode.fromNodeParent(child, result, tryPreserveTypeChecking, alwaysUseEllipsisForElisions, lineNumberStyle);
400
children.push(childNode);
401
}
402
return result;
403
}
404
405
private _surviving = false;
406
407
constructor(
408
public readonly node: TextNode,
409
public readonly parent: SurvivingTextNode | null,
410
public readonly childNodes: readonly SurvivingTextNode[],
411
private readonly _tryPreserveTypeChecking: boolean,
412
private readonly _alwaysUseEllipsisForElisions: boolean,
413
private readonly _lineNumberStyle: SummarizedDocumentLineNumberStyle,
414
) {
415
}
416
417
visitAll(fn: (node: SurvivingTextNode) => boolean): void {
418
if (!fn(this)) { return; }
419
for (const child of this.childNodes) {
420
child.visitAll(fn);
421
}
422
}
423
424
markAsSurviving(): void {
425
if (this._surviving) { return; }
426
this._surviving = true;
427
if (this.parent) {
428
this.parent.markAsSurviving();
429
}
430
this.invalidate();
431
}
432
433
private _textFragment: StringFragment | null = null;
434
435
private invalidate(): void {
436
if (!this._textFragment) { return; }
437
this._textFragment = null;
438
if (this.parent) {
439
this.parent.invalidate();
440
}
441
}
442
443
getTextFragment(): StringFragment {
444
if (!this._textFragment) {
445
this._textFragment = this._computeSummarization();
446
}
447
return this._textFragment;
448
}
449
450
private _computeSummarization(): StringFragment {
451
if (this.childNodes.length === 0 && (this._surviving || !this.node.node)) {
452
return new OriginalStringFragment(this.node.range, this.node.document.getText());
453
}
454
if (!this._surviving) {
455
return new LiteralStringFragment('');
456
}
457
458
const groups = Array.from(groupAdjacentBy(this.childNodes.map(n => ({ node: n, fragment: n.getTextFragment() })), (f1, f2) => (f1.fragment.length === 0) === (f2.fragment.length === 0)));
459
460
const getOmittedMessage = (omittedRange: OffsetRange): string => {
461
if (this._lineNumberStyle === SummarizedDocumentLineNumberStyle.OmittedRanges) {
462
const range = this.node.document.getPositionOffsetTransformer().getRange(omittedRange);
463
if (range.startLineNumber !== range.endLineNumber) {
464
return `/* Lines ${range.startLineNumber}-${range.endLineNumber} omitted */`;
465
}
466
return `/* Line ${range.startLineNumber} omitted */`;
467
}
468
return this._tryPreserveTypeChecking ? '/* ... */' : '…';
469
};
470
471
for (let i = 0; i < groups.length; i++) {
472
// in one group, all elements either are all empty or non-empty. Also, no group is empty.
473
const g = groups[i];
474
const isEmpty = g[0].fragment.length === 0;
475
476
if (isEmpty && i > 0 && i < groups.length - 1) {
477
// All our fragments are empty.
478
const prev = groups[i - 1].at(-1)!; // Non-empty before us
479
const next = groups[i + 1].at(0)!; // Non-empty after us
480
481
const fullRange = g.at(0)!.node.node.range.join(g.at(-1)!.node.node.range);
482
483
if (prev.fragment instanceof OriginalStringFragment && next.fragment instanceof OriginalStringFragment) {
484
const startTrimmed = prev.fragment.trimEnd();
485
const endTrimmed = next.fragment.trimStart();
486
if (startTrimmed.endsWith('{') && endTrimmed.startsWith('}')) {
487
groups[i - 1][groups[i - 1].length - 1].fragment = startTrimmed;
488
g.length = 1;
489
490
g[0].fragment = new LiteralStringFragment(getOmittedMessage(fullRange));
491
groups[i + 1][0].fragment = endTrimmed;
492
continue;
493
}
494
}
495
496
if (this._alwaysUseEllipsisForElisions || this._lineNumberStyle === SummarizedDocumentLineNumberStyle.OmittedRanges) {
497
const indentation = g.at(0)!.node.node.getIndentation();
498
const end = g.at(-1)!.node.node.getTrailingWs();
499
g.length = 1;
500
g[0].fragment = new LiteralStringFragment(indentation + getOmittedMessage(fullRange) + end);
501
}
502
}
503
}
504
505
const result: StringFragment[] = [];
506
for (const group of groups) {
507
for (const g of group) {
508
pushFragment(result, g.fragment);
509
}
510
}
511
512
return ConcatenatedStringFragment.from(result);
513
}
514
515
getDescendantsAndSelf(): SurvivingTextNode[] {
516
const result: SurvivingTextNode[] = [];
517
this._getDescendantsAndSelf(result);
518
return result;
519
}
520
521
private _getDescendantsAndSelf(result: SurvivingTextNode[]): void {
522
result.push(this);
523
for (const child of this.childNodes) {
524
child._getDescendantsAndSelf(result);
525
}
526
}
527
}
528
529