Path: blob/main/extensions/copilot/src/extension/prompts/node/inline/summarizedDocument/implementation.ts
13406 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/4import { AbstractDocument } from '../../../../../platform/editing/common/abstractText';5import { OverlayNode } from '../../../../../platform/parser/node/nodes';6import { min } from '../../../../../util/common/arrays';7import { compareBy, groupAdjacentBy, numberComparator, sumBy } from '../../../../../util/vs/base/common/arrays';8import { CachedFunction } from '../../../../../util/vs/base/common/cache';9import { StringEdit, StringReplacement } from '../../../../../util/vs/editor/common/core/edits/stringEdit';10import { Position } from '../../../../../util/vs/editor/common/core/position';11import { OffsetRange } from '../../../../../util/vs/editor/common/core/ranges/offsetRange';12import { PositionOffsetTransformer } from '../../../../../util/vs/editor/common/core/text/positionToOffset';13import { TextLength } from '../../../../../util/vs/editor/common/core/text/textLength';14import { Range } from '../../../../../vscodeTypes';15import { IAstVisualization, subtractRange, toAstNode } from '../visualization';16import { ConcatenatedStringFragment, LiteralStringFragment, OriginalStringFragment, pushFragment, StringFragment } from './fragments';17import { ProjectedText } from './projectedText';1819// Please avoid using vscode API types in this file!20// This makes testing and reusing much easier (e.g. for inline edits, where the original document is before the edits, which means vscode.TextDocument is not sufficient).2122export interface IDocumentToSummarize<TDocument extends AbstractDocument> {23document: TDocument;24selection: Range | undefined; // TODO set this through a scoring function25overlayNodeRoot: OverlayNode;26}2728export interface ISummarizedDocumentSettings<TDocument extends AbstractDocument> {2930/**31* Custom cost function. Return `false` to make sure a node is removed32*/33costFnOverride?: ((node: RemovableNode, currentCost: number, document: TDocument) => number | false) | ICostFnFactory<TDocument>;3435/**36* If set to true, summarization tries to preserve type checking, e.g. by not removing important imports or removing used method signatures.37*/38tryPreserveTypeChecking?: boolean;3940/**41* If set to true, summarization always uses ellipsis for elisions, like in between sibling AST nodes.42*/43alwaysUseEllipsisForElisions?: boolean;4445/**46* The style for line numbers in the summarized document.47*/48lineNumberStyle?: SummarizedDocumentLineNumberStyle;49}5051export interface ICostFnFactory<TDocument extends AbstractDocument> {52createCostFn(doc: TDocument): (node: RemovableNode, currentCost: number) => number | false;53}5455export class RemovableNode {56constructor(57public readonly parent: RemovableNode | undefined,58private readonly overlayNode: OverlayNode,59public readonly range: OffsetRange,60public readonly children: readonly RemovableNode[],61private readonly _document: AbstractDocument,62) { }6364public get kind(): string {65return this.overlayNode.kind;66}6768public get text(): string {69return this._document.getTextInOffsetRange(this.range);70}71}7273export class ProjectedDocument<TDocument extends AbstractDocument = AbstractDocument> extends ProjectedText {74constructor(75public readonly baseDocument: TDocument,76edits: StringEdit,77) {78super(baseDocument.getText(), edits);79}8081getLanguageId(this: ProjectedDocument<AbstractDocument & { languageId: string }>): string {82return this.baseDocument.languageId;83}84}8586export interface IProjectedDocumentDebugInfo extends ProjectedText {87getVisualization?(): IAstVisualization;88}8990export function summarizeDocumentsSyncImpl<TDocument extends AbstractDocument>(91charLimit: number,92settings: ISummarizedDocumentSettings<TDocument>,93docs: IDocumentToSummarize<TDocument>[],94): ProjectedDocument<TDocument>[] {9596const rootMarkedNodes: SurvivingTextNode[] = [];97const bestSummarizationResultsPerDocIdx: StringFragment[] = [];9899const allNodesWithScores: { docIdx: number; node: SurvivingTextNode; cost: number }[] = [];100101for (let i = 0; i < docs.length; i++) {102const { document, overlayNodeRoot, selection } = docs[i];103104const text = document.getText();105const offsetSelection = selection ? document.rangeToOffsetRange(selection) : undefined;106const removableNodeRoot = createRemovableNodeFromOverlayNode(overlayNodeRoot, document);107const rootTextNode = TextNode.fromRootNode(removableNodeRoot, document);108const rootMarkedNode = SurvivingTextNode.fromNode(rootTextNode, !!settings.tryPreserveTypeChecking, !!settings.alwaysUseEllipsisForElisions, settings.lineNumberStyle ?? SummarizedDocumentLineNumberStyle.None);109110if (offsetSelection) {111// mark all leaf nodes that intersect userSelection112rootMarkedNode.visitAll(node => {113if (!node.node.range.intersectsOrTouches(offsetSelection)) {114return false;115}116if (node.node.children.length === 0) {117node.markAsSurviving();118}119return true;120});121}122123rootMarkedNodes.push(rootMarkedNode);124bestSummarizationResultsPerDocIdx.push(rootMarkedNode.getTextFragment());125126const distanceScoreToSelection = (node: TextNode) => {127if (!offsetSelection) {128return 0;129}130if (node.range.endExclusive < offsetSelection.start) {131// this node is above132return offsetSelection.start - node.range.endExclusive;133}134if (node.range.start > offsetSelection.endExclusive) {135// this node is below136return 3 * (node.range.start - offsetSelection.endExclusive); // we will punish code that is below137}138// this node is intersecting139return 0;140};141142const scopeDistanceDown: CachedFunction<TextNode, number> = new CachedFunction(node => {143if (!offsetSelection) {144return 0;145}146if (node.children.length === 0) {147return node.range.intersectsOrTouches(offsetSelection) ? 0 : Number.MAX_SAFE_INTEGER;148} else {149return min(node.children.map(n => scopeDistanceDown.get(n))) + 1;150}151});152const scopeDistance: CachedFunction<TextNode, number> = new CachedFunction(node => {153const parentScopeDistance = node.parent ? scopeDistance.get(node.parent) : Number.MAX_SAFE_INTEGER;154const nodeScopeDistanceDown = scopeDistanceDown.get(node);155return Math.min(parentScopeDistance, nodeScopeDistanceDown);156});157158const tryPreserveTypeChecking = !!settings.tryPreserveTypeChecking;159let costFn: (node: TextNode) => number | false = node => {160if (tryPreserveTypeChecking && node.node?.kind === 'import_statement') {161return 0;162}163return 100 * scopeDistance.get(node)164+ node.depth165+ 10 * (distanceScoreToSelection(node) / text.length);166};167168const costFnOverride = typeof settings.costFnOverride === 'object' ? settings.costFnOverride.createCostFn(document) : settings.costFnOverride;169if (costFnOverride !== undefined) {170const oldCostFn = costFn;171172costFn = (n: TextNode) => {173const currentScore = oldCostFn(n);174if (currentScore === false) {175return false;176}177if (!n.node) {178return currentScore;179}180return costFnOverride(n.node, currentScore, document);181};182}183184const allNodes = rootMarkedNode.getDescendantsAndSelf();185186for (const node of allNodes) {187if (!node.node.node) {188continue;189}190const cost = costFn(node.node);191if (cost === false) {192continue;193}194allNodesWithScores.push({195docIdx: i,196node,197cost198});199}200}201202allNodesWithScores.sort(compareBy(n => n.cost, numberComparator));203204205const getLineNumberText = (lineNumber: number) => {206return `${lineNumber}: `;207};208209for (const { node, docIdx } of allNodesWithScores) {210node.markAsSurviving();211212// total length of all nodes213let totalLength = sumBy(rootMarkedNodes, c => c.getTextFragment().length);214if (settings.lineNumberStyle === SummarizedDocumentLineNumberStyle.Full) {215const textLen = TextLength.sum(rootMarkedNodes, c => c.getTextFragment().textLength);216const maxLineNumber = docs[docIdx].document.getLineCount();217const totalLineNumberChars = textLen.lineCount * getLineNumberText(maxLineNumber).length; // This is an upper bound approximation.218totalLength += totalLineNumberChars;219}220if (totalLength > charLimit) {221break;222}223224bestSummarizationResultsPerDocIdx[docIdx] = rootMarkedNodes[docIdx].getTextFragment();225}226227const result: ProjectedDocument<TDocument>[] = [];228229for (let i = 0; i < bestSummarizationResultsPerDocIdx.length; i++) {230const bestSummarizationResult = bestSummarizationResultsPerDocIdx[i];231const { document } = docs[i];232let e = bestSummarizationResult.toEditFromOriginal(document.getText().length);233234if (settings.lineNumberStyle === SummarizedDocumentLineNumberStyle.Full) {235const summarizedDoc = e.apply(document.getText());236const t = new PositionOffsetTransformer(summarizedDoc);237const lineNumberReplacements: StringReplacement[] = [];238const lineCount = t.textLength.lineCount;239for (let curLine = 1; curLine <= lineCount; curLine++) {240const offset = t.getOffset(new Position(curLine, 1));241const offsetBefore = e.applyInverseToOffset(offset);242const pos = document.getPositionAtOffset(offsetBefore);243lineNumberReplacements.push(new StringReplacement(OffsetRange.emptyAt(offset), getLineNumberText(pos.line + 1)));244}245e = e.compose(new StringEdit(lineNumberReplacements));246}247248const projectedDoc = new ProjectedDocument(document, e);249const r = projectedDoc as IProjectedDocumentDebugInfo;250251const rootMarkedNode = rootMarkedNodes[i];252253r.getVisualization = () => ({254...{ $fileExtension: 'ast.w' },255source: {256value: projectedDoc.originalText,257decorations: subtractRange(258OffsetRange.ofLength(projectedDoc.originalText.length),259projectedDoc.edits.replacements.map(e => e.replaceRange),260).map(r => ({261range: [r.start, r.endExclusive],262color: 'lime',263}))264},265root: toAstNode(rootMarkedNode, n => ({266label: (n.node.node?.kind || 'unknown') + ` (${allNodesWithScores.find(nws => nws.node === n)?.cost})`,267range: n.node.range,268children: n.childNodes,269isMarked: n['_surviving'],270}))271});272273result.push(projectedDoc);274}275276return result;277}278279function createRemovableNodeFromOverlayNode(node: OverlayNode, document: AbstractDocument, parent: RemovableNode | undefined = undefined): RemovableNode {280const range = new OffsetRange(node.startIndex, node.endIndex);281const children: RemovableNode[] = [];282const result = new RemovableNode(parent, node, range, children, document);283for (const n of node.children) {284children.push(createRemovableNodeFromOverlayNode(n, document, result));285}286return result;287}288289/**290* A dense representation of the text in a document.291* Based on overlay nodes.292*293* **Dense**: every character is contained by some leaf node.294*/295class TextNode {296public static fromRootNode(node: RemovableNode, document: AbstractDocument): TextNode {297const fullRange = new OffsetRange(0, document.length);298if (node.range.equals(fullRange)) {299return TextNode.fromNode(node, document);300}301302// The root node does not cover the entire document!303// So we have to create a virtual root that actually does cover the entire document.304305const startGap = new OffsetRange(0, node.range.start);306const endGap = new OffsetRange(node.range.endExclusive, document.length);307308const children: TextNode[] = [];309const rootNode = new TextNode(undefined, fullRange, children, 0, null, document);310311if (!startGap.isEmpty) {312children.push(new TextNode(undefined, startGap, [], 0, rootNode, document));313}314children.push(TextNode.fromNode(node, document, 1, null));315if (!endGap.isEmpty) {316children.push(new TextNode(undefined, endGap, [], 0, rootNode, document));317}318return rootNode;319}320321private static fromNode(node: RemovableNode, document: AbstractDocument, depth = 0, parent: TextNode | null = null): TextNode {322const children: TextNode[] = [];323const result = new TextNode(node, node.range, children, depth, parent, document);324if (node.children.length > 0) {325let lastEnd = node.range.start;326for (const n of node.children) {327const gap = new OffsetRange(lastEnd, n.range.start);328if (!gap.isEmpty) {329children.push(new TextNode(undefined, gap, [], depth, result, document));330}331children.push(TextNode.fromNode(n, document, depth + 1, result));332lastEnd = n.range.endExclusive;333}334const gap = new OffsetRange(lastEnd, node.range.endExclusive);335if (!gap.isEmpty) {336children.push(new TextNode(undefined, gap, [], depth, result, document));337}338}339return result;340}341342constructor(343public readonly node: RemovableNode | undefined,344public readonly range: OffsetRange,345public readonly children: readonly TextNode[],346public readonly depth: number,347public readonly parent: TextNode | null,348public readonly document: AbstractDocument,349) { }350351getLeadingWs(): string {352return getLeadingWs(this.document.getText(), this.range);353}354355getIndentation(): string {356let indentation = this.getLeadingWs();357const lastNewLineIdx = indentation.lastIndexOf('\n');358if (lastNewLineIdx !== -1) {359indentation = indentation.substring(lastNewLineIdx + 1);360}361return indentation;362}363364getTrailingWs(): string {365return getTrailingWs(this.document.getText(), this.range);366}367}368369function getLeadingWs(str: string, range: OffsetRange): string {370const val = range.substring(str);371const trimmed = val.length - val.trimStart().length;372const ws = val.substring(0, trimmed);373return ws;374}375376function getTrailingWs(str: string, range: OffsetRange): string {377const val = range.substring(str);378const trimmed = val.length - val.trimEnd().length;379const ws = val.substring(val.length - trimmed);380return ws;381}382383export enum SummarizedDocumentLineNumberStyle {384None,385OmittedRanges,386Full,387}388389class SurvivingTextNode {390public static fromNode(node: TextNode, tryPreserveTypeChecking: boolean, alwaysUseEllipsisForElisions: boolean, lineNumberStyle: SummarizedDocumentLineNumberStyle): SurvivingTextNode {391return SurvivingTextNode.fromNodeParent(node, null, tryPreserveTypeChecking, alwaysUseEllipsisForElisions, lineNumberStyle);392}393394private static fromNodeParent(node: TextNode, parent: SurvivingTextNode | null, tryPreserveTypeChecking: boolean, alwaysUseEllipsisForElisions: boolean, lineNumberStyle: SummarizedDocumentLineNumberStyle): SurvivingTextNode {395const children: SurvivingTextNode[] = [];396const result = new SurvivingTextNode(node, parent, children, tryPreserveTypeChecking, alwaysUseEllipsisForElisions, lineNumberStyle);397for (const child of node.children) {398const childNode = SurvivingTextNode.fromNodeParent(child, result, tryPreserveTypeChecking, alwaysUseEllipsisForElisions, lineNumberStyle);399children.push(childNode);400}401return result;402}403404private _surviving = false;405406constructor(407public readonly node: TextNode,408public readonly parent: SurvivingTextNode | null,409public readonly childNodes: readonly SurvivingTextNode[],410private readonly _tryPreserveTypeChecking: boolean,411private readonly _alwaysUseEllipsisForElisions: boolean,412private readonly _lineNumberStyle: SummarizedDocumentLineNumberStyle,413) {414}415416visitAll(fn: (node: SurvivingTextNode) => boolean): void {417if (!fn(this)) { return; }418for (const child of this.childNodes) {419child.visitAll(fn);420}421}422423markAsSurviving(): void {424if (this._surviving) { return; }425this._surviving = true;426if (this.parent) {427this.parent.markAsSurviving();428}429this.invalidate();430}431432private _textFragment: StringFragment | null = null;433434private invalidate(): void {435if (!this._textFragment) { return; }436this._textFragment = null;437if (this.parent) {438this.parent.invalidate();439}440}441442getTextFragment(): StringFragment {443if (!this._textFragment) {444this._textFragment = this._computeSummarization();445}446return this._textFragment;447}448449private _computeSummarization(): StringFragment {450if (this.childNodes.length === 0 && (this._surviving || !this.node.node)) {451return new OriginalStringFragment(this.node.range, this.node.document.getText());452}453if (!this._surviving) {454return new LiteralStringFragment('');455}456457const groups = Array.from(groupAdjacentBy(this.childNodes.map(n => ({ node: n, fragment: n.getTextFragment() })), (f1, f2) => (f1.fragment.length === 0) === (f2.fragment.length === 0)));458459const getOmittedMessage = (omittedRange: OffsetRange): string => {460if (this._lineNumberStyle === SummarizedDocumentLineNumberStyle.OmittedRanges) {461const range = this.node.document.getPositionOffsetTransformer().getRange(omittedRange);462if (range.startLineNumber !== range.endLineNumber) {463return `/* Lines ${range.startLineNumber}-${range.endLineNumber} omitted */`;464}465return `/* Line ${range.startLineNumber} omitted */`;466}467return this._tryPreserveTypeChecking ? '/* ... */' : '…';468};469470for (let i = 0; i < groups.length; i++) {471// in one group, all elements either are all empty or non-empty. Also, no group is empty.472const g = groups[i];473const isEmpty = g[0].fragment.length === 0;474475if (isEmpty && i > 0 && i < groups.length - 1) {476// All our fragments are empty.477const prev = groups[i - 1].at(-1)!; // Non-empty before us478const next = groups[i + 1].at(0)!; // Non-empty after us479480const fullRange = g.at(0)!.node.node.range.join(g.at(-1)!.node.node.range);481482if (prev.fragment instanceof OriginalStringFragment && next.fragment instanceof OriginalStringFragment) {483const startTrimmed = prev.fragment.trimEnd();484const endTrimmed = next.fragment.trimStart();485if (startTrimmed.endsWith('{') && endTrimmed.startsWith('}')) {486groups[i - 1][groups[i - 1].length - 1].fragment = startTrimmed;487g.length = 1;488489g[0].fragment = new LiteralStringFragment(getOmittedMessage(fullRange));490groups[i + 1][0].fragment = endTrimmed;491continue;492}493}494495if (this._alwaysUseEllipsisForElisions || this._lineNumberStyle === SummarizedDocumentLineNumberStyle.OmittedRanges) {496const indentation = g.at(0)!.node.node.getIndentation();497const end = g.at(-1)!.node.node.getTrailingWs();498g.length = 1;499g[0].fragment = new LiteralStringFragment(indentation + getOmittedMessage(fullRange) + end);500}501}502}503504const result: StringFragment[] = [];505for (const group of groups) {506for (const g of group) {507pushFragment(result, g.fragment);508}509}510511return ConcatenatedStringFragment.from(result);512}513514getDescendantsAndSelf(): SurvivingTextNode[] {515const result: SurvivingTextNode[] = [];516this._getDescendantsAndSelf(result);517return result;518}519520private _getDescendantsAndSelf(result: SurvivingTextNode[]): void {521result.push(this);522for (const child of this.childNodes) {523child._getDescendantsAndSelf(result);524}525}526}527528529