Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/codeBlocks/node/codeBlockProcessor.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import type { ChatQuestion, ChatResponseClearToPreviousToolInvocationReason, ChatResponsePart, ChatResponseStream, ChatVulnerability, ThinkingDelta, Uri } from 'vscode';
7
8
import { createFilepathRegexp, mdCodeBlockLangToLanguageId } from '../../../util/common/markdown';
9
import { CharCode } from '../../../util/vs/base/common/charCode';
10
import { isFalsyOrWhitespace, splitLinesIncludeSeparators } from '../../../util/vs/base/common/strings';
11
12
import { IPromptPathRepresentationService } from '../../../platform/prompts/common/promptPathRepresentationService';
13
import { ChatResponseCodeblockUriPart, ChatResponseMarkdownPart, ChatResponseMarkdownWithVulnerabilitiesPart, MarkdownString } from '../../../vscodeTypes';
14
import { CodeBlock } from '../../prompt/common/conversation';
15
16
export type CodeBlockWithResource = { readonly code: string; readonly language?: string; readonly resource: Uri; readonly markdownBeforeBlock?: string };
17
18
export class CodeBlocksMetadata {
19
constructor(
20
readonly codeBlocks: readonly CodeBlock[]
21
) { }
22
}
23
24
export function isCodeBlockWithResource(codeBlock: CodeBlock): codeBlock is CodeBlockWithResource {
25
return codeBlock.resource !== undefined;
26
}
27
28
/**
29
* Proxy of a {@linkcode ChatResponseStream} that processes all code blocks in the markdown.
30
* Filepaths are removed from the Markdown, resolved and reported as codeblockUri
31
*/
32
export class CodeBlockTrackingChatResponseStream implements ChatResponseStream {
33
34
private readonly _codeBlockProcessor;
35
private readonly _codeBlocks: CodeBlock[] = [];
36
37
constructor(
38
private readonly _wrapped: ChatResponseStream,
39
codeblocksRepresentEdits: boolean | undefined,
40
@IPromptPathRepresentationService _promptPathRepresentationService: IPromptPathRepresentationService,
41
) {
42
let uriReportedForIndex = -1;
43
this._codeBlockProcessor = new CodeBlockProcessor(
44
path => {
45
return _promptPathRepresentationService.resolveFilePath(path);
46
},
47
(text: MarkdownString, codeBlockInfo: CodeBlockInfo | undefined, vulnerabilities: ChatVulnerability[] | undefined) => {
48
if (vulnerabilities) {
49
this._wrapped.markdownWithVulnerabilities(text, vulnerabilities);
50
} else {
51
this._wrapped.markdown(text);
52
}
53
if (codeBlockInfo && codeBlockInfo.resource && codeBlockInfo.index !== uriReportedForIndex) {
54
this._wrapped.codeblockUri(codeBlockInfo.resource, codeblocksRepresentEdits);
55
uriReportedForIndex = codeBlockInfo.index;
56
}
57
},
58
codeblock => {
59
this._codeBlocks.push(codeblock);
60
}
61
);
62
}
63
64
clearToPreviousToolInvocation(reason: ChatResponseClearToPreviousToolInvocationReason): void {
65
this._codeBlockProcessor.flush();
66
this._wrapped.clearToPreviousToolInvocation(reason);
67
this._codeBlocks.length = 0;
68
}
69
70
markdown(value: string | MarkdownString): void {
71
this._codeBlockProcessor.processMarkdown(value);
72
}
73
74
markdownWithVulnerabilities(value: string | MarkdownString, vulnerabilities: ChatVulnerability[]): void {
75
this._codeBlockProcessor.processMarkdown(value, vulnerabilities);
76
}
77
78
thinkingProgress(thinkingDelta: ThinkingDelta): void {
79
this._codeBlockProcessor.flush();
80
this._wrapped.thinkingProgress(thinkingDelta);
81
}
82
83
codeblockUri(uri: Uri): void {
84
this._codeBlockProcessor.processCodeblockUri(uri);
85
}
86
87
push(part: ChatResponsePart): void {
88
if (part instanceof ChatResponseMarkdownPart) {
89
this._codeBlockProcessor.processMarkdown(part.value, undefined);
90
} else if (part instanceof ChatResponseMarkdownWithVulnerabilitiesPart) {
91
this._codeBlockProcessor.processMarkdown(part.value, part.vulnerabilities);
92
} else if (part instanceof ChatResponseCodeblockUriPart) {
93
this._codeBlockProcessor.processCodeblockUri(part.value);
94
} else {
95
this._codeBlockProcessor.flush();
96
this._wrapped.push(part);
97
}
98
}
99
100
finish(): CodeBlocksMetadata {
101
this._codeBlockProcessor.flush();
102
return new CodeBlocksMetadata(this._codeBlocks);
103
}
104
105
private forward(fc: CallableFunction) {
106
return (...args: any[]) => {
107
this._codeBlockProcessor.flush();
108
return fc(...args);
109
};
110
}
111
112
/**
113
* If you are adding a new ChatResponseStream type, please make sure to either:
114
* - Update the date on the vscode engine version in package.json to a date when the API will be available in VS Code (sufficient if it's a purely additive/backwards-compatible change)
115
* - Or bump the proposed API version (required if the change is not backwards compatible (changes the shape of an existing API))
116
* to ensure that this extension version only runs in versions of VS Code that contain the necessary API support.
117
*/
118
119
button = this.forward(this._wrapped.button.bind(this._wrapped));
120
filetree = this.forward(this._wrapped.filetree.bind(this._wrapped));
121
progress = this._wrapped.progress.bind(this._wrapped);
122
reference = this.forward(this._wrapped.reference.bind(this._wrapped));
123
textEdit = this.forward(this._wrapped.textEdit.bind(this._wrapped));
124
notebookEdit = this.forward(this._wrapped.notebookEdit.bind(this._wrapped));
125
workspaceEdit = this.forward(this._wrapped.workspaceEdit?.bind(this._wrapped) || (() => { }));
126
confirmation = this.forward(this._wrapped.confirmation.bind(this._wrapped));
127
warning = this.forward(this._wrapped.warning.bind(this._wrapped));
128
info = this.forward(this._wrapped.info.bind(this._wrapped));
129
hookProgress = this.forward(this._wrapped.hookProgress.bind(this._wrapped));
130
reference2 = this.forward(this._wrapped.reference2.bind(this._wrapped));
131
codeCitation = this.forward(this._wrapped.codeCitation.bind(this._wrapped));
132
anchor = this.forward(this._wrapped.anchor.bind(this._wrapped));
133
externalEdit = this.forward(this._wrapped.externalEdit.bind(this._wrapped));
134
beginToolInvocation = this.forward(this._wrapped.beginToolInvocation.bind(this._wrapped));
135
updateToolInvocation = this.forward(this._wrapped.updateToolInvocation.bind(this._wrapped));
136
usage = this.forward(this._wrapped.usage.bind(this._wrapped));
137
138
questionCarousel(questions: ChatQuestion[], allowSkip?: boolean): Thenable<Record<string, unknown> | undefined> {
139
this._codeBlockProcessor.flush();
140
return this._wrapped.questionCarousel(questions, allowSkip);
141
}
142
}
143
144
145
const fenceLanguageRegex = /^(`{3,}|~{3,})(\w*)/;
146
147
enum State {
148
OutsideCodeBlock,
149
LineAfterFence,
150
LineAfterFilePath,
151
InCodeBlock,
152
}
153
154
export interface CodeBlockInfo {
155
readonly language?: string;
156
readonly resource?: Uri;
157
readonly index: number;
158
}
159
160
export interface LineProcessor {
161
matchesLineStart(linePart: string, inCodeBlock: boolean): boolean;
162
process(line: MarkdownString, inCodeBlock: boolean): MarkdownString;
163
}
164
165
/**
166
* The CodeBlockProcessor processes a sequence of markdown text parts and looks for code blocks that it contains.
167
* - Code block filepaths are removed from the Markdown, and reported as codeblockUri
168
* - All complete code blocks are also reported as {@linkcode CodeBlock} objects
169
* - An optional line processor can be used to replace the content of a full line
170
*/
171
export class CodeBlockProcessor {
172
173
private _lastIncompleteLine: MarkdownString | undefined;
174
private _canEmitIncompleteLine: boolean = false;
175
private _currentBlock: {
176
readonly info: {
177
readonly language?: string;
178
resource?: Uri;
179
readonly index: number;
180
};
181
readonly fence: string;
182
readonly vulnerabilities?: ChatVulnerability[];
183
readonly firstLine: MarkdownString;
184
} | undefined;
185
private readonly _code: string[] = [];
186
private readonly _markdownBeforeBlock: string[] = [];
187
private _nestingLevel: number = 0;
188
private _index = 0;
189
private _state: State = State.OutsideCodeBlock;
190
191
constructor(
192
private readonly _resolveCodeblockPath: (path: string) => Uri | undefined,
193
private readonly _emitMarkdown: (markdown: MarkdownString, codeBlockInfo: CodeBlockInfo | undefined, vulnerabilities?: ChatVulnerability[]) => void,
194
private readonly _emitCodeblock: (codeblock: CodeBlock) => void,
195
private readonly _lineProcessor?: LineProcessor,
196
) {
197
}
198
199
processMarkdown(markdown: string | MarkdownString, vulnerabilities?: ChatVulnerability[]): void {
200
const text = typeof markdown === 'string' ? markdown : markdown.value;
201
if (text.length === 0) {
202
return;
203
}
204
205
const lines = splitLinesIncludeSeparators(text).map(line => toMarkdownString(line, markdown));
206
if (lines.length > 0) {
207
if (this._lastIncompleteLine) {
208
lines[0] = appendMarkdownString(this._lastIncompleteLine, lines[0]);
209
}
210
this._lastIncompleteLine = !endsWithLineDelimiter(lines[lines.length - 1].value) ? lines.pop() : undefined;
211
if (this._lastIncompleteLine?.value === '') {
212
this._lastIncompleteLine = undefined;
213
}
214
}
215
216
let i = 0;
217
if (i < lines.length && this._canEmitIncompleteLine) {
218
this._processLinePart(lines[0], vulnerabilities);
219
i++;
220
}
221
for (; i < lines.length; i++) {
222
this._processLine(lines[i], vulnerabilities);
223
}
224
225
if (this._lastIncompleteLine && !this._requiresFullLine(this._lastIncompleteLine)) {
226
this._processLinePart(this._lastIncompleteLine, vulnerabilities);
227
this._lastIncompleteLine = undefined;
228
this._canEmitIncompleteLine = true;
229
} else {
230
this._canEmitIncompleteLine = false;
231
}
232
}
233
234
private _requiresFullLine(markdown: MarkdownString) {
235
if (this._state === State.OutsideCodeBlock || this._state === State.InCodeBlock) {
236
return mightBeFence(markdown.value) || this._lineProcessor?.matchesLineStart(markdown.value, this._state === State.InCodeBlock);
237
}
238
return true;
239
}
240
241
private _processLinePart(incompleteLine: MarkdownString, vulnerabilities?: ChatVulnerability[]) {
242
if (this._currentBlock) {
243
this._code.push(incompleteLine.value);
244
this._emitMarkdown(incompleteLine, this._currentBlock.info, vulnerabilities);
245
} else {
246
this._markdownBeforeBlock.push(incompleteLine.value);
247
this._emitMarkdown(incompleteLine, undefined, vulnerabilities);
248
}
249
}
250
251
/**
252
* Called when there is already a known code block URI for the currently processed code block
253
* @param uri
254
*/
255
processCodeblockUri(uri: Uri): void {
256
if (this._currentBlock && !this._currentBlock.info.resource) {
257
this._currentBlock.info.resource = uri;
258
}
259
}
260
261
/**
262
* Processes a line of markdown.
263
* @param line The line to process. The line includes the line delimiters, unless it is the last line of the document.
264
* @param vulnerabilities Optional set of vulnerabilities to associate with the line.
265
*/
266
private _processLine(line: MarkdownString, vulnerabilities?: ChatVulnerability[]): void {
267
if (this._state === State.LineAfterFence) {
268
const codeBlock = this._currentBlock!; // must be set in that state
269
const filePath = getFilePath(line.value, codeBlock.info.language);
270
if (filePath) {
271
if (!codeBlock.info.resource) {
272
codeBlock.info.resource = this._resolveCodeblockPath(filePath);
273
}
274
this._state = State.LineAfterFilePath;
275
this._emitMarkdown(codeBlock.firstLine, codeBlock.info, codeBlock.vulnerabilities);
276
return;
277
} else {
278
this._state = State.InCodeBlock;
279
this._emitMarkdown(codeBlock.firstLine, codeBlock.info, codeBlock.vulnerabilities);
280
// this was a normal line, not a file path. Continue handling the line
281
}
282
} else if (this._state === State.LineAfterFilePath) {
283
this._state = State.InCodeBlock;
284
if (isFalsyOrWhitespace(line.value)) {
285
return; // filter the empty line after the file path
286
}
287
}
288
289
const fenceLanguageIdMatch = line.value.match(fenceLanguageRegex);
290
if (fenceLanguageIdMatch) {
291
if (!this._currentBlock) {
292
// we are not in a code block. Open the block
293
this._nestingLevel = 1;
294
this._currentBlock = {
295
info: {
296
index: this._index++,
297
language: fenceLanguageIdMatch[2],
298
resource: undefined,
299
},
300
fence: fenceLanguageIdMatch[1],
301
firstLine: line,
302
vulnerabilities,
303
};
304
this._state = State.LineAfterFence;
305
// wait emitting markdown before we have seen the next line
306
return;
307
}
308
if (fenceLanguageIdMatch[1] === this._currentBlock.fence) {
309
if (fenceLanguageIdMatch[2]) {
310
this._nestingLevel++;
311
} else if (this._nestingLevel > 1) {
312
this._nestingLevel--;
313
} else {
314
// the fence matches the opening fence. It does not have a language id, and the nesting level is 1. -> Close the code block
315
this._emitMarkdown(line, this._currentBlock.info, vulnerabilities);
316
this._emitCodeblock({ code: this._code.join(''), resource: this._currentBlock.info.resource, language: this._currentBlock.info.language, markdownBeforeBlock: this._markdownBeforeBlock.join('') });
317
this._code.length = 0;
318
this._markdownBeforeBlock.length = 0;
319
this._currentBlock = undefined;
320
this._nestingLevel = 0;
321
this._state = State.OutsideCodeBlock;
322
return;
323
}
324
}
325
}
326
327
if (this._lineProcessor?.matchesLineStart(line.value, this._state === State.InCodeBlock)) {
328
line = this._lineProcessor.process(line, this._state === State.InCodeBlock);
329
}
330
331
// the current line is not opening or closing a code block
332
if (this._currentBlock) {
333
this._code.push(line.value);
334
this._emitMarkdown(line, this._currentBlock.info, vulnerabilities);
335
} else {
336
this._markdownBeforeBlock.push(line.value);
337
this._emitMarkdown(line, undefined, vulnerabilities);
338
}
339
340
}
341
342
343
flush(): void {
344
if (this._lastIncompleteLine) {
345
this._processLine(this._lastIncompleteLine);
346
this._lastIncompleteLine = undefined;
347
}
348
if (this._state === State.LineAfterFence && this._currentBlock) {
349
this._emitMarkdown(this._currentBlock.firstLine, this._currentBlock.info, this._currentBlock.vulnerabilities);
350
}
351
}
352
}
353
354
function getFilePath(line: string, mdLanguage: string | undefined) {
355
const languageId = mdLanguage ? mdCodeBlockLangToLanguageId(mdLanguage) : mdLanguage;
356
return createFilepathRegexp(languageId).exec(line)?.[1];
357
}
358
359
function endsWithLineDelimiter(line: string) {
360
return [CharCode.LineFeed, CharCode.CarriageReturn].includes(line.charCodeAt(line.length - 1));
361
}
362
363
function toMarkdownString(text: string, template: MarkdownString | string): MarkdownString {
364
const markdownString = new MarkdownString(text);
365
if (typeof template === 'object') {
366
markdownString.isTrusted = template.isTrusted;
367
markdownString.supportThemeIcons = template.supportThemeIcons;
368
markdownString.baseUri = template.baseUri;
369
markdownString.supportHtml = template.supportHtml;
370
}
371
return markdownString;
372
}
373
374
function appendMarkdownString(target: MarkdownString, value: MarkdownString): MarkdownString {
375
const markdownString = new MarkdownString(target.value + value.value);
376
markdownString.isTrusted = target.isTrusted || value.isTrusted;
377
markdownString.supportThemeIcons = target.supportThemeIcons || value.supportThemeIcons;
378
markdownString.supportHtml = target.supportHtml || value.supportHtml;
379
markdownString.baseUri = target.baseUri || value.baseUri;
380
return markdownString;
381
}
382
383
function mightBeFence(line: string) {
384
const len = line.length;
385
if (len > 0) {
386
const ch1 = line.charCodeAt(0);
387
if (ch1 !== CharCode.BackTick && ch1 !== CharCode.Tilde) {
388
return false;
389
}
390
if ((len > 1 && line.charCodeAt(1) !== ch1) || (len > 2 && line.charCodeAt(2) !== ch1)) {
391
return false;
392
}
393
}
394
return true;
395
}
396
397