Path: blob/main/extensions/copilot/src/extension/codeBlocks/node/codeBlockProcessor.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import type { ChatQuestion, ChatResponseClearToPreviousToolInvocationReason, ChatResponsePart, ChatResponseStream, ChatVulnerability, ThinkingDelta, Uri } from 'vscode';67import { createFilepathRegexp, mdCodeBlockLangToLanguageId } from '../../../util/common/markdown';8import { CharCode } from '../../../util/vs/base/common/charCode';9import { isFalsyOrWhitespace, splitLinesIncludeSeparators } from '../../../util/vs/base/common/strings';1011import { IPromptPathRepresentationService } from '../../../platform/prompts/common/promptPathRepresentationService';12import { ChatResponseCodeblockUriPart, ChatResponseMarkdownPart, ChatResponseMarkdownWithVulnerabilitiesPart, MarkdownString } from '../../../vscodeTypes';13import { CodeBlock } from '../../prompt/common/conversation';1415export type CodeBlockWithResource = { readonly code: string; readonly language?: string; readonly resource: Uri; readonly markdownBeforeBlock?: string };1617export class CodeBlocksMetadata {18constructor(19readonly codeBlocks: readonly CodeBlock[]20) { }21}2223export function isCodeBlockWithResource(codeBlock: CodeBlock): codeBlock is CodeBlockWithResource {24return codeBlock.resource !== undefined;25}2627/**28* Proxy of a {@linkcode ChatResponseStream} that processes all code blocks in the markdown.29* Filepaths are removed from the Markdown, resolved and reported as codeblockUri30*/31export class CodeBlockTrackingChatResponseStream implements ChatResponseStream {3233private readonly _codeBlockProcessor;34private readonly _codeBlocks: CodeBlock[] = [];3536constructor(37private readonly _wrapped: ChatResponseStream,38codeblocksRepresentEdits: boolean | undefined,39@IPromptPathRepresentationService _promptPathRepresentationService: IPromptPathRepresentationService,40) {41let uriReportedForIndex = -1;42this._codeBlockProcessor = new CodeBlockProcessor(43path => {44return _promptPathRepresentationService.resolveFilePath(path);45},46(text: MarkdownString, codeBlockInfo: CodeBlockInfo | undefined, vulnerabilities: ChatVulnerability[] | undefined) => {47if (vulnerabilities) {48this._wrapped.markdownWithVulnerabilities(text, vulnerabilities);49} else {50this._wrapped.markdown(text);51}52if (codeBlockInfo && codeBlockInfo.resource && codeBlockInfo.index !== uriReportedForIndex) {53this._wrapped.codeblockUri(codeBlockInfo.resource, codeblocksRepresentEdits);54uriReportedForIndex = codeBlockInfo.index;55}56},57codeblock => {58this._codeBlocks.push(codeblock);59}60);61}6263clearToPreviousToolInvocation(reason: ChatResponseClearToPreviousToolInvocationReason): void {64this._codeBlockProcessor.flush();65this._wrapped.clearToPreviousToolInvocation(reason);66this._codeBlocks.length = 0;67}6869markdown(value: string | MarkdownString): void {70this._codeBlockProcessor.processMarkdown(value);71}7273markdownWithVulnerabilities(value: string | MarkdownString, vulnerabilities: ChatVulnerability[]): void {74this._codeBlockProcessor.processMarkdown(value, vulnerabilities);75}7677thinkingProgress(thinkingDelta: ThinkingDelta): void {78this._codeBlockProcessor.flush();79this._wrapped.thinkingProgress(thinkingDelta);80}8182codeblockUri(uri: Uri): void {83this._codeBlockProcessor.processCodeblockUri(uri);84}8586push(part: ChatResponsePart): void {87if (part instanceof ChatResponseMarkdownPart) {88this._codeBlockProcessor.processMarkdown(part.value, undefined);89} else if (part instanceof ChatResponseMarkdownWithVulnerabilitiesPart) {90this._codeBlockProcessor.processMarkdown(part.value, part.vulnerabilities);91} else if (part instanceof ChatResponseCodeblockUriPart) {92this._codeBlockProcessor.processCodeblockUri(part.value);93} else {94this._codeBlockProcessor.flush();95this._wrapped.push(part);96}97}9899finish(): CodeBlocksMetadata {100this._codeBlockProcessor.flush();101return new CodeBlocksMetadata(this._codeBlocks);102}103104private forward(fc: CallableFunction) {105return (...args: any[]) => {106this._codeBlockProcessor.flush();107return fc(...args);108};109}110111/**112* If you are adding a new ChatResponseStream type, please make sure to either:113* - Update the date on the vscode engine version in package.json to a date when the API will be available in VS Code (sufficient if it's a purely additive/backwards-compatible change)114* - Or bump the proposed API version (required if the change is not backwards compatible (changes the shape of an existing API))115* to ensure that this extension version only runs in versions of VS Code that contain the necessary API support.116*/117118button = this.forward(this._wrapped.button.bind(this._wrapped));119filetree = this.forward(this._wrapped.filetree.bind(this._wrapped));120progress = this._wrapped.progress.bind(this._wrapped);121reference = this.forward(this._wrapped.reference.bind(this._wrapped));122textEdit = this.forward(this._wrapped.textEdit.bind(this._wrapped));123notebookEdit = this.forward(this._wrapped.notebookEdit.bind(this._wrapped));124workspaceEdit = this.forward(this._wrapped.workspaceEdit?.bind(this._wrapped) || (() => { }));125confirmation = this.forward(this._wrapped.confirmation.bind(this._wrapped));126warning = this.forward(this._wrapped.warning.bind(this._wrapped));127info = this.forward(this._wrapped.info.bind(this._wrapped));128hookProgress = this.forward(this._wrapped.hookProgress.bind(this._wrapped));129reference2 = this.forward(this._wrapped.reference2.bind(this._wrapped));130codeCitation = this.forward(this._wrapped.codeCitation.bind(this._wrapped));131anchor = this.forward(this._wrapped.anchor.bind(this._wrapped));132externalEdit = this.forward(this._wrapped.externalEdit.bind(this._wrapped));133beginToolInvocation = this.forward(this._wrapped.beginToolInvocation.bind(this._wrapped));134updateToolInvocation = this.forward(this._wrapped.updateToolInvocation.bind(this._wrapped));135usage = this.forward(this._wrapped.usage.bind(this._wrapped));136137questionCarousel(questions: ChatQuestion[], allowSkip?: boolean): Thenable<Record<string, unknown> | undefined> {138this._codeBlockProcessor.flush();139return this._wrapped.questionCarousel(questions, allowSkip);140}141}142143144const fenceLanguageRegex = /^(`{3,}|~{3,})(\w*)/;145146enum State {147OutsideCodeBlock,148LineAfterFence,149LineAfterFilePath,150InCodeBlock,151}152153export interface CodeBlockInfo {154readonly language?: string;155readonly resource?: Uri;156readonly index: number;157}158159export interface LineProcessor {160matchesLineStart(linePart: string, inCodeBlock: boolean): boolean;161process(line: MarkdownString, inCodeBlock: boolean): MarkdownString;162}163164/**165* The CodeBlockProcessor processes a sequence of markdown text parts and looks for code blocks that it contains.166* - Code block filepaths are removed from the Markdown, and reported as codeblockUri167* - All complete code blocks are also reported as {@linkcode CodeBlock} objects168* - An optional line processor can be used to replace the content of a full line169*/170export class CodeBlockProcessor {171172private _lastIncompleteLine: MarkdownString | undefined;173private _canEmitIncompleteLine: boolean = false;174private _currentBlock: {175readonly info: {176readonly language?: string;177resource?: Uri;178readonly index: number;179};180readonly fence: string;181readonly vulnerabilities?: ChatVulnerability[];182readonly firstLine: MarkdownString;183} | undefined;184private readonly _code: string[] = [];185private readonly _markdownBeforeBlock: string[] = [];186private _nestingLevel: number = 0;187private _index = 0;188private _state: State = State.OutsideCodeBlock;189190constructor(191private readonly _resolveCodeblockPath: (path: string) => Uri | undefined,192private readonly _emitMarkdown: (markdown: MarkdownString, codeBlockInfo: CodeBlockInfo | undefined, vulnerabilities?: ChatVulnerability[]) => void,193private readonly _emitCodeblock: (codeblock: CodeBlock) => void,194private readonly _lineProcessor?: LineProcessor,195) {196}197198processMarkdown(markdown: string | MarkdownString, vulnerabilities?: ChatVulnerability[]): void {199const text = typeof markdown === 'string' ? markdown : markdown.value;200if (text.length === 0) {201return;202}203204const lines = splitLinesIncludeSeparators(text).map(line => toMarkdownString(line, markdown));205if (lines.length > 0) {206if (this._lastIncompleteLine) {207lines[0] = appendMarkdownString(this._lastIncompleteLine, lines[0]);208}209this._lastIncompleteLine = !endsWithLineDelimiter(lines[lines.length - 1].value) ? lines.pop() : undefined;210if (this._lastIncompleteLine?.value === '') {211this._lastIncompleteLine = undefined;212}213}214215let i = 0;216if (i < lines.length && this._canEmitIncompleteLine) {217this._processLinePart(lines[0], vulnerabilities);218i++;219}220for (; i < lines.length; i++) {221this._processLine(lines[i], vulnerabilities);222}223224if (this._lastIncompleteLine && !this._requiresFullLine(this._lastIncompleteLine)) {225this._processLinePart(this._lastIncompleteLine, vulnerabilities);226this._lastIncompleteLine = undefined;227this._canEmitIncompleteLine = true;228} else {229this._canEmitIncompleteLine = false;230}231}232233private _requiresFullLine(markdown: MarkdownString) {234if (this._state === State.OutsideCodeBlock || this._state === State.InCodeBlock) {235return mightBeFence(markdown.value) || this._lineProcessor?.matchesLineStart(markdown.value, this._state === State.InCodeBlock);236}237return true;238}239240private _processLinePart(incompleteLine: MarkdownString, vulnerabilities?: ChatVulnerability[]) {241if (this._currentBlock) {242this._code.push(incompleteLine.value);243this._emitMarkdown(incompleteLine, this._currentBlock.info, vulnerabilities);244} else {245this._markdownBeforeBlock.push(incompleteLine.value);246this._emitMarkdown(incompleteLine, undefined, vulnerabilities);247}248}249250/**251* Called when there is already a known code block URI for the currently processed code block252* @param uri253*/254processCodeblockUri(uri: Uri): void {255if (this._currentBlock && !this._currentBlock.info.resource) {256this._currentBlock.info.resource = uri;257}258}259260/**261* Processes a line of markdown.262* @param line The line to process. The line includes the line delimiters, unless it is the last line of the document.263* @param vulnerabilities Optional set of vulnerabilities to associate with the line.264*/265private _processLine(line: MarkdownString, vulnerabilities?: ChatVulnerability[]): void {266if (this._state === State.LineAfterFence) {267const codeBlock = this._currentBlock!; // must be set in that state268const filePath = getFilePath(line.value, codeBlock.info.language);269if (filePath) {270if (!codeBlock.info.resource) {271codeBlock.info.resource = this._resolveCodeblockPath(filePath);272}273this._state = State.LineAfterFilePath;274this._emitMarkdown(codeBlock.firstLine, codeBlock.info, codeBlock.vulnerabilities);275return;276} else {277this._state = State.InCodeBlock;278this._emitMarkdown(codeBlock.firstLine, codeBlock.info, codeBlock.vulnerabilities);279// this was a normal line, not a file path. Continue handling the line280}281} else if (this._state === State.LineAfterFilePath) {282this._state = State.InCodeBlock;283if (isFalsyOrWhitespace(line.value)) {284return; // filter the empty line after the file path285}286}287288const fenceLanguageIdMatch = line.value.match(fenceLanguageRegex);289if (fenceLanguageIdMatch) {290if (!this._currentBlock) {291// we are not in a code block. Open the block292this._nestingLevel = 1;293this._currentBlock = {294info: {295index: this._index++,296language: fenceLanguageIdMatch[2],297resource: undefined,298},299fence: fenceLanguageIdMatch[1],300firstLine: line,301vulnerabilities,302};303this._state = State.LineAfterFence;304// wait emitting markdown before we have seen the next line305return;306}307if (fenceLanguageIdMatch[1] === this._currentBlock.fence) {308if (fenceLanguageIdMatch[2]) {309this._nestingLevel++;310} else if (this._nestingLevel > 1) {311this._nestingLevel--;312} else {313// the fence matches the opening fence. It does not have a language id, and the nesting level is 1. -> Close the code block314this._emitMarkdown(line, this._currentBlock.info, vulnerabilities);315this._emitCodeblock({ code: this._code.join(''), resource: this._currentBlock.info.resource, language: this._currentBlock.info.language, markdownBeforeBlock: this._markdownBeforeBlock.join('') });316this._code.length = 0;317this._markdownBeforeBlock.length = 0;318this._currentBlock = undefined;319this._nestingLevel = 0;320this._state = State.OutsideCodeBlock;321return;322}323}324}325326if (this._lineProcessor?.matchesLineStart(line.value, this._state === State.InCodeBlock)) {327line = this._lineProcessor.process(line, this._state === State.InCodeBlock);328}329330// the current line is not opening or closing a code block331if (this._currentBlock) {332this._code.push(line.value);333this._emitMarkdown(line, this._currentBlock.info, vulnerabilities);334} else {335this._markdownBeforeBlock.push(line.value);336this._emitMarkdown(line, undefined, vulnerabilities);337}338339}340341342flush(): void {343if (this._lastIncompleteLine) {344this._processLine(this._lastIncompleteLine);345this._lastIncompleteLine = undefined;346}347if (this._state === State.LineAfterFence && this._currentBlock) {348this._emitMarkdown(this._currentBlock.firstLine, this._currentBlock.info, this._currentBlock.vulnerabilities);349}350}351}352353function getFilePath(line: string, mdLanguage: string | undefined) {354const languageId = mdLanguage ? mdCodeBlockLangToLanguageId(mdLanguage) : mdLanguage;355return createFilepathRegexp(languageId).exec(line)?.[1];356}357358function endsWithLineDelimiter(line: string) {359return [CharCode.LineFeed, CharCode.CarriageReturn].includes(line.charCodeAt(line.length - 1));360}361362function toMarkdownString(text: string, template: MarkdownString | string): MarkdownString {363const markdownString = new MarkdownString(text);364if (typeof template === 'object') {365markdownString.isTrusted = template.isTrusted;366markdownString.supportThemeIcons = template.supportThemeIcons;367markdownString.baseUri = template.baseUri;368markdownString.supportHtml = template.supportHtml;369}370return markdownString;371}372373function appendMarkdownString(target: MarkdownString, value: MarkdownString): MarkdownString {374const markdownString = new MarkdownString(target.value + value.value);375markdownString.isTrusted = target.isTrusted || value.isTrusted;376markdownString.supportThemeIcons = target.supportThemeIcons || value.supportThemeIcons;377markdownString.supportHtml = target.supportHtml || value.supportHtml;378markdownString.baseUri = target.baseUri || value.baseUri;379return markdownString;380}381382function mightBeFence(line: string) {383const len = line.length;384if (len > 0) {385const ch1 = line.charCodeAt(0);386if (ch1 !== CharCode.BackTick && ch1 !== CharCode.Tilde) {387return false;388}389if ((len > 1 && line.charCodeAt(1) !== ch1) || (len > 2 && line.charCodeAt(2) !== ch1)) {390return false;391}392}393return true;394}395396397