Path: blob/main/extensions/copilot/src/extension/chatSessions/copilotcli/common/copilotCLIPrompt.ts
13405 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import type { ChatPromptReference } from 'vscode';6import { createFilepathRegexp } from '../../../../util/common/markdown';7import { Schemas } from '../../../../util/vs/base/common/network';8import * as path from '../../../../util/vs/base/common/path';9import { isEqual } from '../../../../util/vs/base/common/resources';10import { URI } from '../../../../util/vs/base/common/uri';11import { Range as EditorRange } from '../../../../util/vs/editor/common/core/range';12import { ChatReferenceDiagnostic, Diagnostic, DiagnosticSeverity, Location, Range } from '../../../../vscodeTypes';13import { PromptFileIdPrefix } from '../../../prompt/common/chatVariablesCollection';1415/**16* Parse the raw user prompt and extract diagnostics and file/line location references17* contained inside a single <attachments>...</attachments> block.18*19* Recognized elements:20* - <error path="/abs/path.py" line=13 code="E001" severity="error">Message</error>21* -> Aggregated into ChatReferenceDiagnostic (maps uri -> Diagnostic[])22* - <attachment>Excerpt from /abs/path.py, lines X to Y: ...</attachment>23* or attachment blocks containing a `# filepath: /abs/path.py` comment24* -> Converted into vscode.Location objects.25*/26export function extractChatPromptReferences(prompt: string): ChatPromptReference[] {27// Preserve order of items as they appear inside <attachments>...28const attachmentsBlockMatch = prompt.match(/<attachments>([\s\S]*?)<\/attachments>/i);29if (!attachmentsBlockMatch) {30return [];31}32const block = attachmentsBlockMatch[1];3334// Helper: collect ordered tag texts (<attachment ...>...</attachment> or self-closing; <error ...>...</error>)35function collectOrderedTags(text: string): string[] {36const results: string[] = [];37let i = 0;38const len = text.length;39while (i < len) {40// Find next tag start41const nextAttachment = text.indexOf('<attachment', i);42const nextError = text.indexOf('<error', i);43let next = -1;44let tagType: 'attachment' | 'error' | undefined;45if (nextAttachment !== -1 && (nextError === -1 || nextAttachment < nextError)) {46next = nextAttachment;47tagType = 'attachment';48} else if (nextError !== -1) {49next = nextError;50tagType = 'error';51}52if (next === -1 || !tagType) { break; }53// Move to end of opening tag54const openEnd = text.indexOf('>', next);55if (openEnd === -1) { break; }56const openingTagText = text.slice(next, openEnd + 1);57// Self-closing?58const isSelfClosing = /<attachment\b[\s\S]*?\/>\s*$/i.test(openingTagText);59if (isSelfClosing) {60results.push(openingTagText);61i = openEnd + 1;62continue;63}64// Otherwise, find the matching closing tag, skipping fenced code blocks65const closing = tagType === 'attachment' ? '</attachment>' : '</error>';66let j = openEnd + 1;67let inFence = false;68while (j < len) {69// Toggle on triple backticks70if (text.startsWith('```', j)) {71inFence = !inFence;72j += 3;73continue;74}75if (!inFence && text.startsWith(closing, j)) {76const tagText = text.slice(next, j + closing.length);77results.push(tagText);78i = j + closing.length;79break;80}81j++;82}83if (j >= len) {84// No closing found; bail out to avoid infinite loop85break;86}87}88return results;89}9091// Collect all tags with their positions, then delegate to specific extractors per tag92const ordered: ChatPromptReference[] = [];93for (const tagText of collectOrderedTags(block)) {94if (/^<attachment\b/i.test(tagText)) {95// Distinguish prompt attachments vs resource attachments96const promptIdMatch = tagText.match(/<attachment\s+id="(prompt:[^"]+)"[\s\S]*?>/i);97const ref = promptIdMatch ? extractPromptReferencesFromTag(prompt, tagText) : extractResourcesFromTag(prompt, tagText);98if (ref) {99ordered.push(ref);100}101} else if (/^<error\b/i.test(tagText)) {102const ref = extractDiagnosticsFromTag(tagText);103if (!ref) {104continue;105}106const previousRef = ordered.length > 0 ? ordered[ordered.length - 1] : undefined;107if (!previousRef || !(previousRef.value instanceof ChatReferenceDiagnostic) || !(ref.value instanceof ChatReferenceDiagnostic) || !isEqual(previousRef.value.diagnostics[0][0], ref.value.diagnostics[0][0])) {108ordered.push(ref);109continue;110}111112// Check if the diagnostics are in intersecting ranges.113const currentDiagnosticRange = toEditorRange(ref.value.diagnostics[0][1][0].range);114const previousDiagnosticRange = toEditorRange(previousRef.value.diagnostics[0][1][0].range);115if (EditorRange.areIntersectingOrTouching(previousDiagnosticRange, currentDiagnosticRange)) {116// Merge diagnostics into previous entry117previousRef.value.diagnostics[0][1].push(...ref.value.diagnostics[0][1]);118} else {119ordered.push(ref);120}121}122}123return ordered;124}125126function severityToString(severity: DiagnosticSeverity): string {127switch (severity) {128case DiagnosticSeverity.Error: return 'error';129case DiagnosticSeverity.Warning: return 'warning';130case DiagnosticSeverity.Information: return 'info';131case DiagnosticSeverity.Hint: return 'hint';132default: return '';133}134}135// Single-tag extractors used by ordered parsing136function extractResourcesFromTag(prompt: string, tagText: string): ChatPromptReference | undefined {137// Self-closing attachment138if (/^<attachment\s+[^>]*\/>$/i.test(tagText.trim())) {139const attrs: Record<string, string> = {};140for (const attrMatch of tagText.matchAll(/(\w+)\s*=\s*"([^"]*)"/g)) {141attrs[attrMatch[1]] = attrMatch[2];142}143const isFolder = attrs['folderPath'] !== undefined && attrs['folderPath'] !== '' && attrs['filePath'] === undefined;144const fileOrFolderpath = attrs['filePath'] || attrs['folderPath'];145if (!fileOrFolderpath) {146return undefined;147}148const uri = pathToUri(isFolder ? getFolderAttachmentPath(fileOrFolderpath) : fileOrFolderpath);149const providedId = attrs['id'];150const locName = providedId ?? uri.toString();151let id = providedId ?? uri.toString();152let range: [number, number] | undefined = undefined;153if (providedId && prompt.includes(`#${providedId}`)) {154const startIdx = prompt.indexOf(`#${providedId}`);155range = [startIdx, startIdx + providedId.length];156}157if (providedId && providedId.startsWith('sym:')) {158id = `vscode.symbol/${uri.toJSON()}`;159}160return { id, name: locName, range, value: uri };161}162163// Normal attachment with content164const content = tagText;165let filePath: string | undefined;166let providedId: string | undefined;167168const githubPRIssue = extractGitHubIssueOrPRChatReference(content);169if (githubPRIssue) {170return githubPRIssue;171}172173const openingTagMatch = content.match(/<attachment\s+([^>]*)>/i);174if (openingTagMatch) {175const attrsStr = openingTagMatch[1];176const idAttrMatch = attrsStr.match(/\bid\s*=\s*"([^"]+)"/);177if (idAttrMatch) {178providedId = idAttrMatch[1];179}180}181if (providedId && providedId.startsWith('prompt:')) {182return undefined; // prompt attachments handled elsewhere183}184const isUntitledFile = providedId?.startsWith('file:untitled-') || false;185const fenceMatch = content.match(/```([^\n`]+)\n([\s\S]*?)```/);186const fencedLanguage = fenceMatch ? fenceMatch[1].trim() : undefined;187const codeBlockBody = fenceMatch ? fenceMatch[2] : undefined;188if (codeBlockBody) {189const re = createFilepathRegexp(fencedLanguage);190for (const line of codeBlockBody.split(/\r?\n/)) {191const lineMatch = re.exec(line);192if (lineMatch && lineMatch[1]) { filePath = lineMatch[1].trim(); break; }193}194}195if (!filePath) {196const simpleMatch = content.match(/[#\/]\s*filepath:\s*(\S+)/);197if (simpleMatch) { filePath = simpleMatch[1]; }198}199if (!filePath) {200const excerptMatch = content.match(/Excerpt from ([^,]+),\s*lines\s+(\d+)\s+to\s+(\d+)/i);201if (excerptMatch) { filePath = excerptMatch[1].trim(); }202}203const linesMatch = content.match(/Excerpt from [^,]+,\s*lines\s+(\d+)\s+to\s+(\d+)/i);204if (!filePath) {205// Possible this is an SCM item206try {207const attrs: Record<string, string> = {};208for (const attrMatch of tagText.matchAll(/(\w+)\s*=\s*"([^"]*)"/g)) {209attrs[attrMatch[1]] = attrMatch[2];210}211if (typeof attrs['filePath'] === 'string') {212filePath = attrs['filePath'];213}214if (filePath?.startsWith('scm-history-item:') && typeof attrs['id'] === 'string') {215let id = attrs['id'];216const value = URI.parse(filePath);217try {218// Extract id from query.219const historyItemId = JSON.parse(value.query).historyItemId;220if (typeof historyItemId === 'string' && historyItemId.length > 0) {221id = historyItemId;222}223} catch { }224return {225id,226name: attrs['id'],227value228} satisfies ChatPromptReference;229}230} catch { }231232return undefined;233}234const startLine = linesMatch ? parseInt(linesMatch[1], 10) : undefined;235const endLine = linesMatch ? parseInt(linesMatch[2], 10) : undefined;236const uri = isUntitledFile && filePath.startsWith('untitled:') ? URI.from({ scheme: Schemas.untitled, path: filePath.substring('untitled:'.length) }) : pathToUri(filePath);237const location = (typeof startLine === 'undefined' || typeof endLine === 'undefined' || isNaN(startLine) || isNaN(endLine)) ? undefined : new Location(uri, new Range(startLine - 1, 0, endLine - 1, 0));238const locName = providedId ?? (location ? JSON.stringify(location) : uri.toString());239let range: [number, number] | undefined = undefined;240let id = (location ? JSON.stringify(location) : uri.toString());241if (prompt.includes(`#${locName}`)) {242const idx = prompt.indexOf(`#${locName}`);243range = [idx, idx + locName.length];244}245if (locName.startsWith('sym:')) { id = `vscode.symbol/${(location ? JSON.stringify(location) : uri.toString())}`; }246return { id, name: locName, range, value: location ?? uri };247}248249function extractPromptReferencesFromTag(prompt: string, tagText: string): ChatPromptReference | undefined {250const idAttrMatch = tagText.match(/<attachment\s+id="(prompt:[^"]+)"[\s\S]*?>/i);251if (!idAttrMatch) { return undefined; }252const idAttr = idAttrMatch[1];253const contentMatch = tagText.match(/<attachment[\s\S]*?>([\s\S]*?)<\/attachment>/i);254const content = contentMatch ? contentMatch[1] : '';255256let filePath: string | undefined;257const filepathMatch = content.match(/^\s*\/\/+\s*filepath:\s*(.+?)(?:\r?\n|$)/im);258if (filepathMatch) { filePath = filepathMatch[1].trim(); }259if (!filePath) {260const hashMatch = content.match(/^\s*#\s*filepath:\s*(.+?)(?:\r?\n|$)/im);261if (hashMatch) { filePath = hashMatch[1].trim(); }262}263if (!filePath) { return undefined; }264let uri: URI;265if (filePath.startsWith('untitled:')) { uri = URI.parse(filePath); } else { uri = pathToUri(filePath); }266const id = `${PromptFileIdPrefix}__${uri.toString()}`;267const name = idAttr;268return { id, name, value: uri, modelDescription: 'Prompt instruction file' };269}270271function extractDiagnosticsFromTag(tagText: string): ChatPromptReference | undefined {272const m = tagText.match(/<error\s+([^>]+)>([\s\S]*?)<\/error>/i);273if (!m) { return undefined; }274const attrText = m[1];275const message = m[2].trim();276const attrs: Record<string, string> = {};277for (const attrMatch of attrText.matchAll(/(\w+)="([^"]*)"/g)) { attrs[attrMatch[1]] = attrMatch[2]; }278for (const attrMatch of attrText.matchAll(/(\w+)=([0-9]+)/g)) { if (!attrs[attrMatch[1]]) { attrs[attrMatch[1]] = attrMatch[2]; } }279const filePath = attrs['path'];280const lineStr = attrs['line'];281if (!filePath || !lineStr) { return undefined; }282const lineNum = parseInt(lineStr, 10);283if (isNaN(lineNum) || lineNum < 1) { return undefined; }284const code = attrs['code'] && attrs['code'] !== 'undefined' ? attrs['code'] : undefined;285const severityStr = (attrs['severity'] || 'error').toLowerCase();286const severityMap: Record<string, number> = { error: DiagnosticSeverity.Error, warning: DiagnosticSeverity.Warning, info: DiagnosticSeverity.Information, hint: DiagnosticSeverity.Hint };287const uri = pathToUri(filePath);288const range = new Range(lineNum - 1, 0, lineNum - 1, 0);289const diagnostic = new Diagnostic(range, message, severityMap[severityStr]);290diagnostic.code = code;291return {292id: `${uri.toString()}:${severityToString(diagnostic.severity)}:${diagnostic.range.start.line + 1}:${diagnostic.range.start.character + 1}`,293name: diagnostic.message,294range: undefined,295value: new ChatReferenceDiagnostic([[uri, [diagnostic]]])296} as ChatPromptReference;297}298299function extractGitHubIssueOrPRChatReference(content: string): ChatPromptReference | undefined {300const openingTagMatch = content.match(/<attachment\s+([^>]*)>/i);301if (!openingTagMatch) {302return;303}304const attrsStr = openingTagMatch[1];305const idAttrMatch = attrsStr.match(/\bid\s*=\s*"([^"]+)"/);306if (!idAttrMatch) {307return;308}309let providedId = idAttrMatch[1];310// If only id attribute is present and inner content is pure JSON, treat as JSON reference311const innerMatch = content.match(/<attachment[\s\S]*?>([\s\S]*?)<\/attachment>/i);312const innerText = innerMatch ? innerMatch[1].trim() : '';313if (!providedId || !innerText.startsWith('{') || !innerText.endsWith('}')) {314return;315}316317try {318const body = JSON.parse(innerText);319if (typeof body.issueNumber !== 'number' && typeof body.prNumber !== 'number') {320// Not GitHub issue or PR reference321return;322}323// Possible that id is JSON encoded & contains special characters that fails parsing using regex, we could improve regex, but thats risky as we don't know all possible id formats & different attributes.324// In case of JSON content (Prs & issues, we know there's just an id attribute)325// Sample = 'id="#17143 Kernel interrupt_mode \\"message\\" sends interrupt_request on shell channel instead of control channel"'326const id = JSON.parse(openingTagMatch[1].substring('id='.length));327if (typeof id === 'string' && id.length > 0) {328providedId = id;329}330} catch { }331return {332id: providedId,333name: providedId,334range: undefined,335value: innerText336};337}338339function toEditorRange(range: Range): EditorRange {340return new EditorRange(range.start.line + 1, range.start.character + 1, range.end.line + 1, range.end.character + 1);341}342343export function getFolderAttachmentPath(folderPath: string): string {344if (folderPath.endsWith('/') || folderPath.endsWith('\\')) {345return folderPath;346}347return folderPath + path.sep;348}349350function pathToUri(pathStr: string): URI {351if (process.platform === 'win32') {352// Don't normalize valid UNC paths (starting with \\ but not with \\\\)353if (pathStr.startsWith('\\\\') && !pathStr.startsWith('\\\\\\\\')) {354return URI.file(pathStr);355}356// Normalize over-escaped paths357if (pathStr.includes('\\\\')) {358return URI.file(pathStr.replaceAll('\\\\', '\\'));359}360}361return URI.file(pathStr);362}363364365