Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/chatSessions/copilotcli/common/copilotCLIPrompt.ts
13405 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import type { ChatPromptReference } from 'vscode';
7
import { createFilepathRegexp } from '../../../../util/common/markdown';
8
import { Schemas } from '../../../../util/vs/base/common/network';
9
import * as path from '../../../../util/vs/base/common/path';
10
import { isEqual } from '../../../../util/vs/base/common/resources';
11
import { URI } from '../../../../util/vs/base/common/uri';
12
import { Range as EditorRange } from '../../../../util/vs/editor/common/core/range';
13
import { ChatReferenceDiagnostic, Diagnostic, DiagnosticSeverity, Location, Range } from '../../../../vscodeTypes';
14
import { PromptFileIdPrefix } from '../../../prompt/common/chatVariablesCollection';
15
16
/**
17
* Parse the raw user prompt and extract diagnostics and file/line location references
18
* contained inside a single <attachments>...</attachments> block.
19
*
20
* Recognized elements:
21
* - <error path="/abs/path.py" line=13 code="E001" severity="error">Message</error>
22
* -> Aggregated into ChatReferenceDiagnostic (maps uri -> Diagnostic[])
23
* - <attachment>Excerpt from /abs/path.py, lines X to Y: ...</attachment>
24
* or attachment blocks containing a `# filepath: /abs/path.py` comment
25
* -> Converted into vscode.Location objects.
26
*/
27
export function extractChatPromptReferences(prompt: string): ChatPromptReference[] {
28
// Preserve order of items as they appear inside <attachments>...
29
const attachmentsBlockMatch = prompt.match(/<attachments>([\s\S]*?)<\/attachments>/i);
30
if (!attachmentsBlockMatch) {
31
return [];
32
}
33
const block = attachmentsBlockMatch[1];
34
35
// Helper: collect ordered tag texts (<attachment ...>...</attachment> or self-closing; <error ...>...</error>)
36
function collectOrderedTags(text: string): string[] {
37
const results: string[] = [];
38
let i = 0;
39
const len = text.length;
40
while (i < len) {
41
// Find next tag start
42
const nextAttachment = text.indexOf('<attachment', i);
43
const nextError = text.indexOf('<error', i);
44
let next = -1;
45
let tagType: 'attachment' | 'error' | undefined;
46
if (nextAttachment !== -1 && (nextError === -1 || nextAttachment < nextError)) {
47
next = nextAttachment;
48
tagType = 'attachment';
49
} else if (nextError !== -1) {
50
next = nextError;
51
tagType = 'error';
52
}
53
if (next === -1 || !tagType) { break; }
54
// Move to end of opening tag
55
const openEnd = text.indexOf('>', next);
56
if (openEnd === -1) { break; }
57
const openingTagText = text.slice(next, openEnd + 1);
58
// Self-closing?
59
const isSelfClosing = /<attachment\b[\s\S]*?\/>\s*$/i.test(openingTagText);
60
if (isSelfClosing) {
61
results.push(openingTagText);
62
i = openEnd + 1;
63
continue;
64
}
65
// Otherwise, find the matching closing tag, skipping fenced code blocks
66
const closing = tagType === 'attachment' ? '</attachment>' : '</error>';
67
let j = openEnd + 1;
68
let inFence = false;
69
while (j < len) {
70
// Toggle on triple backticks
71
if (text.startsWith('```', j)) {
72
inFence = !inFence;
73
j += 3;
74
continue;
75
}
76
if (!inFence && text.startsWith(closing, j)) {
77
const tagText = text.slice(next, j + closing.length);
78
results.push(tagText);
79
i = j + closing.length;
80
break;
81
}
82
j++;
83
}
84
if (j >= len) {
85
// No closing found; bail out to avoid infinite loop
86
break;
87
}
88
}
89
return results;
90
}
91
92
// Collect all tags with their positions, then delegate to specific extractors per tag
93
const ordered: ChatPromptReference[] = [];
94
for (const tagText of collectOrderedTags(block)) {
95
if (/^<attachment\b/i.test(tagText)) {
96
// Distinguish prompt attachments vs resource attachments
97
const promptIdMatch = tagText.match(/<attachment\s+id="(prompt:[^"]+)"[\s\S]*?>/i);
98
const ref = promptIdMatch ? extractPromptReferencesFromTag(prompt, tagText) : extractResourcesFromTag(prompt, tagText);
99
if (ref) {
100
ordered.push(ref);
101
}
102
} else if (/^<error\b/i.test(tagText)) {
103
const ref = extractDiagnosticsFromTag(tagText);
104
if (!ref) {
105
continue;
106
}
107
const previousRef = ordered.length > 0 ? ordered[ordered.length - 1] : undefined;
108
if (!previousRef || !(previousRef.value instanceof ChatReferenceDiagnostic) || !(ref.value instanceof ChatReferenceDiagnostic) || !isEqual(previousRef.value.diagnostics[0][0], ref.value.diagnostics[0][0])) {
109
ordered.push(ref);
110
continue;
111
}
112
113
// Check if the diagnostics are in intersecting ranges.
114
const currentDiagnosticRange = toEditorRange(ref.value.diagnostics[0][1][0].range);
115
const previousDiagnosticRange = toEditorRange(previousRef.value.diagnostics[0][1][0].range);
116
if (EditorRange.areIntersectingOrTouching(previousDiagnosticRange, currentDiagnosticRange)) {
117
// Merge diagnostics into previous entry
118
previousRef.value.diagnostics[0][1].push(...ref.value.diagnostics[0][1]);
119
} else {
120
ordered.push(ref);
121
}
122
}
123
}
124
return ordered;
125
}
126
127
function severityToString(severity: DiagnosticSeverity): string {
128
switch (severity) {
129
case DiagnosticSeverity.Error: return 'error';
130
case DiagnosticSeverity.Warning: return 'warning';
131
case DiagnosticSeverity.Information: return 'info';
132
case DiagnosticSeverity.Hint: return 'hint';
133
default: return '';
134
}
135
}
136
// Single-tag extractors used by ordered parsing
137
function extractResourcesFromTag(prompt: string, tagText: string): ChatPromptReference | undefined {
138
// Self-closing attachment
139
if (/^<attachment\s+[^>]*\/>$/i.test(tagText.trim())) {
140
const attrs: Record<string, string> = {};
141
for (const attrMatch of tagText.matchAll(/(\w+)\s*=\s*"([^"]*)"/g)) {
142
attrs[attrMatch[1]] = attrMatch[2];
143
}
144
const isFolder = attrs['folderPath'] !== undefined && attrs['folderPath'] !== '' && attrs['filePath'] === undefined;
145
const fileOrFolderpath = attrs['filePath'] || attrs['folderPath'];
146
if (!fileOrFolderpath) {
147
return undefined;
148
}
149
const uri = pathToUri(isFolder ? getFolderAttachmentPath(fileOrFolderpath) : fileOrFolderpath);
150
const providedId = attrs['id'];
151
const locName = providedId ?? uri.toString();
152
let id = providedId ?? uri.toString();
153
let range: [number, number] | undefined = undefined;
154
if (providedId && prompt.includes(`#${providedId}`)) {
155
const startIdx = prompt.indexOf(`#${providedId}`);
156
range = [startIdx, startIdx + providedId.length];
157
}
158
if (providedId && providedId.startsWith('sym:')) {
159
id = `vscode.symbol/${uri.toJSON()}`;
160
}
161
return { id, name: locName, range, value: uri };
162
}
163
164
// Normal attachment with content
165
const content = tagText;
166
let filePath: string | undefined;
167
let providedId: string | undefined;
168
169
const githubPRIssue = extractGitHubIssueOrPRChatReference(content);
170
if (githubPRIssue) {
171
return githubPRIssue;
172
}
173
174
const openingTagMatch = content.match(/<attachment\s+([^>]*)>/i);
175
if (openingTagMatch) {
176
const attrsStr = openingTagMatch[1];
177
const idAttrMatch = attrsStr.match(/\bid\s*=\s*"([^"]+)"/);
178
if (idAttrMatch) {
179
providedId = idAttrMatch[1];
180
}
181
}
182
if (providedId && providedId.startsWith('prompt:')) {
183
return undefined; // prompt attachments handled elsewhere
184
}
185
const isUntitledFile = providedId?.startsWith('file:untitled-') || false;
186
const fenceMatch = content.match(/```([^\n`]+)\n([\s\S]*?)```/);
187
const fencedLanguage = fenceMatch ? fenceMatch[1].trim() : undefined;
188
const codeBlockBody = fenceMatch ? fenceMatch[2] : undefined;
189
if (codeBlockBody) {
190
const re = createFilepathRegexp(fencedLanguage);
191
for (const line of codeBlockBody.split(/\r?\n/)) {
192
const lineMatch = re.exec(line);
193
if (lineMatch && lineMatch[1]) { filePath = lineMatch[1].trim(); break; }
194
}
195
}
196
if (!filePath) {
197
const simpleMatch = content.match(/[#\/]\s*filepath:\s*(\S+)/);
198
if (simpleMatch) { filePath = simpleMatch[1]; }
199
}
200
if (!filePath) {
201
const excerptMatch = content.match(/Excerpt from ([^,]+),\s*lines\s+(\d+)\s+to\s+(\d+)/i);
202
if (excerptMatch) { filePath = excerptMatch[1].trim(); }
203
}
204
const linesMatch = content.match(/Excerpt from [^,]+,\s*lines\s+(\d+)\s+to\s+(\d+)/i);
205
if (!filePath) {
206
// Possible this is an SCM item
207
try {
208
const attrs: Record<string, string> = {};
209
for (const attrMatch of tagText.matchAll(/(\w+)\s*=\s*"([^"]*)"/g)) {
210
attrs[attrMatch[1]] = attrMatch[2];
211
}
212
if (typeof attrs['filePath'] === 'string') {
213
filePath = attrs['filePath'];
214
}
215
if (filePath?.startsWith('scm-history-item:') && typeof attrs['id'] === 'string') {
216
let id = attrs['id'];
217
const value = URI.parse(filePath);
218
try {
219
// Extract id from query.
220
const historyItemId = JSON.parse(value.query).historyItemId;
221
if (typeof historyItemId === 'string' && historyItemId.length > 0) {
222
id = historyItemId;
223
}
224
} catch { }
225
return {
226
id,
227
name: attrs['id'],
228
value
229
} satisfies ChatPromptReference;
230
}
231
} catch { }
232
233
return undefined;
234
}
235
const startLine = linesMatch ? parseInt(linesMatch[1], 10) : undefined;
236
const endLine = linesMatch ? parseInt(linesMatch[2], 10) : undefined;
237
const uri = isUntitledFile && filePath.startsWith('untitled:') ? URI.from({ scheme: Schemas.untitled, path: filePath.substring('untitled:'.length) }) : pathToUri(filePath);
238
const location = (typeof startLine === 'undefined' || typeof endLine === 'undefined' || isNaN(startLine) || isNaN(endLine)) ? undefined : new Location(uri, new Range(startLine - 1, 0, endLine - 1, 0));
239
const locName = providedId ?? (location ? JSON.stringify(location) : uri.toString());
240
let range: [number, number] | undefined = undefined;
241
let id = (location ? JSON.stringify(location) : uri.toString());
242
if (prompt.includes(`#${locName}`)) {
243
const idx = prompt.indexOf(`#${locName}`);
244
range = [idx, idx + locName.length];
245
}
246
if (locName.startsWith('sym:')) { id = `vscode.symbol/${(location ? JSON.stringify(location) : uri.toString())}`; }
247
return { id, name: locName, range, value: location ?? uri };
248
}
249
250
function extractPromptReferencesFromTag(prompt: string, tagText: string): ChatPromptReference | undefined {
251
const idAttrMatch = tagText.match(/<attachment\s+id="(prompt:[^"]+)"[\s\S]*?>/i);
252
if (!idAttrMatch) { return undefined; }
253
const idAttr = idAttrMatch[1];
254
const contentMatch = tagText.match(/<attachment[\s\S]*?>([\s\S]*?)<\/attachment>/i);
255
const content = contentMatch ? contentMatch[1] : '';
256
257
let filePath: string | undefined;
258
const filepathMatch = content.match(/^\s*\/\/+\s*filepath:\s*(.+?)(?:\r?\n|$)/im);
259
if (filepathMatch) { filePath = filepathMatch[1].trim(); }
260
if (!filePath) {
261
const hashMatch = content.match(/^\s*#\s*filepath:\s*(.+?)(?:\r?\n|$)/im);
262
if (hashMatch) { filePath = hashMatch[1].trim(); }
263
}
264
if (!filePath) { return undefined; }
265
let uri: URI;
266
if (filePath.startsWith('untitled:')) { uri = URI.parse(filePath); } else { uri = pathToUri(filePath); }
267
const id = `${PromptFileIdPrefix}__${uri.toString()}`;
268
const name = idAttr;
269
return { id, name, value: uri, modelDescription: 'Prompt instruction file' };
270
}
271
272
function extractDiagnosticsFromTag(tagText: string): ChatPromptReference | undefined {
273
const m = tagText.match(/<error\s+([^>]+)>([\s\S]*?)<\/error>/i);
274
if (!m) { return undefined; }
275
const attrText = m[1];
276
const message = m[2].trim();
277
const attrs: Record<string, string> = {};
278
for (const attrMatch of attrText.matchAll(/(\w+)="([^"]*)"/g)) { attrs[attrMatch[1]] = attrMatch[2]; }
279
for (const attrMatch of attrText.matchAll(/(\w+)=([0-9]+)/g)) { if (!attrs[attrMatch[1]]) { attrs[attrMatch[1]] = attrMatch[2]; } }
280
const filePath = attrs['path'];
281
const lineStr = attrs['line'];
282
if (!filePath || !lineStr) { return undefined; }
283
const lineNum = parseInt(lineStr, 10);
284
if (isNaN(lineNum) || lineNum < 1) { return undefined; }
285
const code = attrs['code'] && attrs['code'] !== 'undefined' ? attrs['code'] : undefined;
286
const severityStr = (attrs['severity'] || 'error').toLowerCase();
287
const severityMap: Record<string, number> = { error: DiagnosticSeverity.Error, warning: DiagnosticSeverity.Warning, info: DiagnosticSeverity.Information, hint: DiagnosticSeverity.Hint };
288
const uri = pathToUri(filePath);
289
const range = new Range(lineNum - 1, 0, lineNum - 1, 0);
290
const diagnostic = new Diagnostic(range, message, severityMap[severityStr]);
291
diagnostic.code = code;
292
return {
293
id: `${uri.toString()}:${severityToString(diagnostic.severity)}:${diagnostic.range.start.line + 1}:${diagnostic.range.start.character + 1}`,
294
name: diagnostic.message,
295
range: undefined,
296
value: new ChatReferenceDiagnostic([[uri, [diagnostic]]])
297
} as ChatPromptReference;
298
}
299
300
function extractGitHubIssueOrPRChatReference(content: string): ChatPromptReference | undefined {
301
const openingTagMatch = content.match(/<attachment\s+([^>]*)>/i);
302
if (!openingTagMatch) {
303
return;
304
}
305
const attrsStr = openingTagMatch[1];
306
const idAttrMatch = attrsStr.match(/\bid\s*=\s*"([^"]+)"/);
307
if (!idAttrMatch) {
308
return;
309
}
310
let providedId = idAttrMatch[1];
311
// If only id attribute is present and inner content is pure JSON, treat as JSON reference
312
const innerMatch = content.match(/<attachment[\s\S]*?>([\s\S]*?)<\/attachment>/i);
313
const innerText = innerMatch ? innerMatch[1].trim() : '';
314
if (!providedId || !innerText.startsWith('{') || !innerText.endsWith('}')) {
315
return;
316
}
317
318
try {
319
const body = JSON.parse(innerText);
320
if (typeof body.issueNumber !== 'number' && typeof body.prNumber !== 'number') {
321
// Not GitHub issue or PR reference
322
return;
323
}
324
// Possible that id is JSON encoded & contains special characters that fails parsing using regex, we could improve regex, but thats risky as we don't know all possible id formats & different attributes.
325
// In case of JSON content (Prs & issues, we know there's just an id attribute)
326
// Sample = 'id="#17143 Kernel interrupt_mode \\"message\\" sends interrupt_request on shell channel instead of control channel"'
327
const id = JSON.parse(openingTagMatch[1].substring('id='.length));
328
if (typeof id === 'string' && id.length > 0) {
329
providedId = id;
330
}
331
} catch { }
332
return {
333
id: providedId,
334
name: providedId,
335
range: undefined,
336
value: innerText
337
};
338
}
339
340
function toEditorRange(range: Range): EditorRange {
341
return new EditorRange(range.start.line + 1, range.start.character + 1, range.end.line + 1, range.end.character + 1);
342
}
343
344
export function getFolderAttachmentPath(folderPath: string): string {
345
if (folderPath.endsWith('/') || folderPath.endsWith('\\')) {
346
return folderPath;
347
}
348
return folderPath + path.sep;
349
}
350
351
function pathToUri(pathStr: string): URI {
352
if (process.platform === 'win32') {
353
// Don't normalize valid UNC paths (starting with \\ but not with \\\\)
354
if (pathStr.startsWith('\\\\') && !pathStr.startsWith('\\\\\\\\')) {
355
return URI.file(pathStr);
356
}
357
// Normalize over-escaped paths
358
if (pathStr.includes('\\\\')) {
359
return URI.file(pathStr.replaceAll('\\\\', '\\'));
360
}
361
}
362
return URI.file(pathStr);
363
}
364
365