CoCalc -- copilotCLIPrompt.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/chatSessions/copilotcli/common/copilotCLIPrompt.ts
¹³⁴⁰⁵ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import type { ChatPromptReference } from 'vscode';
7
import { createFilepathRegexp } from '../../../../util/common/markdown';
8
import { Schemas } from '../../../../util/vs/base/common/network';
9
import * as path from '../../../../util/vs/base/common/path';
10
import { isEqual } from '../../../../util/vs/base/common/resources';
11
import { URI } from '../../../../util/vs/base/common/uri';
12
import { Range as EditorRange } from '../../../../util/vs/editor/common/core/range';
13
import { ChatReferenceDiagnostic, Diagnostic, DiagnosticSeverity, Location, Range } from '../../../../vscodeTypes';
14
import { PromptFileIdPrefix } from '../../../prompt/common/chatVariablesCollection';
15

16
/**
17
 * Parse the raw user prompt and extract diagnostics and file/line location references
18
 * contained inside a single <attachments>...</attachments> block.
19
 *
20
 * Recognized elements:
21
 *  - <error path="/abs/path.py" line=13 code="E001" severity="error">Message</error>
22
 *    -> Aggregated into ChatReferenceDiagnostic (maps uri -> Diagnostic[])
23
 *  - <attachment>Excerpt from /abs/path.py, lines X to Y: ...</attachment>
24
 *    or attachment blocks containing a `# filepath: /abs/path.py` comment
25
 *    -> Converted into vscode.Location objects.
26
 */
27
export function extractChatPromptReferences(prompt: string): ChatPromptReference[] {
28
	// Preserve order of items as they appear inside <attachments>...
29
	const attachmentsBlockMatch = prompt.match(/<attachments>([\s\S]*?)<\/attachments>/i);
30
	if (!attachmentsBlockMatch) {
31
		return [];
32
	}
33
	const block = attachmentsBlockMatch[1];
34

35
	// Helper: collect ordered tag texts (<attachment ...>...</attachment> or self-closing; <error ...>...</error>)
36
	function collectOrderedTags(text: string): string[] {
37
		const results: string[] = [];
38
		let i = 0;
39
		const len = text.length;
40
		while (i < len) {
41
			// Find next tag start
42
			const nextAttachment = text.indexOf('<attachment', i);
43
			const nextError = text.indexOf('<error', i);
44
			let next = -1;
45
			let tagType: 'attachment' | 'error' | undefined;
46
			if (nextAttachment !== -1 && (nextError === -1 || nextAttachment < nextError)) {
47
				next = nextAttachment;
48
				tagType = 'attachment';
49
			} else if (nextError !== -1) {
50
				next = nextError;
51
				tagType = 'error';
52
			}
53
			if (next === -1 || !tagType) { break; }
54
			// Move to end of opening tag
55
			const openEnd = text.indexOf('>', next);
56
			if (openEnd === -1) { break; }
57
			const openingTagText = text.slice(next, openEnd + 1);
58
			// Self-closing?
59
			const isSelfClosing = /<attachment\b[\s\S]*?\/>\s*$/i.test(openingTagText);
60
			if (isSelfClosing) {
61
				results.push(openingTagText);
62
				i = openEnd + 1;
63
				continue;
64
			}
65
			// Otherwise, find the matching closing tag, skipping fenced code blocks
66
			const closing = tagType === 'attachment' ? '</attachment>' : '</error>';
67
			let j = openEnd + 1;
68
			let inFence = false;
69
			while (j < len) {
70
				// Toggle on triple backticks
71
				if (text.startsWith('```', j)) {
72
					inFence = !inFence;
73
					j += 3;
74
					continue;
75
				}
76
				if (!inFence && text.startsWith(closing, j)) {
77
					const tagText = text.slice(next, j + closing.length);
78
					results.push(tagText);
79
					i = j + closing.length;
80
					break;
81
				}
82
				j++;
83
			}
84
			if (j >= len) {
85
				// No closing found; bail out to avoid infinite loop
86
				break;
87
			}
88
		}
89
		return results;
90
	}
91

92
	// Collect all tags with their positions, then delegate to specific extractors per tag
93
	const ordered: ChatPromptReference[] = [];
94
	for (const tagText of collectOrderedTags(block)) {
95
		if (/^<attachment\b/i.test(tagText)) {
96
			// Distinguish prompt attachments vs resource attachments
97
			const promptIdMatch = tagText.match(/<attachment\s+id="(prompt:[^"]+)"[\s\S]*?>/i);
98
			const ref = promptIdMatch ? extractPromptReferencesFromTag(prompt, tagText) : extractResourcesFromTag(prompt, tagText);
99
			if (ref) {
100
				ordered.push(ref);
101
			}
102
		} else if (/^<error\b/i.test(tagText)) {
103
			const ref = extractDiagnosticsFromTag(tagText);
104
			if (!ref) {
105
				continue;
106
			}
107
			const previousRef = ordered.length > 0 ? ordered[ordered.length - 1] : undefined;
108
			if (!previousRef || !(previousRef.value instanceof ChatReferenceDiagnostic) || !(ref.value instanceof ChatReferenceDiagnostic) || !isEqual(previousRef.value.diagnostics[0][0], ref.value.diagnostics[0][0])) {
109
				ordered.push(ref);
110
				continue;
111
			}
112

113
			// Check if the diagnostics are in intersecting ranges.
114
			const currentDiagnosticRange = toEditorRange(ref.value.diagnostics[0][1][0].range);
115
			const previousDiagnosticRange = toEditorRange(previousRef.value.diagnostics[0][1][0].range);
116
			if (EditorRange.areIntersectingOrTouching(previousDiagnosticRange, currentDiagnosticRange)) {
117
				// Merge diagnostics into previous entry
118
				previousRef.value.diagnostics[0][1].push(...ref.value.diagnostics[0][1]);
119
			} else {
120
				ordered.push(ref);
121
			}
122
		}
123
	}
124
	return ordered;
125
}
126

127
function severityToString(severity: DiagnosticSeverity): string {
128
	switch (severity) {
129
		case DiagnosticSeverity.Error: return 'error';
130
		case DiagnosticSeverity.Warning: return 'warning';
131
		case DiagnosticSeverity.Information: return 'info';
132
		case DiagnosticSeverity.Hint: return 'hint';
133
		default: return '';
134
	}
135
}
136
// Single-tag extractors used by ordered parsing
137
function extractResourcesFromTag(prompt: string, tagText: string): ChatPromptReference | undefined {
138
	// Self-closing attachment
139
	if (/^<attachment\s+[^>]*\/>$/i.test(tagText.trim())) {
140
		const attrs: Record<string, string> = {};
141
		for (const attrMatch of tagText.matchAll(/(\w+)\s*=\s*"([^"]*)"/g)) {
142
			attrs[attrMatch[1]] = attrMatch[2];
143
		}
144
		const isFolder = attrs['folderPath'] !== undefined && attrs['folderPath'] !== '' && attrs['filePath'] === undefined;
145
		const fileOrFolderpath = attrs['filePath'] || attrs['folderPath'];
146
		if (!fileOrFolderpath) {
147
			return undefined;
148
		}
149
		const uri = pathToUri(isFolder ? getFolderAttachmentPath(fileOrFolderpath) : fileOrFolderpath);
150
		const providedId = attrs['id'];
151
		const locName = providedId ?? uri.toString();
152
		let id = providedId ?? uri.toString();
153
		let range: [number, number] | undefined = undefined;
154
		if (providedId && prompt.includes(`#${providedId}`)) {
155
			const startIdx = prompt.indexOf(`#${providedId}`);
156
			range = [startIdx, startIdx + providedId.length];
157
		}
158
		if (providedId && providedId.startsWith('sym:')) {
159
			id = `vscode.symbol/${uri.toJSON()}`;
160
		}
161
		return { id, name: locName, range, value: uri };
162
	}
163

164
	// Normal attachment with content
165
	const content = tagText;
166
	let filePath: string | undefined;
167
	let providedId: string | undefined;
168

169
	const githubPRIssue = extractGitHubIssueOrPRChatReference(content);
170
	if (githubPRIssue) {
171
		return githubPRIssue;
172
	}
173

174
	const openingTagMatch = content.match(/<attachment\s+([^>]*)>/i);
175
	if (openingTagMatch) {
176
		const attrsStr = openingTagMatch[1];
177
		const idAttrMatch = attrsStr.match(/\bid\s*=\s*"([^"]+)"/);
178
		if (idAttrMatch) {
179
			providedId = idAttrMatch[1];
180
		}
181
	}
182
	if (providedId && providedId.startsWith('prompt:')) {
183
		return undefined; // prompt attachments handled elsewhere
184
	}
185
	const isUntitledFile = providedId?.startsWith('file:untitled-') || false;
186
	const fenceMatch = content.match(/```([^\n`]+)\n([\s\S]*?)```/);
187
	const fencedLanguage = fenceMatch ? fenceMatch[1].trim() : undefined;
188
	const codeBlockBody = fenceMatch ? fenceMatch[2] : undefined;
189
	if (codeBlockBody) {
190
		const re = createFilepathRegexp(fencedLanguage);
191
		for (const line of codeBlockBody.split(/\r?\n/)) {
192
			const lineMatch = re.exec(line);
193
			if (lineMatch && lineMatch[1]) { filePath = lineMatch[1].trim(); break; }
194
		}
195
	}
196
	if (!filePath) {
197
		const simpleMatch = content.match(/[#\/]\s*filepath:\s*(\S+)/);
198
		if (simpleMatch) { filePath = simpleMatch[1]; }
199
	}
200
	if (!filePath) {
201
		const excerptMatch = content.match(/Excerpt from ([^,]+),\s*lines\s+(\d+)\s+to\s+(\d+)/i);
202
		if (excerptMatch) { filePath = excerptMatch[1].trim(); }
203
	}
204
	const linesMatch = content.match(/Excerpt from [^,]+,\s*lines\s+(\d+)\s+to\s+(\d+)/i);
205
	if (!filePath) {
206
		// Possible this is an SCM item
207
		try {
208
			const attrs: Record<string, string> = {};
209
			for (const attrMatch of tagText.matchAll(/(\w+)\s*=\s*"([^"]*)"/g)) {
210
				attrs[attrMatch[1]] = attrMatch[2];
211
			}
212
			if (typeof attrs['filePath'] === 'string') {
213
				filePath = attrs['filePath'];
214
			}
215
			if (filePath?.startsWith('scm-history-item:') && typeof attrs['id'] === 'string') {
216
				let id = attrs['id'];
217
				const value = URI.parse(filePath);
218
				try {
219
					// Extract id from query.
220
					const historyItemId = JSON.parse(value.query).historyItemId;
221
					if (typeof historyItemId === 'string' && historyItemId.length > 0) {
222
						id = historyItemId;
223
					}
224
				} catch { }
225
				return {
226
					id,
227
					name: attrs['id'],
228
					value
229
				} satisfies ChatPromptReference;
230
			}
231
		} catch { }
232

233
		return undefined;
234
	}
235
	const startLine = linesMatch ? parseInt(linesMatch[1], 10) : undefined;
236
	const endLine = linesMatch ? parseInt(linesMatch[2], 10) : undefined;
237
	const uri = isUntitledFile && filePath.startsWith('untitled:') ? URI.from({ scheme: Schemas.untitled, path: filePath.substring('untitled:'.length) }) : pathToUri(filePath);
238
	const location = (typeof startLine === 'undefined' || typeof endLine === 'undefined' || isNaN(startLine) || isNaN(endLine)) ? undefined : new Location(uri, new Range(startLine - 1, 0, endLine - 1, 0));
239
	const locName = providedId ?? (location ? JSON.stringify(location) : uri.toString());
240
	let range: [number, number] | undefined = undefined;
241
	let id = (location ? JSON.stringify(location) : uri.toString());
242
	if (prompt.includes(`#${locName}`)) {
243
		const idx = prompt.indexOf(`#${locName}`);
244
		range = [idx, idx + locName.length];
245
	}
246
	if (locName.startsWith('sym:')) { id = `vscode.symbol/${(location ? JSON.stringify(location) : uri.toString())}`; }
247
	return { id, name: locName, range, value: location ?? uri };
248
}
249

250
function extractPromptReferencesFromTag(prompt: string, tagText: string): ChatPromptReference | undefined {
251
	const idAttrMatch = tagText.match(/<attachment\s+id="(prompt:[^"]+)"[\s\S]*?>/i);
252
	if (!idAttrMatch) { return undefined; }
253
	const idAttr = idAttrMatch[1];
254
	const contentMatch = tagText.match(/<attachment[\s\S]*?>([\s\S]*?)<\/attachment>/i);
255
	const content = contentMatch ? contentMatch[1] : '';
256

257
	let filePath: string | undefined;
258
	const filepathMatch = content.match(/^\s*\/\/+\s*filepath:\s*(.+?)(?:\r?\n|$)/im);
259
	if (filepathMatch) { filePath = filepathMatch[1].trim(); }
260
	if (!filePath) {
261
		const hashMatch = content.match(/^\s*#\s*filepath:\s*(.+?)(?:\r?\n|$)/im);
262
		if (hashMatch) { filePath = hashMatch[1].trim(); }
263
	}
264
	if (!filePath) { return undefined; }
265
	let uri: URI;
266
	if (filePath.startsWith('untitled:')) { uri = URI.parse(filePath); } else { uri = pathToUri(filePath); }
267
	const id = `${PromptFileIdPrefix}__${uri.toString()}`;
268
	const name = idAttr;
269
	return { id, name, value: uri, modelDescription: 'Prompt instruction file' };
270
}
271

272
function extractDiagnosticsFromTag(tagText: string): ChatPromptReference | undefined {
273
	const m = tagText.match(/<error\s+([^>]+)>([\s\S]*?)<\/error>/i);
274
	if (!m) { return undefined; }
275
	const attrText = m[1];
276
	const message = m[2].trim();
277
	const attrs: Record<string, string> = {};
278
	for (const attrMatch of attrText.matchAll(/(\w+)="([^"]*)"/g)) { attrs[attrMatch[1]] = attrMatch[2]; }
279
	for (const attrMatch of attrText.matchAll(/(\w+)=([0-9]+)/g)) { if (!attrs[attrMatch[1]]) { attrs[attrMatch[1]] = attrMatch[2]; } }
280
	const filePath = attrs['path'];
281
	const lineStr = attrs['line'];
282
	if (!filePath || !lineStr) { return undefined; }
283
	const lineNum = parseInt(lineStr, 10);
284
	if (isNaN(lineNum) || lineNum < 1) { return undefined; }
285
	const code = attrs['code'] && attrs['code'] !== 'undefined' ? attrs['code'] : undefined;
286
	const severityStr = (attrs['severity'] || 'error').toLowerCase();
287
	const severityMap: Record<string, number> = { error: DiagnosticSeverity.Error, warning: DiagnosticSeverity.Warning, info: DiagnosticSeverity.Information, hint: DiagnosticSeverity.Hint };
288
	const uri = pathToUri(filePath);
289
	const range = new Range(lineNum - 1, 0, lineNum - 1, 0);
290
	const diagnostic = new Diagnostic(range, message, severityMap[severityStr]);
291
	diagnostic.code = code;
292
	return {
293
		id: `${uri.toString()}:${severityToString(diagnostic.severity)}:${diagnostic.range.start.line + 1}:${diagnostic.range.start.character + 1}`,
294
		name: diagnostic.message,
295
		range: undefined,
296
		value: new ChatReferenceDiagnostic([[uri, [diagnostic]]])
297
	} as ChatPromptReference;
298
}
299

300
function extractGitHubIssueOrPRChatReference(content: string): ChatPromptReference | undefined {
301
	const openingTagMatch = content.match(/<attachment\s+([^>]*)>/i);
302
	if (!openingTagMatch) {
303
		return;
304
	}
305
	const attrsStr = openingTagMatch[1];
306
	const idAttrMatch = attrsStr.match(/\bid\s*=\s*"([^"]+)"/);
307
	if (!idAttrMatch) {
308
		return;
309
	}
310
	let providedId = idAttrMatch[1];
311
	// If only id attribute is present and inner content is pure JSON, treat as JSON reference
312
	const innerMatch = content.match(/<attachment[\s\S]*?>([\s\S]*?)<\/attachment>/i);
313
	const innerText = innerMatch ? innerMatch[1].trim() : '';
314
	if (!providedId || !innerText.startsWith('{') || !innerText.endsWith('}')) {
315
		return;
316
	}
317

318
	try {
319
		const body = JSON.parse(innerText);
320
		if (typeof body.issueNumber !== 'number' && typeof body.prNumber !== 'number') {
321
			// Not GitHub issue or PR reference
322
			return;
323
		}
324
		// Possible that id is JSON encoded & contains special characters that fails parsing using regex, we could improve regex, but thats risky as we don't know all possible id formats & different attributes.
325
		// In case of JSON content (Prs & issues, we know there's just an id attribute)
326
		// Sample = 'id="#17143 Kernel interrupt_mode \\"message\\" sends interrupt_request on shell channel instead of control channel"'
327
		const id = JSON.parse(openingTagMatch[1].substring('id='.length));
328
		if (typeof id === 'string' && id.length > 0) {
329
			providedId = id;
330
		}
331
	} catch { }
332
	return {
333
		id: providedId,
334
		name: providedId,
335
		range: undefined,
336
		value: innerText
337
	};
338
}
339

340
function toEditorRange(range: Range): EditorRange {
341
	return new EditorRange(range.start.line + 1, range.start.character + 1, range.end.line + 1, range.end.character + 1);
342
}
343

344
export function getFolderAttachmentPath(folderPath: string): string {
345
	if (folderPath.endsWith('/') || folderPath.endsWith('\\')) {
346
		return folderPath;
347
	}
348
	return folderPath + path.sep;
349
}
350

351
function pathToUri(pathStr: string): URI {
352
	if (process.platform === 'win32') {
353
		// Don't normalize valid UNC paths (starting with \\ but not with \\\\)
354
		if (pathStr.startsWith('\\\\') && !pathStr.startsWith('\\\\\\\\')) {
355
			return URI.file(pathStr);
356
		}
357
		// Normalize over-escaped paths
358
		if (pathStr.includes('\\\\')) {
359
			return URI.file(pathStr.replaceAll('\\\\', '\\'));
360
		}
361
	}
362
	return URI.file(pathStr);
363
}
364

365
Product

Resources

Company