CoCalc -- computeAutomaticInstructions.ts

GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/workbench/contrib/chat/common/promptSyntax/computeAutomaticInstructions.ts
³²⁹⁷ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { CancellationToken } from '../../../../../base/common/cancellation.js';
7
import { match, splitGlobAware } from '../../../../../base/common/glob.js';
8
import { ResourceMap, ResourceSet } from '../../../../../base/common/map.js';
9
import { Schemas } from '../../../../../base/common/network.js';
10
import { basename, joinPath } from '../../../../../base/common/resources.js';
11
import { URI } from '../../../../../base/common/uri.js';
12
import { localize } from '../../../../../nls.js';
13
import { IConfigurationService } from '../../../../../platform/configuration/common/configuration.js';
14
import { IFileService } from '../../../../../platform/files/common/files.js';
15
import { ILabelService } from '../../../../../platform/label/common/label.js';
16
import { ILogService } from '../../../../../platform/log/common/log.js';
17
import { ITelemetryService } from '../../../../../platform/telemetry/common/telemetry.js';
18
import { IWorkspaceContextService } from '../../../../../platform/workspace/common/workspace.js';
19
import { ChatRequestVariableSet, IChatRequestVariableEntry, isPromptFileVariableEntry, toPromptFileVariableEntry, toPromptTextVariableEntry, PromptFileVariableKind } from '../chatVariableEntries.js';
20
import { IToolData } from '../languageModelToolsService.js';
21
import { PromptsConfig } from './config/config.js';
22
import { COPILOT_CUSTOM_INSTRUCTIONS_FILENAME, isPromptOrInstructionsFile } from './config/promptFileLocations.js';
23
import { PromptsType } from './promptTypes.js';
24
import { IPromptParserResult, IPromptPath, IPromptsService } from './service/promptsService.js';
25

26
export type InstructionsCollectionEvent = {
27
	applyingInstructionsCount: number;
28
	referencedInstructionsCount: number;
29
	agentInstructionsCount: number;
30
	listedInstructionsCount: number;
31
	totalInstructionsCount: number;
32
};
33
export function newInstructionsCollectionEvent(): InstructionsCollectionEvent {
34
	return { applyingInstructionsCount: 0, referencedInstructionsCount: 0, agentInstructionsCount: 0, listedInstructionsCount: 0, totalInstructionsCount: 0 };
35
}
36

37
type InstructionsCollectionClassification = {
38
	applyingInstructionsCount: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Number of instructions added via pattern matching.' };
39
	referencedInstructionsCount: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Number of instructions added via references from other instruction files.' };
40
	agentInstructionsCount: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Number of agent instructions added (copilot-instructions.md and agents.md).' };
41
	listedInstructionsCount: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Number of instruction patterns added.' };
42
	totalInstructionsCount: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Total number of instruction entries added to variables.' };
43
	owner: 'digitarald';
44
	comment: 'Tracks automatic instruction collection usage in chat prompt system.';
45
};
46

47
export class ComputeAutomaticInstructions {
48

49
	private _parseResults: ResourceMap<IPromptParserResult> = new ResourceMap();
50

51
	constructor(
52
		private readonly _readFileTool: IToolData | undefined,
53
		@IPromptsService private readonly _promptsService: IPromptsService,
54
		@ILogService public readonly _logService: ILogService,
55
		@ILabelService private readonly _labelService: ILabelService,
56
		@IConfigurationService private readonly _configurationService: IConfigurationService,
57
		@IWorkspaceContextService private readonly _workspaceService: IWorkspaceContextService,
58
		@IFileService private readonly _fileService: IFileService,
59
		@ITelemetryService private readonly _telemetryService: ITelemetryService,
60
	) {
61
	}
62

63
	private async _parseInstructionsFile(uri: URI, token: CancellationToken): Promise<IPromptParserResult | undefined> {
64
		if (this._parseResults.has(uri)) {
65
			return this._parseResults.get(uri)!;
66
		}
67
		try {
68
			const result = await this._promptsService.parse(uri, PromptsType.instructions, token);
69
			this._parseResults.set(uri, result);
70
			return result;
71
		} catch (error) {
72
			this._logService.error(`[InstructionsContextComputer] Failed to parse instruction file: ${uri}`, error);
73
			return undefined;
74
		}
75

76
	}
77

78
	public async collect(variables: ChatRequestVariableSet, token: CancellationToken): Promise<void> {
79

80
		const instructionFiles = await this._promptsService.listPromptFiles(PromptsType.instructions, token);
81

82
		this._logService.trace(`[InstructionsContextComputer] ${instructionFiles.length} instruction files available.`);
83

84
		const telemetryEvent: InstructionsCollectionEvent = newInstructionsCollectionEvent();
85
		const context = this._getContext(variables);
86

87
		// find instructions where the `applyTo` matches the attached context
88
		await this.addApplyingInstructions(instructionFiles, context, variables, telemetryEvent, token);
89

90
		// add all instructions referenced by all instruction files that are in the context
91
		await this._addReferencedInstructions(variables, telemetryEvent, token);
92

93
		// get copilot instructions
94
		await this._addAgentInstructions(variables, telemetryEvent, token);
95

96
		const instructionsWithPatternsList = await this._getInstructionsWithPatternsList(instructionFiles, variables, token);
97
		if (instructionsWithPatternsList.length > 0) {
98
			const text = instructionsWithPatternsList.join('\n');
99
			variables.add(toPromptTextVariableEntry(text, true));
100
			telemetryEvent.listedInstructionsCount++;
101
		}
102

103
		this.sendTelemetry(telemetryEvent);
104
	}
105

106
	public async collectAgentInstructionsOnly(variables: ChatRequestVariableSet, token: CancellationToken): Promise<void> {
107
		const telemetryEvent: InstructionsCollectionEvent = newInstructionsCollectionEvent();
108
		await this._addAgentInstructions(variables, telemetryEvent, token);
109
		this.sendTelemetry(telemetryEvent);
110
	}
111

112
	private sendTelemetry(telemetryEvent: InstructionsCollectionEvent): void {
113
		// Emit telemetry
114
		telemetryEvent.totalInstructionsCount = telemetryEvent.agentInstructionsCount + telemetryEvent.referencedInstructionsCount + telemetryEvent.applyingInstructionsCount + telemetryEvent.listedInstructionsCount;
115
		this._telemetryService.publicLog2<InstructionsCollectionEvent, InstructionsCollectionClassification>('instructionsCollected', telemetryEvent);
116
	}
117

118
	/** public for testing */
119
	public async addApplyingInstructions(instructionFiles: readonly IPromptPath[], context: { files: ResourceSet; instructions: ResourceSet }, variables: ChatRequestVariableSet, telemetryEvent: InstructionsCollectionEvent, token: CancellationToken): Promise<void> {
120

121
		for (const { uri } of instructionFiles) {
122
			const parsedFile = await this._parseInstructionsFile(uri, token);
123
			if (!parsedFile) {
124
				this._logService.trace(`[InstructionsContextComputer] Unable to read: ${uri}`);
125
				continue;
126
			}
127

128
			if (parsedFile.metadata?.promptType !== PromptsType.instructions) {
129
				this._logService.trace(`[InstructionsContextComputer] Not an instruction file: ${uri}`);
130
				continue;
131
			}
132
			const applyTo = parsedFile.metadata.applyTo;
133

134
			if (!applyTo) {
135
				this._logService.trace(`[InstructionsContextComputer] No 'applyTo' found: ${uri}`);
136
				continue;
137
			}
138

139
			if (context.instructions.has(uri)) {
140
				// the instruction file is already part of the input or has already been processed
141
				this._logService.trace(`[InstructionsContextComputer] Skipping already processed instruction file: ${uri}`);
142
				continue;
143
			}
144

145
			const match = this._matches(context.files, applyTo);
146
			if (match) {
147
				this._logService.trace(`[InstructionsContextComputer] Match for ${uri} with ${match.pattern}${match.file ? ` for file ${match.file}` : ''}`);
148

149
				const reason = !match.file ?
150
					localize('instruction.file.reason.allFiles', 'Automatically attached as pattern is **') :
151
					localize('instruction.file.reason.specificFile', 'Automatically attached as pattern {0} matches {1}', applyTo, this._labelService.getUriLabel(match.file, { relative: true }));
152

153
				variables.add(toPromptFileVariableEntry(uri, PromptFileVariableKind.Instruction, reason, true));
154
				telemetryEvent.applyingInstructionsCount++;
155
			} else {
156
				this._logService.trace(`[InstructionsContextComputer] No match for ${uri} with ${applyTo}`);
157
			}
158
		}
159
	}
160

161
	private _getContext(attachedContext: ChatRequestVariableSet): { files: ResourceSet; instructions: ResourceSet } {
162
		const files = new ResourceSet();
163
		const instructions = new ResourceSet();
164
		for (const variable of attachedContext.asArray()) {
165
			if (isPromptFileVariableEntry(variable)) {
166
				instructions.add(variable.value);
167
			} else {
168
				const uri = IChatRequestVariableEntry.toUri(variable);
169
				if (uri) {
170
					files.add(uri);
171
				}
172
			}
173
		}
174

175
		return { files, instructions };
176
	}
177

178
	private async _addAgentInstructions(variables: ChatRequestVariableSet, telemetryEvent: InstructionsCollectionEvent, token: CancellationToken): Promise<void> {
179
		const useCopilotInstructionsFiles = this._configurationService.getValue(PromptsConfig.USE_COPILOT_INSTRUCTION_FILES);
180
		const useAgentMd = this._configurationService.getValue(PromptsConfig.USE_AGENT_MD);
181
		if (!useCopilotInstructionsFiles && !useAgentMd) {
182
			this._logService.trace(`[InstructionsContextComputer] No agent instructions files added (settings disabled).`);
183
			return;
184
		}
185
		const instructionFiles: string[] = [];
186
		instructionFiles.push(`.github/` + COPILOT_CUSTOM_INSTRUCTIONS_FILENAME);
187

188
		const { folders } = this._workspaceService.getWorkspace();
189
		const entries: ChatRequestVariableSet = new ChatRequestVariableSet();
190
		if (useCopilotInstructionsFiles) {
191
			for (const folder of folders) {
192
				const file = joinPath(folder.uri, `.github/` + COPILOT_CUSTOM_INSTRUCTIONS_FILENAME);
193
				if (await this._fileService.exists(file)) {
194
					entries.add(toPromptFileVariableEntry(file, PromptFileVariableKind.Instruction, localize('instruction.file.reason.copilot', 'Automatically attached as setting {0} is enabled', PromptsConfig.USE_COPILOT_INSTRUCTION_FILES), true));
195
					telemetryEvent.agentInstructionsCount++;
196
					this._logService.trace(`[InstructionsContextComputer] copilot-instruction.md files added: ${file.toString()}`);
197
				}
198
			}
199
			await this._addReferencedInstructions(entries, telemetryEvent, token);
200
		}
201
		if (useAgentMd) {
202
			const resolvedRoots = await this._fileService.resolveAll(folders.map(f => ({ resource: f.uri })));
203
			for (const root of resolvedRoots) {
204
				if (root.success && root.stat?.children) {
205
					const agentMd = root.stat.children.find(c => c.isFile && c.name.toLowerCase() === 'agents.md');
206
					if (agentMd) {
207
						entries.add(toPromptFileVariableEntry(agentMd.resource, PromptFileVariableKind.Instruction, localize('instruction.file.reason.agentsmd', 'Automatically attached as setting {0} is enabled', PromptsConfig.USE_AGENT_MD), true));
208
						telemetryEvent.agentInstructionsCount++;
209
						this._logService.trace(`[InstructionsContextComputer] AGENTS.md files added: ${agentMd.resource.toString()}`);
210
					}
211
				}
212
			}
213
		}
214
		for (const entry of entries.asArray()) {
215
			variables.add(entry);
216
		}
217
	}
218

219
	private _matches(files: ResourceSet, applyToPattern: string): { pattern: string; file?: URI } | undefined {
220
		const patterns = splitGlobAware(applyToPattern, ',');
221
		const patterMatches = (pattern: string): { pattern: string; file?: URI } | undefined => {
222
			pattern = pattern.trim();
223
			if (pattern.length === 0) {
224
				// if glob pattern is empty, skip it
225
				return undefined;
226
			}
227
			if (pattern === '**' || pattern === '**/*' || pattern === '*') {
228
				// if glob pattern is one of the special wildcard values,
229
				// add the instructions file event if no files are attached
230
				return { pattern };
231
			}
232
			if (!pattern.startsWith('/') && !pattern.startsWith('**/')) {
233
				// support relative glob patterns, e.g. `src/**/*.js`
234
				pattern = '**/' + pattern;
235
			}
236

237
			// match each attached file with each glob pattern and
238
			// add the instructions file if its rule matches the file
239
			for (const file of files) {
240
				// if the file is not a valid URI, skip it
241
				if (match(pattern, file.path)) {
242
					return { pattern, file }; // return the matched pattern and file URI
243
				}
244
			}
245
			return undefined;
246
		};
247
		for (const pattern of patterns) {
248
			const matchResult = patterMatches(pattern);
249
			if (matchResult) {
250
				return matchResult; // return the first matched pattern and file URI
251
			}
252
		}
253
		return undefined;
254
	}
255

256
	private async _getInstructionsWithPatternsList(instructionFiles: readonly IPromptPath[], _existingVariables: ChatRequestVariableSet, token: CancellationToken): Promise<string[]> {
257
		if (!this._readFileTool) {
258
			this._logService.trace('[InstructionsContextComputer] No readFile tool available, skipping instructions with patterns list.');
259
			return [];
260
		}
261

262
		const entries: string[] = [];
263
		for (const { uri } of instructionFiles) {
264
			const parsedFile = await this._parseInstructionsFile(uri, token);
265
			if (parsedFile?.metadata?.promptType !== PromptsType.instructions) {
266
				continue;
267
			}
268
			const applyTo = parsedFile.metadata.applyTo ?? '**/*';
269
			const description = parsedFile.metadata.description ?? '';
270
			entries.push(`| '${getFilePath(uri)}' | ${applyTo} | ${description} |`);
271
		}
272
		if (entries.length === 0) {
273
			return entries;
274
		}
275

276
		const toolName = 'read_file'; // workaround https://github.com/microsoft/vscode/issues/252167
277
		return [
278
			'Here is a list of instruction files that contain rules for modifying or creating new code.',
279
			'These files are important for ensuring that the code is modified or created correctly.',
280
			'Please make sure to follow the rules specified in these files when working with the codebase.',
281
			`If the file is not already available as attachment, use the \`${toolName}\` tool to acquire it.`,
282
			'Make sure to acquire the instructions before making any changes to the code.',
283
			'| File | Applies To | Description |',
284
			'| ------- | --------- | ----------- |',
285
		].concat(entries);
286
	}
287

288
	private async _addReferencedInstructions(attachedContext: ChatRequestVariableSet, telemetryEvent: InstructionsCollectionEvent, token: CancellationToken): Promise<void> {
289
		const seen = new ResourceSet();
290
		const todo: URI[] = [];
291
		for (const variable of attachedContext.asArray()) {
292
			if (isPromptFileVariableEntry(variable)) {
293
				if (!seen.has(variable.value)) {
294
					todo.push(variable.value);
295
					seen.add(variable.value);
296
				}
297
			}
298
		}
299
		let next = todo.pop();
300
		while (next) {
301
			const result = await this._parseInstructionsFile(next, token);
302
			if (result) {
303
				const refsToCheck: { resource: URI }[] = [];
304
				for (const ref of result.fileReferences) {
305
					if (!seen.has(ref) && (isPromptOrInstructionsFile(ref) || this._workspaceService.getWorkspaceFolder(ref) !== undefined)) {
306
						// only add references that are either prompt or instruction files or are part of the workspace
307
						refsToCheck.push({ resource: ref });
308
						seen.add(ref);
309
					}
310
				}
311
				if (refsToCheck.length > 0) {
312
					const stats = await this._fileService.resolveAll(refsToCheck);
313
					for (let i = 0; i < stats.length; i++) {
314
						const stat = stats[i];
315
						const uri = refsToCheck[i].resource;
316
						if (stat.success && stat.stat?.isFile) {
317
							if (isPromptOrInstructionsFile(uri)) {
318
								// only recursivly parse instruction files
319
								todo.push(uri);
320
							}
321
							const reason = localize('instruction.file.reason.referenced', 'Referenced by {0}', basename(next));
322
							attachedContext.add(toPromptFileVariableEntry(uri, PromptFileVariableKind.InstructionReference, reason, true));
323
							telemetryEvent.referencedInstructionsCount++;
324
							this._logService.trace(`[InstructionsContextComputer] ${uri.toString()} added, referenced by ${next.toString()}`);
325
						}
326
					}
327
				}
328
			}
329
			next = todo.pop();
330
		}
331
	}
332
}
333

334

335
function getFilePath(uri: URI): string {
336
	if (uri.scheme === Schemas.file || uri.scheme === Schemas.vscodeRemote) {
337
		return uri.fsPath;
338
	}
339
	return uri.toString();
340
}
341

342
Product

Resources

Company