Path: blob/main/src/vs/workbench/contrib/chat/common/promptSyntax/computeAutomaticInstructions.ts
3297 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { CancellationToken } from '../../../../../base/common/cancellation.js';6import { match, splitGlobAware } from '../../../../../base/common/glob.js';7import { ResourceMap, ResourceSet } from '../../../../../base/common/map.js';8import { Schemas } from '../../../../../base/common/network.js';9import { basename, joinPath } from '../../../../../base/common/resources.js';10import { URI } from '../../../../../base/common/uri.js';11import { localize } from '../../../../../nls.js';12import { IConfigurationService } from '../../../../../platform/configuration/common/configuration.js';13import { IFileService } from '../../../../../platform/files/common/files.js';14import { ILabelService } from '../../../../../platform/label/common/label.js';15import { ILogService } from '../../../../../platform/log/common/log.js';16import { ITelemetryService } from '../../../../../platform/telemetry/common/telemetry.js';17import { IWorkspaceContextService } from '../../../../../platform/workspace/common/workspace.js';18import { ChatRequestVariableSet, IChatRequestVariableEntry, isPromptFileVariableEntry, toPromptFileVariableEntry, toPromptTextVariableEntry, PromptFileVariableKind } from '../chatVariableEntries.js';19import { IToolData } from '../languageModelToolsService.js';20import { PromptsConfig } from './config/config.js';21import { COPILOT_CUSTOM_INSTRUCTIONS_FILENAME, isPromptOrInstructionsFile } from './config/promptFileLocations.js';22import { PromptsType } from './promptTypes.js';23import { IPromptParserResult, IPromptPath, IPromptsService } from './service/promptsService.js';2425export type InstructionsCollectionEvent = {26applyingInstructionsCount: number;27referencedInstructionsCount: number;28agentInstructionsCount: number;29listedInstructionsCount: number;30totalInstructionsCount: number;31};32export function newInstructionsCollectionEvent(): InstructionsCollectionEvent {33return { applyingInstructionsCount: 0, referencedInstructionsCount: 0, agentInstructionsCount: 0, listedInstructionsCount: 0, totalInstructionsCount: 0 };34}3536type InstructionsCollectionClassification = {37applyingInstructionsCount: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Number of instructions added via pattern matching.' };38referencedInstructionsCount: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Number of instructions added via references from other instruction files.' };39agentInstructionsCount: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Number of agent instructions added (copilot-instructions.md and agents.md).' };40listedInstructionsCount: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Number of instruction patterns added.' };41totalInstructionsCount: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Total number of instruction entries added to variables.' };42owner: 'digitarald';43comment: 'Tracks automatic instruction collection usage in chat prompt system.';44};4546export class ComputeAutomaticInstructions {4748private _parseResults: ResourceMap<IPromptParserResult> = new ResourceMap();4950constructor(51private readonly _readFileTool: IToolData | undefined,52@IPromptsService private readonly _promptsService: IPromptsService,53@ILogService public readonly _logService: ILogService,54@ILabelService private readonly _labelService: ILabelService,55@IConfigurationService private readonly _configurationService: IConfigurationService,56@IWorkspaceContextService private readonly _workspaceService: IWorkspaceContextService,57@IFileService private readonly _fileService: IFileService,58@ITelemetryService private readonly _telemetryService: ITelemetryService,59) {60}6162private async _parseInstructionsFile(uri: URI, token: CancellationToken): Promise<IPromptParserResult | undefined> {63if (this._parseResults.has(uri)) {64return this._parseResults.get(uri)!;65}66try {67const result = await this._promptsService.parse(uri, PromptsType.instructions, token);68this._parseResults.set(uri, result);69return result;70} catch (error) {71this._logService.error(`[InstructionsContextComputer] Failed to parse instruction file: ${uri}`, error);72return undefined;73}7475}7677public async collect(variables: ChatRequestVariableSet, token: CancellationToken): Promise<void> {7879const instructionFiles = await this._promptsService.listPromptFiles(PromptsType.instructions, token);8081this._logService.trace(`[InstructionsContextComputer] ${instructionFiles.length} instruction files available.`);8283const telemetryEvent: InstructionsCollectionEvent = newInstructionsCollectionEvent();84const context = this._getContext(variables);8586// find instructions where the `applyTo` matches the attached context87await this.addApplyingInstructions(instructionFiles, context, variables, telemetryEvent, token);8889// add all instructions referenced by all instruction files that are in the context90await this._addReferencedInstructions(variables, telemetryEvent, token);9192// get copilot instructions93await this._addAgentInstructions(variables, telemetryEvent, token);9495const instructionsWithPatternsList = await this._getInstructionsWithPatternsList(instructionFiles, variables, token);96if (instructionsWithPatternsList.length > 0) {97const text = instructionsWithPatternsList.join('\n');98variables.add(toPromptTextVariableEntry(text, true));99telemetryEvent.listedInstructionsCount++;100}101102this.sendTelemetry(telemetryEvent);103}104105public async collectAgentInstructionsOnly(variables: ChatRequestVariableSet, token: CancellationToken): Promise<void> {106const telemetryEvent: InstructionsCollectionEvent = newInstructionsCollectionEvent();107await this._addAgentInstructions(variables, telemetryEvent, token);108this.sendTelemetry(telemetryEvent);109}110111private sendTelemetry(telemetryEvent: InstructionsCollectionEvent): void {112// Emit telemetry113telemetryEvent.totalInstructionsCount = telemetryEvent.agentInstructionsCount + telemetryEvent.referencedInstructionsCount + telemetryEvent.applyingInstructionsCount + telemetryEvent.listedInstructionsCount;114this._telemetryService.publicLog2<InstructionsCollectionEvent, InstructionsCollectionClassification>('instructionsCollected', telemetryEvent);115}116117/** public for testing */118public async addApplyingInstructions(instructionFiles: readonly IPromptPath[], context: { files: ResourceSet; instructions: ResourceSet }, variables: ChatRequestVariableSet, telemetryEvent: InstructionsCollectionEvent, token: CancellationToken): Promise<void> {119120for (const { uri } of instructionFiles) {121const parsedFile = await this._parseInstructionsFile(uri, token);122if (!parsedFile) {123this._logService.trace(`[InstructionsContextComputer] Unable to read: ${uri}`);124continue;125}126127if (parsedFile.metadata?.promptType !== PromptsType.instructions) {128this._logService.trace(`[InstructionsContextComputer] Not an instruction file: ${uri}`);129continue;130}131const applyTo = parsedFile.metadata.applyTo;132133if (!applyTo) {134this._logService.trace(`[InstructionsContextComputer] No 'applyTo' found: ${uri}`);135continue;136}137138if (context.instructions.has(uri)) {139// the instruction file is already part of the input or has already been processed140this._logService.trace(`[InstructionsContextComputer] Skipping already processed instruction file: ${uri}`);141continue;142}143144const match = this._matches(context.files, applyTo);145if (match) {146this._logService.trace(`[InstructionsContextComputer] Match for ${uri} with ${match.pattern}${match.file ? ` for file ${match.file}` : ''}`);147148const reason = !match.file ?149localize('instruction.file.reason.allFiles', 'Automatically attached as pattern is **') :150localize('instruction.file.reason.specificFile', 'Automatically attached as pattern {0} matches {1}', applyTo, this._labelService.getUriLabel(match.file, { relative: true }));151152variables.add(toPromptFileVariableEntry(uri, PromptFileVariableKind.Instruction, reason, true));153telemetryEvent.applyingInstructionsCount++;154} else {155this._logService.trace(`[InstructionsContextComputer] No match for ${uri} with ${applyTo}`);156}157}158}159160private _getContext(attachedContext: ChatRequestVariableSet): { files: ResourceSet; instructions: ResourceSet } {161const files = new ResourceSet();162const instructions = new ResourceSet();163for (const variable of attachedContext.asArray()) {164if (isPromptFileVariableEntry(variable)) {165instructions.add(variable.value);166} else {167const uri = IChatRequestVariableEntry.toUri(variable);168if (uri) {169files.add(uri);170}171}172}173174return { files, instructions };175}176177private async _addAgentInstructions(variables: ChatRequestVariableSet, telemetryEvent: InstructionsCollectionEvent, token: CancellationToken): Promise<void> {178const useCopilotInstructionsFiles = this._configurationService.getValue(PromptsConfig.USE_COPILOT_INSTRUCTION_FILES);179const useAgentMd = this._configurationService.getValue(PromptsConfig.USE_AGENT_MD);180if (!useCopilotInstructionsFiles && !useAgentMd) {181this._logService.trace(`[InstructionsContextComputer] No agent instructions files added (settings disabled).`);182return;183}184const instructionFiles: string[] = [];185instructionFiles.push(`.github/` + COPILOT_CUSTOM_INSTRUCTIONS_FILENAME);186187const { folders } = this._workspaceService.getWorkspace();188const entries: ChatRequestVariableSet = new ChatRequestVariableSet();189if (useCopilotInstructionsFiles) {190for (const folder of folders) {191const file = joinPath(folder.uri, `.github/` + COPILOT_CUSTOM_INSTRUCTIONS_FILENAME);192if (await this._fileService.exists(file)) {193entries.add(toPromptFileVariableEntry(file, PromptFileVariableKind.Instruction, localize('instruction.file.reason.copilot', 'Automatically attached as setting {0} is enabled', PromptsConfig.USE_COPILOT_INSTRUCTION_FILES), true));194telemetryEvent.agentInstructionsCount++;195this._logService.trace(`[InstructionsContextComputer] copilot-instruction.md files added: ${file.toString()}`);196}197}198await this._addReferencedInstructions(entries, telemetryEvent, token);199}200if (useAgentMd) {201const resolvedRoots = await this._fileService.resolveAll(folders.map(f => ({ resource: f.uri })));202for (const root of resolvedRoots) {203if (root.success && root.stat?.children) {204const agentMd = root.stat.children.find(c => c.isFile && c.name.toLowerCase() === 'agents.md');205if (agentMd) {206entries.add(toPromptFileVariableEntry(agentMd.resource, PromptFileVariableKind.Instruction, localize('instruction.file.reason.agentsmd', 'Automatically attached as setting {0} is enabled', PromptsConfig.USE_AGENT_MD), true));207telemetryEvent.agentInstructionsCount++;208this._logService.trace(`[InstructionsContextComputer] AGENTS.md files added: ${agentMd.resource.toString()}`);209}210}211}212}213for (const entry of entries.asArray()) {214variables.add(entry);215}216}217218private _matches(files: ResourceSet, applyToPattern: string): { pattern: string; file?: URI } | undefined {219const patterns = splitGlobAware(applyToPattern, ',');220const patterMatches = (pattern: string): { pattern: string; file?: URI } | undefined => {221pattern = pattern.trim();222if (pattern.length === 0) {223// if glob pattern is empty, skip it224return undefined;225}226if (pattern === '**' || pattern === '**/*' || pattern === '*') {227// if glob pattern is one of the special wildcard values,228// add the instructions file event if no files are attached229return { pattern };230}231if (!pattern.startsWith('/') && !pattern.startsWith('**/')) {232// support relative glob patterns, e.g. `src/**/*.js`233pattern = '**/' + pattern;234}235236// match each attached file with each glob pattern and237// add the instructions file if its rule matches the file238for (const file of files) {239// if the file is not a valid URI, skip it240if (match(pattern, file.path)) {241return { pattern, file }; // return the matched pattern and file URI242}243}244return undefined;245};246for (const pattern of patterns) {247const matchResult = patterMatches(pattern);248if (matchResult) {249return matchResult; // return the first matched pattern and file URI250}251}252return undefined;253}254255private async _getInstructionsWithPatternsList(instructionFiles: readonly IPromptPath[], _existingVariables: ChatRequestVariableSet, token: CancellationToken): Promise<string[]> {256if (!this._readFileTool) {257this._logService.trace('[InstructionsContextComputer] No readFile tool available, skipping instructions with patterns list.');258return [];259}260261const entries: string[] = [];262for (const { uri } of instructionFiles) {263const parsedFile = await this._parseInstructionsFile(uri, token);264if (parsedFile?.metadata?.promptType !== PromptsType.instructions) {265continue;266}267const applyTo = parsedFile.metadata.applyTo ?? '**/*';268const description = parsedFile.metadata.description ?? '';269entries.push(`| '${getFilePath(uri)}' | ${applyTo} | ${description} |`);270}271if (entries.length === 0) {272return entries;273}274275const toolName = 'read_file'; // workaround https://github.com/microsoft/vscode/issues/252167276return [277'Here is a list of instruction files that contain rules for modifying or creating new code.',278'These files are important for ensuring that the code is modified or created correctly.',279'Please make sure to follow the rules specified in these files when working with the codebase.',280`If the file is not already available as attachment, use the \`${toolName}\` tool to acquire it.`,281'Make sure to acquire the instructions before making any changes to the code.',282'| File | Applies To | Description |',283'| ------- | --------- | ----------- |',284].concat(entries);285}286287private async _addReferencedInstructions(attachedContext: ChatRequestVariableSet, telemetryEvent: InstructionsCollectionEvent, token: CancellationToken): Promise<void> {288const seen = new ResourceSet();289const todo: URI[] = [];290for (const variable of attachedContext.asArray()) {291if (isPromptFileVariableEntry(variable)) {292if (!seen.has(variable.value)) {293todo.push(variable.value);294seen.add(variable.value);295}296}297}298let next = todo.pop();299while (next) {300const result = await this._parseInstructionsFile(next, token);301if (result) {302const refsToCheck: { resource: URI }[] = [];303for (const ref of result.fileReferences) {304if (!seen.has(ref) && (isPromptOrInstructionsFile(ref) || this._workspaceService.getWorkspaceFolder(ref) !== undefined)) {305// only add references that are either prompt or instruction files or are part of the workspace306refsToCheck.push({ resource: ref });307seen.add(ref);308}309}310if (refsToCheck.length > 0) {311const stats = await this._fileService.resolveAll(refsToCheck);312for (let i = 0; i < stats.length; i++) {313const stat = stats[i];314const uri = refsToCheck[i].resource;315if (stat.success && stat.stat?.isFile) {316if (isPromptOrInstructionsFile(uri)) {317// only recursivly parse instruction files318todo.push(uri);319}320const reason = localize('instruction.file.reason.referenced', 'Referenced by {0}', basename(next));321attachedContext.add(toPromptFileVariableEntry(uri, PromptFileVariableKind.InstructionReference, reason, true));322telemetryEvent.referencedInstructionsCount++;323this._logService.trace(`[InstructionsContextComputer] ${uri.toString()} added, referenced by ${next.toString()}`);324}325}326}327}328next = todo.pop();329}330}331}332333334function getFilePath(uri: URI): string {335if (uri.scheme === Schemas.file || uri.scheme === Schemas.vscodeRemote) {336return uri.fsPath;337}338return uri.toString();339}340341342