CoCalc -- agentIntent.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/intents/node/agentIntent.ts
¹³³⁹⁹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import * as l10n from '@vscode/l10n';
7
import { Raw, RenderPromptResult } from '@vscode/prompt-tsx';
8
import { BudgetExceededError } from '@vscode/prompt-tsx/dist/base/materialized';
9
import type * as vscode from 'vscode';
10
import { IChatSessionService } from '../../../platform/chat/common/chatSessionService';
11
import { ChatFetchResponseType, ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes';
12
import { ISessionTranscriptService } from '../../../platform/chat/common/sessionTranscriptService';
13
import { getTextPart } from '../../../platform/chat/common/globalStringUtils';
14
import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService';
15
import { isAnthropicFamily, isGptFamily, modelCanUseApplyPatchExclusively, modelCanUseReplaceStringExclusively, modelSupportsApplyPatch, modelSupportsMultiReplaceString, modelSupportsReplaceString, modelSupportsSimplifiedApplyPatchInstructions } from '../../../platform/endpoint/common/chatModelCapabilities';
16
import { IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
17
import { IAutomodeService } from '../../../platform/endpoint/node/automodeService';
18
import { IEnvService } from '../../../platform/env/common/envService';
19
import { ILogService } from '../../../platform/log/common/logService';
20
import { IEditLogService } from '../../../platform/multiFileEdit/common/editLogService';
21
import { CUSTOM_TOOL_SEARCH_NAME, isAnthropicContextEditingEnabled } from '../../../platform/networking/common/anthropic';
22
import { IChatEndpoint } from '../../../platform/networking/common/networking';
23
import { modelsWithoutResponsesContextManagement } from '../../../platform/networking/common/openai';
24
import { INotebookService } from '../../../platform/notebook/common/notebookService';
25
import { GenAiMetrics } from '../../../platform/otel/common/genAiMetrics';
26
import { IOTelService } from '../../../platform/otel/common/otelService';
27
import { IPromptPathRepresentationService } from '../../../platform/prompts/common/promptPathRepresentationService';
28
import { ITasksService } from '../../../platform/tasks/common/tasksService';
29
import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';
30
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
31
import { ITestProvider } from '../../../platform/testing/common/testProvider';
32
import { IWorkspaceService } from '../../../platform/workspace/common/workspaceService';
33

34
import { isCancellationError } from '../../../util/vs/base/common/errors';
35
import { Iterable } from '../../../util/vs/base/common/iterator';
36
import { IInstantiationService, ServicesAccessor } from '../../../util/vs/platform/instantiation/common/instantiation';
37

38
import { ChatResponseProgressPart2 } from '../../../vscodeTypes';
39
import { ICommandService } from '../../commands/node/commandService';
40
import { Intent } from '../../common/constants';
41
import { ChatVariablesCollection } from '../../prompt/common/chatVariablesCollection';
42
import { Conversation, normalizeSummariesOnRounds, RenderedUserMessageMetadata, TurnStatus } from '../../prompt/common/conversation';
43
import { IBuildPromptContext } from '../../prompt/common/intents';
44
import { getRequestedToolCallIterationLimit, IContinueOnErrorConfirmation } from '../../prompt/common/specialRequestTypes';
45
import { ChatTelemetryBuilder } from '../../prompt/node/chatParticipantTelemetry';
46
import { IDefaultIntentRequestHandlerOptions } from '../../prompt/node/defaultIntentRequestHandler';
47
import { IDocumentContext } from '../../prompt/node/documentContext';
48
import { IBuildPromptResult, IIntent, IIntentInvocation } from '../../prompt/node/intents';
49
import { AgentPrompt, AgentPromptProps } from '../../prompts/node/agent/agentPrompt';
50
import { BackgroundSummarizationState, BackgroundSummarizer, IBackgroundSummarizationResult, shouldKickOffBackgroundSummarization } from '../../prompts/node/agent/backgroundSummarizer';
51
import { AgentPromptCustomizations, PromptRegistry } from '../../prompts/node/agent/promptRegistry';
52
import { extractInlineSummary, InlineSummarizationUserMessage, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder, appendTranscriptHintToSummary, computeSummarizationRoundCounts } from '../../prompts/node/agent/summarizedConversationHistory';
53
import { PromptRenderer, renderPromptElement } from '../../prompts/node/base/promptRenderer';
54
import { ICodeMapperService } from '../../prompts/node/codeMapper/codeMapperService';
55
import { EditCodePrompt2 } from '../../prompts/node/panel/editCodePrompt2';
56
import { NotebookInlinePrompt } from '../../prompts/node/panel/notebookInlinePrompt';
57
import { ToolResultMetadata } from '../../prompts/node/panel/toolCalling';
58
import { IEditToolLearningService } from '../../tools/common/editToolLearningService';
59
import { normalizeToolSchema } from '../../tools/common/toolSchemaNormalizer';
60
import { ContributedToolName, ToolName } from '../../tools/common/toolNames';
61
import { IToolsService } from '../../tools/common/toolsService';
62
import { applyPatch5Description } from '../../tools/node/applyPatchTool';
63
import { multiReplaceStringPrimaryDescription } from '../../tools/node/multiReplaceStringTool';
64
import { replaceStringBatchDescription } from '../../tools/node/replaceStringTool';
65
import { getAgentMaxRequests } from '../common/agentConfig';
66
import { addCacheBreakpoints } from './cacheBreakpoints';
67
import { EditCodeIntent, EditCodeIntentInvocation, EditCodeIntentInvocationOptions, mergeMetadata, toNewChatReferences } from './editCodeIntent';
68
import { ToolCallingLoop } from './toolCallingLoop';
69

70
function isResponsesCompactionContextManagementEnabled(endpoint: IChatEndpoint, configurationService: IConfigurationService, experimentationService: IExperimentationService): boolean {
71
	return endpoint.apiType === 'responses'
72
		&& configurationService.getExperimentBasedConfig(ConfigKey.ResponsesApiContextManagementEnabled, experimentationService)
73
		&& !modelsWithoutResponsesContextManagement.has(endpoint.family);
74
}
75

76
export const getAgentTools = async (accessor: ServicesAccessor, request: vscode.ChatRequest, model?: IChatEndpoint) => {
77
	const toolsService = accessor.get<IToolsService>(IToolsService);
78
	const testService = accessor.get<ITestProvider>(ITestProvider);
79
	const tasksService = accessor.get<ITasksService>(ITasksService);
80
	const configurationService = accessor.get<IConfigurationService>(IConfigurationService);
81
	const experimentationService = accessor.get<IExperimentationService>(IExperimentationService);
82
	const endpointProvider = accessor.get<IEndpointProvider>(IEndpointProvider);
83
	const editToolLearningService = accessor.get<IEditToolLearningService>(IEditToolLearningService);
84
	model ??= await endpointProvider.getChatEndpoint(request);
85

86
	const allowTools: Record<string, boolean> = {};
87

88
	const learned = editToolLearningService.getPreferredEndpointEditTool(model);
89
	if (learned) { // a learning-enabled (BYOK) model, we should go with what it prefers
90
		allowTools[ToolName.EditFile] = learned.includes(ToolName.EditFile);
91
		allowTools[ToolName.ReplaceString] = learned.includes(ToolName.ReplaceString);
92
		allowTools[ToolName.MultiReplaceString] = learned.includes(ToolName.MultiReplaceString);
93
		allowTools[ToolName.ApplyPatch] = learned.includes(ToolName.ApplyPatch);
94
	} else {
95
		allowTools[ToolName.EditFile] = true;
96
		allowTools[ToolName.ReplaceString] = modelSupportsReplaceString(model);
97
		allowTools[ToolName.ApplyPatch] = modelSupportsApplyPatch(model) && !!toolsService.getTool(ToolName.ApplyPatch);
98

99
		if (allowTools[ToolName.ApplyPatch] && modelCanUseApplyPatchExclusively(model)) {
100
			allowTools[ToolName.EditFile] = false;
101
		}
102

103
		if (modelCanUseReplaceStringExclusively(model)) {
104
			allowTools[ToolName.ReplaceString] = true;
105
			allowTools[ToolName.EditFile] = false;
106
		}
107

108
		if (allowTools[ToolName.ReplaceString] && modelSupportsMultiReplaceString(model)) {
109
			allowTools[ToolName.MultiReplaceString] = true;
110
		}
111
	}
112

113
	allowTools[ToolName.CoreRunTest] = await testService.hasAnyTests();
114
	allowTools[ToolName.CoreRunTask] = tasksService.getTasks().length > 0;
115

116
	const searchSubagentEnabled = configurationService.getExperimentBasedConfig(ConfigKey.Advanced.SearchSubagentToolEnabled, experimentationService);
117
	const isGptOrAnthropic = isGptFamily(model) || isAnthropicFamily(model);
118
	allowTools[ToolName.SearchSubagent] = isGptOrAnthropic && searchSubagentEnabled;
119

120
	const executionSubagentEnabled = configurationService.getExperimentBasedConfig(ConfigKey.Advanced.ExecutionSubagentToolEnabled, experimentationService);
121
	allowTools[ToolName.ExecutionSubagent] = isGptOrAnthropic && executionSubagentEnabled;
122

123
	const skillToolEnabled = configurationService.getExperimentBasedConfig(ConfigKey.Advanced.SkillToolEnabled, experimentationService);
124
	allowTools[ToolName.Skill] = skillToolEnabled;
125

126
	allowTools[CUSTOM_TOOL_SEARCH_NAME] = !!model.supportsToolSearch;
127

128
	if (model.family.includes('grok-code')) {
129
		allowTools[ToolName.CoreManageTodoList] = false;
130
	}
131

132
	// Enable task_complete in autopilot mode so the model can signal task completion.
133
	// The tool is registered in core as a built-in but needs explicit opt-in here.
134
	allowTools['task_complete'] = request.permissionLevel === 'autopilot';
135

136
	allowTools[ToolName.EditFilesPlaceholder] = false;
137
	allowTools[ToolName.SessionStoreSql] = false; // Only available via /chronicle
138
	// todo@connor4312: string check here is for back-compat for 1.109 Insiders
139
	if (Iterable.some(request.tools, ([t, enabled]) => (typeof t === 'string' ? t : t.name) === ContributedToolName.EditFilesPlaceholder && enabled === false)) {
140
		allowTools[ToolName.ApplyPatch] = false;
141
		allowTools[ToolName.EditFile] = false;
142
		allowTools[ToolName.ReplaceString] = false;
143
		allowTools[ToolName.MultiReplaceString] = false;
144
	}
145

146
	if (model.family.toLowerCase().includes('gemini-3') && configurationService.getExperimentBasedConfig(ConfigKey.Advanced.Gemini3MultiReplaceString, experimentationService)) {
147
		allowTools[ToolName.MultiReplaceString] = true;
148
	}
149

150
	const tools = toolsService.getEnabledTools(request, model, tool => {
151
		if (typeof allowTools[tool.name] === 'boolean') {
152
			return allowTools[tool.name];
153
		}
154

155
		// Must return undefined to fall back to other checks
156
		return undefined;
157
	});
158

159
	if (modelSupportsSimplifiedApplyPatchInstructions(model) && configurationService.getExperimentBasedConfig(ConfigKey.Advanced.Gpt5AlternativePatch, experimentationService)) {
160
		const ap = tools.findIndex(t => t.name === ToolName.ApplyPatch);
161
		if (ap !== -1) {
162
			tools[ap] = { ...tools[ap], description: applyPatch5Description };
163
		}
164
	}
165

166
	if (configurationService.getExperimentBasedConfig(ConfigKey.Advanced.BatchReplaceStringDescriptions, experimentationService)) {
167
		const rs = tools.findIndex(t => t.name === ToolName.ReplaceString);
168
		if (rs !== -1) {
169
			tools[rs] = { ...tools[rs], description: replaceStringBatchDescription };
170
		}
171
		const mrs = tools.findIndex(t => t.name === ToolName.MultiReplaceString);
172
		if (mrs !== -1) {
173
			tools[mrs] = { ...tools[mrs], description: multiReplaceStringPrimaryDescription };
174
		}
175
	}
176

177
	return tools;
178
};
179

180
export class AgentIntent extends EditCodeIntent {
181

182
	static override readonly ID = Intent.Agent;
183

184
	override readonly id = AgentIntent.ID;
185

186
	private readonly _backgroundSummarizers = new Map<string, BackgroundSummarizer>();
187

188
	constructor(
189
		@IInstantiationService instantiationService: IInstantiationService,
190
		@IEndpointProvider endpointProvider: IEndpointProvider,
191
		@IConfigurationService configurationService: IConfigurationService,
192
		@IExperimentationService expService: IExperimentationService,
193
		@ICodeMapperService codeMapperService: ICodeMapperService,
194
		@IWorkspaceService workspaceService: IWorkspaceService,
195
		@IChatSessionService chatSessionService: IChatSessionService,
196
		@IAutomodeService private readonly _automodeService: IAutomodeService,
197
	) {
198
		super(instantiationService, endpointProvider, configurationService, expService, codeMapperService, workspaceService, { intentInvocation: AgentIntentInvocation, processCodeblocks: false });
199
		chatSessionService.onDidDisposeChatSession(sessionId => {
200
			const summarizer = this._backgroundSummarizers.get(sessionId);
201
			if (summarizer) {
202
				summarizer.cancel();
203
				this._backgroundSummarizers.delete(sessionId);
204
			}
205
		});
206
	}
207

208
	getOrCreateBackgroundSummarizer(sessionId: string, modelMaxPromptTokens: number): BackgroundSummarizer {
209
		let summarizer = this._backgroundSummarizers.get(sessionId);
210
		if (!summarizer) {
211
			summarizer = new BackgroundSummarizer(modelMaxPromptTokens);
212
			this._backgroundSummarizers.set(sessionId, summarizer);
213
		}
214
		return summarizer;
215
	}
216

217
	protected override getIntentHandlerOptions(request: vscode.ChatRequest): IDefaultIntentRequestHandlerOptions | undefined {
218
		return {
219
			maxToolCallIterations: getRequestedToolCallIterationLimit(request) ??
220
				this.instantiationService.invokeFunction(getAgentMaxRequests),
221
			temperature: this.configurationService.getConfig(ConfigKey.Advanced.AgentTemperature) ?? 0,
222
			overrideRequestLocation: ChatLocation.Agent
223
		};
224
	}
225

226
	override async handleRequest(
227
		conversation: Conversation,
228
		request: vscode.ChatRequest,
229
		stream: vscode.ChatResponseStream,
230
		token: vscode.CancellationToken,
231
		documentContext: IDocumentContext | undefined,
232
		agentName: string,
233
		location: ChatLocation,
234
		chatTelemetry: ChatTelemetryBuilder,
235
		yieldRequested: () => boolean
236
	): Promise<vscode.ChatResult> {
237
		if (request.command === 'compact') {
238
			return this.handleSummarizeCommand(conversation, request, stream, token);
239
		}
240

241
		return super.handleRequest(conversation, request, stream, token, documentContext, agentName, location, chatTelemetry, yieldRequested);
242
	}
243

244
	private async handleSummarizeCommand(
245
		conversation: Conversation,
246
		request: vscode.ChatRequest,
247
		stream: vscode.ChatResponseStream,
248
		token: vscode.CancellationToken
249
	): Promise<vscode.ChatResult> {
250
		normalizeSummariesOnRounds(conversation.turns);
251

252
		// Exclude the current /compact turn.
253
		const history = conversation.turns.slice(0, -1);
254
		if (history.length === 0) {
255
			stream.markdown(l10n.t('Nothing to compact. Start a conversation first.'));
256
			return {};
257
		}
258

259
		// The summarization metadata needs to be associated with a tool call round.
260
		const lastRoundId = history.at(-1)?.rounds.at(-1)?.id;
261
		if (!lastRoundId) {
262
			stream.markdown(l10n.t('Nothing to compact. Start a conversation with tool calls first.'));
263
			return {};
264
		}
265

266
		const endpoint = await this.endpointProvider.getChatEndpoint(request);
267
		if (isResponsesCompactionContextManagementEnabled(endpoint, this.configurationService, this.expService)) {
268
			stream.markdown(l10n.t('Compaction is already managed by context management for this session.'));
269
			return {};
270
		}
271

272
		const promptContext: IBuildPromptContext = {
273
			history,
274
			chatVariables: new ChatVariablesCollection([]),
275
			query: '',
276
			toolCallRounds: [],
277
			conversation,
278
		};
279

280
		try {
281
			const propsBuilder = this.instantiationService.createInstance(SummarizedConversationHistoryPropsBuilder);
282
			const propsInfo = propsBuilder.getProps({
283
				priority: 1,
284
				endpoint,
285
				location: ChatLocation.Agent,
286
				promptContext,
287
				maxToolResultLength: Infinity,
288
			});
289

290
			stream.progress(l10n.t('Compacting conversation...'));
291

292
			const progress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart> = {
293
				report: () => { }
294
			};
295
			const renderer = PromptRenderer.create(this.instantiationService, endpoint, SummarizedConversationHistory, {
296
				...propsInfo.props,
297
				triggerSummarize: true,
298
				summarizationInstructions: request.prompt || undefined,
299
			});
300
			const result = await renderer.render(progress, token);
301
			const summaryMetadata = result.metadata.get(SummarizedConversationHistoryMetadata);
302
			if (!summaryMetadata) {
303
				stream.markdown(l10n.t('Unable to compact conversation.'));
304
				return {};
305
			}
306

307
			if (summaryMetadata.usage) {
308
				stream.usage({
309
					promptTokens: summaryMetadata.usage.prompt_tokens,
310
					completionTokens: summaryMetadata.usage.completion_tokens,
311
					promptTokenDetails: summaryMetadata.promptTokenDetails,
312
				});
313
			}
314

315
			stream.markdown(l10n.t('Compacted conversation.'));
316
			const lastTurn = conversation.getLatestTurn();
317
			// Next turn if using auto will select a new endpoint
318
			this._automodeService.invalidateRouterCache(request);
319

320
			const chatResult: vscode.ChatResult = {
321
				metadata: {
322
					summary: {
323
						toolCallRoundId: summaryMetadata.toolCallRoundId,
324
						text: summaryMetadata.text,
325
					}
326
				}
327
			};
328

329
			// setResponse must be called so that turn.resultMetadata?.summary
330
			// is available for normalizeSummariesOnRounds on subsequent turns.
331
			lastTurn.setResponse(
332
				TurnStatus.Success,
333
				{ type: 'model', message: '' },
334
				undefined,
335
				chatResult,
336
			);
337

338
			lastTurn.setMetadata(summaryMetadata);
339

340
			return chatResult;
341
		} catch (e) {
342
			if (isCancellationError(e)) {
343
				return {};
344
			}
345

346
			const message = e instanceof Error ? e.message : String(e);
347
			stream.markdown(l10n.t('Failed to compact conversation: {0}', message));
348
			return {};
349
		}
350
	}
351
}
352

353
export class AgentIntentInvocation extends EditCodeIntentInvocation implements IIntentInvocation {
354

355
	public override readonly codeblocksRepresentEdits = false;
356

357
	protected prompt: typeof AgentPrompt | typeof EditCodePrompt2 | typeof NotebookInlinePrompt = AgentPrompt;
358

359
	protected extraPromptProps: Partial<AgentPromptProps> | undefined;
360

361
	private _resolvedCustomizations: AgentPromptCustomizations | undefined;
362

363
	private _lastRenderTokenCount: number = 0;
364

365
	/** Cached model capabilities from the most recent main agent render, reused by the background summarizer. */
366
	private _lastModelCapabilities: { enableThinking: boolean; reasoningEffort: string | undefined; enableToolSearch: boolean; enableContextEditing: boolean } | undefined;
367

368
	/**
369
	 * RNG used to jitter the inline-summarization trigger threshold around 0.80.
370
	 * Tests may overwrite this directly (e.g. `(invocation as any)._thresholdRng = () => 0.5`).
371
	 */
372
	private _thresholdRng: () => number = Math.random;
373

374
	constructor(
375
		intent: IIntent,
376
		location: ChatLocation,
377
		endpoint: IChatEndpoint,
378
		request: vscode.ChatRequest,
379
		intentOptions: EditCodeIntentInvocationOptions,
380
		@IInstantiationService instantiationService: IInstantiationService,
381
		@ICodeMapperService codeMapperService: ICodeMapperService,
382
		@IEnvService envService: IEnvService,
383
		@IPromptPathRepresentationService promptPathRepresentationService: IPromptPathRepresentationService,
384
		@IEndpointProvider endpointProvider: IEndpointProvider,
385
		@IWorkspaceService workspaceService: IWorkspaceService,
386
		@IToolsService toolsService: IToolsService,
387
		@IConfigurationService configurationService: IConfigurationService,
388
		@IEditLogService editLogService: IEditLogService,
389
		@ICommandService commandService: ICommandService,
390
		@ITelemetryService telemetryService: ITelemetryService,
391
		@INotebookService notebookService: INotebookService,
392
		@ILogService private readonly logService: ILogService,
393
		@IExperimentationService private readonly expService: IExperimentationService,
394
		@IAutomodeService private readonly automodeService: IAutomodeService,
395
		@IOTelService protected override readonly otelService: IOTelService,
396
		@ISessionTranscriptService private readonly sessionTranscriptService: ISessionTranscriptService,
397
	) {
398
		super(intent, location, endpoint, request, intentOptions, instantiationService, codeMapperService, envService, promptPathRepresentationService, endpointProvider, workspaceService, toolsService, configurationService, editLogService, commandService, telemetryService, notebookService, otelService);
399
	}
400

401
	public override getAvailableTools(): Promise<vscode.LanguageModelToolInformation[]> {
402
		return this.instantiationService.invokeFunction(getAgentTools, this.request);
403
	}
404

405
	override async buildPrompt(
406
		promptContext: IBuildPromptContext,
407
		progress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart>,
408
		token: vscode.CancellationToken
409
	): Promise<IBuildPromptResult> {
410
		this._resolvedCustomizations = await PromptRegistry.resolveAllCustomizations(this.instantiationService, this.endpoint);
411
		// Add any references from the codebase invocation to the request
412
		const codebase = await this._getCodebaseReferences(promptContext, token);
413

414
		let variables = promptContext.chatVariables;
415
		let toolReferences: vscode.ChatPromptReference[] = [];
416
		if (codebase) {
417
			toolReferences = toNewChatReferences(variables, codebase.references);
418
			variables = new ChatVariablesCollection([...this.request.references, ...toolReferences]);
419
		}
420

421
		const tools = promptContext.tools?.availableTools;
422
		const toolSearchEnabled = !!this.endpoint.supportsToolSearch;
423
		const toolTokens = tools?.length ? await this.endpoint.acquireTokenizer().countToolTokens(tools) : 0;
424

425
		const summarizeThresholdOverride = this.configurationService.getConfig<number | undefined>(ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold);
426
		if (typeof summarizeThresholdOverride === 'number' && summarizeThresholdOverride < 100 && summarizeThresholdOverride > 0) {
427
			throw new Error(`Setting github.copilot.${ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold.id} is too low`);
428
		}
429

430
		const baseBudget = Math.min(
431
			this.configurationService.getConfig<number | undefined>(ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold) ?? this.endpoint.modelMaxPromptTokens,
432
			this.endpoint.modelMaxPromptTokens
433
		);
434
		const useTruncation = this.endpoint.apiType === 'responses' && this.configurationService.getConfig(ConfigKey.Advanced.UseResponsesApiTruncation);
435
		const responsesCompactionContextManagementEnabled = isResponsesCompactionContextManagementEnabled(this.endpoint, this.configurationService, this.expService);
436
		const summarizationEnabled = this.configurationService.getConfig(ConfigKey.SummarizeAgentConversationHistory) && this.prompt === AgentPrompt && !responsesCompactionContextManagementEnabled;
437
		const useInlineSummarization = summarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.Advanced.AgentHistorySummarizationInline, this.expService);
438

439
		// When tools are present, apply a 10% safety margin on the message portion
440
		// to account for tokenizer discrepancies between our tool-token counter and
441
		// the model's actual tokenizer. Without this, an undercount could cause an
442
		// API-level context_length_exceeded error instead of a graceful
443
		// BudgetExceededError from prompt-tsx. When there are no tools the endpoint's
444
		// own modelMaxPromptTokens is used unchanged.
445
		const messageBudget = Math.max(1, Math.floor((baseBudget - toolTokens) * 0.9));
446
		const safeBudget = useTruncation ? Number.MAX_SAFE_INTEGER : messageBudget;
447
		const endpoint = toolTokens > 0 ? this.endpoint.cloneWithTokenOverride(safeBudget) : this.endpoint;
448

449
		this.logService.debug(`[Agent] rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}, totalTools: ${tools?.length ?? 0}, toolSearchEnabled: ${toolSearchEnabled}), summarizationEnabled=${summarizationEnabled}`);
450
		let result: RenderPromptResult;
451
		// When the "last two messages" cache breakpoint strategy is enabled,
452
		// suppress prompt-tsx and heuristic cache breakpoints — messagesApi.ts
453
		// will place breakpoints on the last two merged messages instead.
454
		const useLastTwoMessagesCacheBPs = isAnthropicFamily(this.endpoint)
455
			&& this.configurationService.getExperimentBasedConfig(ConfigKey.AnthropicCacheBreakpointsLastTwoMessages, this.expService);
456
		const props: AgentPromptProps = {
457
			endpoint,
458
			promptContext: {
459
				...promptContext,
460
				tools: promptContext.tools && {
461
					...promptContext.tools,
462
					toolReferences: this.stableToolReferences.filter((r) => r.name !== ToolName.Codebase),
463
				}
464
			},
465
			location: this.location,
466
			enableCacheBreakpoints: summarizationEnabled && !useLastTwoMessagesCacheBPs,
467
			...this.extraPromptProps,
468
			customizations: this._resolvedCustomizations
469
		};
470

471
		// ── Background compaction ────────────────────────────────────────
472
		//
473
		//   Pre-render: if a previous bg pass completed, apply it now.
474
		//
475
		//   BudgetExceeded: if bg is InProgress/Completed, wait/apply.
476
		//                   Otherwise fall back to foreground summarization.
477
		//
478
		//   Post-render (≥ 80% + Idle): kick off background compaction
479
		//                                so it is ready for a future turn.
480
		//
481
		const backgroundSummarizer = summarizationEnabled ? this._getOrCreateBackgroundSummarizer(promptContext.conversation?.sessionId) : undefined;
482
		const contextRatio = backgroundSummarizer && baseBudget > 0
483
			? (this._lastRenderTokenCount + toolTokens) / baseBudget
484
			: 0;
485

486
		// Track whether this iteration already performed compaction-related work
487
		// (including applying a summary or using a foreground fallback path) so
488
		// we don't immediately re-trigger background compaction in the post-render check.
489
		let didSummarizeThisIteration = false;
490

491
		// If a previous background pass completed, apply its summary now.
492
		if (summarizationEnabled && backgroundSummarizer?.state === BackgroundSummarizationState.Completed) {
493
			const bgResult = backgroundSummarizer.consumeAndReset();
494
			if (bgResult) {
495
				this.logService.debug(`[ConversationHistorySummarizer] applying completed background summary (roundId=${bgResult.toolCallRoundId})`);
496
				progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation')));
497
				this._applySummaryToRounds(bgResult, promptContext);
498
				this._persistSummaryOnTurn(bgResult, promptContext, this._lastRenderTokenCount);
499
				this._sendBackgroundCompactionTelemetry('preRender', 'applied', contextRatio, promptContext);
500
				didSummarizeThisIteration = true;
501
			} else {
502
				this.logService.warn(`[ConversationHistorySummarizer] background compaction state was Completed but consumeAndReset returned no result`);
503
				this._sendBackgroundCompactionTelemetry('preRender', 'noResult', contextRatio, promptContext);
504
				this._recordBackgroundCompactionFailure(promptContext, 'preRender');
505
			}
506
		}
507

508
		// Render the prompt without summarization or cache breakpoints, using
509
		// the original endpoint (not reduced for tools/safety buffer).
510
		const renderWithoutSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise<RenderPromptResult> => {
511
			this.logService.debug(`[Agent] ${reason}, rendering without summarization`);
512
			const renderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
513
				...renderProps,
514
				endpoint: this.endpoint,
515
				enableCacheBreakpoints: false
516
			});
517
			try {
518
				return await renderer.render(progress, token);
519
			} catch (e) {
520
				if (e instanceof BudgetExceededError) {
521
					this.logService.error(e, `[Agent] fallback render failed due to budget exceeded`);
522
					const maxTokens = this.endpoint.modelMaxPromptTokens;
523
					throw new Error(`Unable to build prompt, modelMaxPromptTokens = ${maxTokens} (${e.message})`);
524
				}
525
				throw e;
526
			}
527
		};
528

529
		// Helper function for synchronous summarization flow with fallbacks
530
		const renderWithSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise<RenderPromptResult> => {
531
			// Check if a previous foreground summarization already failed in this
532
			// turn.  The metadata is set on the turn returned by getLatestTurn(),
533
			// which is the same turn throughout a single buildPrompt call since
534
			// the conversation doesn't advance mid-render.
535
			const turn = promptContext.conversation?.getLatestTurn();
536
			const previousForegroundSummary = turn?.getMetadata(SummarizedConversationHistoryMetadata);
537
			if (previousForegroundSummary?.source === 'foreground' && previousForegroundSummary.outcome && previousForegroundSummary.outcome !== 'success') {
538
				this.logService.debug(`[ConversationHistorySummarizer] ${reason}, skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`);
539
				/* __GDPR__
540
					"triggerSummarizeSkipped" : {
541
						"owner": "bhavyau",
542
						"comment": "Tracks when foreground summarization was skipped because a previous attempt already failed in this turn.",
543
						"previousOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The outcome of the previous failed summarization attempt." },
544
						"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." }
545
					}
546
				*/
547
				this.telemetryService.sendMSFTTelemetryEvent('triggerSummarizeSkipped', { previousOutcome: previousForegroundSummary.outcome, model: renderProps.endpoint.model });
548
				GenAiMetrics.incrementAgentSummarizationCount(this.otelService, 'skipped');
549
				return renderWithoutSummarization(`skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`, renderProps);
550
			}
551

552
			this.logService.debug(`[ConversationHistorySummarizer] ${reason}, triggering summarization`);
553
			try {
554
				const renderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
555
					...renderProps,
556
					endpoint: this.endpoint,
557
					promptContext: renderProps.promptContext,
558
					triggerSummarize: true,
559
					forceSimpleSummary: true,
560
				});
561
				return await renderer.render(progress, token);
562
			} catch (e) {
563
				this.logService.error(e, `[ConversationHistorySummarizer] summarization failed`);
564
				const errorKind = e instanceof BudgetExceededError ? 'budgetExceeded' : 'error';
565
				/* __GDPR__
566
					"triggerSummarizeFailed" : {
567
						"owner": "roblourens",
568
						"comment": "Tracks when triggering summarization failed - for example, a summary was created but not applied successfully.",
569
						"errorKind": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state or failure reason of the summarization." },
570
						"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID used for the summarization." }
571
					}
572
				*/
573
				this.telemetryService.sendMSFTTelemetryEvent('triggerSummarizeFailed', { errorKind, model: renderProps.endpoint.model });
574
				GenAiMetrics.incrementAgentSummarizationCount(this.otelService, 'failed');
575

576
				// Track failed foreground compaction
577
				const turn = promptContext.conversation?.getLatestTurn();
578
				turn?.setMetadata(new SummarizedConversationHistoryMetadata(
579
					'', // no toolCallRoundId for failures
580
					'', // no summary text for failures
581
					{
582
						model: renderProps.endpoint.model,
583
						source: 'foreground',
584
						outcome: errorKind,
585
						contextLengthBefore: this._lastRenderTokenCount,
586
					},
587
				));
588

589
				return renderWithoutSummarization(`summarization failed (${errorKind})`, renderProps);
590
			}
591
		};
592

593
		const contextLengthBefore = this._lastRenderTokenCount;
594

595
		try {
596
			const renderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, props);
597
			result = await renderer.render(progress, token);
598
		} catch (e) {
599
			if (e instanceof BudgetExceededError && summarizationEnabled) {
600
				if (!promptContext.toolCallResults) {
601
					promptContext = {
602
						...promptContext,
603
						toolCallResults: {}
604
					};
605
				}
606
				e.metadata.getAll(ToolResultMetadata).forEach((metadata) => {
607
					promptContext.toolCallResults![metadata.toolCallId] = metadata.result;
608
				});
609

610
				// If a background compaction is already running or completed,
611
				// wait for / apply it instead of firing another LLM request.
612
				if (backgroundSummarizer && (backgroundSummarizer.state === BackgroundSummarizationState.InProgress || backgroundSummarizer.state === BackgroundSummarizationState.Completed)) {
613
					let budgetExceededTrigger: string;
614
					if (backgroundSummarizer.state === BackgroundSummarizationState.InProgress) {
615
						budgetExceededTrigger = 'budgetExceededWaited';
616
						this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — waiting on in-progress background compaction instead of new request`);
617
						const summaryPromise = backgroundSummarizer.waitForCompletion();
618
						progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {
619
							try { await summaryPromise; } catch { }
620
							return l10n.t('Compacted conversation');
621
						}));
622
						await summaryPromise;
623
					} else {
624
						budgetExceededTrigger = 'budgetExceededReady';
625
						this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — applying already-completed background compaction`);
626
						progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation')));
627
					}
628
					const bgResult = backgroundSummarizer.consumeAndReset();
629
					if (bgResult) {
630
						this.logService.debug(`[ConversationHistorySummarizer] background compaction applied after budget exceeded (roundId=${bgResult.toolCallRoundId})`);
631
						this._applySummaryToRounds(bgResult, promptContext);
632
						this._persistSummaryOnTurn(bgResult, promptContext, contextLengthBefore);
633
						didSummarizeThisIteration = true;
634
						try {
635
							const reRenderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, { ...props, promptContext });
636
							result = await reRenderer.render(progress, token);
637
							this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'applied', contextRatio, promptContext);
638
						} catch (reRenderError) {
639
							if (reRenderError instanceof BudgetExceededError) {
640
								this.logService.debug(`[ConversationHistorySummarizer] re-render after background compaction still exceeded budget — falling back`);
641
								this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'appliedButReRenderFailed', contextRatio, promptContext);
642
								result = await renderWithoutSummarization('budget exceeded after background compaction applied', { ...props, promptContext });
643
							} else {
644
								throw reRenderError;
645
							}
646
						}
647
					} else {
648
						this.logService.debug(`[ConversationHistorySummarizer] background compaction produced no usable result after budget exceeded — falling back to synchronous summarization`);
649
						this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'noResult', contextRatio, promptContext);
650
						this._recordBackgroundCompactionFailure(promptContext, budgetExceededTrigger);
651
						// Background compaction failed — fall back to synchronous summarization
652
						result = await renderWithSummarization(`budget exceeded(${e.message}), background compaction failed`);
653
						didSummarizeThisIteration = true;
654
					}
655
				} else {
656
					result = await renderWithSummarization(`budget exceeded(${e.message})`);
657
					didSummarizeThisIteration = true;
658
				}
659
			} else {
660
				throw e;
661
			}
662
		}
663

664
		this._lastRenderTokenCount = result.tokenCount;
665

666
		// Track foreground compaction if summarization happened during rendering
667
		const summaryMeta = result.metadata.get(SummarizedConversationHistoryMetadata);
668
		if (summaryMeta) {
669
			const turn = promptContext.conversation?.getLatestTurn();
670
			turn?.setMetadata(new SummarizedConversationHistoryMetadata(
671
				summaryMeta.toolCallRoundId,
672
				summaryMeta.text,
673
				{
674
					thinking: summaryMeta.thinking,
675
					usage: summaryMeta.usage,
676
					promptTokenDetails: summaryMeta.promptTokenDetails,
677
					model: summaryMeta.model,
678
					summarizationMode: summaryMeta.summarizationMode,
679
					numRounds: summaryMeta.numRounds,
680
					numRoundsSinceLastSummarization: summaryMeta.numRoundsSinceLastSummarization,
681
					durationMs: summaryMeta.durationMs,
682
					source: 'foreground',
683
					outcome: 'success',
684
					contextLengthBefore,
685
				},
686
			));
687
		}
688

689
		// Post-render: kick off background compaction if idle and over the
690
		// threshold. For the inline-summarization path we care about prompt
691
		// cache parity with the main agent fetch — so we gate kick-off on a
692
		// completed tool call (cache has been warmed) and jitter the threshold
693
		// around 0.80 to avoid firing at the same exact boundary every time.
694
		// The non-inline path forks its own prompt and sees no cache benefit,
695
		// so it keeps the simple >= 0.80 behavior.
696
		if (summarizationEnabled && backgroundSummarizer && !didSummarizeThisIteration) {
697
			const postRenderRatio = baseBudget > 0
698
				? (result.tokenCount + toolTokens) / baseBudget
699
				: 0;
700

701
			const idleOrFailed = backgroundSummarizer.state === BackgroundSummarizationState.Idle
702
				|| backgroundSummarizer.state === BackgroundSummarizationState.Failed;
703

704
			const cacheWarm = (promptContext.toolCallRounds?.length ?? 0) > 0;
705

706
			const kickOff = shouldKickOffBackgroundSummarization(postRenderRatio, useInlineSummarization, cacheWarm, this._thresholdRng);
707

708
			if (kickOff && idleOrFailed) {
709
				if (useInlineSummarization) {
710
					// Compute and cache model capabilities from the current render's
711
					// messages. These must match the main agent fetch for cache parity.
712
					const strippedMessages = ToolCallingLoop.stripInternalToolCallIds(result.messages);
713
					const rawEffort = this.request.modelConfiguration?.reasoningEffort;
714
					const isSubagent = !!this.request.subAgentInvocationId;
715
					// Must match the main agent's enableThinking logic in
716
					// toolCallingLoop.ts runOne() — thinking is only disabled
717
					// on continuation turns for Anthropic when no thinking
718
					// blocks exist yet in the messages.
719
					const shouldDisableThinking = !!promptContext.isContinuation && isAnthropicFamily(this.endpoint) && !ToolCallingLoop.messagesContainThinking(strippedMessages);
720
					this._lastModelCapabilities = {
721
						enableThinking: !shouldDisableThinking,
722
						reasoningEffort: typeof rawEffort === 'string' ? rawEffort : undefined,
723
						enableToolSearch: !isSubagent && !!this.endpoint.supportsToolSearch,
724
						enableContextEditing: !isSubagent && isAnthropicContextEditingEnabled(this.endpoint, this.configurationService, this.expService),
725
					};
726
				}
727
				this._startBackgroundSummarization(backgroundSummarizer, result.messages, promptContext, props, token, postRenderRatio, useInlineSummarization);
728
			}
729
		}
730

731
		const lastMessage = result.messages.at(-1);
732
		if (lastMessage?.role === Raw.ChatRole.User) {
733
			const currentTurn = promptContext.conversation?.getLatestTurn();
734
			if (currentTurn && !currentTurn.getMetadata(RenderedUserMessageMetadata)) {
735
				currentTurn.setMetadata(new RenderedUserMessageMetadata(lastMessage.content));
736
			}
737
		}
738

739
		if (!useLastTwoMessagesCacheBPs) {
740
			addCacheBreakpoints(result.messages);
741
		}
742

743
		if (this.request.command === 'error') {
744
			// Should trigger a 400
745
			result.messages.push({
746
				role: Raw.ChatRole.Assistant,
747
				content: [],
748
				toolCalls: [{ type: 'function', id: '', function: { name: 'tool', arguments: '{' } }]
749
			});
750
		}
751

752

753
		return {
754
			...result,
755
			// The codebase tool is not actually called/referenced in the edit prompt, so we ned to
756
			// merge its metadata so that its output is not lost and it's not called repeatedly every turn
757
			// todo@connor4312/joycerhl: this seems a bit janky
758
			metadata: codebase ? mergeMetadata(result.metadata, codebase.metadatas) : result.metadata,
759
			// Don't report file references that came in via chat variables in an editing session, unless they have warnings,
760
			// because they are already displayed as part of the working set
761
			// references: result.references.filter((ref) => this.shouldKeepReference(editCodeStep, ref, toolReferences, chatVariables)),
762
		};
763
	}
764

765
	modifyErrorDetails(errorDetails: vscode.ChatErrorDetails, response: ChatResponse): vscode.ChatErrorDetails {
766
		if (!errorDetails.responseIsFiltered) {
767
			errorDetails.confirmationButtons = [
768
				...(errorDetails.confirmationButtons ?? []),
769
				{ data: { copilotContinueOnError: true } satisfies IContinueOnErrorConfirmation, label: l10n.t('Try Again') },
770
			];
771
		}
772
		return errorDetails;
773
	}
774

775
	getAdditionalVariables(promptContext: IBuildPromptContext): ChatVariablesCollection | undefined {
776
		const lastTurn = promptContext.conversation?.turns.at(-1);
777
		if (!lastTurn) {
778
			return;
779
		}
780

781
		// Search backwards to find the first real request and return those variables too.
782
		// Variables aren't re-attached to requests from confirmations.
783
		// TODO https://github.com/microsoft/vscode/issues/262858, more to do here
784
		if (lastTurn.acceptedConfirmationData) {
785
			const turns = promptContext.conversation!.turns.slice(0, -1);
786
			for (const turn of Iterable.reverse(turns)) {
787
				if (!turn.acceptedConfirmationData) {
788
					return turn.promptVariables;
789
				}
790
			}
791
		}
792
	}
793

794
	private _startBackgroundSummarization(
795
		backgroundSummarizer: BackgroundSummarizer,
796
		mainRenderMessages: Raw.ChatMessage[],
797
		promptContext: IBuildPromptContext,
798
		props: AgentPromptProps,
799
		token: vscode.CancellationToken,
800
		contextRatio: number,
801
		useInlineSummarization: boolean,
802
	): void {
803
		this.logService.debug(`[ConversationHistorySummarizer] context at ${(contextRatio * 100).toFixed(0)}% — starting background compaction (inline=${useInlineSummarization})`);
804

805
		const bgStartTime = Date.now();
806

807
		// Snapshot rounds so telemetry reflects state at kick-off time, not at
808
		// completion time (the main loop mutates toolCallRounds). History is
809
		// stable across a single user turn so a reference is sufficient.
810
		const rounds = [...(promptContext.toolCallRounds ?? [])];
811
		const history = promptContext.history;
812
		let toolCallRoundId: string | undefined;
813
		if (rounds.length >= 2) {
814
			// Mark the round before the last, preserving the last round verbatim
815
			toolCallRoundId = rounds[rounds.length - 2].id;
816
		} else if (rounds.length === 1) {
817
			toolCallRoundId = rounds[0].id;
818
		} else {
819
			for (let i = history.length - 1; i >= 0 && !toolCallRoundId; i--) {
820
				const lastRound = history[i].rounds.at(-1);
821
				if (lastRound) {
822
					toolCallRoundId = lastRound.id;
823
				}
824
			}
825
		}
826

827
		// Build tool schemas matching the main agent loop so the prompt
828
		// prefix (system + tools + messages) is identical for cache hits.
829
		const availableTools = promptContext.tools?.availableTools;
830
		const normalizedTools = availableTools?.length ? normalizeToolSchema(
831
			this.endpoint.family,
832
			availableTools.map(tool => ({
833
				function: {
834
					name: tool.name,
835
					description: tool.description,
836
					parameters: tool.inputSchema && Object.keys(tool.inputSchema).length ? tool.inputSchema : undefined
837
				},
838
				type: 'function' as const,
839
			})),
840
			(tool, rule) => {
841
				this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`);
842
			},
843
		) : undefined;
844
		const toolOpts = normalizedTools?.length ? {
845
			tools: normalizedTools,
846
		} : undefined;
847

848
		const associatedRequestId = promptContext.conversation?.getLatestTurn()?.id;
849
		const conversationId = promptContext.conversation?.sessionId;
850
		const modelCapabilities = this._lastModelCapabilities;
851

852
		backgroundSummarizer.start(async bgToken => {
853
			try {
854
				if (useInlineSummarization) {
855
					// Inline mode: fork the exact messages from the main render
856
					// and append a summary user message. The prompt prefix is
857
					// byte-identical to the main agent loop for cache hits.
858
					const strippedMainMessages = ToolCallingLoop.stripInternalToolCallIds(mainRenderMessages);
859
					const summaryMsgResult = await renderPromptElement(
860
						this.instantiationService,
861
						this.endpoint,
862
						InlineSummarizationUserMessage,
863
						{ endpoint: this.endpoint },
864
						undefined,
865
						bgToken,
866
					);
867
					const messages = [
868
						...strippedMainMessages,
869
						...summaryMsgResult.messages,
870
					];
871

872
					const response = await this.endpoint.makeChatRequest2({
873
						debugName: 'summarizeConversationHistory-inline',
874
						messages,
875
						finishedCb: undefined,
876
						location: ChatLocation.Agent,
877
						conversationId,
878
						requestOptions: {
879
							temperature: 0,
880
							stream: false,
881
							...toolOpts,
882
						},
883
						modelCapabilities,
884
						telemetryProperties: associatedRequestId ? { associatedRequestId } : undefined,
885
						enableRetryOnFilter: true,
886
					}, bgToken);
887
					if (response.type !== ChatFetchResponseType.Success) {
888
						throw new Error(`Background inline summarization request failed: ${response.type}`);
889
					}
890
					const rawSummaryText = extractInlineSummary(response.value);
891
					if (!rawSummaryText) {
892
						throw new Error('Background inline summarization: no <summary> tags found in response');
893
					}
894
					if (!toolCallRoundId) {
895
						throw new Error('Background inline summarization: no round ID to apply summary to');
896
					}
897
					// Flush the transcript before snapshotting the line count so
898
					// the baked "N lines" hint matches the on-disk file at this
899
					// moment (mirrors the full/simple path in SummarizedConversationHistory.render).
900
					if (conversationId && this.sessionTranscriptService.getTranscriptPath(conversationId)) {
901
						await this.sessionTranscriptService.flush(conversationId);
902
					}
903
					const summaryText = conversationId
904
						? appendTranscriptHintToSummary(rawSummaryText, conversationId, this.sessionTranscriptService)
905
						: rawSummaryText;
906
					this.logService.debug(`[ConversationHistorySummarizer] background inline compaction completed (${summaryText.length} chars, roundId=${toolCallRoundId})`);
907

908
					// Send summarizedConversationHistory telemetry for parity
909
					// with the standard ConversationHistorySummarizer path.
910
					const { numRounds, numRoundsSinceLastSummarization } = computeSummarizationRoundCounts(history, rounds);
911
					const numRoundsInCurrentTurn = rounds.length;
912
					const lastUsedTool = rounds.at(-1)?.toolCalls?.at(-1)?.name
913
						?? history.at(-1)?.rounds.at(-1)?.toolCalls?.at(-1)?.name ?? 'none';
914
					const promptTypes = messages.map(msg => `${msg.role}${'name' in msg && msg.name ? `-${msg.name}` : ''}:${getTextPart(msg.content).length}`).join(',');
915
					/* __GDPR__
916
						"summarizedConversationHistory" : {
917
							"owner": "bhavyau",
918
							"comment": "Tracks background inline summarization outcome",
919
							"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
920
							"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
921
							"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
922
							"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
923
							"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
924
							"lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The last tool used before summarization." },
925
							"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },
926
							"promptTypes": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Role and character count of each prompt message in order, as a proxy for cache hit rate (e.g. system:1234,user:567)." },
927
							"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." },
928
							"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },
929
							"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." },
930
							"isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this was triggered during tool calling." },
931
							"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." },
932
							"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." },
933
							"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." },
934
							"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." }
935
						}
936
					*/
937
					this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
938
						outcome: 'success',
939
						model: this.endpoint.model,
940
						summarizationMode: 'inline',
941
						conversationId,
942
						chatRequestId: associatedRequestId,
943
						lastUsedTool,
944
						requestId: response.requestId,
945
						promptTypes,
946
					}, {
947
						numRounds,
948
						turnIndex: history.length,
949
						curTurnRoundIndex: numRoundsInCurrentTurn,
950
						isDuringToolCalling: numRoundsInCurrentTurn > 0 ? 1 : 0,
951
						duration: Date.now() - bgStartTime,
952
						promptTokenCount: response.usage?.prompt_tokens,
953
						promptCacheTokenCount: response.usage?.prompt_tokens_details?.cached_tokens,
954
						responseTokenCount: response.usage?.completion_tokens,
955
					});
956

957
					return {
958
						summary: summaryText,
959
						toolCallRoundId,
960
						promptTokens: response.usage?.prompt_tokens,
961
						promptCacheTokens: response.usage?.prompt_tokens_details?.cached_tokens,
962
						outputTokens: response.usage?.completion_tokens,
963
						durationMs: Date.now() - bgStartTime,
964
						model: this.endpoint.model,
965
						summarizationMode: 'inline',
966
						numRounds,
967
						numRoundsSinceLastSummarization,
968
					};
969
				} else {
970
					// Standard mode: use triggerSummarize which makes a separate
971
					// LLM call with a summarization-specific prompt during render.
972
					const snapshotProps: AgentPromptProps = {
973
						...props,
974
						promptContext: {
975
							...promptContext,
976
							toolCallRounds: promptContext.toolCallRounds ? [...promptContext.toolCallRounds] : undefined,
977
							toolCallResults: promptContext.toolCallResults ? { ...promptContext.toolCallResults } : undefined,
978
						}
979
					};
980
					const bgRenderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
981
						...snapshotProps,
982
						endpoint: this.endpoint,
983
						promptContext: snapshotProps.promptContext,
984
						triggerSummarize: true,
985
					});
986
					const bgProgress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart> = { report: () => { } };
987
					const bgRenderResult = await bgRenderer.render(bgProgress, bgToken);
988
					const summaryMetadata = bgRenderResult.metadata.get(SummarizedConversationHistoryMetadata);
989
					if (!summaryMetadata) {
990
						throw new Error('Background compaction produced no summary metadata');
991
					}
992
					this.logService.debug(`[ConversationHistorySummarizer] background compaction completed successfully (roundId=${summaryMetadata.toolCallRoundId})`);
993
					return {
994
						summary: summaryMetadata.text,
995
						toolCallRoundId: summaryMetadata.toolCallRoundId,
996
						promptTokens: summaryMetadata.usage?.prompt_tokens,
997
						promptCacheTokens: summaryMetadata.usage?.prompt_tokens_details?.cached_tokens,
998
						outputTokens: summaryMetadata.usage?.completion_tokens,
999
						durationMs: Date.now() - bgStartTime,
1000
						model: summaryMetadata.model,
1001
						summarizationMode: summaryMetadata.summarizationMode,
1002
						numRounds: summaryMetadata.numRounds,
1003
						numRoundsSinceLastSummarization: summaryMetadata.numRoundsSinceLastSummarization,
1004
					};
1005
				}
1006
			} catch (err) {
1007
				this.logService.error(err, `[ConversationHistorySummarizer] background compaction failed`);
1008

1009
				// Send failure telemetry for inline background summarization
1010
				if (useInlineSummarization) {
1011
					/* __GDPR__
1012
						"summarizedConversationHistory" : {
1013
							"owner": "bhavyau",
1014
							"comment": "Tracks background inline summarization failure",
1015
							"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
1016
							"detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Detailed failure reason." },
1017
							"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
1018
							"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
1019
							"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
1020
							"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
1021
							"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." }
1022
						}
1023
					*/
1024
					this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
1025
						outcome: 'failed',
1026
						detailedOutcome: err instanceof Error ? err.message : String(err),
1027
						model: this.endpoint.model,
1028
						summarizationMode: 'inline',
1029
						conversationId,
1030
						chatRequestId: associatedRequestId,
1031
					}, {
1032
						duration: Date.now() - bgStartTime,
1033
					});
1034
				}
1035

1036
				throw err;
1037
			}
1038
		}, token);
1039
	}
1040

1041
	/**
1042
	 * Returns the `BackgroundSummarizer` for this session, or `undefined` if
1043
	 * the intent is not an `AgentIntent` (e.g. `AskAgentIntent`).
1044
	 */
1045
	private _getOrCreateBackgroundSummarizer(sessionId: string | undefined): BackgroundSummarizer | undefined {
1046
		if (!sessionId || !(this.intent instanceof AgentIntent)) {
1047
			return undefined;
1048
		}
1049
		return this.intent.getOrCreateBackgroundSummarizer(sessionId, this.endpoint.modelMaxPromptTokens);
1050
	}
1051

1052
	/**
1053
	 * Apply a background-compaction result onto the in-memory rounds so
1054
	 * that the next render picks up the `<conversation-summary>` element.
1055
	 */
1056
	private _applySummaryToRounds(bgResult: { summary: string; toolCallRoundId: string }, promptContext: IBuildPromptContext): void {
1057
		// Check current-turn rounds first
1058
		const currentRound = promptContext.toolCallRounds?.find(r => r.id === bgResult.toolCallRoundId);
1059
		if (currentRound) {
1060
			currentRound.summary = bgResult.summary;
1061
		} else {
1062
			// Fall back to history turns
1063
			let found = false;
1064
			for (const turn of [...promptContext.history].reverse()) {
1065
				const round = turn.rounds.find(r => r.id === bgResult.toolCallRoundId);
1066
				if (round) {
1067
					round.summary = bgResult.summary;
1068
					found = true;
1069
					break;
1070
				}
1071
			}
1072
			if (!found) {
1073
				this.logService.warn(`[ConversationHistorySummarizer] background compaction round ${bgResult.toolCallRoundId} not found in toolCallRounds or history — summary dropped`);
1074
			}
1075
		}
1076
		// Invalidate the auto mode router cache so the next getChatEndpoint()
1077
		// call re-evaluates which model to use after compaction.
1078
		this.automodeService.invalidateRouterCache(this.request);
1079
	}
1080

1081
	/**
1082
	 * Persist the summary on the current turn's `resultMetadata` so that
1083
	 * `normalizeSummariesOnRounds` restores it on subsequent turns.
1084
	 */
1085
	private _persistSummaryOnTurn(bgResult: IBackgroundSummarizationResult, promptContext: IBuildPromptContext, contextLengthBefore?: number): void {
1086
		const turn = promptContext.conversation?.getLatestTurn();
1087
		const chatResult = turn?.responseChatResult;
1088
		if (chatResult) {
1089
			const metadata = (chatResult.metadata ?? {}) as Record<string, unknown>;
1090
			const existingSummaries = (metadata['summaries'] as unknown[] ?? []);
1091
			existingSummaries.push({ toolCallRoundId: bgResult.toolCallRoundId, text: bgResult.summary });
1092
			metadata['summaries'] = existingSummaries;
1093
			(chatResult as { metadata: unknown }).metadata = metadata;
1094
		}
1095
		// Also store as a pending summary on the turn so normalizeSummariesOnRounds
1096
		// can restore it even when chatResult doesn't exist yet (mid-tool-call-loop).
1097
		turn?.addPendingSummary(bgResult.toolCallRoundId, bgResult.summary);
1098
		const usage = bgResult.promptTokens !== undefined && bgResult.outputTokens !== undefined
1099
			? { prompt_tokens: bgResult.promptTokens, completion_tokens: bgResult.outputTokens, total_tokens: bgResult.promptTokens + bgResult.outputTokens, ...(bgResult.promptCacheTokens !== undefined ? { prompt_tokens_details: { cached_tokens: bgResult.promptCacheTokens } } : {}) }
1100
			: undefined;
1101
		turn?.setMetadata(new SummarizedConversationHistoryMetadata(
1102
			bgResult.toolCallRoundId,
1103
			bgResult.summary,
1104
			{
1105
				usage,
1106
				model: bgResult.model,
1107
				summarizationMode: bgResult.summarizationMode,
1108
				numRounds: bgResult.numRounds,
1109
				numRoundsSinceLastSummarization: bgResult.numRoundsSinceLastSummarization,
1110
				durationMs: bgResult.durationMs,
1111
				source: 'background',
1112
				outcome: 'success',
1113
				contextLengthBefore,
1114
			},
1115
		));
1116
	}
1117

1118
	/**
1119
	 * Record a background compaction failure on the current turn's metadata,
1120
	 * matching how foreground compaction records its failures.
1121
	 */
1122
	private _recordBackgroundCompactionFailure(promptContext: IBuildPromptContext, trigger: string): void {
1123
		const turn = promptContext.conversation?.getLatestTurn();
1124
		turn?.setMetadata(new SummarizedConversationHistoryMetadata(
1125
			'', // no toolCallRoundId for failures
1126
			'', // no summary text for failures
1127
			{
1128
				model: this.endpoint.model,
1129
				source: 'background',
1130
				outcome: `noResult_${trigger}`,
1131
				contextLengthBefore: this._lastRenderTokenCount,
1132
			},
1133
		));
1134
	}
1135

1136
	private _sendBackgroundCompactionTelemetry(
1137
		trigger: string,
1138
		outcome: string,
1139
		contextRatio: number,
1140
		promptContext: IBuildPromptContext,
1141
	): void {
1142
		/* __GDPR__
1143
			"backgroundSummarizationApplied" : {
1144
				"owner": "bhavyau",
1145
				"comment": "Tracks background compaction orchestration decisions and outcomes in the agent loop.",
1146
				"trigger": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The code path that triggered background compaction consumption." },
1147
				"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Outcome of the background compaction consumption. One of: 'applied' (result applied and re-render succeeded), 'appliedButReRenderFailed' (result applied but the subsequent re-render still exceeded budget and required a fallback), 'noResult' (no usable result was produced)." },
1148
				"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Id for the current chat conversation." },
1149
				"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID that this background compaction was consumed during." },
1150
				"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID used." },
1151
				"contextRatio": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The context window usage ratio when background compaction was consumed." }
1152
			}
1153
		*/
1154
		this.telemetryService.sendMSFTTelemetryEvent('backgroundSummarizationApplied', {
1155
			trigger,
1156
			outcome,
1157
			conversationId: promptContext.conversation?.sessionId,
1158
			chatRequestId: promptContext.conversation?.getLatestTurn()?.id,
1159
			model: this.endpoint.model,
1160
		}, {
1161
			contextRatio,
1162
		});
1163
		GenAiMetrics.incrementAgentSummarizationCount(this.otelService, outcome);
1164
	}
1165

1166
	override processResponse = undefined;
1167
}
1168

1169
Product

Resources

Company