Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/intents/node/agentIntent.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import * as l10n from '@vscode/l10n';
7
import { Raw, RenderPromptResult } from '@vscode/prompt-tsx';
8
import { BudgetExceededError } from '@vscode/prompt-tsx/dist/base/materialized';
9
import type * as vscode from 'vscode';
10
import { IChatSessionService } from '../../../platform/chat/common/chatSessionService';
11
import { ChatFetchResponseType, ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes';
12
import { ISessionTranscriptService } from '../../../platform/chat/common/sessionTranscriptService';
13
import { getTextPart } from '../../../platform/chat/common/globalStringUtils';
14
import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService';
15
import { isAnthropicFamily, isGptFamily, modelCanUseApplyPatchExclusively, modelCanUseReplaceStringExclusively, modelSupportsApplyPatch, modelSupportsMultiReplaceString, modelSupportsReplaceString, modelSupportsSimplifiedApplyPatchInstructions } from '../../../platform/endpoint/common/chatModelCapabilities';
16
import { IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
17
import { IAutomodeService } from '../../../platform/endpoint/node/automodeService';
18
import { IEnvService } from '../../../platform/env/common/envService';
19
import { ILogService } from '../../../platform/log/common/logService';
20
import { IEditLogService } from '../../../platform/multiFileEdit/common/editLogService';
21
import { CUSTOM_TOOL_SEARCH_NAME, isAnthropicContextEditingEnabled } from '../../../platform/networking/common/anthropic';
22
import { IChatEndpoint } from '../../../platform/networking/common/networking';
23
import { modelsWithoutResponsesContextManagement } from '../../../platform/networking/common/openai';
24
import { INotebookService } from '../../../platform/notebook/common/notebookService';
25
import { GenAiMetrics } from '../../../platform/otel/common/genAiMetrics';
26
import { IOTelService } from '../../../platform/otel/common/otelService';
27
import { IPromptPathRepresentationService } from '../../../platform/prompts/common/promptPathRepresentationService';
28
import { ITasksService } from '../../../platform/tasks/common/tasksService';
29
import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';
30
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
31
import { ITestProvider } from '../../../platform/testing/common/testProvider';
32
import { IWorkspaceService } from '../../../platform/workspace/common/workspaceService';
33
34
import { isCancellationError } from '../../../util/vs/base/common/errors';
35
import { Iterable } from '../../../util/vs/base/common/iterator';
36
import { IInstantiationService, ServicesAccessor } from '../../../util/vs/platform/instantiation/common/instantiation';
37
38
import { ChatResponseProgressPart2 } from '../../../vscodeTypes';
39
import { ICommandService } from '../../commands/node/commandService';
40
import { Intent } from '../../common/constants';
41
import { ChatVariablesCollection } from '../../prompt/common/chatVariablesCollection';
42
import { Conversation, normalizeSummariesOnRounds, RenderedUserMessageMetadata, TurnStatus } from '../../prompt/common/conversation';
43
import { IBuildPromptContext } from '../../prompt/common/intents';
44
import { getRequestedToolCallIterationLimit, IContinueOnErrorConfirmation } from '../../prompt/common/specialRequestTypes';
45
import { ChatTelemetryBuilder } from '../../prompt/node/chatParticipantTelemetry';
46
import { IDefaultIntentRequestHandlerOptions } from '../../prompt/node/defaultIntentRequestHandler';
47
import { IDocumentContext } from '../../prompt/node/documentContext';
48
import { IBuildPromptResult, IIntent, IIntentInvocation } from '../../prompt/node/intents';
49
import { AgentPrompt, AgentPromptProps } from '../../prompts/node/agent/agentPrompt';
50
import { BackgroundSummarizationState, BackgroundSummarizer, IBackgroundSummarizationResult, shouldKickOffBackgroundSummarization } from '../../prompts/node/agent/backgroundSummarizer';
51
import { AgentPromptCustomizations, PromptRegistry } from '../../prompts/node/agent/promptRegistry';
52
import { extractInlineSummary, InlineSummarizationUserMessage, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder, appendTranscriptHintToSummary, computeSummarizationRoundCounts } from '../../prompts/node/agent/summarizedConversationHistory';
53
import { PromptRenderer, renderPromptElement } from '../../prompts/node/base/promptRenderer';
54
import { ICodeMapperService } from '../../prompts/node/codeMapper/codeMapperService';
55
import { EditCodePrompt2 } from '../../prompts/node/panel/editCodePrompt2';
56
import { NotebookInlinePrompt } from '../../prompts/node/panel/notebookInlinePrompt';
57
import { ToolResultMetadata } from '../../prompts/node/panel/toolCalling';
58
import { IEditToolLearningService } from '../../tools/common/editToolLearningService';
59
import { normalizeToolSchema } from '../../tools/common/toolSchemaNormalizer';
60
import { ContributedToolName, ToolName } from '../../tools/common/toolNames';
61
import { IToolsService } from '../../tools/common/toolsService';
62
import { applyPatch5Description } from '../../tools/node/applyPatchTool';
63
import { multiReplaceStringPrimaryDescription } from '../../tools/node/multiReplaceStringTool';
64
import { replaceStringBatchDescription } from '../../tools/node/replaceStringTool';
65
import { getAgentMaxRequests } from '../common/agentConfig';
66
import { addCacheBreakpoints } from './cacheBreakpoints';
67
import { EditCodeIntent, EditCodeIntentInvocation, EditCodeIntentInvocationOptions, mergeMetadata, toNewChatReferences } from './editCodeIntent';
68
import { ToolCallingLoop } from './toolCallingLoop';
69
70
function isResponsesCompactionContextManagementEnabled(endpoint: IChatEndpoint, configurationService: IConfigurationService, experimentationService: IExperimentationService): boolean {
71
return endpoint.apiType === 'responses'
72
&& configurationService.getExperimentBasedConfig(ConfigKey.ResponsesApiContextManagementEnabled, experimentationService)
73
&& !modelsWithoutResponsesContextManagement.has(endpoint.family);
74
}
75
76
export const getAgentTools = async (accessor: ServicesAccessor, request: vscode.ChatRequest, model?: IChatEndpoint) => {
77
const toolsService = accessor.get<IToolsService>(IToolsService);
78
const testService = accessor.get<ITestProvider>(ITestProvider);
79
const tasksService = accessor.get<ITasksService>(ITasksService);
80
const configurationService = accessor.get<IConfigurationService>(IConfigurationService);
81
const experimentationService = accessor.get<IExperimentationService>(IExperimentationService);
82
const endpointProvider = accessor.get<IEndpointProvider>(IEndpointProvider);
83
const editToolLearningService = accessor.get<IEditToolLearningService>(IEditToolLearningService);
84
model ??= await endpointProvider.getChatEndpoint(request);
85
86
const allowTools: Record<string, boolean> = {};
87
88
const learned = editToolLearningService.getPreferredEndpointEditTool(model);
89
if (learned) { // a learning-enabled (BYOK) model, we should go with what it prefers
90
allowTools[ToolName.EditFile] = learned.includes(ToolName.EditFile);
91
allowTools[ToolName.ReplaceString] = learned.includes(ToolName.ReplaceString);
92
allowTools[ToolName.MultiReplaceString] = learned.includes(ToolName.MultiReplaceString);
93
allowTools[ToolName.ApplyPatch] = learned.includes(ToolName.ApplyPatch);
94
} else {
95
allowTools[ToolName.EditFile] = true;
96
allowTools[ToolName.ReplaceString] = modelSupportsReplaceString(model);
97
allowTools[ToolName.ApplyPatch] = modelSupportsApplyPatch(model) && !!toolsService.getTool(ToolName.ApplyPatch);
98
99
if (allowTools[ToolName.ApplyPatch] && modelCanUseApplyPatchExclusively(model)) {
100
allowTools[ToolName.EditFile] = false;
101
}
102
103
if (modelCanUseReplaceStringExclusively(model)) {
104
allowTools[ToolName.ReplaceString] = true;
105
allowTools[ToolName.EditFile] = false;
106
}
107
108
if (allowTools[ToolName.ReplaceString] && modelSupportsMultiReplaceString(model)) {
109
allowTools[ToolName.MultiReplaceString] = true;
110
}
111
}
112
113
allowTools[ToolName.CoreRunTest] = await testService.hasAnyTests();
114
allowTools[ToolName.CoreRunTask] = tasksService.getTasks().length > 0;
115
116
const searchSubagentEnabled = configurationService.getExperimentBasedConfig(ConfigKey.Advanced.SearchSubagentToolEnabled, experimentationService);
117
const isGptOrAnthropic = isGptFamily(model) || isAnthropicFamily(model);
118
allowTools[ToolName.SearchSubagent] = isGptOrAnthropic && searchSubagentEnabled;
119
120
const executionSubagentEnabled = configurationService.getExperimentBasedConfig(ConfigKey.Advanced.ExecutionSubagentToolEnabled, experimentationService);
121
allowTools[ToolName.ExecutionSubagent] = isGptOrAnthropic && executionSubagentEnabled;
122
123
const skillToolEnabled = configurationService.getExperimentBasedConfig(ConfigKey.Advanced.SkillToolEnabled, experimentationService);
124
allowTools[ToolName.Skill] = skillToolEnabled;
125
126
allowTools[CUSTOM_TOOL_SEARCH_NAME] = !!model.supportsToolSearch;
127
128
if (model.family.includes('grok-code')) {
129
allowTools[ToolName.CoreManageTodoList] = false;
130
}
131
132
// Enable task_complete in autopilot mode so the model can signal task completion.
133
// The tool is registered in core as a built-in but needs explicit opt-in here.
134
allowTools['task_complete'] = request.permissionLevel === 'autopilot';
135
136
allowTools[ToolName.EditFilesPlaceholder] = false;
137
allowTools[ToolName.SessionStoreSql] = false; // Only available via /chronicle
138
// todo@connor4312: string check here is for back-compat for 1.109 Insiders
139
if (Iterable.some(request.tools, ([t, enabled]) => (typeof t === 'string' ? t : t.name) === ContributedToolName.EditFilesPlaceholder && enabled === false)) {
140
allowTools[ToolName.ApplyPatch] = false;
141
allowTools[ToolName.EditFile] = false;
142
allowTools[ToolName.ReplaceString] = false;
143
allowTools[ToolName.MultiReplaceString] = false;
144
}
145
146
if (model.family.toLowerCase().includes('gemini-3') && configurationService.getExperimentBasedConfig(ConfigKey.Advanced.Gemini3MultiReplaceString, experimentationService)) {
147
allowTools[ToolName.MultiReplaceString] = true;
148
}
149
150
const tools = toolsService.getEnabledTools(request, model, tool => {
151
if (typeof allowTools[tool.name] === 'boolean') {
152
return allowTools[tool.name];
153
}
154
155
// Must return undefined to fall back to other checks
156
return undefined;
157
});
158
159
if (modelSupportsSimplifiedApplyPatchInstructions(model) && configurationService.getExperimentBasedConfig(ConfigKey.Advanced.Gpt5AlternativePatch, experimentationService)) {
160
const ap = tools.findIndex(t => t.name === ToolName.ApplyPatch);
161
if (ap !== -1) {
162
tools[ap] = { ...tools[ap], description: applyPatch5Description };
163
}
164
}
165
166
if (configurationService.getExperimentBasedConfig(ConfigKey.Advanced.BatchReplaceStringDescriptions, experimentationService)) {
167
const rs = tools.findIndex(t => t.name === ToolName.ReplaceString);
168
if (rs !== -1) {
169
tools[rs] = { ...tools[rs], description: replaceStringBatchDescription };
170
}
171
const mrs = tools.findIndex(t => t.name === ToolName.MultiReplaceString);
172
if (mrs !== -1) {
173
tools[mrs] = { ...tools[mrs], description: multiReplaceStringPrimaryDescription };
174
}
175
}
176
177
return tools;
178
};
179
180
export class AgentIntent extends EditCodeIntent {
181
182
static override readonly ID = Intent.Agent;
183
184
override readonly id = AgentIntent.ID;
185
186
private readonly _backgroundSummarizers = new Map<string, BackgroundSummarizer>();
187
188
constructor(
189
@IInstantiationService instantiationService: IInstantiationService,
190
@IEndpointProvider endpointProvider: IEndpointProvider,
191
@IConfigurationService configurationService: IConfigurationService,
192
@IExperimentationService expService: IExperimentationService,
193
@ICodeMapperService codeMapperService: ICodeMapperService,
194
@IWorkspaceService workspaceService: IWorkspaceService,
195
@IChatSessionService chatSessionService: IChatSessionService,
196
@IAutomodeService private readonly _automodeService: IAutomodeService,
197
) {
198
super(instantiationService, endpointProvider, configurationService, expService, codeMapperService, workspaceService, { intentInvocation: AgentIntentInvocation, processCodeblocks: false });
199
chatSessionService.onDidDisposeChatSession(sessionId => {
200
const summarizer = this._backgroundSummarizers.get(sessionId);
201
if (summarizer) {
202
summarizer.cancel();
203
this._backgroundSummarizers.delete(sessionId);
204
}
205
});
206
}
207
208
getOrCreateBackgroundSummarizer(sessionId: string, modelMaxPromptTokens: number): BackgroundSummarizer {
209
let summarizer = this._backgroundSummarizers.get(sessionId);
210
if (!summarizer) {
211
summarizer = new BackgroundSummarizer(modelMaxPromptTokens);
212
this._backgroundSummarizers.set(sessionId, summarizer);
213
}
214
return summarizer;
215
}
216
217
protected override getIntentHandlerOptions(request: vscode.ChatRequest): IDefaultIntentRequestHandlerOptions | undefined {
218
return {
219
maxToolCallIterations: getRequestedToolCallIterationLimit(request) ??
220
this.instantiationService.invokeFunction(getAgentMaxRequests),
221
temperature: this.configurationService.getConfig(ConfigKey.Advanced.AgentTemperature) ?? 0,
222
overrideRequestLocation: ChatLocation.Agent
223
};
224
}
225
226
override async handleRequest(
227
conversation: Conversation,
228
request: vscode.ChatRequest,
229
stream: vscode.ChatResponseStream,
230
token: vscode.CancellationToken,
231
documentContext: IDocumentContext | undefined,
232
agentName: string,
233
location: ChatLocation,
234
chatTelemetry: ChatTelemetryBuilder,
235
yieldRequested: () => boolean
236
): Promise<vscode.ChatResult> {
237
if (request.command === 'compact') {
238
return this.handleSummarizeCommand(conversation, request, stream, token);
239
}
240
241
return super.handleRequest(conversation, request, stream, token, documentContext, agentName, location, chatTelemetry, yieldRequested);
242
}
243
244
private async handleSummarizeCommand(
245
conversation: Conversation,
246
request: vscode.ChatRequest,
247
stream: vscode.ChatResponseStream,
248
token: vscode.CancellationToken
249
): Promise<vscode.ChatResult> {
250
normalizeSummariesOnRounds(conversation.turns);
251
252
// Exclude the current /compact turn.
253
const history = conversation.turns.slice(0, -1);
254
if (history.length === 0) {
255
stream.markdown(l10n.t('Nothing to compact. Start a conversation first.'));
256
return {};
257
}
258
259
// The summarization metadata needs to be associated with a tool call round.
260
const lastRoundId = history.at(-1)?.rounds.at(-1)?.id;
261
if (!lastRoundId) {
262
stream.markdown(l10n.t('Nothing to compact. Start a conversation with tool calls first.'));
263
return {};
264
}
265
266
const endpoint = await this.endpointProvider.getChatEndpoint(request);
267
if (isResponsesCompactionContextManagementEnabled(endpoint, this.configurationService, this.expService)) {
268
stream.markdown(l10n.t('Compaction is already managed by context management for this session.'));
269
return {};
270
}
271
272
const promptContext: IBuildPromptContext = {
273
history,
274
chatVariables: new ChatVariablesCollection([]),
275
query: '',
276
toolCallRounds: [],
277
conversation,
278
};
279
280
try {
281
const propsBuilder = this.instantiationService.createInstance(SummarizedConversationHistoryPropsBuilder);
282
const propsInfo = propsBuilder.getProps({
283
priority: 1,
284
endpoint,
285
location: ChatLocation.Agent,
286
promptContext,
287
maxToolResultLength: Infinity,
288
});
289
290
stream.progress(l10n.t('Compacting conversation...'));
291
292
const progress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart> = {
293
report: () => { }
294
};
295
const renderer = PromptRenderer.create(this.instantiationService, endpoint, SummarizedConversationHistory, {
296
...propsInfo.props,
297
triggerSummarize: true,
298
summarizationInstructions: request.prompt || undefined,
299
});
300
const result = await renderer.render(progress, token);
301
const summaryMetadata = result.metadata.get(SummarizedConversationHistoryMetadata);
302
if (!summaryMetadata) {
303
stream.markdown(l10n.t('Unable to compact conversation.'));
304
return {};
305
}
306
307
if (summaryMetadata.usage) {
308
stream.usage({
309
promptTokens: summaryMetadata.usage.prompt_tokens,
310
completionTokens: summaryMetadata.usage.completion_tokens,
311
promptTokenDetails: summaryMetadata.promptTokenDetails,
312
});
313
}
314
315
stream.markdown(l10n.t('Compacted conversation.'));
316
const lastTurn = conversation.getLatestTurn();
317
// Next turn if using auto will select a new endpoint
318
this._automodeService.invalidateRouterCache(request);
319
320
const chatResult: vscode.ChatResult = {
321
metadata: {
322
summary: {
323
toolCallRoundId: summaryMetadata.toolCallRoundId,
324
text: summaryMetadata.text,
325
}
326
}
327
};
328
329
// setResponse must be called so that turn.resultMetadata?.summary
330
// is available for normalizeSummariesOnRounds on subsequent turns.
331
lastTurn.setResponse(
332
TurnStatus.Success,
333
{ type: 'model', message: '' },
334
undefined,
335
chatResult,
336
);
337
338
lastTurn.setMetadata(summaryMetadata);
339
340
return chatResult;
341
} catch (e) {
342
if (isCancellationError(e)) {
343
return {};
344
}
345
346
const message = e instanceof Error ? e.message : String(e);
347
stream.markdown(l10n.t('Failed to compact conversation: {0}', message));
348
return {};
349
}
350
}
351
}
352
353
export class AgentIntentInvocation extends EditCodeIntentInvocation implements IIntentInvocation {
354
355
public override readonly codeblocksRepresentEdits = false;
356
357
protected prompt: typeof AgentPrompt | typeof EditCodePrompt2 | typeof NotebookInlinePrompt = AgentPrompt;
358
359
protected extraPromptProps: Partial<AgentPromptProps> | undefined;
360
361
private _resolvedCustomizations: AgentPromptCustomizations | undefined;
362
363
private _lastRenderTokenCount: number = 0;
364
365
/** Cached model capabilities from the most recent main agent render, reused by the background summarizer. */
366
private _lastModelCapabilities: { enableThinking: boolean; reasoningEffort: string | undefined; enableToolSearch: boolean; enableContextEditing: boolean } | undefined;
367
368
/**
369
* RNG used to jitter the inline-summarization trigger threshold around 0.80.
370
* Tests may overwrite this directly (e.g. `(invocation as any)._thresholdRng = () => 0.5`).
371
*/
372
private _thresholdRng: () => number = Math.random;
373
374
constructor(
375
intent: IIntent,
376
location: ChatLocation,
377
endpoint: IChatEndpoint,
378
request: vscode.ChatRequest,
379
intentOptions: EditCodeIntentInvocationOptions,
380
@IInstantiationService instantiationService: IInstantiationService,
381
@ICodeMapperService codeMapperService: ICodeMapperService,
382
@IEnvService envService: IEnvService,
383
@IPromptPathRepresentationService promptPathRepresentationService: IPromptPathRepresentationService,
384
@IEndpointProvider endpointProvider: IEndpointProvider,
385
@IWorkspaceService workspaceService: IWorkspaceService,
386
@IToolsService toolsService: IToolsService,
387
@IConfigurationService configurationService: IConfigurationService,
388
@IEditLogService editLogService: IEditLogService,
389
@ICommandService commandService: ICommandService,
390
@ITelemetryService telemetryService: ITelemetryService,
391
@INotebookService notebookService: INotebookService,
392
@ILogService private readonly logService: ILogService,
393
@IExperimentationService private readonly expService: IExperimentationService,
394
@IAutomodeService private readonly automodeService: IAutomodeService,
395
@IOTelService protected override readonly otelService: IOTelService,
396
@ISessionTranscriptService private readonly sessionTranscriptService: ISessionTranscriptService,
397
) {
398
super(intent, location, endpoint, request, intentOptions, instantiationService, codeMapperService, envService, promptPathRepresentationService, endpointProvider, workspaceService, toolsService, configurationService, editLogService, commandService, telemetryService, notebookService, otelService);
399
}
400
401
public override getAvailableTools(): Promise<vscode.LanguageModelToolInformation[]> {
402
return this.instantiationService.invokeFunction(getAgentTools, this.request);
403
}
404
405
override async buildPrompt(
406
promptContext: IBuildPromptContext,
407
progress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart>,
408
token: vscode.CancellationToken
409
): Promise<IBuildPromptResult> {
410
this._resolvedCustomizations = await PromptRegistry.resolveAllCustomizations(this.instantiationService, this.endpoint);
411
// Add any references from the codebase invocation to the request
412
const codebase = await this._getCodebaseReferences(promptContext, token);
413
414
let variables = promptContext.chatVariables;
415
let toolReferences: vscode.ChatPromptReference[] = [];
416
if (codebase) {
417
toolReferences = toNewChatReferences(variables, codebase.references);
418
variables = new ChatVariablesCollection([...this.request.references, ...toolReferences]);
419
}
420
421
const tools = promptContext.tools?.availableTools;
422
const toolSearchEnabled = !!this.endpoint.supportsToolSearch;
423
const toolTokens = tools?.length ? await this.endpoint.acquireTokenizer().countToolTokens(tools) : 0;
424
425
const summarizeThresholdOverride = this.configurationService.getConfig<number | undefined>(ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold);
426
if (typeof summarizeThresholdOverride === 'number' && summarizeThresholdOverride < 100 && summarizeThresholdOverride > 0) {
427
throw new Error(`Setting github.copilot.${ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold.id} is too low`);
428
}
429
430
const baseBudget = Math.min(
431
this.configurationService.getConfig<number | undefined>(ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold) ?? this.endpoint.modelMaxPromptTokens,
432
this.endpoint.modelMaxPromptTokens
433
);
434
const useTruncation = this.endpoint.apiType === 'responses' && this.configurationService.getConfig(ConfigKey.Advanced.UseResponsesApiTruncation);
435
const responsesCompactionContextManagementEnabled = isResponsesCompactionContextManagementEnabled(this.endpoint, this.configurationService, this.expService);
436
const summarizationEnabled = this.configurationService.getConfig(ConfigKey.SummarizeAgentConversationHistory) && this.prompt === AgentPrompt && !responsesCompactionContextManagementEnabled;
437
const useInlineSummarization = summarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.Advanced.AgentHistorySummarizationInline, this.expService);
438
439
// When tools are present, apply a 10% safety margin on the message portion
440
// to account for tokenizer discrepancies between our tool-token counter and
441
// the model's actual tokenizer. Without this, an undercount could cause an
442
// API-level context_length_exceeded error instead of a graceful
443
// BudgetExceededError from prompt-tsx. When there are no tools the endpoint's
444
// own modelMaxPromptTokens is used unchanged.
445
const messageBudget = Math.max(1, Math.floor((baseBudget - toolTokens) * 0.9));
446
const safeBudget = useTruncation ? Number.MAX_SAFE_INTEGER : messageBudget;
447
const endpoint = toolTokens > 0 ? this.endpoint.cloneWithTokenOverride(safeBudget) : this.endpoint;
448
449
this.logService.debug(`[Agent] rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}, totalTools: ${tools?.length ?? 0}, toolSearchEnabled: ${toolSearchEnabled}), summarizationEnabled=${summarizationEnabled}`);
450
let result: RenderPromptResult;
451
// When the "last two messages" cache breakpoint strategy is enabled,
452
// suppress prompt-tsx and heuristic cache breakpoints — messagesApi.ts
453
// will place breakpoints on the last two merged messages instead.
454
const useLastTwoMessagesCacheBPs = isAnthropicFamily(this.endpoint)
455
&& this.configurationService.getExperimentBasedConfig(ConfigKey.AnthropicCacheBreakpointsLastTwoMessages, this.expService);
456
const props: AgentPromptProps = {
457
endpoint,
458
promptContext: {
459
...promptContext,
460
tools: promptContext.tools && {
461
...promptContext.tools,
462
toolReferences: this.stableToolReferences.filter((r) => r.name !== ToolName.Codebase),
463
}
464
},
465
location: this.location,
466
enableCacheBreakpoints: summarizationEnabled && !useLastTwoMessagesCacheBPs,
467
...this.extraPromptProps,
468
customizations: this._resolvedCustomizations
469
};
470
471
// ── Background compaction ────────────────────────────────────────
472
//
473
// Pre-render: if a previous bg pass completed, apply it now.
474
//
475
// BudgetExceeded: if bg is InProgress/Completed, wait/apply.
476
// Otherwise fall back to foreground summarization.
477
//
478
// Post-render (≥ 80% + Idle): kick off background compaction
479
// so it is ready for a future turn.
480
//
481
const backgroundSummarizer = summarizationEnabled ? this._getOrCreateBackgroundSummarizer(promptContext.conversation?.sessionId) : undefined;
482
const contextRatio = backgroundSummarizer && baseBudget > 0
483
? (this._lastRenderTokenCount + toolTokens) / baseBudget
484
: 0;
485
486
// Track whether this iteration already performed compaction-related work
487
// (including applying a summary or using a foreground fallback path) so
488
// we don't immediately re-trigger background compaction in the post-render check.
489
let didSummarizeThisIteration = false;
490
491
// If a previous background pass completed, apply its summary now.
492
if (summarizationEnabled && backgroundSummarizer?.state === BackgroundSummarizationState.Completed) {
493
const bgResult = backgroundSummarizer.consumeAndReset();
494
if (bgResult) {
495
this.logService.debug(`[ConversationHistorySummarizer] applying completed background summary (roundId=${bgResult.toolCallRoundId})`);
496
progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation')));
497
this._applySummaryToRounds(bgResult, promptContext);
498
this._persistSummaryOnTurn(bgResult, promptContext, this._lastRenderTokenCount);
499
this._sendBackgroundCompactionTelemetry('preRender', 'applied', contextRatio, promptContext);
500
didSummarizeThisIteration = true;
501
} else {
502
this.logService.warn(`[ConversationHistorySummarizer] background compaction state was Completed but consumeAndReset returned no result`);
503
this._sendBackgroundCompactionTelemetry('preRender', 'noResult', contextRatio, promptContext);
504
this._recordBackgroundCompactionFailure(promptContext, 'preRender');
505
}
506
}
507
508
// Render the prompt without summarization or cache breakpoints, using
509
// the original endpoint (not reduced for tools/safety buffer).
510
const renderWithoutSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise<RenderPromptResult> => {
511
this.logService.debug(`[Agent] ${reason}, rendering without summarization`);
512
const renderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
513
...renderProps,
514
endpoint: this.endpoint,
515
enableCacheBreakpoints: false
516
});
517
try {
518
return await renderer.render(progress, token);
519
} catch (e) {
520
if (e instanceof BudgetExceededError) {
521
this.logService.error(e, `[Agent] fallback render failed due to budget exceeded`);
522
const maxTokens = this.endpoint.modelMaxPromptTokens;
523
throw new Error(`Unable to build prompt, modelMaxPromptTokens = ${maxTokens} (${e.message})`);
524
}
525
throw e;
526
}
527
};
528
529
// Helper function for synchronous summarization flow with fallbacks
530
const renderWithSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise<RenderPromptResult> => {
531
// Check if a previous foreground summarization already failed in this
532
// turn. The metadata is set on the turn returned by getLatestTurn(),
533
// which is the same turn throughout a single buildPrompt call since
534
// the conversation doesn't advance mid-render.
535
const turn = promptContext.conversation?.getLatestTurn();
536
const previousForegroundSummary = turn?.getMetadata(SummarizedConversationHistoryMetadata);
537
if (previousForegroundSummary?.source === 'foreground' && previousForegroundSummary.outcome && previousForegroundSummary.outcome !== 'success') {
538
this.logService.debug(`[ConversationHistorySummarizer] ${reason}, skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`);
539
/* __GDPR__
540
"triggerSummarizeSkipped" : {
541
"owner": "bhavyau",
542
"comment": "Tracks when foreground summarization was skipped because a previous attempt already failed in this turn.",
543
"previousOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The outcome of the previous failed summarization attempt." },
544
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." }
545
}
546
*/
547
this.telemetryService.sendMSFTTelemetryEvent('triggerSummarizeSkipped', { previousOutcome: previousForegroundSummary.outcome, model: renderProps.endpoint.model });
548
GenAiMetrics.incrementAgentSummarizationCount(this.otelService, 'skipped');
549
return renderWithoutSummarization(`skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`, renderProps);
550
}
551
552
this.logService.debug(`[ConversationHistorySummarizer] ${reason}, triggering summarization`);
553
try {
554
const renderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
555
...renderProps,
556
endpoint: this.endpoint,
557
promptContext: renderProps.promptContext,
558
triggerSummarize: true,
559
forceSimpleSummary: true,
560
});
561
return await renderer.render(progress, token);
562
} catch (e) {
563
this.logService.error(e, `[ConversationHistorySummarizer] summarization failed`);
564
const errorKind = e instanceof BudgetExceededError ? 'budgetExceeded' : 'error';
565
/* __GDPR__
566
"triggerSummarizeFailed" : {
567
"owner": "roblourens",
568
"comment": "Tracks when triggering summarization failed - for example, a summary was created but not applied successfully.",
569
"errorKind": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state or failure reason of the summarization." },
570
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID used for the summarization." }
571
}
572
*/
573
this.telemetryService.sendMSFTTelemetryEvent('triggerSummarizeFailed', { errorKind, model: renderProps.endpoint.model });
574
GenAiMetrics.incrementAgentSummarizationCount(this.otelService, 'failed');
575
576
// Track failed foreground compaction
577
const turn = promptContext.conversation?.getLatestTurn();
578
turn?.setMetadata(new SummarizedConversationHistoryMetadata(
579
'', // no toolCallRoundId for failures
580
'', // no summary text for failures
581
{
582
model: renderProps.endpoint.model,
583
source: 'foreground',
584
outcome: errorKind,
585
contextLengthBefore: this._lastRenderTokenCount,
586
},
587
));
588
589
return renderWithoutSummarization(`summarization failed (${errorKind})`, renderProps);
590
}
591
};
592
593
const contextLengthBefore = this._lastRenderTokenCount;
594
595
try {
596
const renderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, props);
597
result = await renderer.render(progress, token);
598
} catch (e) {
599
if (e instanceof BudgetExceededError && summarizationEnabled) {
600
if (!promptContext.toolCallResults) {
601
promptContext = {
602
...promptContext,
603
toolCallResults: {}
604
};
605
}
606
e.metadata.getAll(ToolResultMetadata).forEach((metadata) => {
607
promptContext.toolCallResults![metadata.toolCallId] = metadata.result;
608
});
609
610
// If a background compaction is already running or completed,
611
// wait for / apply it instead of firing another LLM request.
612
if (backgroundSummarizer && (backgroundSummarizer.state === BackgroundSummarizationState.InProgress || backgroundSummarizer.state === BackgroundSummarizationState.Completed)) {
613
let budgetExceededTrigger: string;
614
if (backgroundSummarizer.state === BackgroundSummarizationState.InProgress) {
615
budgetExceededTrigger = 'budgetExceededWaited';
616
this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — waiting on in-progress background compaction instead of new request`);
617
const summaryPromise = backgroundSummarizer.waitForCompletion();
618
progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {
619
try { await summaryPromise; } catch { }
620
return l10n.t('Compacted conversation');
621
}));
622
await summaryPromise;
623
} else {
624
budgetExceededTrigger = 'budgetExceededReady';
625
this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — applying already-completed background compaction`);
626
progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation')));
627
}
628
const bgResult = backgroundSummarizer.consumeAndReset();
629
if (bgResult) {
630
this.logService.debug(`[ConversationHistorySummarizer] background compaction applied after budget exceeded (roundId=${bgResult.toolCallRoundId})`);
631
this._applySummaryToRounds(bgResult, promptContext);
632
this._persistSummaryOnTurn(bgResult, promptContext, contextLengthBefore);
633
didSummarizeThisIteration = true;
634
try {
635
const reRenderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, { ...props, promptContext });
636
result = await reRenderer.render(progress, token);
637
this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'applied', contextRatio, promptContext);
638
} catch (reRenderError) {
639
if (reRenderError instanceof BudgetExceededError) {
640
this.logService.debug(`[ConversationHistorySummarizer] re-render after background compaction still exceeded budget — falling back`);
641
this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'appliedButReRenderFailed', contextRatio, promptContext);
642
result = await renderWithoutSummarization('budget exceeded after background compaction applied', { ...props, promptContext });
643
} else {
644
throw reRenderError;
645
}
646
}
647
} else {
648
this.logService.debug(`[ConversationHistorySummarizer] background compaction produced no usable result after budget exceeded — falling back to synchronous summarization`);
649
this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'noResult', contextRatio, promptContext);
650
this._recordBackgroundCompactionFailure(promptContext, budgetExceededTrigger);
651
// Background compaction failed — fall back to synchronous summarization
652
result = await renderWithSummarization(`budget exceeded(${e.message}), background compaction failed`);
653
didSummarizeThisIteration = true;
654
}
655
} else {
656
result = await renderWithSummarization(`budget exceeded(${e.message})`);
657
didSummarizeThisIteration = true;
658
}
659
} else {
660
throw e;
661
}
662
}
663
664
this._lastRenderTokenCount = result.tokenCount;
665
666
// Track foreground compaction if summarization happened during rendering
667
const summaryMeta = result.metadata.get(SummarizedConversationHistoryMetadata);
668
if (summaryMeta) {
669
const turn = promptContext.conversation?.getLatestTurn();
670
turn?.setMetadata(new SummarizedConversationHistoryMetadata(
671
summaryMeta.toolCallRoundId,
672
summaryMeta.text,
673
{
674
thinking: summaryMeta.thinking,
675
usage: summaryMeta.usage,
676
promptTokenDetails: summaryMeta.promptTokenDetails,
677
model: summaryMeta.model,
678
summarizationMode: summaryMeta.summarizationMode,
679
numRounds: summaryMeta.numRounds,
680
numRoundsSinceLastSummarization: summaryMeta.numRoundsSinceLastSummarization,
681
durationMs: summaryMeta.durationMs,
682
source: 'foreground',
683
outcome: 'success',
684
contextLengthBefore,
685
},
686
));
687
}
688
689
// Post-render: kick off background compaction if idle and over the
690
// threshold. For the inline-summarization path we care about prompt
691
// cache parity with the main agent fetch — so we gate kick-off on a
692
// completed tool call (cache has been warmed) and jitter the threshold
693
// around 0.80 to avoid firing at the same exact boundary every time.
694
// The non-inline path forks its own prompt and sees no cache benefit,
695
// so it keeps the simple >= 0.80 behavior.
696
if (summarizationEnabled && backgroundSummarizer && !didSummarizeThisIteration) {
697
const postRenderRatio = baseBudget > 0
698
? (result.tokenCount + toolTokens) / baseBudget
699
: 0;
700
701
const idleOrFailed = backgroundSummarizer.state === BackgroundSummarizationState.Idle
702
|| backgroundSummarizer.state === BackgroundSummarizationState.Failed;
703
704
const cacheWarm = (promptContext.toolCallRounds?.length ?? 0) > 0;
705
706
const kickOff = shouldKickOffBackgroundSummarization(postRenderRatio, useInlineSummarization, cacheWarm, this._thresholdRng);
707
708
if (kickOff && idleOrFailed) {
709
if (useInlineSummarization) {
710
// Compute and cache model capabilities from the current render's
711
// messages. These must match the main agent fetch for cache parity.
712
const strippedMessages = ToolCallingLoop.stripInternalToolCallIds(result.messages);
713
const rawEffort = this.request.modelConfiguration?.reasoningEffort;
714
const isSubagent = !!this.request.subAgentInvocationId;
715
// Must match the main agent's enableThinking logic in
716
// toolCallingLoop.ts runOne() — thinking is only disabled
717
// on continuation turns for Anthropic when no thinking
718
// blocks exist yet in the messages.
719
const shouldDisableThinking = !!promptContext.isContinuation && isAnthropicFamily(this.endpoint) && !ToolCallingLoop.messagesContainThinking(strippedMessages);
720
this._lastModelCapabilities = {
721
enableThinking: !shouldDisableThinking,
722
reasoningEffort: typeof rawEffort === 'string' ? rawEffort : undefined,
723
enableToolSearch: !isSubagent && !!this.endpoint.supportsToolSearch,
724
enableContextEditing: !isSubagent && isAnthropicContextEditingEnabled(this.endpoint, this.configurationService, this.expService),
725
};
726
}
727
this._startBackgroundSummarization(backgroundSummarizer, result.messages, promptContext, props, token, postRenderRatio, useInlineSummarization);
728
}
729
}
730
731
const lastMessage = result.messages.at(-1);
732
if (lastMessage?.role === Raw.ChatRole.User) {
733
const currentTurn = promptContext.conversation?.getLatestTurn();
734
if (currentTurn && !currentTurn.getMetadata(RenderedUserMessageMetadata)) {
735
currentTurn.setMetadata(new RenderedUserMessageMetadata(lastMessage.content));
736
}
737
}
738
739
if (!useLastTwoMessagesCacheBPs) {
740
addCacheBreakpoints(result.messages);
741
}
742
743
if (this.request.command === 'error') {
744
// Should trigger a 400
745
result.messages.push({
746
role: Raw.ChatRole.Assistant,
747
content: [],
748
toolCalls: [{ type: 'function', id: '', function: { name: 'tool', arguments: '{' } }]
749
});
750
}
751
752
753
return {
754
...result,
755
// The codebase tool is not actually called/referenced in the edit prompt, so we ned to
756
// merge its metadata so that its output is not lost and it's not called repeatedly every turn
757
// todo@connor4312/joycerhl: this seems a bit janky
758
metadata: codebase ? mergeMetadata(result.metadata, codebase.metadatas) : result.metadata,
759
// Don't report file references that came in via chat variables in an editing session, unless they have warnings,
760
// because they are already displayed as part of the working set
761
// references: result.references.filter((ref) => this.shouldKeepReference(editCodeStep, ref, toolReferences, chatVariables)),
762
};
763
}
764
765
modifyErrorDetails(errorDetails: vscode.ChatErrorDetails, response: ChatResponse): vscode.ChatErrorDetails {
766
if (!errorDetails.responseIsFiltered) {
767
errorDetails.confirmationButtons = [
768
...(errorDetails.confirmationButtons ?? []),
769
{ data: { copilotContinueOnError: true } satisfies IContinueOnErrorConfirmation, label: l10n.t('Try Again') },
770
];
771
}
772
return errorDetails;
773
}
774
775
getAdditionalVariables(promptContext: IBuildPromptContext): ChatVariablesCollection | undefined {
776
const lastTurn = promptContext.conversation?.turns.at(-1);
777
if (!lastTurn) {
778
return;
779
}
780
781
// Search backwards to find the first real request and return those variables too.
782
// Variables aren't re-attached to requests from confirmations.
783
// TODO https://github.com/microsoft/vscode/issues/262858, more to do here
784
if (lastTurn.acceptedConfirmationData) {
785
const turns = promptContext.conversation!.turns.slice(0, -1);
786
for (const turn of Iterable.reverse(turns)) {
787
if (!turn.acceptedConfirmationData) {
788
return turn.promptVariables;
789
}
790
}
791
}
792
}
793
794
private _startBackgroundSummarization(
795
backgroundSummarizer: BackgroundSummarizer,
796
mainRenderMessages: Raw.ChatMessage[],
797
promptContext: IBuildPromptContext,
798
props: AgentPromptProps,
799
token: vscode.CancellationToken,
800
contextRatio: number,
801
useInlineSummarization: boolean,
802
): void {
803
this.logService.debug(`[ConversationHistorySummarizer] context at ${(contextRatio * 100).toFixed(0)}% starting background compaction (inline=${useInlineSummarization})`);
804
805
const bgStartTime = Date.now();
806
807
// Snapshot rounds so telemetry reflects state at kick-off time, not at
808
// completion time (the main loop mutates toolCallRounds). History is
809
// stable across a single user turn so a reference is sufficient.
810
const rounds = [...(promptContext.toolCallRounds ?? [])];
811
const history = promptContext.history;
812
let toolCallRoundId: string | undefined;
813
if (rounds.length >= 2) {
814
// Mark the round before the last, preserving the last round verbatim
815
toolCallRoundId = rounds[rounds.length - 2].id;
816
} else if (rounds.length === 1) {
817
toolCallRoundId = rounds[0].id;
818
} else {
819
for (let i = history.length - 1; i >= 0 && !toolCallRoundId; i--) {
820
const lastRound = history[i].rounds.at(-1);
821
if (lastRound) {
822
toolCallRoundId = lastRound.id;
823
}
824
}
825
}
826
827
// Build tool schemas matching the main agent loop so the prompt
828
// prefix (system + tools + messages) is identical for cache hits.
829
const availableTools = promptContext.tools?.availableTools;
830
const normalizedTools = availableTools?.length ? normalizeToolSchema(
831
this.endpoint.family,
832
availableTools.map(tool => ({
833
function: {
834
name: tool.name,
835
description: tool.description,
836
parameters: tool.inputSchema && Object.keys(tool.inputSchema).length ? tool.inputSchema : undefined
837
},
838
type: 'function' as const,
839
})),
840
(tool, rule) => {
841
this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`);
842
},
843
) : undefined;
844
const toolOpts = normalizedTools?.length ? {
845
tools: normalizedTools,
846
} : undefined;
847
848
const associatedRequestId = promptContext.conversation?.getLatestTurn()?.id;
849
const conversationId = promptContext.conversation?.sessionId;
850
const modelCapabilities = this._lastModelCapabilities;
851
852
backgroundSummarizer.start(async bgToken => {
853
try {
854
if (useInlineSummarization) {
855
// Inline mode: fork the exact messages from the main render
856
// and append a summary user message. The prompt prefix is
857
// byte-identical to the main agent loop for cache hits.
858
const strippedMainMessages = ToolCallingLoop.stripInternalToolCallIds(mainRenderMessages);
859
const summaryMsgResult = await renderPromptElement(
860
this.instantiationService,
861
this.endpoint,
862
InlineSummarizationUserMessage,
863
{ endpoint: this.endpoint },
864
undefined,
865
bgToken,
866
);
867
const messages = [
868
...strippedMainMessages,
869
...summaryMsgResult.messages,
870
];
871
872
const response = await this.endpoint.makeChatRequest2({
873
debugName: 'summarizeConversationHistory-inline',
874
messages,
875
finishedCb: undefined,
876
location: ChatLocation.Agent,
877
conversationId,
878
requestOptions: {
879
temperature: 0,
880
stream: false,
881
...toolOpts,
882
},
883
modelCapabilities,
884
telemetryProperties: associatedRequestId ? { associatedRequestId } : undefined,
885
enableRetryOnFilter: true,
886
}, bgToken);
887
if (response.type !== ChatFetchResponseType.Success) {
888
throw new Error(`Background inline summarization request failed: ${response.type}`);
889
}
890
const rawSummaryText = extractInlineSummary(response.value);
891
if (!rawSummaryText) {
892
throw new Error('Background inline summarization: no <summary> tags found in response');
893
}
894
if (!toolCallRoundId) {
895
throw new Error('Background inline summarization: no round ID to apply summary to');
896
}
897
// Flush the transcript before snapshotting the line count so
898
// the baked "N lines" hint matches the on-disk file at this
899
// moment (mirrors the full/simple path in SummarizedConversationHistory.render).
900
if (conversationId && this.sessionTranscriptService.getTranscriptPath(conversationId)) {
901
await this.sessionTranscriptService.flush(conversationId);
902
}
903
const summaryText = conversationId
904
? appendTranscriptHintToSummary(rawSummaryText, conversationId, this.sessionTranscriptService)
905
: rawSummaryText;
906
this.logService.debug(`[ConversationHistorySummarizer] background inline compaction completed (${summaryText.length} chars, roundId=${toolCallRoundId})`);
907
908
// Send summarizedConversationHistory telemetry for parity
909
// with the standard ConversationHistorySummarizer path.
910
const { numRounds, numRoundsSinceLastSummarization } = computeSummarizationRoundCounts(history, rounds);
911
const numRoundsInCurrentTurn = rounds.length;
912
const lastUsedTool = rounds.at(-1)?.toolCalls?.at(-1)?.name
913
?? history.at(-1)?.rounds.at(-1)?.toolCalls?.at(-1)?.name ?? 'none';
914
const promptTypes = messages.map(msg => `${msg.role}${'name' in msg && msg.name ? `-${msg.name}` : ''}:${getTextPart(msg.content).length}`).join(',');
915
/* __GDPR__
916
"summarizedConversationHistory" : {
917
"owner": "bhavyau",
918
"comment": "Tracks background inline summarization outcome",
919
"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
920
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
921
"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
922
"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
923
"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
924
"lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The last tool used before summarization." },
925
"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },
926
"promptTypes": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Role and character count of each prompt message in order, as a proxy for cache hit rate (e.g. system:1234,user:567)." },
927
"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." },
928
"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },
929
"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." },
930
"isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this was triggered during tool calling." },
931
"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." },
932
"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." },
933
"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." },
934
"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." }
935
}
936
*/
937
this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
938
outcome: 'success',
939
model: this.endpoint.model,
940
summarizationMode: 'inline',
941
conversationId,
942
chatRequestId: associatedRequestId,
943
lastUsedTool,
944
requestId: response.requestId,
945
promptTypes,
946
}, {
947
numRounds,
948
turnIndex: history.length,
949
curTurnRoundIndex: numRoundsInCurrentTurn,
950
isDuringToolCalling: numRoundsInCurrentTurn > 0 ? 1 : 0,
951
duration: Date.now() - bgStartTime,
952
promptTokenCount: response.usage?.prompt_tokens,
953
promptCacheTokenCount: response.usage?.prompt_tokens_details?.cached_tokens,
954
responseTokenCount: response.usage?.completion_tokens,
955
});
956
957
return {
958
summary: summaryText,
959
toolCallRoundId,
960
promptTokens: response.usage?.prompt_tokens,
961
promptCacheTokens: response.usage?.prompt_tokens_details?.cached_tokens,
962
outputTokens: response.usage?.completion_tokens,
963
durationMs: Date.now() - bgStartTime,
964
model: this.endpoint.model,
965
summarizationMode: 'inline',
966
numRounds,
967
numRoundsSinceLastSummarization,
968
};
969
} else {
970
// Standard mode: use triggerSummarize which makes a separate
971
// LLM call with a summarization-specific prompt during render.
972
const snapshotProps: AgentPromptProps = {
973
...props,
974
promptContext: {
975
...promptContext,
976
toolCallRounds: promptContext.toolCallRounds ? [...promptContext.toolCallRounds] : undefined,
977
toolCallResults: promptContext.toolCallResults ? { ...promptContext.toolCallResults } : undefined,
978
}
979
};
980
const bgRenderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
981
...snapshotProps,
982
endpoint: this.endpoint,
983
promptContext: snapshotProps.promptContext,
984
triggerSummarize: true,
985
});
986
const bgProgress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart> = { report: () => { } };
987
const bgRenderResult = await bgRenderer.render(bgProgress, bgToken);
988
const summaryMetadata = bgRenderResult.metadata.get(SummarizedConversationHistoryMetadata);
989
if (!summaryMetadata) {
990
throw new Error('Background compaction produced no summary metadata');
991
}
992
this.logService.debug(`[ConversationHistorySummarizer] background compaction completed successfully (roundId=${summaryMetadata.toolCallRoundId})`);
993
return {
994
summary: summaryMetadata.text,
995
toolCallRoundId: summaryMetadata.toolCallRoundId,
996
promptTokens: summaryMetadata.usage?.prompt_tokens,
997
promptCacheTokens: summaryMetadata.usage?.prompt_tokens_details?.cached_tokens,
998
outputTokens: summaryMetadata.usage?.completion_tokens,
999
durationMs: Date.now() - bgStartTime,
1000
model: summaryMetadata.model,
1001
summarizationMode: summaryMetadata.summarizationMode,
1002
numRounds: summaryMetadata.numRounds,
1003
numRoundsSinceLastSummarization: summaryMetadata.numRoundsSinceLastSummarization,
1004
};
1005
}
1006
} catch (err) {
1007
this.logService.error(err, `[ConversationHistorySummarizer] background compaction failed`);
1008
1009
// Send failure telemetry for inline background summarization
1010
if (useInlineSummarization) {
1011
/* __GDPR__
1012
"summarizedConversationHistory" : {
1013
"owner": "bhavyau",
1014
"comment": "Tracks background inline summarization failure",
1015
"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
1016
"detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Detailed failure reason." },
1017
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
1018
"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
1019
"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
1020
"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
1021
"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." }
1022
}
1023
*/
1024
this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
1025
outcome: 'failed',
1026
detailedOutcome: err instanceof Error ? err.message : String(err),
1027
model: this.endpoint.model,
1028
summarizationMode: 'inline',
1029
conversationId,
1030
chatRequestId: associatedRequestId,
1031
}, {
1032
duration: Date.now() - bgStartTime,
1033
});
1034
}
1035
1036
throw err;
1037
}
1038
}, token);
1039
}
1040
1041
/**
1042
* Returns the `BackgroundSummarizer` for this session, or `undefined` if
1043
* the intent is not an `AgentIntent` (e.g. `AskAgentIntent`).
1044
*/
1045
private _getOrCreateBackgroundSummarizer(sessionId: string | undefined): BackgroundSummarizer | undefined {
1046
if (!sessionId || !(this.intent instanceof AgentIntent)) {
1047
return undefined;
1048
}
1049
return this.intent.getOrCreateBackgroundSummarizer(sessionId, this.endpoint.modelMaxPromptTokens);
1050
}
1051
1052
/**
1053
* Apply a background-compaction result onto the in-memory rounds so
1054
* that the next render picks up the `<conversation-summary>` element.
1055
*/
1056
private _applySummaryToRounds(bgResult: { summary: string; toolCallRoundId: string }, promptContext: IBuildPromptContext): void {
1057
// Check current-turn rounds first
1058
const currentRound = promptContext.toolCallRounds?.find(r => r.id === bgResult.toolCallRoundId);
1059
if (currentRound) {
1060
currentRound.summary = bgResult.summary;
1061
} else {
1062
// Fall back to history turns
1063
let found = false;
1064
for (const turn of [...promptContext.history].reverse()) {
1065
const round = turn.rounds.find(r => r.id === bgResult.toolCallRoundId);
1066
if (round) {
1067
round.summary = bgResult.summary;
1068
found = true;
1069
break;
1070
}
1071
}
1072
if (!found) {
1073
this.logService.warn(`[ConversationHistorySummarizer] background compaction round ${bgResult.toolCallRoundId} not found in toolCallRounds or history summary dropped`);
1074
}
1075
}
1076
// Invalidate the auto mode router cache so the next getChatEndpoint()
1077
// call re-evaluates which model to use after compaction.
1078
this.automodeService.invalidateRouterCache(this.request);
1079
}
1080
1081
/**
1082
* Persist the summary on the current turn's `resultMetadata` so that
1083
* `normalizeSummariesOnRounds` restores it on subsequent turns.
1084
*/
1085
private _persistSummaryOnTurn(bgResult: IBackgroundSummarizationResult, promptContext: IBuildPromptContext, contextLengthBefore?: number): void {
1086
const turn = promptContext.conversation?.getLatestTurn();
1087
const chatResult = turn?.responseChatResult;
1088
if (chatResult) {
1089
const metadata = (chatResult.metadata ?? {}) as Record<string, unknown>;
1090
const existingSummaries = (metadata['summaries'] as unknown[] ?? []);
1091
existingSummaries.push({ toolCallRoundId: bgResult.toolCallRoundId, text: bgResult.summary });
1092
metadata['summaries'] = existingSummaries;
1093
(chatResult as { metadata: unknown }).metadata = metadata;
1094
}
1095
// Also store as a pending summary on the turn so normalizeSummariesOnRounds
1096
// can restore it even when chatResult doesn't exist yet (mid-tool-call-loop).
1097
turn?.addPendingSummary(bgResult.toolCallRoundId, bgResult.summary);
1098
const usage = bgResult.promptTokens !== undefined && bgResult.outputTokens !== undefined
1099
? { prompt_tokens: bgResult.promptTokens, completion_tokens: bgResult.outputTokens, total_tokens: bgResult.promptTokens + bgResult.outputTokens, ...(bgResult.promptCacheTokens !== undefined ? { prompt_tokens_details: { cached_tokens: bgResult.promptCacheTokens } } : {}) }
1100
: undefined;
1101
turn?.setMetadata(new SummarizedConversationHistoryMetadata(
1102
bgResult.toolCallRoundId,
1103
bgResult.summary,
1104
{
1105
usage,
1106
model: bgResult.model,
1107
summarizationMode: bgResult.summarizationMode,
1108
numRounds: bgResult.numRounds,
1109
numRoundsSinceLastSummarization: bgResult.numRoundsSinceLastSummarization,
1110
durationMs: bgResult.durationMs,
1111
source: 'background',
1112
outcome: 'success',
1113
contextLengthBefore,
1114
},
1115
));
1116
}
1117
1118
/**
1119
* Record a background compaction failure on the current turn's metadata,
1120
* matching how foreground compaction records its failures.
1121
*/
1122
private _recordBackgroundCompactionFailure(promptContext: IBuildPromptContext, trigger: string): void {
1123
const turn = promptContext.conversation?.getLatestTurn();
1124
turn?.setMetadata(new SummarizedConversationHistoryMetadata(
1125
'', // no toolCallRoundId for failures
1126
'', // no summary text for failures
1127
{
1128
model: this.endpoint.model,
1129
source: 'background',
1130
outcome: `noResult_${trigger}`,
1131
contextLengthBefore: this._lastRenderTokenCount,
1132
},
1133
));
1134
}
1135
1136
private _sendBackgroundCompactionTelemetry(
1137
trigger: string,
1138
outcome: string,
1139
contextRatio: number,
1140
promptContext: IBuildPromptContext,
1141
): void {
1142
/* __GDPR__
1143
"backgroundSummarizationApplied" : {
1144
"owner": "bhavyau",
1145
"comment": "Tracks background compaction orchestration decisions and outcomes in the agent loop.",
1146
"trigger": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The code path that triggered background compaction consumption." },
1147
"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Outcome of the background compaction consumption. One of: 'applied' (result applied and re-render succeeded), 'appliedButReRenderFailed' (result applied but the subsequent re-render still exceeded budget and required a fallback), 'noResult' (no usable result was produced)." },
1148
"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Id for the current chat conversation." },
1149
"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID that this background compaction was consumed during." },
1150
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID used." },
1151
"contextRatio": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The context window usage ratio when background compaction was consumed." }
1152
}
1153
*/
1154
this.telemetryService.sendMSFTTelemetryEvent('backgroundSummarizationApplied', {
1155
trigger,
1156
outcome,
1157
conversationId: promptContext.conversation?.sessionId,
1158
chatRequestId: promptContext.conversation?.getLatestTurn()?.id,
1159
model: this.endpoint.model,
1160
}, {
1161
contextRatio,
1162
});
1163
GenAiMetrics.incrementAgentSummarizationCount(this.otelService, outcome);
1164
}
1165
1166
override processResponse = undefined;
1167
}
1168
1169