Path: blob/main/extensions/copilot/src/extension/intents/node/agentIntent.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import * as l10n from '@vscode/l10n';6import { Raw, RenderPromptResult } from '@vscode/prompt-tsx';7import { BudgetExceededError } from '@vscode/prompt-tsx/dist/base/materialized';8import type * as vscode from 'vscode';9import { IChatSessionService } from '../../../platform/chat/common/chatSessionService';10import { ChatFetchResponseType, ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes';11import { ISessionTranscriptService } from '../../../platform/chat/common/sessionTranscriptService';12import { getTextPart } from '../../../platform/chat/common/globalStringUtils';13import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService';14import { isAnthropicFamily, isGptFamily, modelCanUseApplyPatchExclusively, modelCanUseReplaceStringExclusively, modelSupportsApplyPatch, modelSupportsMultiReplaceString, modelSupportsReplaceString, modelSupportsSimplifiedApplyPatchInstructions } from '../../../platform/endpoint/common/chatModelCapabilities';15import { IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';16import { IAutomodeService } from '../../../platform/endpoint/node/automodeService';17import { IEnvService } from '../../../platform/env/common/envService';18import { ILogService } from '../../../platform/log/common/logService';19import { IEditLogService } from '../../../platform/multiFileEdit/common/editLogService';20import { CUSTOM_TOOL_SEARCH_NAME, isAnthropicContextEditingEnabled } from '../../../platform/networking/common/anthropic';21import { IChatEndpoint } from '../../../platform/networking/common/networking';22import { modelsWithoutResponsesContextManagement } from '../../../platform/networking/common/openai';23import { INotebookService } from '../../../platform/notebook/common/notebookService';24import { GenAiMetrics } from '../../../platform/otel/common/genAiMetrics';25import { IOTelService } from '../../../platform/otel/common/otelService';26import { IPromptPathRepresentationService } from '../../../platform/prompts/common/promptPathRepresentationService';27import { ITasksService } from '../../../platform/tasks/common/tasksService';28import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';29import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';30import { ITestProvider } from '../../../platform/testing/common/testProvider';31import { IWorkspaceService } from '../../../platform/workspace/common/workspaceService';3233import { isCancellationError } from '../../../util/vs/base/common/errors';34import { Iterable } from '../../../util/vs/base/common/iterator';35import { IInstantiationService, ServicesAccessor } from '../../../util/vs/platform/instantiation/common/instantiation';3637import { ChatResponseProgressPart2 } from '../../../vscodeTypes';38import { ICommandService } from '../../commands/node/commandService';39import { Intent } from '../../common/constants';40import { ChatVariablesCollection } from '../../prompt/common/chatVariablesCollection';41import { Conversation, normalizeSummariesOnRounds, RenderedUserMessageMetadata, TurnStatus } from '../../prompt/common/conversation';42import { IBuildPromptContext } from '../../prompt/common/intents';43import { getRequestedToolCallIterationLimit, IContinueOnErrorConfirmation } from '../../prompt/common/specialRequestTypes';44import { ChatTelemetryBuilder } from '../../prompt/node/chatParticipantTelemetry';45import { IDefaultIntentRequestHandlerOptions } from '../../prompt/node/defaultIntentRequestHandler';46import { IDocumentContext } from '../../prompt/node/documentContext';47import { IBuildPromptResult, IIntent, IIntentInvocation } from '../../prompt/node/intents';48import { AgentPrompt, AgentPromptProps } from '../../prompts/node/agent/agentPrompt';49import { BackgroundSummarizationState, BackgroundSummarizer, IBackgroundSummarizationResult, shouldKickOffBackgroundSummarization } from '../../prompts/node/agent/backgroundSummarizer';50import { AgentPromptCustomizations, PromptRegistry } from '../../prompts/node/agent/promptRegistry';51import { extractInlineSummary, InlineSummarizationUserMessage, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder, appendTranscriptHintToSummary, computeSummarizationRoundCounts } from '../../prompts/node/agent/summarizedConversationHistory';52import { PromptRenderer, renderPromptElement } from '../../prompts/node/base/promptRenderer';53import { ICodeMapperService } from '../../prompts/node/codeMapper/codeMapperService';54import { EditCodePrompt2 } from '../../prompts/node/panel/editCodePrompt2';55import { NotebookInlinePrompt } from '../../prompts/node/panel/notebookInlinePrompt';56import { ToolResultMetadata } from '../../prompts/node/panel/toolCalling';57import { IEditToolLearningService } from '../../tools/common/editToolLearningService';58import { normalizeToolSchema } from '../../tools/common/toolSchemaNormalizer';59import { ContributedToolName, ToolName } from '../../tools/common/toolNames';60import { IToolsService } from '../../tools/common/toolsService';61import { applyPatch5Description } from '../../tools/node/applyPatchTool';62import { multiReplaceStringPrimaryDescription } from '../../tools/node/multiReplaceStringTool';63import { replaceStringBatchDescription } from '../../tools/node/replaceStringTool';64import { getAgentMaxRequests } from '../common/agentConfig';65import { addCacheBreakpoints } from './cacheBreakpoints';66import { EditCodeIntent, EditCodeIntentInvocation, EditCodeIntentInvocationOptions, mergeMetadata, toNewChatReferences } from './editCodeIntent';67import { ToolCallingLoop } from './toolCallingLoop';6869function isResponsesCompactionContextManagementEnabled(endpoint: IChatEndpoint, configurationService: IConfigurationService, experimentationService: IExperimentationService): boolean {70return endpoint.apiType === 'responses'71&& configurationService.getExperimentBasedConfig(ConfigKey.ResponsesApiContextManagementEnabled, experimentationService)72&& !modelsWithoutResponsesContextManagement.has(endpoint.family);73}7475export const getAgentTools = async (accessor: ServicesAccessor, request: vscode.ChatRequest, model?: IChatEndpoint) => {76const toolsService = accessor.get<IToolsService>(IToolsService);77const testService = accessor.get<ITestProvider>(ITestProvider);78const tasksService = accessor.get<ITasksService>(ITasksService);79const configurationService = accessor.get<IConfigurationService>(IConfigurationService);80const experimentationService = accessor.get<IExperimentationService>(IExperimentationService);81const endpointProvider = accessor.get<IEndpointProvider>(IEndpointProvider);82const editToolLearningService = accessor.get<IEditToolLearningService>(IEditToolLearningService);83model ??= await endpointProvider.getChatEndpoint(request);8485const allowTools: Record<string, boolean> = {};8687const learned = editToolLearningService.getPreferredEndpointEditTool(model);88if (learned) { // a learning-enabled (BYOK) model, we should go with what it prefers89allowTools[ToolName.EditFile] = learned.includes(ToolName.EditFile);90allowTools[ToolName.ReplaceString] = learned.includes(ToolName.ReplaceString);91allowTools[ToolName.MultiReplaceString] = learned.includes(ToolName.MultiReplaceString);92allowTools[ToolName.ApplyPatch] = learned.includes(ToolName.ApplyPatch);93} else {94allowTools[ToolName.EditFile] = true;95allowTools[ToolName.ReplaceString] = modelSupportsReplaceString(model);96allowTools[ToolName.ApplyPatch] = modelSupportsApplyPatch(model) && !!toolsService.getTool(ToolName.ApplyPatch);9798if (allowTools[ToolName.ApplyPatch] && modelCanUseApplyPatchExclusively(model)) {99allowTools[ToolName.EditFile] = false;100}101102if (modelCanUseReplaceStringExclusively(model)) {103allowTools[ToolName.ReplaceString] = true;104allowTools[ToolName.EditFile] = false;105}106107if (allowTools[ToolName.ReplaceString] && modelSupportsMultiReplaceString(model)) {108allowTools[ToolName.MultiReplaceString] = true;109}110}111112allowTools[ToolName.CoreRunTest] = await testService.hasAnyTests();113allowTools[ToolName.CoreRunTask] = tasksService.getTasks().length > 0;114115const searchSubagentEnabled = configurationService.getExperimentBasedConfig(ConfigKey.Advanced.SearchSubagentToolEnabled, experimentationService);116const isGptOrAnthropic = isGptFamily(model) || isAnthropicFamily(model);117allowTools[ToolName.SearchSubagent] = isGptOrAnthropic && searchSubagentEnabled;118119const executionSubagentEnabled = configurationService.getExperimentBasedConfig(ConfigKey.Advanced.ExecutionSubagentToolEnabled, experimentationService);120allowTools[ToolName.ExecutionSubagent] = isGptOrAnthropic && executionSubagentEnabled;121122const skillToolEnabled = configurationService.getExperimentBasedConfig(ConfigKey.Advanced.SkillToolEnabled, experimentationService);123allowTools[ToolName.Skill] = skillToolEnabled;124125allowTools[CUSTOM_TOOL_SEARCH_NAME] = !!model.supportsToolSearch;126127if (model.family.includes('grok-code')) {128allowTools[ToolName.CoreManageTodoList] = false;129}130131// Enable task_complete in autopilot mode so the model can signal task completion.132// The tool is registered in core as a built-in but needs explicit opt-in here.133allowTools['task_complete'] = request.permissionLevel === 'autopilot';134135allowTools[ToolName.EditFilesPlaceholder] = false;136allowTools[ToolName.SessionStoreSql] = false; // Only available via /chronicle137// todo@connor4312: string check here is for back-compat for 1.109 Insiders138if (Iterable.some(request.tools, ([t, enabled]) => (typeof t === 'string' ? t : t.name) === ContributedToolName.EditFilesPlaceholder && enabled === false)) {139allowTools[ToolName.ApplyPatch] = false;140allowTools[ToolName.EditFile] = false;141allowTools[ToolName.ReplaceString] = false;142allowTools[ToolName.MultiReplaceString] = false;143}144145if (model.family.toLowerCase().includes('gemini-3') && configurationService.getExperimentBasedConfig(ConfigKey.Advanced.Gemini3MultiReplaceString, experimentationService)) {146allowTools[ToolName.MultiReplaceString] = true;147}148149const tools = toolsService.getEnabledTools(request, model, tool => {150if (typeof allowTools[tool.name] === 'boolean') {151return allowTools[tool.name];152}153154// Must return undefined to fall back to other checks155return undefined;156});157158if (modelSupportsSimplifiedApplyPatchInstructions(model) && configurationService.getExperimentBasedConfig(ConfigKey.Advanced.Gpt5AlternativePatch, experimentationService)) {159const ap = tools.findIndex(t => t.name === ToolName.ApplyPatch);160if (ap !== -1) {161tools[ap] = { ...tools[ap], description: applyPatch5Description };162}163}164165if (configurationService.getExperimentBasedConfig(ConfigKey.Advanced.BatchReplaceStringDescriptions, experimentationService)) {166const rs = tools.findIndex(t => t.name === ToolName.ReplaceString);167if (rs !== -1) {168tools[rs] = { ...tools[rs], description: replaceStringBatchDescription };169}170const mrs = tools.findIndex(t => t.name === ToolName.MultiReplaceString);171if (mrs !== -1) {172tools[mrs] = { ...tools[mrs], description: multiReplaceStringPrimaryDescription };173}174}175176return tools;177};178179export class AgentIntent extends EditCodeIntent {180181static override readonly ID = Intent.Agent;182183override readonly id = AgentIntent.ID;184185private readonly _backgroundSummarizers = new Map<string, BackgroundSummarizer>();186187constructor(188@IInstantiationService instantiationService: IInstantiationService,189@IEndpointProvider endpointProvider: IEndpointProvider,190@IConfigurationService configurationService: IConfigurationService,191@IExperimentationService expService: IExperimentationService,192@ICodeMapperService codeMapperService: ICodeMapperService,193@IWorkspaceService workspaceService: IWorkspaceService,194@IChatSessionService chatSessionService: IChatSessionService,195@IAutomodeService private readonly _automodeService: IAutomodeService,196) {197super(instantiationService, endpointProvider, configurationService, expService, codeMapperService, workspaceService, { intentInvocation: AgentIntentInvocation, processCodeblocks: false });198chatSessionService.onDidDisposeChatSession(sessionId => {199const summarizer = this._backgroundSummarizers.get(sessionId);200if (summarizer) {201summarizer.cancel();202this._backgroundSummarizers.delete(sessionId);203}204});205}206207getOrCreateBackgroundSummarizer(sessionId: string, modelMaxPromptTokens: number): BackgroundSummarizer {208let summarizer = this._backgroundSummarizers.get(sessionId);209if (!summarizer) {210summarizer = new BackgroundSummarizer(modelMaxPromptTokens);211this._backgroundSummarizers.set(sessionId, summarizer);212}213return summarizer;214}215216protected override getIntentHandlerOptions(request: vscode.ChatRequest): IDefaultIntentRequestHandlerOptions | undefined {217return {218maxToolCallIterations: getRequestedToolCallIterationLimit(request) ??219this.instantiationService.invokeFunction(getAgentMaxRequests),220temperature: this.configurationService.getConfig(ConfigKey.Advanced.AgentTemperature) ?? 0,221overrideRequestLocation: ChatLocation.Agent222};223}224225override async handleRequest(226conversation: Conversation,227request: vscode.ChatRequest,228stream: vscode.ChatResponseStream,229token: vscode.CancellationToken,230documentContext: IDocumentContext | undefined,231agentName: string,232location: ChatLocation,233chatTelemetry: ChatTelemetryBuilder,234yieldRequested: () => boolean235): Promise<vscode.ChatResult> {236if (request.command === 'compact') {237return this.handleSummarizeCommand(conversation, request, stream, token);238}239240return super.handleRequest(conversation, request, stream, token, documentContext, agentName, location, chatTelemetry, yieldRequested);241}242243private async handleSummarizeCommand(244conversation: Conversation,245request: vscode.ChatRequest,246stream: vscode.ChatResponseStream,247token: vscode.CancellationToken248): Promise<vscode.ChatResult> {249normalizeSummariesOnRounds(conversation.turns);250251// Exclude the current /compact turn.252const history = conversation.turns.slice(0, -1);253if (history.length === 0) {254stream.markdown(l10n.t('Nothing to compact. Start a conversation first.'));255return {};256}257258// The summarization metadata needs to be associated with a tool call round.259const lastRoundId = history.at(-1)?.rounds.at(-1)?.id;260if (!lastRoundId) {261stream.markdown(l10n.t('Nothing to compact. Start a conversation with tool calls first.'));262return {};263}264265const endpoint = await this.endpointProvider.getChatEndpoint(request);266if (isResponsesCompactionContextManagementEnabled(endpoint, this.configurationService, this.expService)) {267stream.markdown(l10n.t('Compaction is already managed by context management for this session.'));268return {};269}270271const promptContext: IBuildPromptContext = {272history,273chatVariables: new ChatVariablesCollection([]),274query: '',275toolCallRounds: [],276conversation,277};278279try {280const propsBuilder = this.instantiationService.createInstance(SummarizedConversationHistoryPropsBuilder);281const propsInfo = propsBuilder.getProps({282priority: 1,283endpoint,284location: ChatLocation.Agent,285promptContext,286maxToolResultLength: Infinity,287});288289stream.progress(l10n.t('Compacting conversation...'));290291const progress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart> = {292report: () => { }293};294const renderer = PromptRenderer.create(this.instantiationService, endpoint, SummarizedConversationHistory, {295...propsInfo.props,296triggerSummarize: true,297summarizationInstructions: request.prompt || undefined,298});299const result = await renderer.render(progress, token);300const summaryMetadata = result.metadata.get(SummarizedConversationHistoryMetadata);301if (!summaryMetadata) {302stream.markdown(l10n.t('Unable to compact conversation.'));303return {};304}305306if (summaryMetadata.usage) {307stream.usage({308promptTokens: summaryMetadata.usage.prompt_tokens,309completionTokens: summaryMetadata.usage.completion_tokens,310promptTokenDetails: summaryMetadata.promptTokenDetails,311});312}313314stream.markdown(l10n.t('Compacted conversation.'));315const lastTurn = conversation.getLatestTurn();316// Next turn if using auto will select a new endpoint317this._automodeService.invalidateRouterCache(request);318319const chatResult: vscode.ChatResult = {320metadata: {321summary: {322toolCallRoundId: summaryMetadata.toolCallRoundId,323text: summaryMetadata.text,324}325}326};327328// setResponse must be called so that turn.resultMetadata?.summary329// is available for normalizeSummariesOnRounds on subsequent turns.330lastTurn.setResponse(331TurnStatus.Success,332{ type: 'model', message: '' },333undefined,334chatResult,335);336337lastTurn.setMetadata(summaryMetadata);338339return chatResult;340} catch (e) {341if (isCancellationError(e)) {342return {};343}344345const message = e instanceof Error ? e.message : String(e);346stream.markdown(l10n.t('Failed to compact conversation: {0}', message));347return {};348}349}350}351352export class AgentIntentInvocation extends EditCodeIntentInvocation implements IIntentInvocation {353354public override readonly codeblocksRepresentEdits = false;355356protected prompt: typeof AgentPrompt | typeof EditCodePrompt2 | typeof NotebookInlinePrompt = AgentPrompt;357358protected extraPromptProps: Partial<AgentPromptProps> | undefined;359360private _resolvedCustomizations: AgentPromptCustomizations | undefined;361362private _lastRenderTokenCount: number = 0;363364/** Cached model capabilities from the most recent main agent render, reused by the background summarizer. */365private _lastModelCapabilities: { enableThinking: boolean; reasoningEffort: string | undefined; enableToolSearch: boolean; enableContextEditing: boolean } | undefined;366367/**368* RNG used to jitter the inline-summarization trigger threshold around 0.80.369* Tests may overwrite this directly (e.g. `(invocation as any)._thresholdRng = () => 0.5`).370*/371private _thresholdRng: () => number = Math.random;372373constructor(374intent: IIntent,375location: ChatLocation,376endpoint: IChatEndpoint,377request: vscode.ChatRequest,378intentOptions: EditCodeIntentInvocationOptions,379@IInstantiationService instantiationService: IInstantiationService,380@ICodeMapperService codeMapperService: ICodeMapperService,381@IEnvService envService: IEnvService,382@IPromptPathRepresentationService promptPathRepresentationService: IPromptPathRepresentationService,383@IEndpointProvider endpointProvider: IEndpointProvider,384@IWorkspaceService workspaceService: IWorkspaceService,385@IToolsService toolsService: IToolsService,386@IConfigurationService configurationService: IConfigurationService,387@IEditLogService editLogService: IEditLogService,388@ICommandService commandService: ICommandService,389@ITelemetryService telemetryService: ITelemetryService,390@INotebookService notebookService: INotebookService,391@ILogService private readonly logService: ILogService,392@IExperimentationService private readonly expService: IExperimentationService,393@IAutomodeService private readonly automodeService: IAutomodeService,394@IOTelService protected override readonly otelService: IOTelService,395@ISessionTranscriptService private readonly sessionTranscriptService: ISessionTranscriptService,396) {397super(intent, location, endpoint, request, intentOptions, instantiationService, codeMapperService, envService, promptPathRepresentationService, endpointProvider, workspaceService, toolsService, configurationService, editLogService, commandService, telemetryService, notebookService, otelService);398}399400public override getAvailableTools(): Promise<vscode.LanguageModelToolInformation[]> {401return this.instantiationService.invokeFunction(getAgentTools, this.request);402}403404override async buildPrompt(405promptContext: IBuildPromptContext,406progress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart>,407token: vscode.CancellationToken408): Promise<IBuildPromptResult> {409this._resolvedCustomizations = await PromptRegistry.resolveAllCustomizations(this.instantiationService, this.endpoint);410// Add any references from the codebase invocation to the request411const codebase = await this._getCodebaseReferences(promptContext, token);412413let variables = promptContext.chatVariables;414let toolReferences: vscode.ChatPromptReference[] = [];415if (codebase) {416toolReferences = toNewChatReferences(variables, codebase.references);417variables = new ChatVariablesCollection([...this.request.references, ...toolReferences]);418}419420const tools = promptContext.tools?.availableTools;421const toolSearchEnabled = !!this.endpoint.supportsToolSearch;422const toolTokens = tools?.length ? await this.endpoint.acquireTokenizer().countToolTokens(tools) : 0;423424const summarizeThresholdOverride = this.configurationService.getConfig<number | undefined>(ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold);425if (typeof summarizeThresholdOverride === 'number' && summarizeThresholdOverride < 100 && summarizeThresholdOverride > 0) {426throw new Error(`Setting github.copilot.${ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold.id} is too low`);427}428429const baseBudget = Math.min(430this.configurationService.getConfig<number | undefined>(ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold) ?? this.endpoint.modelMaxPromptTokens,431this.endpoint.modelMaxPromptTokens432);433const useTruncation = this.endpoint.apiType === 'responses' && this.configurationService.getConfig(ConfigKey.Advanced.UseResponsesApiTruncation);434const responsesCompactionContextManagementEnabled = isResponsesCompactionContextManagementEnabled(this.endpoint, this.configurationService, this.expService);435const summarizationEnabled = this.configurationService.getConfig(ConfigKey.SummarizeAgentConversationHistory) && this.prompt === AgentPrompt && !responsesCompactionContextManagementEnabled;436const useInlineSummarization = summarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.Advanced.AgentHistorySummarizationInline, this.expService);437438// When tools are present, apply a 10% safety margin on the message portion439// to account for tokenizer discrepancies between our tool-token counter and440// the model's actual tokenizer. Without this, an undercount could cause an441// API-level context_length_exceeded error instead of a graceful442// BudgetExceededError from prompt-tsx. When there are no tools the endpoint's443// own modelMaxPromptTokens is used unchanged.444const messageBudget = Math.max(1, Math.floor((baseBudget - toolTokens) * 0.9));445const safeBudget = useTruncation ? Number.MAX_SAFE_INTEGER : messageBudget;446const endpoint = toolTokens > 0 ? this.endpoint.cloneWithTokenOverride(safeBudget) : this.endpoint;447448this.logService.debug(`[Agent] rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}, totalTools: ${tools?.length ?? 0}, toolSearchEnabled: ${toolSearchEnabled}), summarizationEnabled=${summarizationEnabled}`);449let result: RenderPromptResult;450// When the "last two messages" cache breakpoint strategy is enabled,451// suppress prompt-tsx and heuristic cache breakpoints — messagesApi.ts452// will place breakpoints on the last two merged messages instead.453const useLastTwoMessagesCacheBPs = isAnthropicFamily(this.endpoint)454&& this.configurationService.getExperimentBasedConfig(ConfigKey.AnthropicCacheBreakpointsLastTwoMessages, this.expService);455const props: AgentPromptProps = {456endpoint,457promptContext: {458...promptContext,459tools: promptContext.tools && {460...promptContext.tools,461toolReferences: this.stableToolReferences.filter((r) => r.name !== ToolName.Codebase),462}463},464location: this.location,465enableCacheBreakpoints: summarizationEnabled && !useLastTwoMessagesCacheBPs,466...this.extraPromptProps,467customizations: this._resolvedCustomizations468};469470// ── Background compaction ────────────────────────────────────────471//472// Pre-render: if a previous bg pass completed, apply it now.473//474// BudgetExceeded: if bg is InProgress/Completed, wait/apply.475// Otherwise fall back to foreground summarization.476//477// Post-render (≥ 80% + Idle): kick off background compaction478// so it is ready for a future turn.479//480const backgroundSummarizer = summarizationEnabled ? this._getOrCreateBackgroundSummarizer(promptContext.conversation?.sessionId) : undefined;481const contextRatio = backgroundSummarizer && baseBudget > 0482? (this._lastRenderTokenCount + toolTokens) / baseBudget483: 0;484485// Track whether this iteration already performed compaction-related work486// (including applying a summary or using a foreground fallback path) so487// we don't immediately re-trigger background compaction in the post-render check.488let didSummarizeThisIteration = false;489490// If a previous background pass completed, apply its summary now.491if (summarizationEnabled && backgroundSummarizer?.state === BackgroundSummarizationState.Completed) {492const bgResult = backgroundSummarizer.consumeAndReset();493if (bgResult) {494this.logService.debug(`[ConversationHistorySummarizer] applying completed background summary (roundId=${bgResult.toolCallRoundId})`);495progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation')));496this._applySummaryToRounds(bgResult, promptContext);497this._persistSummaryOnTurn(bgResult, promptContext, this._lastRenderTokenCount);498this._sendBackgroundCompactionTelemetry('preRender', 'applied', contextRatio, promptContext);499didSummarizeThisIteration = true;500} else {501this.logService.warn(`[ConversationHistorySummarizer] background compaction state was Completed but consumeAndReset returned no result`);502this._sendBackgroundCompactionTelemetry('preRender', 'noResult', contextRatio, promptContext);503this._recordBackgroundCompactionFailure(promptContext, 'preRender');504}505}506507// Render the prompt without summarization or cache breakpoints, using508// the original endpoint (not reduced for tools/safety buffer).509const renderWithoutSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise<RenderPromptResult> => {510this.logService.debug(`[Agent] ${reason}, rendering without summarization`);511const renderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {512...renderProps,513endpoint: this.endpoint,514enableCacheBreakpoints: false515});516try {517return await renderer.render(progress, token);518} catch (e) {519if (e instanceof BudgetExceededError) {520this.logService.error(e, `[Agent] fallback render failed due to budget exceeded`);521const maxTokens = this.endpoint.modelMaxPromptTokens;522throw new Error(`Unable to build prompt, modelMaxPromptTokens = ${maxTokens} (${e.message})`);523}524throw e;525}526};527528// Helper function for synchronous summarization flow with fallbacks529const renderWithSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise<RenderPromptResult> => {530// Check if a previous foreground summarization already failed in this531// turn. The metadata is set on the turn returned by getLatestTurn(),532// which is the same turn throughout a single buildPrompt call since533// the conversation doesn't advance mid-render.534const turn = promptContext.conversation?.getLatestTurn();535const previousForegroundSummary = turn?.getMetadata(SummarizedConversationHistoryMetadata);536if (previousForegroundSummary?.source === 'foreground' && previousForegroundSummary.outcome && previousForegroundSummary.outcome !== 'success') {537this.logService.debug(`[ConversationHistorySummarizer] ${reason}, skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`);538/* __GDPR__539"triggerSummarizeSkipped" : {540"owner": "bhavyau",541"comment": "Tracks when foreground summarization was skipped because a previous attempt already failed in this turn.",542"previousOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The outcome of the previous failed summarization attempt." },543"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." }544}545*/546this.telemetryService.sendMSFTTelemetryEvent('triggerSummarizeSkipped', { previousOutcome: previousForegroundSummary.outcome, model: renderProps.endpoint.model });547GenAiMetrics.incrementAgentSummarizationCount(this.otelService, 'skipped');548return renderWithoutSummarization(`skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`, renderProps);549}550551this.logService.debug(`[ConversationHistorySummarizer] ${reason}, triggering summarization`);552try {553const renderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {554...renderProps,555endpoint: this.endpoint,556promptContext: renderProps.promptContext,557triggerSummarize: true,558forceSimpleSummary: true,559});560return await renderer.render(progress, token);561} catch (e) {562this.logService.error(e, `[ConversationHistorySummarizer] summarization failed`);563const errorKind = e instanceof BudgetExceededError ? 'budgetExceeded' : 'error';564/* __GDPR__565"triggerSummarizeFailed" : {566"owner": "roblourens",567"comment": "Tracks when triggering summarization failed - for example, a summary was created but not applied successfully.",568"errorKind": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state or failure reason of the summarization." },569"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID used for the summarization." }570}571*/572this.telemetryService.sendMSFTTelemetryEvent('triggerSummarizeFailed', { errorKind, model: renderProps.endpoint.model });573GenAiMetrics.incrementAgentSummarizationCount(this.otelService, 'failed');574575// Track failed foreground compaction576const turn = promptContext.conversation?.getLatestTurn();577turn?.setMetadata(new SummarizedConversationHistoryMetadata(578'', // no toolCallRoundId for failures579'', // no summary text for failures580{581model: renderProps.endpoint.model,582source: 'foreground',583outcome: errorKind,584contextLengthBefore: this._lastRenderTokenCount,585},586));587588return renderWithoutSummarization(`summarization failed (${errorKind})`, renderProps);589}590};591592const contextLengthBefore = this._lastRenderTokenCount;593594try {595const renderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, props);596result = await renderer.render(progress, token);597} catch (e) {598if (e instanceof BudgetExceededError && summarizationEnabled) {599if (!promptContext.toolCallResults) {600promptContext = {601...promptContext,602toolCallResults: {}603};604}605e.metadata.getAll(ToolResultMetadata).forEach((metadata) => {606promptContext.toolCallResults![metadata.toolCallId] = metadata.result;607});608609// If a background compaction is already running or completed,610// wait for / apply it instead of firing another LLM request.611if (backgroundSummarizer && (backgroundSummarizer.state === BackgroundSummarizationState.InProgress || backgroundSummarizer.state === BackgroundSummarizationState.Completed)) {612let budgetExceededTrigger: string;613if (backgroundSummarizer.state === BackgroundSummarizationState.InProgress) {614budgetExceededTrigger = 'budgetExceededWaited';615this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — waiting on in-progress background compaction instead of new request`);616const summaryPromise = backgroundSummarizer.waitForCompletion();617progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {618try { await summaryPromise; } catch { }619return l10n.t('Compacted conversation');620}));621await summaryPromise;622} else {623budgetExceededTrigger = 'budgetExceededReady';624this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — applying already-completed background compaction`);625progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation')));626}627const bgResult = backgroundSummarizer.consumeAndReset();628if (bgResult) {629this.logService.debug(`[ConversationHistorySummarizer] background compaction applied after budget exceeded (roundId=${bgResult.toolCallRoundId})`);630this._applySummaryToRounds(bgResult, promptContext);631this._persistSummaryOnTurn(bgResult, promptContext, contextLengthBefore);632didSummarizeThisIteration = true;633try {634const reRenderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, { ...props, promptContext });635result = await reRenderer.render(progress, token);636this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'applied', contextRatio, promptContext);637} catch (reRenderError) {638if (reRenderError instanceof BudgetExceededError) {639this.logService.debug(`[ConversationHistorySummarizer] re-render after background compaction still exceeded budget — falling back`);640this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'appliedButReRenderFailed', contextRatio, promptContext);641result = await renderWithoutSummarization('budget exceeded after background compaction applied', { ...props, promptContext });642} else {643throw reRenderError;644}645}646} else {647this.logService.debug(`[ConversationHistorySummarizer] background compaction produced no usable result after budget exceeded — falling back to synchronous summarization`);648this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'noResult', contextRatio, promptContext);649this._recordBackgroundCompactionFailure(promptContext, budgetExceededTrigger);650// Background compaction failed — fall back to synchronous summarization651result = await renderWithSummarization(`budget exceeded(${e.message}), background compaction failed`);652didSummarizeThisIteration = true;653}654} else {655result = await renderWithSummarization(`budget exceeded(${e.message})`);656didSummarizeThisIteration = true;657}658} else {659throw e;660}661}662663this._lastRenderTokenCount = result.tokenCount;664665// Track foreground compaction if summarization happened during rendering666const summaryMeta = result.metadata.get(SummarizedConversationHistoryMetadata);667if (summaryMeta) {668const turn = promptContext.conversation?.getLatestTurn();669turn?.setMetadata(new SummarizedConversationHistoryMetadata(670summaryMeta.toolCallRoundId,671summaryMeta.text,672{673thinking: summaryMeta.thinking,674usage: summaryMeta.usage,675promptTokenDetails: summaryMeta.promptTokenDetails,676model: summaryMeta.model,677summarizationMode: summaryMeta.summarizationMode,678numRounds: summaryMeta.numRounds,679numRoundsSinceLastSummarization: summaryMeta.numRoundsSinceLastSummarization,680durationMs: summaryMeta.durationMs,681source: 'foreground',682outcome: 'success',683contextLengthBefore,684},685));686}687688// Post-render: kick off background compaction if idle and over the689// threshold. For the inline-summarization path we care about prompt690// cache parity with the main agent fetch — so we gate kick-off on a691// completed tool call (cache has been warmed) and jitter the threshold692// around 0.80 to avoid firing at the same exact boundary every time.693// The non-inline path forks its own prompt and sees no cache benefit,694// so it keeps the simple >= 0.80 behavior.695if (summarizationEnabled && backgroundSummarizer && !didSummarizeThisIteration) {696const postRenderRatio = baseBudget > 0697? (result.tokenCount + toolTokens) / baseBudget698: 0;699700const idleOrFailed = backgroundSummarizer.state === BackgroundSummarizationState.Idle701|| backgroundSummarizer.state === BackgroundSummarizationState.Failed;702703const cacheWarm = (promptContext.toolCallRounds?.length ?? 0) > 0;704705const kickOff = shouldKickOffBackgroundSummarization(postRenderRatio, useInlineSummarization, cacheWarm, this._thresholdRng);706707if (kickOff && idleOrFailed) {708if (useInlineSummarization) {709// Compute and cache model capabilities from the current render's710// messages. These must match the main agent fetch for cache parity.711const strippedMessages = ToolCallingLoop.stripInternalToolCallIds(result.messages);712const rawEffort = this.request.modelConfiguration?.reasoningEffort;713const isSubagent = !!this.request.subAgentInvocationId;714// Must match the main agent's enableThinking logic in715// toolCallingLoop.ts runOne() — thinking is only disabled716// on continuation turns for Anthropic when no thinking717// blocks exist yet in the messages.718const shouldDisableThinking = !!promptContext.isContinuation && isAnthropicFamily(this.endpoint) && !ToolCallingLoop.messagesContainThinking(strippedMessages);719this._lastModelCapabilities = {720enableThinking: !shouldDisableThinking,721reasoningEffort: typeof rawEffort === 'string' ? rawEffort : undefined,722enableToolSearch: !isSubagent && !!this.endpoint.supportsToolSearch,723enableContextEditing: !isSubagent && isAnthropicContextEditingEnabled(this.endpoint, this.configurationService, this.expService),724};725}726this._startBackgroundSummarization(backgroundSummarizer, result.messages, promptContext, props, token, postRenderRatio, useInlineSummarization);727}728}729730const lastMessage = result.messages.at(-1);731if (lastMessage?.role === Raw.ChatRole.User) {732const currentTurn = promptContext.conversation?.getLatestTurn();733if (currentTurn && !currentTurn.getMetadata(RenderedUserMessageMetadata)) {734currentTurn.setMetadata(new RenderedUserMessageMetadata(lastMessage.content));735}736}737738if (!useLastTwoMessagesCacheBPs) {739addCacheBreakpoints(result.messages);740}741742if (this.request.command === 'error') {743// Should trigger a 400744result.messages.push({745role: Raw.ChatRole.Assistant,746content: [],747toolCalls: [{ type: 'function', id: '', function: { name: 'tool', arguments: '{' } }]748});749}750751752return {753...result,754// The codebase tool is not actually called/referenced in the edit prompt, so we ned to755// merge its metadata so that its output is not lost and it's not called repeatedly every turn756// todo@connor4312/joycerhl: this seems a bit janky757metadata: codebase ? mergeMetadata(result.metadata, codebase.metadatas) : result.metadata,758// Don't report file references that came in via chat variables in an editing session, unless they have warnings,759// because they are already displayed as part of the working set760// references: result.references.filter((ref) => this.shouldKeepReference(editCodeStep, ref, toolReferences, chatVariables)),761};762}763764modifyErrorDetails(errorDetails: vscode.ChatErrorDetails, response: ChatResponse): vscode.ChatErrorDetails {765if (!errorDetails.responseIsFiltered) {766errorDetails.confirmationButtons = [767...(errorDetails.confirmationButtons ?? []),768{ data: { copilotContinueOnError: true } satisfies IContinueOnErrorConfirmation, label: l10n.t('Try Again') },769];770}771return errorDetails;772}773774getAdditionalVariables(promptContext: IBuildPromptContext): ChatVariablesCollection | undefined {775const lastTurn = promptContext.conversation?.turns.at(-1);776if (!lastTurn) {777return;778}779780// Search backwards to find the first real request and return those variables too.781// Variables aren't re-attached to requests from confirmations.782// TODO https://github.com/microsoft/vscode/issues/262858, more to do here783if (lastTurn.acceptedConfirmationData) {784const turns = promptContext.conversation!.turns.slice(0, -1);785for (const turn of Iterable.reverse(turns)) {786if (!turn.acceptedConfirmationData) {787return turn.promptVariables;788}789}790}791}792793private _startBackgroundSummarization(794backgroundSummarizer: BackgroundSummarizer,795mainRenderMessages: Raw.ChatMessage[],796promptContext: IBuildPromptContext,797props: AgentPromptProps,798token: vscode.CancellationToken,799contextRatio: number,800useInlineSummarization: boolean,801): void {802this.logService.debug(`[ConversationHistorySummarizer] context at ${(contextRatio * 100).toFixed(0)}% — starting background compaction (inline=${useInlineSummarization})`);803804const bgStartTime = Date.now();805806// Snapshot rounds so telemetry reflects state at kick-off time, not at807// completion time (the main loop mutates toolCallRounds). History is808// stable across a single user turn so a reference is sufficient.809const rounds = [...(promptContext.toolCallRounds ?? [])];810const history = promptContext.history;811let toolCallRoundId: string | undefined;812if (rounds.length >= 2) {813// Mark the round before the last, preserving the last round verbatim814toolCallRoundId = rounds[rounds.length - 2].id;815} else if (rounds.length === 1) {816toolCallRoundId = rounds[0].id;817} else {818for (let i = history.length - 1; i >= 0 && !toolCallRoundId; i--) {819const lastRound = history[i].rounds.at(-1);820if (lastRound) {821toolCallRoundId = lastRound.id;822}823}824}825826// Build tool schemas matching the main agent loop so the prompt827// prefix (system + tools + messages) is identical for cache hits.828const availableTools = promptContext.tools?.availableTools;829const normalizedTools = availableTools?.length ? normalizeToolSchema(830this.endpoint.family,831availableTools.map(tool => ({832function: {833name: tool.name,834description: tool.description,835parameters: tool.inputSchema && Object.keys(tool.inputSchema).length ? tool.inputSchema : undefined836},837type: 'function' as const,838})),839(tool, rule) => {840this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`);841},842) : undefined;843const toolOpts = normalizedTools?.length ? {844tools: normalizedTools,845} : undefined;846847const associatedRequestId = promptContext.conversation?.getLatestTurn()?.id;848const conversationId = promptContext.conversation?.sessionId;849const modelCapabilities = this._lastModelCapabilities;850851backgroundSummarizer.start(async bgToken => {852try {853if (useInlineSummarization) {854// Inline mode: fork the exact messages from the main render855// and append a summary user message. The prompt prefix is856// byte-identical to the main agent loop for cache hits.857const strippedMainMessages = ToolCallingLoop.stripInternalToolCallIds(mainRenderMessages);858const summaryMsgResult = await renderPromptElement(859this.instantiationService,860this.endpoint,861InlineSummarizationUserMessage,862{ endpoint: this.endpoint },863undefined,864bgToken,865);866const messages = [867...strippedMainMessages,868...summaryMsgResult.messages,869];870871const response = await this.endpoint.makeChatRequest2({872debugName: 'summarizeConversationHistory-inline',873messages,874finishedCb: undefined,875location: ChatLocation.Agent,876conversationId,877requestOptions: {878temperature: 0,879stream: false,880...toolOpts,881},882modelCapabilities,883telemetryProperties: associatedRequestId ? { associatedRequestId } : undefined,884enableRetryOnFilter: true,885}, bgToken);886if (response.type !== ChatFetchResponseType.Success) {887throw new Error(`Background inline summarization request failed: ${response.type}`);888}889const rawSummaryText = extractInlineSummary(response.value);890if (!rawSummaryText) {891throw new Error('Background inline summarization: no <summary> tags found in response');892}893if (!toolCallRoundId) {894throw new Error('Background inline summarization: no round ID to apply summary to');895}896// Flush the transcript before snapshotting the line count so897// the baked "N lines" hint matches the on-disk file at this898// moment (mirrors the full/simple path in SummarizedConversationHistory.render).899if (conversationId && this.sessionTranscriptService.getTranscriptPath(conversationId)) {900await this.sessionTranscriptService.flush(conversationId);901}902const summaryText = conversationId903? appendTranscriptHintToSummary(rawSummaryText, conversationId, this.sessionTranscriptService)904: rawSummaryText;905this.logService.debug(`[ConversationHistorySummarizer] background inline compaction completed (${summaryText.length} chars, roundId=${toolCallRoundId})`);906907// Send summarizedConversationHistory telemetry for parity908// with the standard ConversationHistorySummarizer path.909const { numRounds, numRoundsSinceLastSummarization } = computeSummarizationRoundCounts(history, rounds);910const numRoundsInCurrentTurn = rounds.length;911const lastUsedTool = rounds.at(-1)?.toolCalls?.at(-1)?.name912?? history.at(-1)?.rounds.at(-1)?.toolCalls?.at(-1)?.name ?? 'none';913const promptTypes = messages.map(msg => `${msg.role}${'name' in msg && msg.name ? `-${msg.name}` : ''}:${getTextPart(msg.content).length}`).join(',');914/* __GDPR__915"summarizedConversationHistory" : {916"owner": "bhavyau",917"comment": "Tracks background inline summarization outcome",918"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },919"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },920"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },921"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },922"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },923"lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The last tool used before summarization." },924"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },925"promptTypes": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Role and character count of each prompt message in order, as a proxy for cache hit rate (e.g. system:1234,user:567)." },926"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." },927"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },928"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." },929"isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this was triggered during tool calling." },930"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." },931"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." },932"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." },933"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." }934}935*/936this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {937outcome: 'success',938model: this.endpoint.model,939summarizationMode: 'inline',940conversationId,941chatRequestId: associatedRequestId,942lastUsedTool,943requestId: response.requestId,944promptTypes,945}, {946numRounds,947turnIndex: history.length,948curTurnRoundIndex: numRoundsInCurrentTurn,949isDuringToolCalling: numRoundsInCurrentTurn > 0 ? 1 : 0,950duration: Date.now() - bgStartTime,951promptTokenCount: response.usage?.prompt_tokens,952promptCacheTokenCount: response.usage?.prompt_tokens_details?.cached_tokens,953responseTokenCount: response.usage?.completion_tokens,954});955956return {957summary: summaryText,958toolCallRoundId,959promptTokens: response.usage?.prompt_tokens,960promptCacheTokens: response.usage?.prompt_tokens_details?.cached_tokens,961outputTokens: response.usage?.completion_tokens,962durationMs: Date.now() - bgStartTime,963model: this.endpoint.model,964summarizationMode: 'inline',965numRounds,966numRoundsSinceLastSummarization,967};968} else {969// Standard mode: use triggerSummarize which makes a separate970// LLM call with a summarization-specific prompt during render.971const snapshotProps: AgentPromptProps = {972...props,973promptContext: {974...promptContext,975toolCallRounds: promptContext.toolCallRounds ? [...promptContext.toolCallRounds] : undefined,976toolCallResults: promptContext.toolCallResults ? { ...promptContext.toolCallResults } : undefined,977}978};979const bgRenderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {980...snapshotProps,981endpoint: this.endpoint,982promptContext: snapshotProps.promptContext,983triggerSummarize: true,984});985const bgProgress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart> = { report: () => { } };986const bgRenderResult = await bgRenderer.render(bgProgress, bgToken);987const summaryMetadata = bgRenderResult.metadata.get(SummarizedConversationHistoryMetadata);988if (!summaryMetadata) {989throw new Error('Background compaction produced no summary metadata');990}991this.logService.debug(`[ConversationHistorySummarizer] background compaction completed successfully (roundId=${summaryMetadata.toolCallRoundId})`);992return {993summary: summaryMetadata.text,994toolCallRoundId: summaryMetadata.toolCallRoundId,995promptTokens: summaryMetadata.usage?.prompt_tokens,996promptCacheTokens: summaryMetadata.usage?.prompt_tokens_details?.cached_tokens,997outputTokens: summaryMetadata.usage?.completion_tokens,998durationMs: Date.now() - bgStartTime,999model: summaryMetadata.model,1000summarizationMode: summaryMetadata.summarizationMode,1001numRounds: summaryMetadata.numRounds,1002numRoundsSinceLastSummarization: summaryMetadata.numRoundsSinceLastSummarization,1003};1004}1005} catch (err) {1006this.logService.error(err, `[ConversationHistorySummarizer] background compaction failed`);10071008// Send failure telemetry for inline background summarization1009if (useInlineSummarization) {1010/* __GDPR__1011"summarizedConversationHistory" : {1012"owner": "bhavyau",1013"comment": "Tracks background inline summarization failure",1014"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },1015"detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Detailed failure reason." },1016"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },1017"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },1018"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },1019"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },1020"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." }1021}1022*/1023this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {1024outcome: 'failed',1025detailedOutcome: err instanceof Error ? err.message : String(err),1026model: this.endpoint.model,1027summarizationMode: 'inline',1028conversationId,1029chatRequestId: associatedRequestId,1030}, {1031duration: Date.now() - bgStartTime,1032});1033}10341035throw err;1036}1037}, token);1038}10391040/**1041* Returns the `BackgroundSummarizer` for this session, or `undefined` if1042* the intent is not an `AgentIntent` (e.g. `AskAgentIntent`).1043*/1044private _getOrCreateBackgroundSummarizer(sessionId: string | undefined): BackgroundSummarizer | undefined {1045if (!sessionId || !(this.intent instanceof AgentIntent)) {1046return undefined;1047}1048return this.intent.getOrCreateBackgroundSummarizer(sessionId, this.endpoint.modelMaxPromptTokens);1049}10501051/**1052* Apply a background-compaction result onto the in-memory rounds so1053* that the next render picks up the `<conversation-summary>` element.1054*/1055private _applySummaryToRounds(bgResult: { summary: string; toolCallRoundId: string }, promptContext: IBuildPromptContext): void {1056// Check current-turn rounds first1057const currentRound = promptContext.toolCallRounds?.find(r => r.id === bgResult.toolCallRoundId);1058if (currentRound) {1059currentRound.summary = bgResult.summary;1060} else {1061// Fall back to history turns1062let found = false;1063for (const turn of [...promptContext.history].reverse()) {1064const round = turn.rounds.find(r => r.id === bgResult.toolCallRoundId);1065if (round) {1066round.summary = bgResult.summary;1067found = true;1068break;1069}1070}1071if (!found) {1072this.logService.warn(`[ConversationHistorySummarizer] background compaction round ${bgResult.toolCallRoundId} not found in toolCallRounds or history — summary dropped`);1073}1074}1075// Invalidate the auto mode router cache so the next getChatEndpoint()1076// call re-evaluates which model to use after compaction.1077this.automodeService.invalidateRouterCache(this.request);1078}10791080/**1081* Persist the summary on the current turn's `resultMetadata` so that1082* `normalizeSummariesOnRounds` restores it on subsequent turns.1083*/1084private _persistSummaryOnTurn(bgResult: IBackgroundSummarizationResult, promptContext: IBuildPromptContext, contextLengthBefore?: number): void {1085const turn = promptContext.conversation?.getLatestTurn();1086const chatResult = turn?.responseChatResult;1087if (chatResult) {1088const metadata = (chatResult.metadata ?? {}) as Record<string, unknown>;1089const existingSummaries = (metadata['summaries'] as unknown[] ?? []);1090existingSummaries.push({ toolCallRoundId: bgResult.toolCallRoundId, text: bgResult.summary });1091metadata['summaries'] = existingSummaries;1092(chatResult as { metadata: unknown }).metadata = metadata;1093}1094// Also store as a pending summary on the turn so normalizeSummariesOnRounds1095// can restore it even when chatResult doesn't exist yet (mid-tool-call-loop).1096turn?.addPendingSummary(bgResult.toolCallRoundId, bgResult.summary);1097const usage = bgResult.promptTokens !== undefined && bgResult.outputTokens !== undefined1098? { prompt_tokens: bgResult.promptTokens, completion_tokens: bgResult.outputTokens, total_tokens: bgResult.promptTokens + bgResult.outputTokens, ...(bgResult.promptCacheTokens !== undefined ? { prompt_tokens_details: { cached_tokens: bgResult.promptCacheTokens } } : {}) }1099: undefined;1100turn?.setMetadata(new SummarizedConversationHistoryMetadata(1101bgResult.toolCallRoundId,1102bgResult.summary,1103{1104usage,1105model: bgResult.model,1106summarizationMode: bgResult.summarizationMode,1107numRounds: bgResult.numRounds,1108numRoundsSinceLastSummarization: bgResult.numRoundsSinceLastSummarization,1109durationMs: bgResult.durationMs,1110source: 'background',1111outcome: 'success',1112contextLengthBefore,1113},1114));1115}11161117/**1118* Record a background compaction failure on the current turn's metadata,1119* matching how foreground compaction records its failures.1120*/1121private _recordBackgroundCompactionFailure(promptContext: IBuildPromptContext, trigger: string): void {1122const turn = promptContext.conversation?.getLatestTurn();1123turn?.setMetadata(new SummarizedConversationHistoryMetadata(1124'', // no toolCallRoundId for failures1125'', // no summary text for failures1126{1127model: this.endpoint.model,1128source: 'background',1129outcome: `noResult_${trigger}`,1130contextLengthBefore: this._lastRenderTokenCount,1131},1132));1133}11341135private _sendBackgroundCompactionTelemetry(1136trigger: string,1137outcome: string,1138contextRatio: number,1139promptContext: IBuildPromptContext,1140): void {1141/* __GDPR__1142"backgroundSummarizationApplied" : {1143"owner": "bhavyau",1144"comment": "Tracks background compaction orchestration decisions and outcomes in the agent loop.",1145"trigger": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The code path that triggered background compaction consumption." },1146"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Outcome of the background compaction consumption. One of: 'applied' (result applied and re-render succeeded), 'appliedButReRenderFailed' (result applied but the subsequent re-render still exceeded budget and required a fallback), 'noResult' (no usable result was produced)." },1147"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Id for the current chat conversation." },1148"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID that this background compaction was consumed during." },1149"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID used." },1150"contextRatio": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The context window usage ratio when background compaction was consumed." }1151}1152*/1153this.telemetryService.sendMSFTTelemetryEvent('backgroundSummarizationApplied', {1154trigger,1155outcome,1156conversationId: promptContext.conversation?.sessionId,1157chatRequestId: promptContext.conversation?.getLatestTurn()?.id,1158model: this.endpoint.model,1159}, {1160contextRatio,1161});1162GenAiMetrics.incrementAgentSummarizationCount(this.otelService, outcome);1163}11641165override processResponse = undefined;1166}116711681169