CoCalc -- summarizedConversationHistory.tsx

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/prompts/node/agent/summarizedConversationHistory.tsx
¹³⁴⁰⁵ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import * as l10n from '@vscode/l10n';
7
import { BasePromptElementProps, PrioritizedList, PromptElement, PromptMetadata, PromptSizing, Raw, SystemMessage, UserMessage } from '@vscode/prompt-tsx';
8
import { BudgetExceededError } from '@vscode/prompt-tsx/dist/base/materialized';
9
import { ChatMessage } from '@vscode/prompt-tsx/dist/base/output/rawTypes';
10
import type { ChatResponsePart, ChatResultPromptTokenDetail, LanguageModelToolInformation, NotebookDocument, Progress } from 'vscode';
11
import { IChatHookService, PreCompactHookInput } from '../../../../platform/chat/common/chatHookService';
12
import { ChatFetchResponseType, ChatLocation, ChatResponse, FetchSuccess } from '../../../../platform/chat/common/commonTypes';
13
import { getTextPart } from '../../../../platform/chat/common/globalStringUtils';
14
import { IHistoricalTurn, ISessionTranscriptService } from '../../../../platform/chat/common/sessionTranscriptService';
15
import { ConfigKey, IConfigurationService } from '../../../../platform/configuration/common/configurationService';
16
import { isAnthropicFamily, isGeminiFamily } from '../../../../platform/endpoint/common/chatModelCapabilities';
17
import { ILogService } from '../../../../platform/log/common/logService';
18
import { CUSTOM_TOOL_SEARCH_NAME } from '../../../../platform/networking/common/anthropic';
19
import { IChatEndpoint } from '../../../../platform/networking/common/networking';
20
import { APIUsage } from '../../../../platform/networking/common/openai';
21
import { IPromptPathRepresentationService } from '../../../../platform/prompts/common/promptPathRepresentationService';
22
import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry';
23
import { ThinkingData } from '../../../../platform/thinking/common/thinking';
24
import { computePromptTokenDetails } from '../../../../platform/tokenizer/node/promptTokenDetails';
25
import { IWorkspaceService } from '../../../../platform/workspace/common/workspaceService';
26
import { CancellationToken } from '../../../../util/vs/base/common/cancellation';
27
import { CancellationError, isCancellationError } from '../../../../util/vs/base/common/errors';
28
import { Iterable } from '../../../../util/vs/base/common/iterator';
29
import { StopWatch } from '../../../../util/vs/base/common/stopwatch';
30
import { generateUuid } from '../../../../util/vs/base/common/uuid';
31
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
32
import { ChatResponseProgressPart2 } from '../../../../vscodeTypes';
33
import { ToolCallingLoop } from '../../../intents/node/toolCallingLoop';
34
import { IResultMetadata } from '../../../prompt/common/conversation';
35
import { IBuildPromptContext, IToolCallRound } from '../../../prompt/common/intents';
36
import { ToolName } from '../../../tools/common/toolNames';
37
import { normalizeToolSchema } from '../../../tools/common/toolSchemaNormalizer';
38
import { NotebookSummary } from '../../../tools/node/notebookSummaryTool';
39
import { renderPromptElement } from '../base/promptRenderer';
40
import { Tag } from '../base/tag';
41
import { ChatToolCalls } from '../panel/toolCalling';
42
import { AgentUserMessage, AgentUserMessageCustomizations, getUserMessagePropsFromAgentProps, getUserMessagePropsFromTurn } from './agentPrompt';
43
import { DefaultOpenAIKeepGoingReminder } from './openai/defaultOpenAIPrompt';
44
import { SimpleSummarizedHistory } from './simpleSummarizedHistoryPrompt';
45

46
export interface ConversationHistorySummarizationPromptProps extends SummarizedAgentHistoryProps {
47
	readonly simpleMode?: boolean;
48
}
49

50
const SummaryPrompt = <>
51
	Your task is to create a comprehensive, detailed summary of the entire conversation that captures all essential information needed to seamlessly continue the work without any loss of context. This summary will be used to compact the conversation while preserving critical technical details, decisions, and progress.<br />
52

53
	## Recent Context Analysis<br />
54

55
	Pay special attention to the most recent agent commands and tool executions that led to this summarization being triggered. Include:<br />
56
	- **Last Agent Commands**: What specific actions/tools were just executed<br />
57
	- **Tool Results**: Key outcomes from recent tool calls (truncate if very long, but preserve essential information)<br />
58
	- **Immediate State**: What was the system doing right before summarization<br />
59
	- **Triggering Context**: What caused the token budget to be exceeded<br />
60

61
	## Analysis Process<br />
62

63
	Before providing your final summary, wrap your analysis in `&lt;analysis&gt;` tags to organize your thoughts systematically:<br />
64

65
	1. **Chronological Review**: Go through the conversation chronologically, identifying key phases and transitions<br />
66
	2. **Intent Mapping**: Extract all explicit and implicit user requests, goals, and expectations<br />
67
	3. **Technical Inventory**: Catalog all technical concepts, tools, frameworks, and architectural decisions<br />
68
	4. **Code Archaeology**: Document all files, functions, and code patterns that were discussed or modified<br />
69
	5. **Progress Assessment**: Evaluate what has been completed vs. what remains pending<br />
70
	6. **Context Validation**: Ensure all critical information for continuation is captured<br />
71
	7. **Recent Commands Analysis**: Document the specific agent commands and tool results from the most recent operations<br />
72

73
	## Summary Structure<br />
74

75
	Your summary must include these sections in order, following the exact format below:<br />
76

77
	<Tag name='analysis'>
78
		[Chronological Review: Walk through conversation phases: initial request → exploration → implementation → debugging → current state]<br />
79
		[Intent Mapping: List each explicit user request with message context]<br />
80
		[Technical Inventory: Catalog all technologies, patterns, and decisions mentioned]<br />
81
		[Code Archaeology: Document every file, function, and code change discussed]<br />
82
		[Progress Assessment: What's done vs. pending with specific status]<br />
83
		[Context Validation: Verify all continuation context is captured]<br />
84
		[Recent Commands Analysis: Last agent commands executed, tool results (truncated if long), immediate pre-summarization state]<br />
85
	</Tag><br />
86

87
	<Tag name='summary'>
88
		1. Conversation Overview:<br />
89
		- Primary Objectives: [All explicit user requests and overarching goals with exact quotes]<br />
90
		- Session Context: [High-level narrative of conversation flow and key phases]<br />
91
		- User Intent Evolution: [How user's needs or direction changed throughout conversation]<br />
92

93
		2. Technical Foundation:<br />
94
		- [Core Technology 1]: [Version/details and purpose]<br />
95
		- [Framework/Library 2]: [Configuration and usage context]<br />
96
		- [Architectural Pattern 3]: [Implementation approach and reasoning]<br />
97
		- [Environment Detail 4]: [Setup specifics and constraints]<br />
98

99
		3. Codebase Status:<br />
100
		- [File Name 1]:<br />
101
		- Purpose: [Why this file is important to the project]<br />
102
		- Current State: [Summary of recent changes or modifications]<br />
103
		- Key Code Segments: [Important functions/classes with brief explanations]<br />
104
		- Dependencies: [How this relates to other components]<br />
105
		- [File Name 2]:<br />
106
		- Purpose: [Role in the project]<br />
107
		- Current State: [Modification status]<br />
108
		- Key Code Segments: [Critical code blocks]<br />
109
		- [Additional files as needed]<br />
110

111
		4. Problem Resolution:<br />
112
		- Issues Encountered: [Technical problems, bugs, or challenges faced]<br />
113
		- Solutions Implemented: [How problems were resolved and reasoning]<br />
114
		- Debugging Context: [Ongoing troubleshooting efforts or known issues]<br />
115
		- Lessons Learned: [Important insights or patterns discovered]<br />
116

117
		5. Progress Tracking:<br />
118
		- Completed Tasks: [What has been successfully implemented with status indicators]<br />
119
		- Partially Complete Work: [Tasks in progress with current completion status]<br />
120
		- Validated Outcomes: [Features or code confirmed working through testing]<br />
121

122
		6. Active Work State:<br />
123
		- Current Focus: [Precisely what was being worked on in most recent messages]<br />
124
		- Recent Context: [Detailed description of last few conversation exchanges]<br />
125
		- Working Code: [Code snippets being modified or discussed recently]<br />
126
		- Immediate Context: [Specific problem or feature being addressed before summary]<br />
127

128
		7. Recent Operations:<br />
129
		- Last Agent Commands: [Specific tools/actions executed just before summarization with exact command names]<br />
130
		- Tool Results Summary: [Key outcomes from recent tool executions - truncate long results but keep essential info]<br />
131
		- Pre-Summary State: [What the agent was actively doing when token budget was exceeded]<br />
132
		- Operation Context: [Why these specific commands were executed and their relationship to user goals]<br />
133

134
		8. Continuation Plan:<br />
135
		- [Pending Task 1]: [Details and specific next steps with verbatim quotes]<br />
136
		- [Pending Task 2]: [Requirements and continuation context]<br />
137
		- [Priority Information]: [Which tasks are most urgent or logically sequential]<br />
138
		- [Next Action]: [Immediate next step with direct quotes from recent messages]<br />
139
	</Tag><br />
140

141
	## Quality Guidelines<br />
142

143
	- **Precision**: Include exact filenames, function names, variable names, and technical terms<br />
144
	- **Completeness**: Capture all context needed to continue without re-reading the full conversation<br />
145
	- **Clarity**: Write for someone who needs to pick up exactly where the conversation left off<br />
146
	- **Verbatim Accuracy**: Use direct quotes for task specifications and recent work context<br />
147
	- **Technical Depth**: Include enough detail for complex technical decisions and code patterns<br />
148
	- **Logical Flow**: Present information in a way that builds understanding progressively<br />
149

150
	This summary should serve as a comprehensive handoff document that enables seamless continuation of all active work streams while preserving the full technical and contextual richness of the original conversation.<br />
151
</>;
152

153
/**
154
 * Prompt used to summarize conversation history when the context window is exceeded.
155
 */
156
export class ConversationHistorySummarizationPrompt extends PromptElement<ConversationHistorySummarizationPromptProps> {
157
	override async render(state: void, sizing: PromptSizing) {
158
		const history = this.props.simpleMode ?
159
			<SimpleSummarizedHistory priority={1} promptContext={this.props.promptContext} location={this.props.location} endpoint={this.props.endpoint} maxToolResultLength={this.props.maxToolResultLength} /> :
160
			<ConversationHistory priority={1} promptContext={this.props.promptContext} location={this.props.location} endpoint={this.props.endpoint} maxToolResultLength={this.props.maxToolResultLength} enableCacheBreakpoints={this.props.enableCacheBreakpoints} />;
161
		const isOpus = this.props.endpoint.model.startsWith('claude-opus');
162
		return (
163
			<>
164
				<SystemMessage priority={this.props.priority}>
165
					{SummaryPrompt}
166
					{this.props.summarizationInstructions && <>
167
						<br /><br />
168
						## Additional instructions from the user:<br />
169
						{this.props.summarizationInstructions}
170
					</>}
171
				</SystemMessage>
172
				{history}
173
				{this.props.workingNotebook && <WorkingNotebookSummary priority={this.props.priority - 2} notebook={this.props.workingNotebook} />}
174
				<UserMessage priority={this.props.priority}>
175
					Summarize the conversation history so far, paying special attention to the most recent agent commands and tool results that triggered this summarization. Structure your summary using the enhanced format provided in the system message.<br />
176
					{isOpus && <>
177
						<br />
178
						IMPORTANT: Do NOT call any tools. Your only task is to generate a text summary of the conversation. Do not attempt to execute any actions or make any tool calls.<br />
179
					</>}
180
					Focus particularly on:<br />
181
					- The specific agent commands/tools that were just executed<br />
182
					- The results returned from these recent tool calls (truncate if very long but preserve key information)<br />
183
					- What the agent was actively working on when the token budget was exceeded<br />
184
					- How these recent operations connect to the overall user goals<br />
185

186
					Include all important tool calls and their results as part of the appropriate sections, with special emphasis on the most recent operations.
187
				</UserMessage>
188
			</>
189
		);
190
	}
191
}
192

193
class WorkingNotebookSummary extends PromptElement<NotebookSummaryProps> {
194
	override async render(state: void, sizing: PromptSizing) {
195
		return (
196
			<UserMessage>
197
				This is the current state of the notebook that you have been working on:<br />
198
				<NotebookSummary notebook={this.props.notebook} includeCellLines={false} altDoc={undefined} />
199
			</UserMessage>
200
		);
201
	}
202
}
203

204
export interface NotebookSummaryProps extends BasePromptElementProps {
205
	readonly notebook: NotebookDocument;
206
}
207

208
/**
209
 * Conversation history rendered with tool calls and summaries.
210
 */
211
class ConversationHistory extends PromptElement<SummarizedAgentHistoryProps> {
212
	override async render(state: void, sizing: PromptSizing) {
213
		// Iterate over the turns in reverse order until we find a turn with a tool call round that was summarized
214
		const history: PromptElement[] = [];
215

216
		// If we have a stop hook query, add it as a new user message at the very end of the conversation.
217
		// Push it first so that after history.reverse() it will be last.
218
		if (this.props.promptContext.hasStopHookQuery) {
219
			history.push(<UserMessage priority={901}>{this.props.promptContext.query}</UserMessage>);
220
		}
221

222
		// Handle the possibility that we summarized partway through the current turn (e.g. if we accumulated many tool call rounds)
223
		let summaryForCurrentTurn: string | undefined = undefined;
224
		let thinkingForFirstRoundAfterSummarization: ThinkingData | undefined = undefined;
225
		if (this.props.promptContext.toolCallRounds?.length) {
226
			const toolCallRounds: IToolCallRound[] = [];
227
			for (let i = this.props.promptContext.toolCallRounds.length - 1; i >= 0; i--) {
228
				const toolCallRound = this.props.promptContext.toolCallRounds[i];
229
				if (toolCallRound.summary) {
230
					// This tool call round was summarized
231
					summaryForCurrentTurn = toolCallRound.summary;
232
					thinkingForFirstRoundAfterSummarization = toolCallRound.thinking;
233
					break;
234
				}
235
				toolCallRounds.push(toolCallRound);
236
			}
237

238
			// Reverse the tool call rounds so they are in chronological order
239
			toolCallRounds.reverse();
240

241
			// For Anthropic models with thinking enabled, set the thinking on the first round
242
			// so it gets rendered as the first thinking block after summarization
243
			if (isAnthropicFamily(this.props.endpoint) && thinkingForFirstRoundAfterSummarization && toolCallRounds.length > 0 && !toolCallRounds[0].thinking) {
244
				toolCallRounds[0].thinking = thinkingForFirstRoundAfterSummarization;
245
			}
246

247
			history.push(<ChatToolCalls priority={899} flexGrow={2} promptContext={this.props.promptContext} toolCallRounds={toolCallRounds} toolCallResults={this.props.promptContext.toolCallResults} enableCacheBreakpoints={this.props.enableCacheBreakpoints} truncateAt={this.props.maxToolResultLength} />);
248
		}
249

250
		if (summaryForCurrentTurn) {
251
			history.push(<SummaryMessageElement endpoint={this.props.endpoint} summaryText={summaryForCurrentTurn} />);
252

253
			return (<PrioritizedList priority={this.props.priority} descending={false} passPriority={true}>
254
				{history.reverse()}
255
			</PrioritizedList>);
256
		}
257

258
		// Render the original user message:
259
		// - Always render for non-continuation (normal first iteration)
260
		// - Also render for stop hook continuation (the original message is needed, frozen content will provide it)
261
		if (!this.props.promptContext.isContinuation || this.props.promptContext.hasStopHookQuery) {
262
			history.push(<AgentUserMessage flexGrow={2} priority={900} {...getUserMessagePropsFromAgentProps(this.props, {
263
				userQueryTagName: this.props.userQueryTagName,
264
				ReminderInstructionsClass: this.props.ReminderInstructionsClass,
265
				ToolReferencesHintClass: this.props.ToolReferencesHintClass,
266
			})} />);
267
		}
268

269
		// We may have a summary from earlier in the conversation, but skip history if we have a new summary
270
		for (const [i, turn] of [...this.props.promptContext.history.entries()].reverse()) {
271
			const metadata = turn.resultMetadata;
272

273
			// Build this list in chronological order
274
			const turnComponents: PromptElement[] = [];
275

276
			// Turn anatomy
277
			// ______________
278
			// |            |
279
			// |    USER    |
280
			// |            |
281
			// |  ASSISTANT |
282
			// |            |
283
			// |    TOOL    | <-- { summary: ..., toolCallRoundId: ... }
284
			// |  ASSISTANT |
285
			// |____________|
286

287
			let summaryForTurn: SummarizedConversationHistoryMetadata | undefined;
288
			// If a tool call limit is exceeded, the tool call from this turn will
289
			// have been aborted and any result should be found in the next turn.
290
			const toolCallResultInNextTurn = metadata?.maxToolCallsExceeded;
291
			let toolCallResults = metadata?.toolCallResults;
292
			if (toolCallResultInNextTurn) {
293
				const nextMetadata = this.props.promptContext.history.at(i + 1)?.responseChatResult?.metadata as IResultMetadata | undefined;
294
				const mergeFrom = i === this.props.promptContext.history.length - 1 ? this.props.promptContext.toolCallResults : nextMetadata?.toolCallResults;
295
				toolCallResults = { ...toolCallResults, ...mergeFrom };
296
			}
297

298
			// Find the latest tool call round that was summarized
299
			const toolCallRounds: IToolCallRound[] = [];
300
			for (let i = turn.rounds.length - 1; i >= 0; i--) {
301
				const round = turn.rounds[i];
302
				summaryForTurn = round.summary ? new SummarizedConversationHistoryMetadata(round.id, round.summary) : undefined;
303
				if (summaryForTurn) {
304
					break;
305
				}
306
				toolCallRounds.push(round);
307
			}
308

309
			if (summaryForTurn) {
310
				// We have a summary for a tool call round that was part of this turn
311
				turnComponents.push(<SummaryMessageElement endpoint={this.props.endpoint} summaryText={summaryForTurn.text} />);
312
			} else if (!turn.isContinuation) {
313
				turnComponents.push(<AgentUserMessage flexGrow={1} {...getUserMessagePropsFromTurn(turn, this.props.endpoint, {
314
					userQueryTagName: this.props.userQueryTagName,
315
					ReminderInstructionsClass: this.props.ReminderInstructionsClass,
316
					ToolReferencesHintClass: this.props.ToolReferencesHintClass,
317
				})} />);
318
			}
319

320
			// Reverse the tool call rounds so they are in chronological order
321
			toolCallRounds.reverse();
322
			turnComponents.push(<ChatToolCalls
323
				flexGrow={1}
324
				promptContext={this.props.promptContext}
325
				toolCallRounds={toolCallRounds}
326
				toolCallResults={toolCallResults}
327
				isHistorical={!(toolCallResultInNextTurn && i === this.props.promptContext.history.length - 1)}
328
				truncateAt={this.props.maxToolResultLength}
329
			/>);
330

331
			history.push(...turnComponents.reverse());
332
			if (summaryForTurn) {
333
				// All preceding turns are covered by the summary and shouldn't be included verbatim
334
				break;
335
			}
336
		}
337

338
		return (<PrioritizedList priority={this.props.priority} descending={false} passPriority={true}>
339
			{history.reverse()}
340
		</PrioritizedList>);
341
	}
342
}
343

344
export interface ISummarizedConversationHistoryMetadataOptions {
345
	readonly thinking?: ThinkingData;
346
	readonly usage?: APIUsage;
347
	readonly promptTokenDetails?: readonly ChatResultPromptTokenDetail[];
348
	readonly model?: string;
349
	readonly summarizationMode?: string;
350
	readonly numRounds?: number;
351
	readonly numRoundsSinceLastSummarization?: number;
352
	readonly durationMs?: number;
353
	readonly source?: 'foreground' | 'background';
354
	readonly outcome?: string;
355
	readonly contextLengthBefore?: number;
356
}
357

358
export class SummarizedConversationHistoryMetadata extends PromptMetadata {
359
	public readonly toolCallRoundId: string;
360
	public readonly text: string;
361
	public readonly thinking?: ThinkingData;
362
	public readonly usage?: APIUsage;
363
	public readonly promptTokenDetails?: readonly ChatResultPromptTokenDetail[];
364
	public readonly model?: string;
365
	public readonly summarizationMode?: string;
366
	public readonly numRounds?: number;
367
	public readonly numRoundsSinceLastSummarization?: number;
368
	public readonly durationMs?: number;
369
	public readonly source?: 'foreground' | 'background';
370
	public readonly outcome?: string;
371
	public readonly contextLengthBefore?: number;
372

373
	constructor(
374
		toolCallRoundId: string,
375
		text: string,
376
		options?: ISummarizedConversationHistoryMetadataOptions,
377
	) {
378
		super();
379
		this.toolCallRoundId = toolCallRoundId;
380
		this.text = text;
381
		this.thinking = options?.thinking;
382
		this.usage = options?.usage;
383
		this.promptTokenDetails = options?.promptTokenDetails;
384
		this.model = options?.model;
385
		this.summarizationMode = options?.summarizationMode;
386
		this.numRounds = options?.numRounds;
387
		this.numRoundsSinceLastSummarization = options?.numRoundsSinceLastSummarization;
388
		this.durationMs = options?.durationMs;
389
		this.source = options?.source;
390
		this.outcome = options?.outcome;
391
		this.contextLengthBefore = options?.contextLengthBefore;
392
	}
393
}
394

395
export interface SummarizedAgentHistoryProps extends BasePromptElementProps, AgentUserMessageCustomizations {
396
	readonly priority: number;
397
	readonly endpoint: IChatEndpoint;
398
	readonly location: ChatLocation;
399
	readonly promptContext: IBuildPromptContext;
400
	readonly triggerSummarize?: boolean;
401
	readonly tools?: ReadonlyArray<LanguageModelToolInformation> | undefined;
402
	readonly enableCacheBreakpoints?: boolean;
403
	readonly workingNotebook?: NotebookDocument;
404
	readonly maxToolResultLength: number;
405
	/** Optional hard cap on summary tokens; effective budget = min(prompt sizing tokenBudget, this value) */
406
	readonly maxSummaryTokens?: number;
407
	/** Optional custom instructions to include in the summarization prompt */
408
	readonly summarizationInstructions?: string;
409
	/** Skip Full mode and go straight to Simple mode for foreground budget-exceeded recovery. */
410
	readonly forceSimpleSummary?: boolean;
411
}
412

413
/**
414
 * Renders conversation history with tool calls and summaries, triggering summarization while rendering if necessary.
415
 */
416
export class SummarizedConversationHistory extends PromptElement<SummarizedAgentHistoryProps> {
417
	constructor(
418
		props: SummarizedAgentHistoryProps,
419
		@IInstantiationService private readonly instantiationService: IInstantiationService,
420
		@ISessionTranscriptService private readonly sessionTranscriptService: ISessionTranscriptService,
421
	) {
422
		super(props);
423
	}
424

425
	override async render(state: void, sizing: PromptSizing, progress: Progress<ChatResponsePart> | undefined, token: CancellationToken | undefined) {
426
		const promptContext = { ...this.props.promptContext };
427
		let historyMetadata: SummarizedConversationHistoryMetadata | undefined;
428
		const sessionId = this.props.promptContext.conversation?.sessionId;
429
		if (sessionId) {
430
			// Lazily start the transcript session now (before summarization) so it
431
			// captures the full pre-compaction conversation. startSession is
432
			// idempotent — if hooks already started it, this is a no-op.
433
			await this.ensureTranscriptSession();
434

435
			if (this.sessionTranscriptService.getTranscriptPath(sessionId)) {
436
				await this.sessionTranscriptService.flush(sessionId);
437
			}
438
		}
439

440
		if (this.props.triggerSummarize) {
441

442
			const summarizer = this.instantiationService.createInstance(ConversationHistorySummarizer, this.props, sizing, progress, token);
443
			const summResult = await summarizer.summarizeHistory();
444
			if (summResult) {
445
				historyMetadata = new SummarizedConversationHistoryMetadata(summResult.toolCallRoundId, summResult.summary, {
446
					thinking: summResult.thinking,
447
					usage: summResult.usage,
448
					promptTokenDetails: summResult.promptTokenDetails,
449
					model: summResult.model,
450
					summarizationMode: summResult.summarizationMode,
451
					numRounds: summResult.numRounds,
452
					numRoundsSinceLastSummarization: summResult.numRoundsSinceLastSummarization,
453
					durationMs: summResult.durationMs,
454
				});
455
				this.addSummaryToHistory(summResult.summary, summResult.toolCallRoundId, summResult.thinking);
456
			}
457
		}
458

459
		return <>
460
			{historyMetadata && <meta value={historyMetadata} />}
461
			<ConversationHistory
462
				{...this.props}
463
				promptContext={promptContext}
464
				enableCacheBreakpoints={this.props.enableCacheBreakpoints} />
465
		</>;
466
	}
467

468
	/**
469
	 * Lazily starts a transcript session with the full conversation history.
470
	 * This is called just before summarization so that the transcript file
471
	 * contains the complete pre-compaction conversation. If a session was
472
	 * already started (e.g. by hooks), this is a no-op.
473
	 */
474
	private async ensureTranscriptSession(): Promise<void> {
475
		const sessionId = this.props.promptContext.conversation?.sessionId;
476
		if (!sessionId) {
477
			return;
478
		}
479

480
		// Short-circuit if session already exists — avoids rebuilding
481
		// the full IHistoricalTurn[] array on every render.
482
		if (this.sessionTranscriptService.getTranscriptPath(sessionId)) {
483
			return;
484
		}
485

486
		// Build IHistoricalTurn[] from the prompt context's Turn[] history
487
		const history: IHistoricalTurn[] = this.props.promptContext.history.map(turn => ({
488
			userMessage: turn.request.message,
489
			timestamp: turn.startTime,
490
			rounds: turn.rounds.map(round => ({
491
				response: round.response,
492
				toolCalls: round.toolCalls.map(tc => ({
493
					name: tc.name,
494
					arguments: tc.arguments,
495
					id: tc.id,
496
				})),
497
				reasoningText: round.thinking
498
					? (Array.isArray(round.thinking.text) ? round.thinking.text.join('') : round.thinking.text)
499
					: undefined,
500
				timestamp: round.timestamp,
501
			})),
502
		}));
503

504
		await this.sessionTranscriptService.startSession(sessionId, undefined, history.length > 0 ? history : undefined);
505
	}
506

507
	private addSummaryToHistory(summary: string, toolCallRoundId: string, thinking?: ThinkingData): void {
508
		const round = this.props.promptContext.toolCallRounds?.find(round => round.id === toolCallRoundId);
509
		if (round) {
510
			round.summary = summary;
511
			round.thinking = thinking;
512
			return;
513
		}
514

515
		// Adding summaries to rounds in previous turns will only be persisted during the current session.
516
		// For the next turn, need to restore them from metadata (see normalizeSummariesOnRounds).
517
		for (const turn of [...this.props.promptContext.history].reverse()) {
518
			const round = turn.rounds.find(round => round.id === toolCallRoundId);
519
			if (round) {
520
				round.summary = summary;
521
				round.thinking = thinking;
522
				break;
523
			}
524
		}
525
	}
526
}
527

528
enum SummaryMode {
529
	Simple = 'simple',
530
	Full = 'full'
531
}
532

533
interface SummarizationResult {
534
	result: FetchSuccess<string>;
535
	promptTokenDetails?: readonly ChatResultPromptTokenDetail[];
536
	model?: string;
537
	summarizationMode?: string;
538
	numRounds?: number;
539
	numRoundsSinceLastSummarization?: number;
540
	durationMs?: number;
541
}
542

543
class ConversationHistorySummarizer {
544
	private readonly summarizationId = generateUuid();
545

546
	constructor(
547
		private readonly props: SummarizedAgentHistoryProps,
548
		private readonly sizing: PromptSizing,
549
		private readonly progress: Progress<ChatResponsePart> | undefined,
550
		private readonly token: CancellationToken | undefined,
551
		@ITelemetryService private readonly telemetryService: ITelemetryService,
552
		@ILogService private readonly logService: ILogService,
553
		@IInstantiationService private readonly instantiationService: IInstantiationService,
554
		@IConfigurationService private readonly configurationService: IConfigurationService,
555
		@IChatHookService private readonly chatHookService: IChatHookService,
556
		@ISessionTranscriptService private readonly sessionTranscriptService: ISessionTranscriptService,
557
	) { }
558

559
	async summarizeHistory(): Promise<{ summary: string; toolCallRoundId: string; thinking?: ThinkingData; usage?: APIUsage; promptTokenDetails?: readonly ChatResultPromptTokenDetail[]; model?: string; summarizationMode?: string; numRounds?: number; numRoundsSinceLastSummarization?: number; durationMs?: number }> {
560
		// Execute pre-compact hook before summarization to allow hooks to archive transcripts or perform cleanup
561
		await this.executePreCompactHook();
562

563
		// Just a function for test to create props and call this
564
		const propsInfo = this.instantiationService.createInstance(SummarizedConversationHistoryPropsBuilder).getProps(this.props);
565

566
		const summaryPromise = this.getSummaryWithFallback(propsInfo);
567
		this.progress?.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {
568
			try {
569
				await summaryPromise;
570
			} catch { }
571
			return l10n.t('Compacted conversation');
572
		}));
573

574
		const summary = await summaryPromise;
575
		const { numRounds, numRoundsSinceLastSummarization } = computeSummarizationRoundCounts(this.props.promptContext.history, this.props.promptContext.toolCallRounds);
576
		return {
577
			summary: this.appendTranscriptHint(summary.result.value),
578
			toolCallRoundId: propsInfo.summarizedToolCallRoundId,
579
			thinking: propsInfo.summarizedThinking,
580
			usage: summary.result.usage,
581
			promptTokenDetails: summary.promptTokenDetails,
582
			model: summary.model,
583
			summarizationMode: summary.summarizationMode,
584
			numRounds,
585
			numRoundsSinceLastSummarization,
586
			durationMs: summary.durationMs,
587
		};
588
	}
589

590
	private appendTranscriptHint(summary: string): string {
591
		const sessionId = this.props.promptContext.conversation?.sessionId;
592
		if (!sessionId) {
593
			return summary;
594
		}
595
		return appendTranscriptHintToSummary(summary, sessionId, this.sessionTranscriptService);
596
	}
597

598
	private async getSummaryWithFallback(propsInfo: ISummarizedConversationHistoryInfo): Promise<SummarizationResult> {
599
		const forceMode = this.configurationService.getConfig<string | undefined>(ConfigKey.Advanced.AgentHistorySummarizationMode);
600
		if (this.props.forceSimpleSummary && forceMode !== SummaryMode.Full) {
601
			// Foreground budget-exceeded recovery — go straight to Simple.
602
			return await this.getSummary(SummaryMode.Simple, propsInfo);
603
		}
604
		if (forceMode === SummaryMode.Simple) {
605
			return await this.getSummary(SummaryMode.Simple, propsInfo);
606
		} else {
607
			try {
608
				return await this.getSummary(SummaryMode.Full, propsInfo);
609
			} catch (e) {
610
				if (isCancellationError(e)) {
611
					throw e;
612
				}
613

614
				return await this.getSummary(SummaryMode.Simple, propsInfo);
615
			}
616
		}
617
	}
618

619
	private logInfo(message: string, mode: SummaryMode): void {
620
		this.logService.info(`[ConversationHistorySummarizer] [${mode}] ${message}`);
621
	}
622

623
	/**
624
	 * Executes the PreCompact hook before summarization starts.
625
	 * This gives hook scripts a chance to archive the transcript or perform cleanup
626
	 * before the conversation is compacted.
627
	 */
628
	private async executePreCompactHook(): Promise<void> {
629
		const hooks = this.props.promptContext.request?.hooks;
630
		if (!hooks) {
631
			return;
632
		}
633

634
		try {
635
			const results = await this.chatHookService.executeHook('PreCompact', hooks, {
636
				trigger: 'auto',
637
			} satisfies PreCompactHookInput, this.props.promptContext.conversation?.sessionId, this.token ?? CancellationToken.None);
638

639
			for (const result of results) {
640
				if (result.resultKind === 'error') {
641
					const errorMessage = typeof result.output === 'string' ? result.output : 'Unknown error';
642
					this.logService.error(`[ConversationHistorySummarizer] PreCompact hook error: ${errorMessage}`);
643
				}
644
			}
645
		} catch (error) {
646
			this.logService.error('[ConversationHistorySummarizer] Error executing PreCompact hook', error);
647
		}
648
	}
649

650
	private async getSummary(mode: SummaryMode, propsInfo: ISummarizedConversationHistoryInfo): Promise<SummarizationResult> {
651
		const stopwatch = new StopWatch(false);
652

653
		// In Full mode, tools are sent alongside the summarization prompt with
654
		// tool_choice: 'none'. Reserve budget for them so the rendered messages
655
		// plus tools don't exceed the model's context window.
656
		const tools = this.props.tools;
657
		const toolTokens = mode === SummaryMode.Full && tools?.length
658
			? await this.props.endpoint.acquireTokenizer().countToolTokens(tools)
659
			: 0;
660
		const endpoint = toolTokens > 0
661
			? this.props.endpoint.cloneWithTokenOverride(
662
				Math.max(1, Math.floor((this.props.endpoint.modelMaxPromptTokens - toolTokens) * 0.9)))
663
			: this.props.endpoint;
664

665
		let summarizationPrompt: ChatMessage[];
666
		const associatedRequestId = this.props.promptContext.conversation?.getLatestTurn().id;
667
		try {
668
			summarizationPrompt = (await renderPromptElement(this.instantiationService, endpoint, ConversationHistorySummarizationPrompt, { ...propsInfo.props, enableCacheBreakpoints: false, simpleMode: mode === SummaryMode.Simple }, undefined, this.token)).messages;
669
			this.logInfo(`summarization prompt rendered in ${stopwatch.elapsed()}ms.`, mode);
670
		} catch (e) {
671
			const budgetExceeded = e instanceof BudgetExceededError;
672
			const outcome = budgetExceeded ? 'budget_exceeded' : 'renderError';
673
			this.logInfo(`Error rendering summarization prompt in mode: ${mode}. ${e.stack}`, mode);
674
			this.sendSummarizationTelemetry(outcome, '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e));
675
			throw e;
676
		}
677

678
		let summaryResponse: ChatResponse;
679
		let promptTypes: string | undefined;
680
		try {
681
			const normalizedTools = mode === SummaryMode.Full ? normalizeToolSchema(
682
				endpoint.family,
683
				this.props.tools?.map(tool => ({
684
					function:
685
					{
686
						name: tool.name,
687
						description: tool.description,
688
						parameters: tool.inputSchema && Object.keys(tool.inputSchema).length ? tool.inputSchema : undefined
689
					}, type: 'function'
690
				})),
691
				(tool, rule) => {
692
					this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`);
693
				},
694
			) : undefined;
695
			const toolOpts = normalizedTools?.length ? {
696
				tool_choice: 'none' as const,
697
				tools: normalizedTools,
698
			} : undefined;
699

700
			stripCacheBreakpoints(summarizationPrompt);
701
			replaceImageContentWithPlaceholders(summarizationPrompt);
702

703
			let messages = ToolCallingLoop.stripInternalToolCallIds(summarizationPrompt);
704

705
			// Strip custom client-side tool search (tool_search) tool_use/tool_result
706
			// pairs. The summarization call uses ChatLocation.Other but
707
			// createMessagesRequestBody still converts tool_search results to
708
			// tool_reference blocks (customToolSearchEnabled isn't gated by location).
709
			// Without tool search enabled in the request, Anthropic rejects them.
710
			if (isAnthropicFamily(endpoint)) {
711
				messages = stripToolSearchMessages(messages);
712
			}
713

714
			// Gemini strictly requires every function_call to have a matching function_response.
715
			// When prompt-tsx prunes tool result messages due to token budget, orphaned tool_calls
716
			// can remain, causing a 400 INVALID_ARGUMENT error. Strip them for Gemini models.
717
			if (isGeminiFamily(endpoint)) {
718
				const validationResult = ToolCallingLoop.validateToolMessagesCore(messages, { stripOrphanedToolCalls: true });
719
				messages = validationResult.messages;
720
				if (validationResult.strippedToolCallCount > 0) {
721
					this.logInfo(`Stripped ${validationResult.strippedToolCallCount} orphaned tool calls from summarization prompt`, mode);
722
					/* __GDPR__
723
						"summarization.strippedOrphanedToolCalls" : {
724
							"owner": "vijayu",
725
							"comment": "Tracks when orphaned tool calls are stripped from the summarization prompt for Gemini models",
726
							"strippedToolCallCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Number of orphaned tool_calls stripped from the summarization prompt." },
727
							"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
728
							"mode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode (simple or full)." }
729
						}
730
					*/
731
					this.telemetryService.sendMSFTTelemetryEvent('summarization.strippedOrphanedToolCalls', {
732
						model: endpoint.model,
733
						mode,
734
					}, {
735
						strippedToolCallCount: validationResult.strippedToolCallCount,
736
					});
737
				}
738
			}
739

740
			promptTypes = messages.map(msg => `${msg.role}${'name' in msg && msg.name ? `-${msg.name}` : ''}:${getTextPart(msg.content).length}`).join(',');
741
			summaryResponse = await endpoint.makeChatRequest2({
742
				debugName: `summarizeConversationHistory-${mode}`,
743
				messages,
744
				finishedCb: undefined,
745
				location: ChatLocation.Other,
746
				requestOptions: {
747
					temperature: 0,
748
					stream: false,
749
					...toolOpts
750
				},
751
				telemetryProperties: associatedRequestId ? { associatedRequestId } : undefined,
752
				enableRetryOnFilter: true
753
			}, this.token ?? CancellationToken.None);
754
		} catch (e) {
755
			this.logInfo(`Error from summarization request. ${e.message}`, mode);
756
			this.sendSummarizationTelemetry('requestThrow', '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e));
757
			throw e;
758
		}
759

760
		const tokenizer = endpoint.acquireTokenizer();
761
		const promptTokenDetails = await computePromptTokenDetails({
762
			messages: summarizationPrompt,
763
			tokenizer,
764
			tools: this.props.tools ?? undefined,
765
			totalPromptTokens: summaryResponse.type === ChatFetchResponseType.Success ? summaryResponse.usage?.prompt_tokens : undefined,
766
		});
767

768
		const durationMs = stopwatch.elapsed();
769
		return {
770
			result: await this.handleSummarizationResponse(summaryResponse, mode, durationMs, promptTypes),
771
			promptTokenDetails,
772
			model: endpoint.model,
773
			summarizationMode: mode,
774
			durationMs,
775
		};
776
	}
777

778
	private async handleSummarizationResponse(response: ChatResponse, mode: SummaryMode, elapsedTime: number, promptTypes?: string): Promise<FetchSuccess<string>> {
779
		if (response.type !== ChatFetchResponseType.Success) {
780
			const outcome = response.type;
781
			this.sendSummarizationTelemetry(outcome, response.requestId, this.props.endpoint.model, mode, elapsedTime, undefined, response.reason ?? response.type);
782
			this.logInfo(`Summarization request failed. ${response.type} ${response.reason ?? response.type}`, mode);
783
			if (response.type === ChatFetchResponseType.Canceled) {
784
				throw new CancellationError();
785
			}
786

787
			throw new Error('Summarization request failed');
788
		}
789

790
		const summarySize = await this.sizing.countTokens(response.value);
791
		const effectiveBudget =
792
			!!this.props.maxSummaryTokens
793
				? Math.min(this.sizing.tokenBudget, this.props.maxSummaryTokens)
794
				: this.sizing.tokenBudget;
795
		if (summarySize > effectiveBudget) {
796
			this.sendSummarizationTelemetry('too_large', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage, `${summarySize} tokens exceeds budget ${effectiveBudget}`);
797
			this.logInfo(`Summary too large: ${summarySize} tokens (effective budget ${effectiveBudget})`, mode);
798
			throw new Error('Summary too large');
799
		}
800

801
		this.sendSummarizationTelemetry('success', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage, undefined, promptTypes);
802
		this.logInfo(`Summarization usage: prompt=${response.usage?.prompt_tokens ?? '?'}, cached=${response.usage?.prompt_tokens_details?.cached_tokens ?? '?'}, completion=${response.usage?.completion_tokens ?? '?'}`, mode);
803
		return response;
804
	}
805

806
	/**
807
	 * Send telemetry for conversation summarization.
808
	 * @param outcome High-level result of the summarization (for example, 'success', 'too_large', or the ChatFetchResponseType value)
809
	 * @param requestId Unique identifier of the underlying chat request used for summarization
810
	 * @param model Identifier of the language model used to generate the summary
811
	 * @param mode Summarization mode indicating how the conversation was summarized
812
	 * @param elapsedTime Total time in milliseconds taken for the summarization request
813
	 * @param usage Token usage information for the summarization request, if available
814
	 * @param detailedOutcome Optional detailed reason for non-success outcomes (for example, error or cancellation reason)
815
	 * @param promptTypes Optional pre-computed promptTypes string for the summarization request
816
	 */
817
	private sendSummarizationTelemetry(outcome: string, requestId: string, model: string, mode: SummaryMode, elapsedTime: number, usage: APIUsage | undefined, detailedOutcome?: string, promptTypes?: string): void {
818
		const { numRounds, numRoundsSinceLastSummarization } = computeSummarizationRoundCounts(this.props.promptContext.history, this.props.promptContext.toolCallRounds);
819

820
		const turnIndex = this.props.promptContext.history.length;
821
		const curTurnRoundIndex = this.props.promptContext.toolCallRounds?.length ?? 0;
822

823
		const lastUsedTool = this.props.promptContext.toolCallRounds?.at(-1)?.toolCalls?.at(-1)?.name ??
824
			this.props.promptContext.history?.at(-1)?.rounds.at(-1)?.toolCalls?.at(-1)?.name ?? 'none';
825

826
		const isDuringToolCalling = !!this.props.promptContext.toolCallRounds?.length ? 1 : 0;
827
		const conversationId = this.props.promptContext.conversation?.sessionId;
828
		const hasWorkingNotebook = this.props.workingNotebook ? 1 : 0;
829

830
		/* __GDPR__
831
			"summarizedConversationHistory" : {
832
				"owner": "roblourens",
833
				"comment": "Tracks when summarization happens and what the outcome was",
834
				"summarizationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "An ID to join all attempts of this summarization task." },
835
				"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state or failure reason of the summarization." },
836
				"detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "A more detailed error message." },
837
				"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID used for the summarization." },
838
				"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },
839
				"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID that this summarization ran during." },
840
				"promptTypes": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Role and character count of each prompt message in order, as a proxy for cache hit rate (e.g. system:1234,user:567)." },
841
				"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The number of tool call rounds before this summarization was triggered." },
842
				"numRoundsSinceLastSummarization": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The number of tool call rounds since the last summarization." },
843
				"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },
844
				"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn" },
845
				"lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The name of the last tool used before summarization." },
846
				"isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this summarization was triggered during a tool calling loop." },
847
				"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Id for the current chat conversation." },
848
				"hasWorkingNotebook": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether the conversation summary includes a working notebook." },
849
				"mode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The mode of the conversation summary." },
850
				"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The mode of the conversation summary." },
851
				"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The duration of the summarization attempt in ms." },
852
				"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, server side counted", "isMeasurement": true },
853
				"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens hitting cache as reported by server", "isMeasurement": true },
854
				"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of generated tokens", "isMeasurement": true }
855
			}
856
		*/
857
		this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
858
			summarizationId: this.summarizationId,
859
			outcome,
860
			detailedOutcome,
861
			requestId,
862
			chatRequestId: this.props.promptContext.conversation?.getLatestTurn().id,
863
			model,
864
			lastUsedTool,
865
			conversationId,
866
			mode,
867
			summarizationMode: mode, // Try to unstick GDPR
868
			promptTypes,
869
		}, {
870
			numRounds,
871
			numRoundsSinceLastSummarization,
872
			turnIndex,
873
			curTurnRoundIndex,
874
			isDuringToolCalling,
875
			hasWorkingNotebook,
876
			duration: elapsedTime,
877
			promptTokenCount: usage?.prompt_tokens,
878
			promptCacheTokenCount: usage?.prompt_tokens_details?.cached_tokens,
879
			responseTokenCount: usage?.completion_tokens,
880
		});
881
	}
882
}
883

884
function stripCacheBreakpoints(messages: ChatMessage[]): void {
885
	messages.forEach(message => {
886
		message.content = message.content.filter(part => {
887
			return part.type !== Raw.ChatCompletionContentPartKind.CacheBreakpoint;
888
		});
889
	});
890
}
891

892
function replaceImageContentWithPlaceholders(messages: ChatMessage[]): void {
893
	messages.forEach(message => {
894
		message.content = message.content.map(part => {
895
			if (part.type === Raw.ChatCompletionContentPartKind.Image) {
896
				return { type: Raw.ChatCompletionContentPartKind.Text, text: '[Image was attached]' };
897
			}
898
			return part;
899
		});
900
	});
901
}
902

903
/**
904
 * Bake a stable transcript pointer into a freshly-produced summary text.
905
 *
906
 * Shared by both the full/simple summarization path
907
 * ({@link ConversationHistorySummarizer}) and the inline background
908
 * summarization path in `agentIntent.ts`. The hint is appended exactly once,
909
 * at summary creation time, so the resulting string is frozen from then on
910
 * and replayed verbatim — preserving Anthropic prompt cache hits across
911
 * subsequent renders.
912
 *
913
 * Returns the input unchanged when there is no transcript on disk for the
914
 * session.
915
 */
916
export function appendTranscriptHintToSummary(summary: string, sessionId: string, sessionTranscriptService: ISessionTranscriptService): string {
917
	const transcriptUri = sessionTranscriptService.getTranscriptPath(sessionId);
918
	if (!transcriptUri) {
919
		return summary;
920
	}
921
	const transcriptPath = transcriptUri.fsPath;
922
	const lineCount = sessionTranscriptService.getLineCount(sessionId);
923
	let out = summary;
924
	out += `\nIf you need specific details from before compaction (such as exact code snippets, error messages, tool results, or content you previously generated), use the ${ToolName.ReadFile} tool to look up the full uncompacted conversation transcript at: "${transcriptPath}"`;
925
	if (lineCount !== undefined) {
926
		out += `\nAt the time this summary was created, the transcript had ${lineCount} lines.`;
927
	}
928
	out += `\nExample usage: ${ToolName.ReadFile}(filePath: "${transcriptPath}")`;
929
	return out;
930
}
931

932
export function computeSummarizationRoundCounts(
933
	history: IBuildPromptContext['history'],
934
	currentRounds: readonly IToolCallRound[] | undefined,
935
): { numRounds: number; numRoundsSinceLastSummarization: number } {
936
	const numRoundsInHistory = history.reduce((sum, turn) => sum + turn.rounds.length, 0);
937
	const numRoundsInCurrentTurn = currentRounds?.length ?? 0;
938
	const numRounds = numRoundsInHistory + numRoundsInCurrentTurn;
939

940
	const reversedCurrentRounds = [...(currentRounds ?? [])].reverse();
941
	let numRoundsSinceLastSummarization = reversedCurrentRounds.findIndex(round => round.summary);
942
	if (numRoundsSinceLastSummarization === -1) {
943
		let count = numRoundsInCurrentTurn;
944
		outer: for (const turn of Iterable.reverse(Array.from(history))) {
945
			for (const round of Iterable.reverse(Array.from(turn.rounds ?? []))) {
946
				if (round.summary) {
947
					numRoundsSinceLastSummarization = count;
948
					break outer;
949
				}
950
				count++;
951
			}
952
		}
953
	}
954
	return { numRounds, numRoundsSinceLastSummarization };
955
}
956

957
/**
958
 * Strip custom client-side tool search (tool_search) tool_use and tool_result
959
 * messages from the conversation. The summarization call uses ChatLocation.Other
960
 * but createMessagesRequestBody still converts tool_search results to
961
 * tool_reference blocks (customToolSearchEnabled isn't gated by location).
962
 * Without tool search enabled in the request, Anthropic rejects tool_reference
963
 * content blocks with: "Input tag 'tool_reference' found using 'type' does not
964
 * match any of the expected tags".
965
 */
966
export function stripToolSearchMessages(messages: ChatMessage[]): ChatMessage[] {
967
	const toolSearchIds = new Set<string>();
968
	for (const message of messages) {
969
		if (message.role === Raw.ChatRole.Assistant && message.toolCalls) {
970
			for (const tc of message.toolCalls) {
971
				if (tc.function.name === CUSTOM_TOOL_SEARCH_NAME) {
972
					toolSearchIds.add(tc.id);
973
				}
974
			}
975
		}
976
	}
977

978
	if (toolSearchIds.size === 0) {
979
		return messages;
980
	}
981

982
	return messages.map(message => {
983
		if (message.role === Raw.ChatRole.Assistant && message.toolCalls) {
984
			const filteredToolCalls = message.toolCalls.filter(tc => !toolSearchIds.has(tc.id));
985
			if (filteredToolCalls.length !== message.toolCalls.length) {
986
				return { ...message, toolCalls: filteredToolCalls.length > 0 ? filteredToolCalls : undefined };
987
			}
988
		} else if (message.role === Raw.ChatRole.Tool && message.toolCallId && toolSearchIds.has(message.toolCallId)) {
989
			return undefined;
990
		}
991
		return message;
992
	}).filter((m): m is ChatMessage => m !== undefined);
993
}
994

995
export interface ISummarizedConversationHistoryInfo {
996
	readonly props: SummarizedAgentHistoryProps;
997
	readonly summarizedToolCallRoundId: string;
998
	readonly summarizedThinking?: ThinkingData;
999
}
1000

1001
/**
1002
 * Exported for test
1003
 */
1004
export class SummarizedConversationHistoryPropsBuilder {
1005
	constructor(
1006
		@IPromptPathRepresentationService private readonly _promptPathRepresentationService: IPromptPathRepresentationService,
1007
		@IWorkspaceService private readonly _workspaceService: IWorkspaceService,
1008
	) { }
1009

1010
	getProps(
1011
		props: SummarizedAgentHistoryProps
1012
	): ISummarizedConversationHistoryInfo {
1013
		let toolCallRounds = props.promptContext.toolCallRounds;
1014
		let isContinuation = props.promptContext.isContinuation;
1015
		let summarizedToolCallRoundId = '';
1016
		if (toolCallRounds && toolCallRounds.length > 1) {
1017
			// If there are multiple tool call rounds, exclude the last one, because it must have put us over the limit.
1018
			// Summarize from the previous round in this turn.
1019
			toolCallRounds = toolCallRounds.slice(0, -1);
1020
			summarizedToolCallRoundId = toolCallRounds.at(-1)!.id;
1021
		} else if (props.promptContext.history.length > 0) {
1022
			// If there is only one tool call round, then summarize from the last round of the last turn.
1023
			// Or if there are no tool call rounds, then the new user message put us over the limit. (or the last assistant message?)
1024
			// This flag excludes the last user message from the summary.
1025
			isContinuation = true;
1026
			toolCallRounds = [];
1027
			summarizedToolCallRoundId = props.promptContext.history.at(-1)!.rounds.at(-1)!.id;
1028
		} else {
1029
			throw new Error('Nothing to summarize');
1030
		}
1031

1032
		// For Anthropic models with thinking enabled, find the last assistant message with thinking
1033
		// from all rounds being summarized (both current toolCallRounds and history).
1034
		// This thinking will be used as the first thinking block after summarization.
1035
		const summarizedThinking = isAnthropicFamily(props.endpoint) ? this.findLastThinking(props) : undefined;
1036
		const promptContext = {
1037
			...props.promptContext,
1038
			toolCallRounds,
1039
			isContinuation,
1040
		};
1041
		return {
1042
			props: {
1043
				...props,
1044
				workingNotebook: this.getWorkingNotebook(props),
1045
				promptContext
1046
			},
1047
			summarizedToolCallRoundId,
1048
			summarizedThinking
1049
		};
1050
	}
1051

1052
	private findLastThinking(props: SummarizedAgentHistoryProps): ThinkingData | undefined {
1053
		if (props.promptContext.toolCallRounds) {
1054
			for (let i = props.promptContext.toolCallRounds.length - 1; i >= 0; i--) {
1055
				const round = props.promptContext.toolCallRounds[i];
1056
				if (round.thinking) {
1057
					return round.thinking;
1058
				}
1059
			}
1060
		}
1061
		return undefined;
1062
	}
1063

1064
	private getWorkingNotebook(props: SummarizedAgentHistoryProps): NotebookDocument | undefined {
1065
		const toolCallRound = props.promptContext.toolCallRounds && [...props.promptContext.toolCallRounds].reverse().find(round => round.toolCalls.some(call => call.name === ToolName.RunNotebookCell));
1066
		const toolCall = toolCallRound?.toolCalls.find(call => call.name === ToolName.RunNotebookCell);
1067
		if (toolCall && toolCall.arguments) {
1068
			try {
1069
				const args = JSON.parse(toolCall.arguments);
1070
				if (typeof args.filePath === 'string') {
1071
					const uri = this._promptPathRepresentationService.resolveFilePath(args.filePath);
1072
					if (!uri) {
1073
						return undefined;
1074
					}
1075
					return this._workspaceService.notebookDocuments.find(doc => doc.uri.toString() === uri.toString());
1076
				}
1077
			} catch (e) {
1078
				// Ignore parsing errors
1079
			}
1080
		}
1081

1082
		return undefined;
1083
	}
1084
}
1085

1086
interface SummaryMessageProps extends BasePromptElementProps {
1087
	readonly summaryText: string;
1088
	readonly endpoint: IChatEndpoint;
1089
}
1090

1091
class SummaryMessageElement extends PromptElement<SummaryMessageProps> {
1092
	override async render(state: void, sizing: PromptSizing) {
1093
		return <UserMessage>
1094
			<Tag name='conversation-summary'>
1095
				{this.props.summaryText}
1096
			</Tag>
1097
			{this.props.endpoint.family === 'gpt-4.1' && <Tag name='reminderInstructions'>
1098
				<DefaultOpenAIKeepGoingReminder />
1099
			</Tag>}
1100
		</UserMessage>;
1101
	}
1102
}
1103

1104
export interface InlineSummarizationUserMessageProps extends BasePromptElementProps {
1105
	readonly endpoint: IChatEndpoint;
1106
}
1107

1108
/**
1109
 * User message appended to the agent prompt when inline summarization is triggered.
1110
 * Instructs the model to output ONLY a summary wrapped in `<summary>` tags, with
1111
 * no tool calls. The summary is extracted from the response and stored on the round
1112
 * for the next iteration.
1113
 */
1114
export class InlineSummarizationUserMessage extends PromptElement<InlineSummarizationUserMessageProps> {
1115
	override async render(state: void, sizing: PromptSizing) {
1116
		const isOpus = this.props.endpoint.model.startsWith('claude-opus');
1117
		return <UserMessage priority={1000}>
1118
			The conversation has grown too large for the context window and must be compacted now.<br />
1119
			<br />
1120
			{SummaryPrompt}
1121
			<br />
1122
			<br />
1123
			IMPORTANT: Output your summary wrapped in {'<summary>'} and {'</summary>'} tags. Do NOT call any tools. Your ONLY task right now is to produce a comprehensive summary of the conversation so far.<br />
1124
			{isOpus && <>
1125
				<br />
1126
				IMPORTANT: Do NOT call any tools. Your only task is to generate a text summary of the conversation. Do not attempt to execute any actions or make any tool calls.<br />
1127
			</>}
1128
		</UserMessage>;
1129
	}
1130
}
1131

1132
/**
1133
 * Extracts an inline summary from the model's response text.
1134
 *
1135
 * Parsing strategy (multi-level fallback):
1136
 * 1. Clean `<summary>...</summary>` tags → extracts content between them
1137
 * 2. `<summary>` found but no closing tag → takes everything after `<summary>`
1138
 * 3. No tags found → returns undefined (caller falls back to separate-call summarization)
1139
 *
1140
 * @returns The extracted summary text, or `undefined` if no summary could be found.
1141
 */
1142
export function extractInlineSummary(responseText: string): string | undefined {
1143
	// 1. Try clean <summary>...</summary> extraction
1144
	const openTag = '<summary>';
1145
	const closeTag = '</summary>';
1146
	const openIdx = responseText.indexOf(openTag);
1147
	if (openIdx !== -1) {
1148
		const contentStart = openIdx + openTag.length;
1149
		const closeIdx = responseText.indexOf(closeTag, contentStart);
1150
		if (closeIdx !== -1) {
1151
			// Clean extraction
1152
			return responseText.substring(contentStart, closeIdx).trim();
1153
		}
1154
		// 2. Open tag but no closing tag — take everything after <summary>
1155
		return responseText.substring(contentStart).trim();
1156
	}
1157

1158
	// 3. No tags found — cannot extract
1159
	return undefined;
1160
}
1161

1162
Product

Resources

Company