Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/prompts/node/agent/summarizedConversationHistory.tsx
13405 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import * as l10n from '@vscode/l10n';
7
import { BasePromptElementProps, PrioritizedList, PromptElement, PromptMetadata, PromptSizing, Raw, SystemMessage, UserMessage } from '@vscode/prompt-tsx';
8
import { BudgetExceededError } from '@vscode/prompt-tsx/dist/base/materialized';
9
import { ChatMessage } from '@vscode/prompt-tsx/dist/base/output/rawTypes';
10
import type { ChatResponsePart, ChatResultPromptTokenDetail, LanguageModelToolInformation, NotebookDocument, Progress } from 'vscode';
11
import { IChatHookService, PreCompactHookInput } from '../../../../platform/chat/common/chatHookService';
12
import { ChatFetchResponseType, ChatLocation, ChatResponse, FetchSuccess } from '../../../../platform/chat/common/commonTypes';
13
import { getTextPart } from '../../../../platform/chat/common/globalStringUtils';
14
import { IHistoricalTurn, ISessionTranscriptService } from '../../../../platform/chat/common/sessionTranscriptService';
15
import { ConfigKey, IConfigurationService } from '../../../../platform/configuration/common/configurationService';
16
import { isAnthropicFamily, isGeminiFamily } from '../../../../platform/endpoint/common/chatModelCapabilities';
17
import { ILogService } from '../../../../platform/log/common/logService';
18
import { CUSTOM_TOOL_SEARCH_NAME } from '../../../../platform/networking/common/anthropic';
19
import { IChatEndpoint } from '../../../../platform/networking/common/networking';
20
import { APIUsage } from '../../../../platform/networking/common/openai';
21
import { IPromptPathRepresentationService } from '../../../../platform/prompts/common/promptPathRepresentationService';
22
import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry';
23
import { ThinkingData } from '../../../../platform/thinking/common/thinking';
24
import { computePromptTokenDetails } from '../../../../platform/tokenizer/node/promptTokenDetails';
25
import { IWorkspaceService } from '../../../../platform/workspace/common/workspaceService';
26
import { CancellationToken } from '../../../../util/vs/base/common/cancellation';
27
import { CancellationError, isCancellationError } from '../../../../util/vs/base/common/errors';
28
import { Iterable } from '../../../../util/vs/base/common/iterator';
29
import { StopWatch } from '../../../../util/vs/base/common/stopwatch';
30
import { generateUuid } from '../../../../util/vs/base/common/uuid';
31
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
32
import { ChatResponseProgressPart2 } from '../../../../vscodeTypes';
33
import { ToolCallingLoop } from '../../../intents/node/toolCallingLoop';
34
import { IResultMetadata } from '../../../prompt/common/conversation';
35
import { IBuildPromptContext, IToolCallRound } from '../../../prompt/common/intents';
36
import { ToolName } from '../../../tools/common/toolNames';
37
import { normalizeToolSchema } from '../../../tools/common/toolSchemaNormalizer';
38
import { NotebookSummary } from '../../../tools/node/notebookSummaryTool';
39
import { renderPromptElement } from '../base/promptRenderer';
40
import { Tag } from '../base/tag';
41
import { ChatToolCalls } from '../panel/toolCalling';
42
import { AgentUserMessage, AgentUserMessageCustomizations, getUserMessagePropsFromAgentProps, getUserMessagePropsFromTurn } from './agentPrompt';
43
import { DefaultOpenAIKeepGoingReminder } from './openai/defaultOpenAIPrompt';
44
import { SimpleSummarizedHistory } from './simpleSummarizedHistoryPrompt';
45
46
export interface ConversationHistorySummarizationPromptProps extends SummarizedAgentHistoryProps {
47
readonly simpleMode?: boolean;
48
}
49
50
const SummaryPrompt = <>
51
Your task is to create a comprehensive, detailed summary of the entire conversation that captures all essential information needed to seamlessly continue the work without any loss of context. This summary will be used to compact the conversation while preserving critical technical details, decisions, and progress.<br />
52
53
## Recent Context Analysis<br />
54
55
Pay special attention to the most recent agent commands and tool executions that led to this summarization being triggered. Include:<br />
56
- **Last Agent Commands**: What specific actions/tools were just executed<br />
57
- **Tool Results**: Key outcomes from recent tool calls (truncate if very long, but preserve essential information)<br />
58
- **Immediate State**: What was the system doing right before summarization<br />
59
- **Triggering Context**: What caused the token budget to be exceeded<br />
60
61
## Analysis Process<br />
62
63
Before providing your final summary, wrap your analysis in `&lt;analysis&gt;` tags to organize your thoughts systematically:<br />
64
65
1. **Chronological Review**: Go through the conversation chronologically, identifying key phases and transitions<br />
66
2. **Intent Mapping**: Extract all explicit and implicit user requests, goals, and expectations<br />
67
3. **Technical Inventory**: Catalog all technical concepts, tools, frameworks, and architectural decisions<br />
68
4. **Code Archaeology**: Document all files, functions, and code patterns that were discussed or modified<br />
69
5. **Progress Assessment**: Evaluate what has been completed vs. what remains pending<br />
70
6. **Context Validation**: Ensure all critical information for continuation is captured<br />
71
7. **Recent Commands Analysis**: Document the specific agent commands and tool results from the most recent operations<br />
72
73
## Summary Structure<br />
74
75
Your summary must include these sections in order, following the exact format below:<br />
76
77
<Tag name='analysis'>
78
[Chronological Review: Walk through conversation phases: initial request → exploration → implementation → debugging → current state]<br />
79
[Intent Mapping: List each explicit user request with message context]<br />
80
[Technical Inventory: Catalog all technologies, patterns, and decisions mentioned]<br />
81
[Code Archaeology: Document every file, function, and code change discussed]<br />
82
[Progress Assessment: What's done vs. pending with specific status]<br />
83
[Context Validation: Verify all continuation context is captured]<br />
84
[Recent Commands Analysis: Last agent commands executed, tool results (truncated if long), immediate pre-summarization state]<br />
85
</Tag><br />
86
87
<Tag name='summary'>
88
1. Conversation Overview:<br />
89
- Primary Objectives: [All explicit user requests and overarching goals with exact quotes]<br />
90
- Session Context: [High-level narrative of conversation flow and key phases]<br />
91
- User Intent Evolution: [How user's needs or direction changed throughout conversation]<br />
92
93
2. Technical Foundation:<br />
94
- [Core Technology 1]: [Version/details and purpose]<br />
95
- [Framework/Library 2]: [Configuration and usage context]<br />
96
- [Architectural Pattern 3]: [Implementation approach and reasoning]<br />
97
- [Environment Detail 4]: [Setup specifics and constraints]<br />
98
99
3. Codebase Status:<br />
100
- [File Name 1]:<br />
101
- Purpose: [Why this file is important to the project]<br />
102
- Current State: [Summary of recent changes or modifications]<br />
103
- Key Code Segments: [Important functions/classes with brief explanations]<br />
104
- Dependencies: [How this relates to other components]<br />
105
- [File Name 2]:<br />
106
- Purpose: [Role in the project]<br />
107
- Current State: [Modification status]<br />
108
- Key Code Segments: [Critical code blocks]<br />
109
- [Additional files as needed]<br />
110
111
4. Problem Resolution:<br />
112
- Issues Encountered: [Technical problems, bugs, or challenges faced]<br />
113
- Solutions Implemented: [How problems were resolved and reasoning]<br />
114
- Debugging Context: [Ongoing troubleshooting efforts or known issues]<br />
115
- Lessons Learned: [Important insights or patterns discovered]<br />
116
117
5. Progress Tracking:<br />
118
- Completed Tasks: [What has been successfully implemented with status indicators]<br />
119
- Partially Complete Work: [Tasks in progress with current completion status]<br />
120
- Validated Outcomes: [Features or code confirmed working through testing]<br />
121
122
6. Active Work State:<br />
123
- Current Focus: [Precisely what was being worked on in most recent messages]<br />
124
- Recent Context: [Detailed description of last few conversation exchanges]<br />
125
- Working Code: [Code snippets being modified or discussed recently]<br />
126
- Immediate Context: [Specific problem or feature being addressed before summary]<br />
127
128
7. Recent Operations:<br />
129
- Last Agent Commands: [Specific tools/actions executed just before summarization with exact command names]<br />
130
- Tool Results Summary: [Key outcomes from recent tool executions - truncate long results but keep essential info]<br />
131
- Pre-Summary State: [What the agent was actively doing when token budget was exceeded]<br />
132
- Operation Context: [Why these specific commands were executed and their relationship to user goals]<br />
133
134
8. Continuation Plan:<br />
135
- [Pending Task 1]: [Details and specific next steps with verbatim quotes]<br />
136
- [Pending Task 2]: [Requirements and continuation context]<br />
137
- [Priority Information]: [Which tasks are most urgent or logically sequential]<br />
138
- [Next Action]: [Immediate next step with direct quotes from recent messages]<br />
139
</Tag><br />
140
141
## Quality Guidelines<br />
142
143
- **Precision**: Include exact filenames, function names, variable names, and technical terms<br />
144
- **Completeness**: Capture all context needed to continue without re-reading the full conversation<br />
145
- **Clarity**: Write for someone who needs to pick up exactly where the conversation left off<br />
146
- **Verbatim Accuracy**: Use direct quotes for task specifications and recent work context<br />
147
- **Technical Depth**: Include enough detail for complex technical decisions and code patterns<br />
148
- **Logical Flow**: Present information in a way that builds understanding progressively<br />
149
150
This summary should serve as a comprehensive handoff document that enables seamless continuation of all active work streams while preserving the full technical and contextual richness of the original conversation.<br />
151
</>;
152
153
/**
154
* Prompt used to summarize conversation history when the context window is exceeded.
155
*/
156
export class ConversationHistorySummarizationPrompt extends PromptElement<ConversationHistorySummarizationPromptProps> {
157
override async render(state: void, sizing: PromptSizing) {
158
const history = this.props.simpleMode ?
159
<SimpleSummarizedHistory priority={1} promptContext={this.props.promptContext} location={this.props.location} endpoint={this.props.endpoint} maxToolResultLength={this.props.maxToolResultLength} /> :
160
<ConversationHistory priority={1} promptContext={this.props.promptContext} location={this.props.location} endpoint={this.props.endpoint} maxToolResultLength={this.props.maxToolResultLength} enableCacheBreakpoints={this.props.enableCacheBreakpoints} />;
161
const isOpus = this.props.endpoint.model.startsWith('claude-opus');
162
return (
163
<>
164
<SystemMessage priority={this.props.priority}>
165
{SummaryPrompt}
166
{this.props.summarizationInstructions && <>
167
<br /><br />
168
## Additional instructions from the user:<br />
169
{this.props.summarizationInstructions}
170
</>}
171
</SystemMessage>
172
{history}
173
{this.props.workingNotebook && <WorkingNotebookSummary priority={this.props.priority - 2} notebook={this.props.workingNotebook} />}
174
<UserMessage priority={this.props.priority}>
175
Summarize the conversation history so far, paying special attention to the most recent agent commands and tool results that triggered this summarization. Structure your summary using the enhanced format provided in the system message.<br />
176
{isOpus && <>
177
<br />
178
IMPORTANT: Do NOT call any tools. Your only task is to generate a text summary of the conversation. Do not attempt to execute any actions or make any tool calls.<br />
179
</>}
180
Focus particularly on:<br />
181
- The specific agent commands/tools that were just executed<br />
182
- The results returned from these recent tool calls (truncate if very long but preserve key information)<br />
183
- What the agent was actively working on when the token budget was exceeded<br />
184
- How these recent operations connect to the overall user goals<br />
185
186
Include all important tool calls and their results as part of the appropriate sections, with special emphasis on the most recent operations.
187
</UserMessage>
188
</>
189
);
190
}
191
}
192
193
class WorkingNotebookSummary extends PromptElement<NotebookSummaryProps> {
194
override async render(state: void, sizing: PromptSizing) {
195
return (
196
<UserMessage>
197
This is the current state of the notebook that you have been working on:<br />
198
<NotebookSummary notebook={this.props.notebook} includeCellLines={false} altDoc={undefined} />
199
</UserMessage>
200
);
201
}
202
}
203
204
export interface NotebookSummaryProps extends BasePromptElementProps {
205
readonly notebook: NotebookDocument;
206
}
207
208
/**
209
* Conversation history rendered with tool calls and summaries.
210
*/
211
class ConversationHistory extends PromptElement<SummarizedAgentHistoryProps> {
212
override async render(state: void, sizing: PromptSizing) {
213
// Iterate over the turns in reverse order until we find a turn with a tool call round that was summarized
214
const history: PromptElement[] = [];
215
216
// If we have a stop hook query, add it as a new user message at the very end of the conversation.
217
// Push it first so that after history.reverse() it will be last.
218
if (this.props.promptContext.hasStopHookQuery) {
219
history.push(<UserMessage priority={901}>{this.props.promptContext.query}</UserMessage>);
220
}
221
222
// Handle the possibility that we summarized partway through the current turn (e.g. if we accumulated many tool call rounds)
223
let summaryForCurrentTurn: string | undefined = undefined;
224
let thinkingForFirstRoundAfterSummarization: ThinkingData | undefined = undefined;
225
if (this.props.promptContext.toolCallRounds?.length) {
226
const toolCallRounds: IToolCallRound[] = [];
227
for (let i = this.props.promptContext.toolCallRounds.length - 1; i >= 0; i--) {
228
const toolCallRound = this.props.promptContext.toolCallRounds[i];
229
if (toolCallRound.summary) {
230
// This tool call round was summarized
231
summaryForCurrentTurn = toolCallRound.summary;
232
thinkingForFirstRoundAfterSummarization = toolCallRound.thinking;
233
break;
234
}
235
toolCallRounds.push(toolCallRound);
236
}
237
238
// Reverse the tool call rounds so they are in chronological order
239
toolCallRounds.reverse();
240
241
// For Anthropic models with thinking enabled, set the thinking on the first round
242
// so it gets rendered as the first thinking block after summarization
243
if (isAnthropicFamily(this.props.endpoint) && thinkingForFirstRoundAfterSummarization && toolCallRounds.length > 0 && !toolCallRounds[0].thinking) {
244
toolCallRounds[0].thinking = thinkingForFirstRoundAfterSummarization;
245
}
246
247
history.push(<ChatToolCalls priority={899} flexGrow={2} promptContext={this.props.promptContext} toolCallRounds={toolCallRounds} toolCallResults={this.props.promptContext.toolCallResults} enableCacheBreakpoints={this.props.enableCacheBreakpoints} truncateAt={this.props.maxToolResultLength} />);
248
}
249
250
if (summaryForCurrentTurn) {
251
history.push(<SummaryMessageElement endpoint={this.props.endpoint} summaryText={summaryForCurrentTurn} />);
252
253
return (<PrioritizedList priority={this.props.priority} descending={false} passPriority={true}>
254
{history.reverse()}
255
</PrioritizedList>);
256
}
257
258
// Render the original user message:
259
// - Always render for non-continuation (normal first iteration)
260
// - Also render for stop hook continuation (the original message is needed, frozen content will provide it)
261
if (!this.props.promptContext.isContinuation || this.props.promptContext.hasStopHookQuery) {
262
history.push(<AgentUserMessage flexGrow={2} priority={900} {...getUserMessagePropsFromAgentProps(this.props, {
263
userQueryTagName: this.props.userQueryTagName,
264
ReminderInstructionsClass: this.props.ReminderInstructionsClass,
265
ToolReferencesHintClass: this.props.ToolReferencesHintClass,
266
})} />);
267
}
268
269
// We may have a summary from earlier in the conversation, but skip history if we have a new summary
270
for (const [i, turn] of [...this.props.promptContext.history.entries()].reverse()) {
271
const metadata = turn.resultMetadata;
272
273
// Build this list in chronological order
274
const turnComponents: PromptElement[] = [];
275
276
// Turn anatomy
277
// ______________
278
// | |
279
// | USER |
280
// | |
281
// | ASSISTANT |
282
// | |
283
// | TOOL | <-- { summary: ..., toolCallRoundId: ... }
284
// | ASSISTANT |
285
// |____________|
286
287
let summaryForTurn: SummarizedConversationHistoryMetadata | undefined;
288
// If a tool call limit is exceeded, the tool call from this turn will
289
// have been aborted and any result should be found in the next turn.
290
const toolCallResultInNextTurn = metadata?.maxToolCallsExceeded;
291
let toolCallResults = metadata?.toolCallResults;
292
if (toolCallResultInNextTurn) {
293
const nextMetadata = this.props.promptContext.history.at(i + 1)?.responseChatResult?.metadata as IResultMetadata | undefined;
294
const mergeFrom = i === this.props.promptContext.history.length - 1 ? this.props.promptContext.toolCallResults : nextMetadata?.toolCallResults;
295
toolCallResults = { ...toolCallResults, ...mergeFrom };
296
}
297
298
// Find the latest tool call round that was summarized
299
const toolCallRounds: IToolCallRound[] = [];
300
for (let i = turn.rounds.length - 1; i >= 0; i--) {
301
const round = turn.rounds[i];
302
summaryForTurn = round.summary ? new SummarizedConversationHistoryMetadata(round.id, round.summary) : undefined;
303
if (summaryForTurn) {
304
break;
305
}
306
toolCallRounds.push(round);
307
}
308
309
if (summaryForTurn) {
310
// We have a summary for a tool call round that was part of this turn
311
turnComponents.push(<SummaryMessageElement endpoint={this.props.endpoint} summaryText={summaryForTurn.text} />);
312
} else if (!turn.isContinuation) {
313
turnComponents.push(<AgentUserMessage flexGrow={1} {...getUserMessagePropsFromTurn(turn, this.props.endpoint, {
314
userQueryTagName: this.props.userQueryTagName,
315
ReminderInstructionsClass: this.props.ReminderInstructionsClass,
316
ToolReferencesHintClass: this.props.ToolReferencesHintClass,
317
})} />);
318
}
319
320
// Reverse the tool call rounds so they are in chronological order
321
toolCallRounds.reverse();
322
turnComponents.push(<ChatToolCalls
323
flexGrow={1}
324
promptContext={this.props.promptContext}
325
toolCallRounds={toolCallRounds}
326
toolCallResults={toolCallResults}
327
isHistorical={!(toolCallResultInNextTurn && i === this.props.promptContext.history.length - 1)}
328
truncateAt={this.props.maxToolResultLength}
329
/>);
330
331
history.push(...turnComponents.reverse());
332
if (summaryForTurn) {
333
// All preceding turns are covered by the summary and shouldn't be included verbatim
334
break;
335
}
336
}
337
338
return (<PrioritizedList priority={this.props.priority} descending={false} passPriority={true}>
339
{history.reverse()}
340
</PrioritizedList>);
341
}
342
}
343
344
export interface ISummarizedConversationHistoryMetadataOptions {
345
readonly thinking?: ThinkingData;
346
readonly usage?: APIUsage;
347
readonly promptTokenDetails?: readonly ChatResultPromptTokenDetail[];
348
readonly model?: string;
349
readonly summarizationMode?: string;
350
readonly numRounds?: number;
351
readonly numRoundsSinceLastSummarization?: number;
352
readonly durationMs?: number;
353
readonly source?: 'foreground' | 'background';
354
readonly outcome?: string;
355
readonly contextLengthBefore?: number;
356
}
357
358
export class SummarizedConversationHistoryMetadata extends PromptMetadata {
359
public readonly toolCallRoundId: string;
360
public readonly text: string;
361
public readonly thinking?: ThinkingData;
362
public readonly usage?: APIUsage;
363
public readonly promptTokenDetails?: readonly ChatResultPromptTokenDetail[];
364
public readonly model?: string;
365
public readonly summarizationMode?: string;
366
public readonly numRounds?: number;
367
public readonly numRoundsSinceLastSummarization?: number;
368
public readonly durationMs?: number;
369
public readonly source?: 'foreground' | 'background';
370
public readonly outcome?: string;
371
public readonly contextLengthBefore?: number;
372
373
constructor(
374
toolCallRoundId: string,
375
text: string,
376
options?: ISummarizedConversationHistoryMetadataOptions,
377
) {
378
super();
379
this.toolCallRoundId = toolCallRoundId;
380
this.text = text;
381
this.thinking = options?.thinking;
382
this.usage = options?.usage;
383
this.promptTokenDetails = options?.promptTokenDetails;
384
this.model = options?.model;
385
this.summarizationMode = options?.summarizationMode;
386
this.numRounds = options?.numRounds;
387
this.numRoundsSinceLastSummarization = options?.numRoundsSinceLastSummarization;
388
this.durationMs = options?.durationMs;
389
this.source = options?.source;
390
this.outcome = options?.outcome;
391
this.contextLengthBefore = options?.contextLengthBefore;
392
}
393
}
394
395
export interface SummarizedAgentHistoryProps extends BasePromptElementProps, AgentUserMessageCustomizations {
396
readonly priority: number;
397
readonly endpoint: IChatEndpoint;
398
readonly location: ChatLocation;
399
readonly promptContext: IBuildPromptContext;
400
readonly triggerSummarize?: boolean;
401
readonly tools?: ReadonlyArray<LanguageModelToolInformation> | undefined;
402
readonly enableCacheBreakpoints?: boolean;
403
readonly workingNotebook?: NotebookDocument;
404
readonly maxToolResultLength: number;
405
/** Optional hard cap on summary tokens; effective budget = min(prompt sizing tokenBudget, this value) */
406
readonly maxSummaryTokens?: number;
407
/** Optional custom instructions to include in the summarization prompt */
408
readonly summarizationInstructions?: string;
409
/** Skip Full mode and go straight to Simple mode for foreground budget-exceeded recovery. */
410
readonly forceSimpleSummary?: boolean;
411
}
412
413
/**
414
* Renders conversation history with tool calls and summaries, triggering summarization while rendering if necessary.
415
*/
416
export class SummarizedConversationHistory extends PromptElement<SummarizedAgentHistoryProps> {
417
constructor(
418
props: SummarizedAgentHistoryProps,
419
@IInstantiationService private readonly instantiationService: IInstantiationService,
420
@ISessionTranscriptService private readonly sessionTranscriptService: ISessionTranscriptService,
421
) {
422
super(props);
423
}
424
425
override async render(state: void, sizing: PromptSizing, progress: Progress<ChatResponsePart> | undefined, token: CancellationToken | undefined) {
426
const promptContext = { ...this.props.promptContext };
427
let historyMetadata: SummarizedConversationHistoryMetadata | undefined;
428
const sessionId = this.props.promptContext.conversation?.sessionId;
429
if (sessionId) {
430
// Lazily start the transcript session now (before summarization) so it
431
// captures the full pre-compaction conversation. startSession is
432
// idempotent — if hooks already started it, this is a no-op.
433
await this.ensureTranscriptSession();
434
435
if (this.sessionTranscriptService.getTranscriptPath(sessionId)) {
436
await this.sessionTranscriptService.flush(sessionId);
437
}
438
}
439
440
if (this.props.triggerSummarize) {
441
442
const summarizer = this.instantiationService.createInstance(ConversationHistorySummarizer, this.props, sizing, progress, token);
443
const summResult = await summarizer.summarizeHistory();
444
if (summResult) {
445
historyMetadata = new SummarizedConversationHistoryMetadata(summResult.toolCallRoundId, summResult.summary, {
446
thinking: summResult.thinking,
447
usage: summResult.usage,
448
promptTokenDetails: summResult.promptTokenDetails,
449
model: summResult.model,
450
summarizationMode: summResult.summarizationMode,
451
numRounds: summResult.numRounds,
452
numRoundsSinceLastSummarization: summResult.numRoundsSinceLastSummarization,
453
durationMs: summResult.durationMs,
454
});
455
this.addSummaryToHistory(summResult.summary, summResult.toolCallRoundId, summResult.thinking);
456
}
457
}
458
459
return <>
460
{historyMetadata && <meta value={historyMetadata} />}
461
<ConversationHistory
462
{...this.props}
463
promptContext={promptContext}
464
enableCacheBreakpoints={this.props.enableCacheBreakpoints} />
465
</>;
466
}
467
468
/**
469
* Lazily starts a transcript session with the full conversation history.
470
* This is called just before summarization so that the transcript file
471
* contains the complete pre-compaction conversation. If a session was
472
* already started (e.g. by hooks), this is a no-op.
473
*/
474
private async ensureTranscriptSession(): Promise<void> {
475
const sessionId = this.props.promptContext.conversation?.sessionId;
476
if (!sessionId) {
477
return;
478
}
479
480
// Short-circuit if session already exists — avoids rebuilding
481
// the full IHistoricalTurn[] array on every render.
482
if (this.sessionTranscriptService.getTranscriptPath(sessionId)) {
483
return;
484
}
485
486
// Build IHistoricalTurn[] from the prompt context's Turn[] history
487
const history: IHistoricalTurn[] = this.props.promptContext.history.map(turn => ({
488
userMessage: turn.request.message,
489
timestamp: turn.startTime,
490
rounds: turn.rounds.map(round => ({
491
response: round.response,
492
toolCalls: round.toolCalls.map(tc => ({
493
name: tc.name,
494
arguments: tc.arguments,
495
id: tc.id,
496
})),
497
reasoningText: round.thinking
498
? (Array.isArray(round.thinking.text) ? round.thinking.text.join('') : round.thinking.text)
499
: undefined,
500
timestamp: round.timestamp,
501
})),
502
}));
503
504
await this.sessionTranscriptService.startSession(sessionId, undefined, history.length > 0 ? history : undefined);
505
}
506
507
private addSummaryToHistory(summary: string, toolCallRoundId: string, thinking?: ThinkingData): void {
508
const round = this.props.promptContext.toolCallRounds?.find(round => round.id === toolCallRoundId);
509
if (round) {
510
round.summary = summary;
511
round.thinking = thinking;
512
return;
513
}
514
515
// Adding summaries to rounds in previous turns will only be persisted during the current session.
516
// For the next turn, need to restore them from metadata (see normalizeSummariesOnRounds).
517
for (const turn of [...this.props.promptContext.history].reverse()) {
518
const round = turn.rounds.find(round => round.id === toolCallRoundId);
519
if (round) {
520
round.summary = summary;
521
round.thinking = thinking;
522
break;
523
}
524
}
525
}
526
}
527
528
enum SummaryMode {
529
Simple = 'simple',
530
Full = 'full'
531
}
532
533
interface SummarizationResult {
534
result: FetchSuccess<string>;
535
promptTokenDetails?: readonly ChatResultPromptTokenDetail[];
536
model?: string;
537
summarizationMode?: string;
538
numRounds?: number;
539
numRoundsSinceLastSummarization?: number;
540
durationMs?: number;
541
}
542
543
class ConversationHistorySummarizer {
544
private readonly summarizationId = generateUuid();
545
546
constructor(
547
private readonly props: SummarizedAgentHistoryProps,
548
private readonly sizing: PromptSizing,
549
private readonly progress: Progress<ChatResponsePart> | undefined,
550
private readonly token: CancellationToken | undefined,
551
@ITelemetryService private readonly telemetryService: ITelemetryService,
552
@ILogService private readonly logService: ILogService,
553
@IInstantiationService private readonly instantiationService: IInstantiationService,
554
@IConfigurationService private readonly configurationService: IConfigurationService,
555
@IChatHookService private readonly chatHookService: IChatHookService,
556
@ISessionTranscriptService private readonly sessionTranscriptService: ISessionTranscriptService,
557
) { }
558
559
async summarizeHistory(): Promise<{ summary: string; toolCallRoundId: string; thinking?: ThinkingData; usage?: APIUsage; promptTokenDetails?: readonly ChatResultPromptTokenDetail[]; model?: string; summarizationMode?: string; numRounds?: number; numRoundsSinceLastSummarization?: number; durationMs?: number }> {
560
// Execute pre-compact hook before summarization to allow hooks to archive transcripts or perform cleanup
561
await this.executePreCompactHook();
562
563
// Just a function for test to create props and call this
564
const propsInfo = this.instantiationService.createInstance(SummarizedConversationHistoryPropsBuilder).getProps(this.props);
565
566
const summaryPromise = this.getSummaryWithFallback(propsInfo);
567
this.progress?.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {
568
try {
569
await summaryPromise;
570
} catch { }
571
return l10n.t('Compacted conversation');
572
}));
573
574
const summary = await summaryPromise;
575
const { numRounds, numRoundsSinceLastSummarization } = computeSummarizationRoundCounts(this.props.promptContext.history, this.props.promptContext.toolCallRounds);
576
return {
577
summary: this.appendTranscriptHint(summary.result.value),
578
toolCallRoundId: propsInfo.summarizedToolCallRoundId,
579
thinking: propsInfo.summarizedThinking,
580
usage: summary.result.usage,
581
promptTokenDetails: summary.promptTokenDetails,
582
model: summary.model,
583
summarizationMode: summary.summarizationMode,
584
numRounds,
585
numRoundsSinceLastSummarization,
586
durationMs: summary.durationMs,
587
};
588
}
589
590
private appendTranscriptHint(summary: string): string {
591
const sessionId = this.props.promptContext.conversation?.sessionId;
592
if (!sessionId) {
593
return summary;
594
}
595
return appendTranscriptHintToSummary(summary, sessionId, this.sessionTranscriptService);
596
}
597
598
private async getSummaryWithFallback(propsInfo: ISummarizedConversationHistoryInfo): Promise<SummarizationResult> {
599
const forceMode = this.configurationService.getConfig<string | undefined>(ConfigKey.Advanced.AgentHistorySummarizationMode);
600
if (this.props.forceSimpleSummary && forceMode !== SummaryMode.Full) {
601
// Foreground budget-exceeded recovery — go straight to Simple.
602
return await this.getSummary(SummaryMode.Simple, propsInfo);
603
}
604
if (forceMode === SummaryMode.Simple) {
605
return await this.getSummary(SummaryMode.Simple, propsInfo);
606
} else {
607
try {
608
return await this.getSummary(SummaryMode.Full, propsInfo);
609
} catch (e) {
610
if (isCancellationError(e)) {
611
throw e;
612
}
613
614
return await this.getSummary(SummaryMode.Simple, propsInfo);
615
}
616
}
617
}
618
619
private logInfo(message: string, mode: SummaryMode): void {
620
this.logService.info(`[ConversationHistorySummarizer] [${mode}] ${message}`);
621
}
622
623
/**
624
* Executes the PreCompact hook before summarization starts.
625
* This gives hook scripts a chance to archive the transcript or perform cleanup
626
* before the conversation is compacted.
627
*/
628
private async executePreCompactHook(): Promise<void> {
629
const hooks = this.props.promptContext.request?.hooks;
630
if (!hooks) {
631
return;
632
}
633
634
try {
635
const results = await this.chatHookService.executeHook('PreCompact', hooks, {
636
trigger: 'auto',
637
} satisfies PreCompactHookInput, this.props.promptContext.conversation?.sessionId, this.token ?? CancellationToken.None);
638
639
for (const result of results) {
640
if (result.resultKind === 'error') {
641
const errorMessage = typeof result.output === 'string' ? result.output : 'Unknown error';
642
this.logService.error(`[ConversationHistorySummarizer] PreCompact hook error: ${errorMessage}`);
643
}
644
}
645
} catch (error) {
646
this.logService.error('[ConversationHistorySummarizer] Error executing PreCompact hook', error);
647
}
648
}
649
650
private async getSummary(mode: SummaryMode, propsInfo: ISummarizedConversationHistoryInfo): Promise<SummarizationResult> {
651
const stopwatch = new StopWatch(false);
652
653
// In Full mode, tools are sent alongside the summarization prompt with
654
// tool_choice: 'none'. Reserve budget for them so the rendered messages
655
// plus tools don't exceed the model's context window.
656
const tools = this.props.tools;
657
const toolTokens = mode === SummaryMode.Full && tools?.length
658
? await this.props.endpoint.acquireTokenizer().countToolTokens(tools)
659
: 0;
660
const endpoint = toolTokens > 0
661
? this.props.endpoint.cloneWithTokenOverride(
662
Math.max(1, Math.floor((this.props.endpoint.modelMaxPromptTokens - toolTokens) * 0.9)))
663
: this.props.endpoint;
664
665
let summarizationPrompt: ChatMessage[];
666
const associatedRequestId = this.props.promptContext.conversation?.getLatestTurn().id;
667
try {
668
summarizationPrompt = (await renderPromptElement(this.instantiationService, endpoint, ConversationHistorySummarizationPrompt, { ...propsInfo.props, enableCacheBreakpoints: false, simpleMode: mode === SummaryMode.Simple }, undefined, this.token)).messages;
669
this.logInfo(`summarization prompt rendered in ${stopwatch.elapsed()}ms.`, mode);
670
} catch (e) {
671
const budgetExceeded = e instanceof BudgetExceededError;
672
const outcome = budgetExceeded ? 'budget_exceeded' : 'renderError';
673
this.logInfo(`Error rendering summarization prompt in mode: ${mode}. ${e.stack}`, mode);
674
this.sendSummarizationTelemetry(outcome, '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e));
675
throw e;
676
}
677
678
let summaryResponse: ChatResponse;
679
let promptTypes: string | undefined;
680
try {
681
const normalizedTools = mode === SummaryMode.Full ? normalizeToolSchema(
682
endpoint.family,
683
this.props.tools?.map(tool => ({
684
function:
685
{
686
name: tool.name,
687
description: tool.description,
688
parameters: tool.inputSchema && Object.keys(tool.inputSchema).length ? tool.inputSchema : undefined
689
}, type: 'function'
690
})),
691
(tool, rule) => {
692
this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`);
693
},
694
) : undefined;
695
const toolOpts = normalizedTools?.length ? {
696
tool_choice: 'none' as const,
697
tools: normalizedTools,
698
} : undefined;
699
700
stripCacheBreakpoints(summarizationPrompt);
701
replaceImageContentWithPlaceholders(summarizationPrompt);
702
703
let messages = ToolCallingLoop.stripInternalToolCallIds(summarizationPrompt);
704
705
// Strip custom client-side tool search (tool_search) tool_use/tool_result
706
// pairs. The summarization call uses ChatLocation.Other but
707
// createMessagesRequestBody still converts tool_search results to
708
// tool_reference blocks (customToolSearchEnabled isn't gated by location).
709
// Without tool search enabled in the request, Anthropic rejects them.
710
if (isAnthropicFamily(endpoint)) {
711
messages = stripToolSearchMessages(messages);
712
}
713
714
// Gemini strictly requires every function_call to have a matching function_response.
715
// When prompt-tsx prunes tool result messages due to token budget, orphaned tool_calls
716
// can remain, causing a 400 INVALID_ARGUMENT error. Strip them for Gemini models.
717
if (isGeminiFamily(endpoint)) {
718
const validationResult = ToolCallingLoop.validateToolMessagesCore(messages, { stripOrphanedToolCalls: true });
719
messages = validationResult.messages;
720
if (validationResult.strippedToolCallCount > 0) {
721
this.logInfo(`Stripped ${validationResult.strippedToolCallCount} orphaned tool calls from summarization prompt`, mode);
722
/* __GDPR__
723
"summarization.strippedOrphanedToolCalls" : {
724
"owner": "vijayu",
725
"comment": "Tracks when orphaned tool calls are stripped from the summarization prompt for Gemini models",
726
"strippedToolCallCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Number of orphaned tool_calls stripped from the summarization prompt." },
727
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
728
"mode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode (simple or full)." }
729
}
730
*/
731
this.telemetryService.sendMSFTTelemetryEvent('summarization.strippedOrphanedToolCalls', {
732
model: endpoint.model,
733
mode,
734
}, {
735
strippedToolCallCount: validationResult.strippedToolCallCount,
736
});
737
}
738
}
739
740
promptTypes = messages.map(msg => `${msg.role}${'name' in msg && msg.name ? `-${msg.name}` : ''}:${getTextPart(msg.content).length}`).join(',');
741
summaryResponse = await endpoint.makeChatRequest2({
742
debugName: `summarizeConversationHistory-${mode}`,
743
messages,
744
finishedCb: undefined,
745
location: ChatLocation.Other,
746
requestOptions: {
747
temperature: 0,
748
stream: false,
749
...toolOpts
750
},
751
telemetryProperties: associatedRequestId ? { associatedRequestId } : undefined,
752
enableRetryOnFilter: true
753
}, this.token ?? CancellationToken.None);
754
} catch (e) {
755
this.logInfo(`Error from summarization request. ${e.message}`, mode);
756
this.sendSummarizationTelemetry('requestThrow', '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e));
757
throw e;
758
}
759
760
const tokenizer = endpoint.acquireTokenizer();
761
const promptTokenDetails = await computePromptTokenDetails({
762
messages: summarizationPrompt,
763
tokenizer,
764
tools: this.props.tools ?? undefined,
765
totalPromptTokens: summaryResponse.type === ChatFetchResponseType.Success ? summaryResponse.usage?.prompt_tokens : undefined,
766
});
767
768
const durationMs = stopwatch.elapsed();
769
return {
770
result: await this.handleSummarizationResponse(summaryResponse, mode, durationMs, promptTypes),
771
promptTokenDetails,
772
model: endpoint.model,
773
summarizationMode: mode,
774
durationMs,
775
};
776
}
777
778
private async handleSummarizationResponse(response: ChatResponse, mode: SummaryMode, elapsedTime: number, promptTypes?: string): Promise<FetchSuccess<string>> {
779
if (response.type !== ChatFetchResponseType.Success) {
780
const outcome = response.type;
781
this.sendSummarizationTelemetry(outcome, response.requestId, this.props.endpoint.model, mode, elapsedTime, undefined, response.reason ?? response.type);
782
this.logInfo(`Summarization request failed. ${response.type} ${response.reason ?? response.type}`, mode);
783
if (response.type === ChatFetchResponseType.Canceled) {
784
throw new CancellationError();
785
}
786
787
throw new Error('Summarization request failed');
788
}
789
790
const summarySize = await this.sizing.countTokens(response.value);
791
const effectiveBudget =
792
!!this.props.maxSummaryTokens
793
? Math.min(this.sizing.tokenBudget, this.props.maxSummaryTokens)
794
: this.sizing.tokenBudget;
795
if (summarySize > effectiveBudget) {
796
this.sendSummarizationTelemetry('too_large', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage, `${summarySize} tokens exceeds budget ${effectiveBudget}`);
797
this.logInfo(`Summary too large: ${summarySize} tokens (effective budget ${effectiveBudget})`, mode);
798
throw new Error('Summary too large');
799
}
800
801
this.sendSummarizationTelemetry('success', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage, undefined, promptTypes);
802
this.logInfo(`Summarization usage: prompt=${response.usage?.prompt_tokens ?? '?'}, cached=${response.usage?.prompt_tokens_details?.cached_tokens ?? '?'}, completion=${response.usage?.completion_tokens ?? '?'}`, mode);
803
return response;
804
}
805
806
/**
807
* Send telemetry for conversation summarization.
808
* @param outcome High-level result of the summarization (for example, 'success', 'too_large', or the ChatFetchResponseType value)
809
* @param requestId Unique identifier of the underlying chat request used for summarization
810
* @param model Identifier of the language model used to generate the summary
811
* @param mode Summarization mode indicating how the conversation was summarized
812
* @param elapsedTime Total time in milliseconds taken for the summarization request
813
* @param usage Token usage information for the summarization request, if available
814
* @param detailedOutcome Optional detailed reason for non-success outcomes (for example, error or cancellation reason)
815
* @param promptTypes Optional pre-computed promptTypes string for the summarization request
816
*/
817
private sendSummarizationTelemetry(outcome: string, requestId: string, model: string, mode: SummaryMode, elapsedTime: number, usage: APIUsage | undefined, detailedOutcome?: string, promptTypes?: string): void {
818
const { numRounds, numRoundsSinceLastSummarization } = computeSummarizationRoundCounts(this.props.promptContext.history, this.props.promptContext.toolCallRounds);
819
820
const turnIndex = this.props.promptContext.history.length;
821
const curTurnRoundIndex = this.props.promptContext.toolCallRounds?.length ?? 0;
822
823
const lastUsedTool = this.props.promptContext.toolCallRounds?.at(-1)?.toolCalls?.at(-1)?.name ??
824
this.props.promptContext.history?.at(-1)?.rounds.at(-1)?.toolCalls?.at(-1)?.name ?? 'none';
825
826
const isDuringToolCalling = !!this.props.promptContext.toolCallRounds?.length ? 1 : 0;
827
const conversationId = this.props.promptContext.conversation?.sessionId;
828
const hasWorkingNotebook = this.props.workingNotebook ? 1 : 0;
829
830
/* __GDPR__
831
"summarizedConversationHistory" : {
832
"owner": "roblourens",
833
"comment": "Tracks when summarization happens and what the outcome was",
834
"summarizationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "An ID to join all attempts of this summarization task." },
835
"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state or failure reason of the summarization." },
836
"detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "A more detailed error message." },
837
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID used for the summarization." },
838
"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },
839
"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID that this summarization ran during." },
840
"promptTypes": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Role and character count of each prompt message in order, as a proxy for cache hit rate (e.g. system:1234,user:567)." },
841
"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The number of tool call rounds before this summarization was triggered." },
842
"numRoundsSinceLastSummarization": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The number of tool call rounds since the last summarization." },
843
"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },
844
"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn" },
845
"lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The name of the last tool used before summarization." },
846
"isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this summarization was triggered during a tool calling loop." },
847
"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Id for the current chat conversation." },
848
"hasWorkingNotebook": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether the conversation summary includes a working notebook." },
849
"mode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The mode of the conversation summary." },
850
"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The mode of the conversation summary." },
851
"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The duration of the summarization attempt in ms." },
852
"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, server side counted", "isMeasurement": true },
853
"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens hitting cache as reported by server", "isMeasurement": true },
854
"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of generated tokens", "isMeasurement": true }
855
}
856
*/
857
this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
858
summarizationId: this.summarizationId,
859
outcome,
860
detailedOutcome,
861
requestId,
862
chatRequestId: this.props.promptContext.conversation?.getLatestTurn().id,
863
model,
864
lastUsedTool,
865
conversationId,
866
mode,
867
summarizationMode: mode, // Try to unstick GDPR
868
promptTypes,
869
}, {
870
numRounds,
871
numRoundsSinceLastSummarization,
872
turnIndex,
873
curTurnRoundIndex,
874
isDuringToolCalling,
875
hasWorkingNotebook,
876
duration: elapsedTime,
877
promptTokenCount: usage?.prompt_tokens,
878
promptCacheTokenCount: usage?.prompt_tokens_details?.cached_tokens,
879
responseTokenCount: usage?.completion_tokens,
880
});
881
}
882
}
883
884
function stripCacheBreakpoints(messages: ChatMessage[]): void {
885
messages.forEach(message => {
886
message.content = message.content.filter(part => {
887
return part.type !== Raw.ChatCompletionContentPartKind.CacheBreakpoint;
888
});
889
});
890
}
891
892
function replaceImageContentWithPlaceholders(messages: ChatMessage[]): void {
893
messages.forEach(message => {
894
message.content = message.content.map(part => {
895
if (part.type === Raw.ChatCompletionContentPartKind.Image) {
896
return { type: Raw.ChatCompletionContentPartKind.Text, text: '[Image was attached]' };
897
}
898
return part;
899
});
900
});
901
}
902
903
/**
904
* Bake a stable transcript pointer into a freshly-produced summary text.
905
*
906
* Shared by both the full/simple summarization path
907
* ({@link ConversationHistorySummarizer}) and the inline background
908
* summarization path in `agentIntent.ts`. The hint is appended exactly once,
909
* at summary creation time, so the resulting string is frozen from then on
910
* and replayed verbatim — preserving Anthropic prompt cache hits across
911
* subsequent renders.
912
*
913
* Returns the input unchanged when there is no transcript on disk for the
914
* session.
915
*/
916
export function appendTranscriptHintToSummary(summary: string, sessionId: string, sessionTranscriptService: ISessionTranscriptService): string {
917
const transcriptUri = sessionTranscriptService.getTranscriptPath(sessionId);
918
if (!transcriptUri) {
919
return summary;
920
}
921
const transcriptPath = transcriptUri.fsPath;
922
const lineCount = sessionTranscriptService.getLineCount(sessionId);
923
let out = summary;
924
out += `\nIf you need specific details from before compaction (such as exact code snippets, error messages, tool results, or content you previously generated), use the ${ToolName.ReadFile} tool to look up the full uncompacted conversation transcript at: "${transcriptPath}"`;
925
if (lineCount !== undefined) {
926
out += `\nAt the time this summary was created, the transcript had ${lineCount} lines.`;
927
}
928
out += `\nExample usage: ${ToolName.ReadFile}(filePath: "${transcriptPath}")`;
929
return out;
930
}
931
932
export function computeSummarizationRoundCounts(
933
history: IBuildPromptContext['history'],
934
currentRounds: readonly IToolCallRound[] | undefined,
935
): { numRounds: number; numRoundsSinceLastSummarization: number } {
936
const numRoundsInHistory = history.reduce((sum, turn) => sum + turn.rounds.length, 0);
937
const numRoundsInCurrentTurn = currentRounds?.length ?? 0;
938
const numRounds = numRoundsInHistory + numRoundsInCurrentTurn;
939
940
const reversedCurrentRounds = [...(currentRounds ?? [])].reverse();
941
let numRoundsSinceLastSummarization = reversedCurrentRounds.findIndex(round => round.summary);
942
if (numRoundsSinceLastSummarization === -1) {
943
let count = numRoundsInCurrentTurn;
944
outer: for (const turn of Iterable.reverse(Array.from(history))) {
945
for (const round of Iterable.reverse(Array.from(turn.rounds ?? []))) {
946
if (round.summary) {
947
numRoundsSinceLastSummarization = count;
948
break outer;
949
}
950
count++;
951
}
952
}
953
}
954
return { numRounds, numRoundsSinceLastSummarization };
955
}
956
957
/**
958
* Strip custom client-side tool search (tool_search) tool_use and tool_result
959
* messages from the conversation. The summarization call uses ChatLocation.Other
960
* but createMessagesRequestBody still converts tool_search results to
961
* tool_reference blocks (customToolSearchEnabled isn't gated by location).
962
* Without tool search enabled in the request, Anthropic rejects tool_reference
963
* content blocks with: "Input tag 'tool_reference' found using 'type' does not
964
* match any of the expected tags".
965
*/
966
export function stripToolSearchMessages(messages: ChatMessage[]): ChatMessage[] {
967
const toolSearchIds = new Set<string>();
968
for (const message of messages) {
969
if (message.role === Raw.ChatRole.Assistant && message.toolCalls) {
970
for (const tc of message.toolCalls) {
971
if (tc.function.name === CUSTOM_TOOL_SEARCH_NAME) {
972
toolSearchIds.add(tc.id);
973
}
974
}
975
}
976
}
977
978
if (toolSearchIds.size === 0) {
979
return messages;
980
}
981
982
return messages.map(message => {
983
if (message.role === Raw.ChatRole.Assistant && message.toolCalls) {
984
const filteredToolCalls = message.toolCalls.filter(tc => !toolSearchIds.has(tc.id));
985
if (filteredToolCalls.length !== message.toolCalls.length) {
986
return { ...message, toolCalls: filteredToolCalls.length > 0 ? filteredToolCalls : undefined };
987
}
988
} else if (message.role === Raw.ChatRole.Tool && message.toolCallId && toolSearchIds.has(message.toolCallId)) {
989
return undefined;
990
}
991
return message;
992
}).filter((m): m is ChatMessage => m !== undefined);
993
}
994
995
export interface ISummarizedConversationHistoryInfo {
996
readonly props: SummarizedAgentHistoryProps;
997
readonly summarizedToolCallRoundId: string;
998
readonly summarizedThinking?: ThinkingData;
999
}
1000
1001
/**
1002
* Exported for test
1003
*/
1004
export class SummarizedConversationHistoryPropsBuilder {
1005
constructor(
1006
@IPromptPathRepresentationService private readonly _promptPathRepresentationService: IPromptPathRepresentationService,
1007
@IWorkspaceService private readonly _workspaceService: IWorkspaceService,
1008
) { }
1009
1010
getProps(
1011
props: SummarizedAgentHistoryProps
1012
): ISummarizedConversationHistoryInfo {
1013
let toolCallRounds = props.promptContext.toolCallRounds;
1014
let isContinuation = props.promptContext.isContinuation;
1015
let summarizedToolCallRoundId = '';
1016
if (toolCallRounds && toolCallRounds.length > 1) {
1017
// If there are multiple tool call rounds, exclude the last one, because it must have put us over the limit.
1018
// Summarize from the previous round in this turn.
1019
toolCallRounds = toolCallRounds.slice(0, -1);
1020
summarizedToolCallRoundId = toolCallRounds.at(-1)!.id;
1021
} else if (props.promptContext.history.length > 0) {
1022
// If there is only one tool call round, then summarize from the last round of the last turn.
1023
// Or if there are no tool call rounds, then the new user message put us over the limit. (or the last assistant message?)
1024
// This flag excludes the last user message from the summary.
1025
isContinuation = true;
1026
toolCallRounds = [];
1027
summarizedToolCallRoundId = props.promptContext.history.at(-1)!.rounds.at(-1)!.id;
1028
} else {
1029
throw new Error('Nothing to summarize');
1030
}
1031
1032
// For Anthropic models with thinking enabled, find the last assistant message with thinking
1033
// from all rounds being summarized (both current toolCallRounds and history).
1034
// This thinking will be used as the first thinking block after summarization.
1035
const summarizedThinking = isAnthropicFamily(props.endpoint) ? this.findLastThinking(props) : undefined;
1036
const promptContext = {
1037
...props.promptContext,
1038
toolCallRounds,
1039
isContinuation,
1040
};
1041
return {
1042
props: {
1043
...props,
1044
workingNotebook: this.getWorkingNotebook(props),
1045
promptContext
1046
},
1047
summarizedToolCallRoundId,
1048
summarizedThinking
1049
};
1050
}
1051
1052
private findLastThinking(props: SummarizedAgentHistoryProps): ThinkingData | undefined {
1053
if (props.promptContext.toolCallRounds) {
1054
for (let i = props.promptContext.toolCallRounds.length - 1; i >= 0; i--) {
1055
const round = props.promptContext.toolCallRounds[i];
1056
if (round.thinking) {
1057
return round.thinking;
1058
}
1059
}
1060
}
1061
return undefined;
1062
}
1063
1064
private getWorkingNotebook(props: SummarizedAgentHistoryProps): NotebookDocument | undefined {
1065
const toolCallRound = props.promptContext.toolCallRounds && [...props.promptContext.toolCallRounds].reverse().find(round => round.toolCalls.some(call => call.name === ToolName.RunNotebookCell));
1066
const toolCall = toolCallRound?.toolCalls.find(call => call.name === ToolName.RunNotebookCell);
1067
if (toolCall && toolCall.arguments) {
1068
try {
1069
const args = JSON.parse(toolCall.arguments);
1070
if (typeof args.filePath === 'string') {
1071
const uri = this._promptPathRepresentationService.resolveFilePath(args.filePath);
1072
if (!uri) {
1073
return undefined;
1074
}
1075
return this._workspaceService.notebookDocuments.find(doc => doc.uri.toString() === uri.toString());
1076
}
1077
} catch (e) {
1078
// Ignore parsing errors
1079
}
1080
}
1081
1082
return undefined;
1083
}
1084
}
1085
1086
interface SummaryMessageProps extends BasePromptElementProps {
1087
readonly summaryText: string;
1088
readonly endpoint: IChatEndpoint;
1089
}
1090
1091
class SummaryMessageElement extends PromptElement<SummaryMessageProps> {
1092
override async render(state: void, sizing: PromptSizing) {
1093
return <UserMessage>
1094
<Tag name='conversation-summary'>
1095
{this.props.summaryText}
1096
</Tag>
1097
{this.props.endpoint.family === 'gpt-4.1' && <Tag name='reminderInstructions'>
1098
<DefaultOpenAIKeepGoingReminder />
1099
</Tag>}
1100
</UserMessage>;
1101
}
1102
}
1103
1104
export interface InlineSummarizationUserMessageProps extends BasePromptElementProps {
1105
readonly endpoint: IChatEndpoint;
1106
}
1107
1108
/**
1109
* User message appended to the agent prompt when inline summarization is triggered.
1110
* Instructs the model to output ONLY a summary wrapped in `<summary>` tags, with
1111
* no tool calls. The summary is extracted from the response and stored on the round
1112
* for the next iteration.
1113
*/
1114
export class InlineSummarizationUserMessage extends PromptElement<InlineSummarizationUserMessageProps> {
1115
override async render(state: void, sizing: PromptSizing) {
1116
const isOpus = this.props.endpoint.model.startsWith('claude-opus');
1117
return <UserMessage priority={1000}>
1118
The conversation has grown too large for the context window and must be compacted now.<br />
1119
<br />
1120
{SummaryPrompt}
1121
<br />
1122
<br />
1123
IMPORTANT: Output your summary wrapped in {'<summary>'} and {'</summary>'} tags. Do NOT call any tools. Your ONLY task right now is to produce a comprehensive summary of the conversation so far.<br />
1124
{isOpus && <>
1125
<br />
1126
IMPORTANT: Do NOT call any tools. Your only task is to generate a text summary of the conversation. Do not attempt to execute any actions or make any tool calls.<br />
1127
</>}
1128
</UserMessage>;
1129
}
1130
}
1131
1132
/**
1133
* Extracts an inline summary from the model's response text.
1134
*
1135
* Parsing strategy (multi-level fallback):
1136
* 1. Clean `<summary>...</summary>` tags → extracts content between them
1137
* 2. `<summary>` found but no closing tag → takes everything after `<summary>`
1138
* 3. No tags found → returns undefined (caller falls back to separate-call summarization)
1139
*
1140
* @returns The extracted summary text, or `undefined` if no summary could be found.
1141
*/
1142
export function extractInlineSummary(responseText: string): string | undefined {
1143
// 1. Try clean <summary>...</summary> extraction
1144
const openTag = '<summary>';
1145
const closeTag = '</summary>';
1146
const openIdx = responseText.indexOf(openTag);
1147
if (openIdx !== -1) {
1148
const contentStart = openIdx + openTag.length;
1149
const closeIdx = responseText.indexOf(closeTag, contentStart);
1150
if (closeIdx !== -1) {
1151
// Clean extraction
1152
return responseText.substring(contentStart, closeIdx).trim();
1153
}
1154
// 2. Open tag but no closing tag — take everything after <summary>
1155
return responseText.substring(contentStart).trim();
1156
}
1157
1158
// 3. No tags found — cannot extract
1159
return undefined;
1160
}
1161
1162