Path: blob/main/extensions/copilot/src/extension/prompts/node/agent/summarizedConversationHistory.tsx
13405 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import * as l10n from '@vscode/l10n';6import { BasePromptElementProps, PrioritizedList, PromptElement, PromptMetadata, PromptSizing, Raw, SystemMessage, UserMessage } from '@vscode/prompt-tsx';7import { BudgetExceededError } from '@vscode/prompt-tsx/dist/base/materialized';8import { ChatMessage } from '@vscode/prompt-tsx/dist/base/output/rawTypes';9import type { ChatResponsePart, ChatResultPromptTokenDetail, LanguageModelToolInformation, NotebookDocument, Progress } from 'vscode';10import { IChatHookService, PreCompactHookInput } from '../../../../platform/chat/common/chatHookService';11import { ChatFetchResponseType, ChatLocation, ChatResponse, FetchSuccess } from '../../../../platform/chat/common/commonTypes';12import { getTextPart } from '../../../../platform/chat/common/globalStringUtils';13import { IHistoricalTurn, ISessionTranscriptService } from '../../../../platform/chat/common/sessionTranscriptService';14import { ConfigKey, IConfigurationService } from '../../../../platform/configuration/common/configurationService';15import { isAnthropicFamily, isGeminiFamily } from '../../../../platform/endpoint/common/chatModelCapabilities';16import { ILogService } from '../../../../platform/log/common/logService';17import { CUSTOM_TOOL_SEARCH_NAME } from '../../../../platform/networking/common/anthropic';18import { IChatEndpoint } from '../../../../platform/networking/common/networking';19import { APIUsage } from '../../../../platform/networking/common/openai';20import { IPromptPathRepresentationService } from '../../../../platform/prompts/common/promptPathRepresentationService';21import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry';22import { ThinkingData } from '../../../../platform/thinking/common/thinking';23import { computePromptTokenDetails } from '../../../../platform/tokenizer/node/promptTokenDetails';24import { IWorkspaceService } from '../../../../platform/workspace/common/workspaceService';25import { CancellationToken } from '../../../../util/vs/base/common/cancellation';26import { CancellationError, isCancellationError } from '../../../../util/vs/base/common/errors';27import { Iterable } from '../../../../util/vs/base/common/iterator';28import { StopWatch } from '../../../../util/vs/base/common/stopwatch';29import { generateUuid } from '../../../../util/vs/base/common/uuid';30import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';31import { ChatResponseProgressPart2 } from '../../../../vscodeTypes';32import { ToolCallingLoop } from '../../../intents/node/toolCallingLoop';33import { IResultMetadata } from '../../../prompt/common/conversation';34import { IBuildPromptContext, IToolCallRound } from '../../../prompt/common/intents';35import { ToolName } from '../../../tools/common/toolNames';36import { normalizeToolSchema } from '../../../tools/common/toolSchemaNormalizer';37import { NotebookSummary } from '../../../tools/node/notebookSummaryTool';38import { renderPromptElement } from '../base/promptRenderer';39import { Tag } from '../base/tag';40import { ChatToolCalls } from '../panel/toolCalling';41import { AgentUserMessage, AgentUserMessageCustomizations, getUserMessagePropsFromAgentProps, getUserMessagePropsFromTurn } from './agentPrompt';42import { DefaultOpenAIKeepGoingReminder } from './openai/defaultOpenAIPrompt';43import { SimpleSummarizedHistory } from './simpleSummarizedHistoryPrompt';4445export interface ConversationHistorySummarizationPromptProps extends SummarizedAgentHistoryProps {46readonly simpleMode?: boolean;47}4849const SummaryPrompt = <>50Your task is to create a comprehensive, detailed summary of the entire conversation that captures all essential information needed to seamlessly continue the work without any loss of context. This summary will be used to compact the conversation while preserving critical technical details, decisions, and progress.<br />5152## Recent Context Analysis<br />5354Pay special attention to the most recent agent commands and tool executions that led to this summarization being triggered. Include:<br />55- **Last Agent Commands**: What specific actions/tools were just executed<br />56- **Tool Results**: Key outcomes from recent tool calls (truncate if very long, but preserve essential information)<br />57- **Immediate State**: What was the system doing right before summarization<br />58- **Triggering Context**: What caused the token budget to be exceeded<br />5960## Analysis Process<br />6162Before providing your final summary, wrap your analysis in `<analysis>` tags to organize your thoughts systematically:<br />63641. **Chronological Review**: Go through the conversation chronologically, identifying key phases and transitions<br />652. **Intent Mapping**: Extract all explicit and implicit user requests, goals, and expectations<br />663. **Technical Inventory**: Catalog all technical concepts, tools, frameworks, and architectural decisions<br />674. **Code Archaeology**: Document all files, functions, and code patterns that were discussed or modified<br />685. **Progress Assessment**: Evaluate what has been completed vs. what remains pending<br />696. **Context Validation**: Ensure all critical information for continuation is captured<br />707. **Recent Commands Analysis**: Document the specific agent commands and tool results from the most recent operations<br />7172## Summary Structure<br />7374Your summary must include these sections in order, following the exact format below:<br />7576<Tag name='analysis'>77[Chronological Review: Walk through conversation phases: initial request → exploration → implementation → debugging → current state]<br />78[Intent Mapping: List each explicit user request with message context]<br />79[Technical Inventory: Catalog all technologies, patterns, and decisions mentioned]<br />80[Code Archaeology: Document every file, function, and code change discussed]<br />81[Progress Assessment: What's done vs. pending with specific status]<br />82[Context Validation: Verify all continuation context is captured]<br />83[Recent Commands Analysis: Last agent commands executed, tool results (truncated if long), immediate pre-summarization state]<br />84</Tag><br />8586<Tag name='summary'>871. Conversation Overview:<br />88- Primary Objectives: [All explicit user requests and overarching goals with exact quotes]<br />89- Session Context: [High-level narrative of conversation flow and key phases]<br />90- User Intent Evolution: [How user's needs or direction changed throughout conversation]<br />91922. Technical Foundation:<br />93- [Core Technology 1]: [Version/details and purpose]<br />94- [Framework/Library 2]: [Configuration and usage context]<br />95- [Architectural Pattern 3]: [Implementation approach and reasoning]<br />96- [Environment Detail 4]: [Setup specifics and constraints]<br />97983. Codebase Status:<br />99- [File Name 1]:<br />100- Purpose: [Why this file is important to the project]<br />101- Current State: [Summary of recent changes or modifications]<br />102- Key Code Segments: [Important functions/classes with brief explanations]<br />103- Dependencies: [How this relates to other components]<br />104- [File Name 2]:<br />105- Purpose: [Role in the project]<br />106- Current State: [Modification status]<br />107- Key Code Segments: [Critical code blocks]<br />108- [Additional files as needed]<br />1091104. Problem Resolution:<br />111- Issues Encountered: [Technical problems, bugs, or challenges faced]<br />112- Solutions Implemented: [How problems were resolved and reasoning]<br />113- Debugging Context: [Ongoing troubleshooting efforts or known issues]<br />114- Lessons Learned: [Important insights or patterns discovered]<br />1151165. Progress Tracking:<br />117- Completed Tasks: [What has been successfully implemented with status indicators]<br />118- Partially Complete Work: [Tasks in progress with current completion status]<br />119- Validated Outcomes: [Features or code confirmed working through testing]<br />1201216. Active Work State:<br />122- Current Focus: [Precisely what was being worked on in most recent messages]<br />123- Recent Context: [Detailed description of last few conversation exchanges]<br />124- Working Code: [Code snippets being modified or discussed recently]<br />125- Immediate Context: [Specific problem or feature being addressed before summary]<br />1261277. Recent Operations:<br />128- Last Agent Commands: [Specific tools/actions executed just before summarization with exact command names]<br />129- Tool Results Summary: [Key outcomes from recent tool executions - truncate long results but keep essential info]<br />130- Pre-Summary State: [What the agent was actively doing when token budget was exceeded]<br />131- Operation Context: [Why these specific commands were executed and their relationship to user goals]<br />1321338. Continuation Plan:<br />134- [Pending Task 1]: [Details and specific next steps with verbatim quotes]<br />135- [Pending Task 2]: [Requirements and continuation context]<br />136- [Priority Information]: [Which tasks are most urgent or logically sequential]<br />137- [Next Action]: [Immediate next step with direct quotes from recent messages]<br />138</Tag><br />139140## Quality Guidelines<br />141142- **Precision**: Include exact filenames, function names, variable names, and technical terms<br />143- **Completeness**: Capture all context needed to continue without re-reading the full conversation<br />144- **Clarity**: Write for someone who needs to pick up exactly where the conversation left off<br />145- **Verbatim Accuracy**: Use direct quotes for task specifications and recent work context<br />146- **Technical Depth**: Include enough detail for complex technical decisions and code patterns<br />147- **Logical Flow**: Present information in a way that builds understanding progressively<br />148149This summary should serve as a comprehensive handoff document that enables seamless continuation of all active work streams while preserving the full technical and contextual richness of the original conversation.<br />150</>;151152/**153* Prompt used to summarize conversation history when the context window is exceeded.154*/155export class ConversationHistorySummarizationPrompt extends PromptElement<ConversationHistorySummarizationPromptProps> {156override async render(state: void, sizing: PromptSizing) {157const history = this.props.simpleMode ?158<SimpleSummarizedHistory priority={1} promptContext={this.props.promptContext} location={this.props.location} endpoint={this.props.endpoint} maxToolResultLength={this.props.maxToolResultLength} /> :159<ConversationHistory priority={1} promptContext={this.props.promptContext} location={this.props.location} endpoint={this.props.endpoint} maxToolResultLength={this.props.maxToolResultLength} enableCacheBreakpoints={this.props.enableCacheBreakpoints} />;160const isOpus = this.props.endpoint.model.startsWith('claude-opus');161return (162<>163<SystemMessage priority={this.props.priority}>164{SummaryPrompt}165{this.props.summarizationInstructions && <>166<br /><br />167## Additional instructions from the user:<br />168{this.props.summarizationInstructions}169</>}170</SystemMessage>171{history}172{this.props.workingNotebook && <WorkingNotebookSummary priority={this.props.priority - 2} notebook={this.props.workingNotebook} />}173<UserMessage priority={this.props.priority}>174Summarize the conversation history so far, paying special attention to the most recent agent commands and tool results that triggered this summarization. Structure your summary using the enhanced format provided in the system message.<br />175{isOpus && <>176<br />177IMPORTANT: Do NOT call any tools. Your only task is to generate a text summary of the conversation. Do not attempt to execute any actions or make any tool calls.<br />178</>}179Focus particularly on:<br />180- The specific agent commands/tools that were just executed<br />181- The results returned from these recent tool calls (truncate if very long but preserve key information)<br />182- What the agent was actively working on when the token budget was exceeded<br />183- How these recent operations connect to the overall user goals<br />184185Include all important tool calls and their results as part of the appropriate sections, with special emphasis on the most recent operations.186</UserMessage>187</>188);189}190}191192class WorkingNotebookSummary extends PromptElement<NotebookSummaryProps> {193override async render(state: void, sizing: PromptSizing) {194return (195<UserMessage>196This is the current state of the notebook that you have been working on:<br />197<NotebookSummary notebook={this.props.notebook} includeCellLines={false} altDoc={undefined} />198</UserMessage>199);200}201}202203export interface NotebookSummaryProps extends BasePromptElementProps {204readonly notebook: NotebookDocument;205}206207/**208* Conversation history rendered with tool calls and summaries.209*/210class ConversationHistory extends PromptElement<SummarizedAgentHistoryProps> {211override async render(state: void, sizing: PromptSizing) {212// Iterate over the turns in reverse order until we find a turn with a tool call round that was summarized213const history: PromptElement[] = [];214215// If we have a stop hook query, add it as a new user message at the very end of the conversation.216// Push it first so that after history.reverse() it will be last.217if (this.props.promptContext.hasStopHookQuery) {218history.push(<UserMessage priority={901}>{this.props.promptContext.query}</UserMessage>);219}220221// Handle the possibility that we summarized partway through the current turn (e.g. if we accumulated many tool call rounds)222let summaryForCurrentTurn: string | undefined = undefined;223let thinkingForFirstRoundAfterSummarization: ThinkingData | undefined = undefined;224if (this.props.promptContext.toolCallRounds?.length) {225const toolCallRounds: IToolCallRound[] = [];226for (let i = this.props.promptContext.toolCallRounds.length - 1; i >= 0; i--) {227const toolCallRound = this.props.promptContext.toolCallRounds[i];228if (toolCallRound.summary) {229// This tool call round was summarized230summaryForCurrentTurn = toolCallRound.summary;231thinkingForFirstRoundAfterSummarization = toolCallRound.thinking;232break;233}234toolCallRounds.push(toolCallRound);235}236237// Reverse the tool call rounds so they are in chronological order238toolCallRounds.reverse();239240// For Anthropic models with thinking enabled, set the thinking on the first round241// so it gets rendered as the first thinking block after summarization242if (isAnthropicFamily(this.props.endpoint) && thinkingForFirstRoundAfterSummarization && toolCallRounds.length > 0 && !toolCallRounds[0].thinking) {243toolCallRounds[0].thinking = thinkingForFirstRoundAfterSummarization;244}245246history.push(<ChatToolCalls priority={899} flexGrow={2} promptContext={this.props.promptContext} toolCallRounds={toolCallRounds} toolCallResults={this.props.promptContext.toolCallResults} enableCacheBreakpoints={this.props.enableCacheBreakpoints} truncateAt={this.props.maxToolResultLength} />);247}248249if (summaryForCurrentTurn) {250history.push(<SummaryMessageElement endpoint={this.props.endpoint} summaryText={summaryForCurrentTurn} />);251252return (<PrioritizedList priority={this.props.priority} descending={false} passPriority={true}>253{history.reverse()}254</PrioritizedList>);255}256257// Render the original user message:258// - Always render for non-continuation (normal first iteration)259// - Also render for stop hook continuation (the original message is needed, frozen content will provide it)260if (!this.props.promptContext.isContinuation || this.props.promptContext.hasStopHookQuery) {261history.push(<AgentUserMessage flexGrow={2} priority={900} {...getUserMessagePropsFromAgentProps(this.props, {262userQueryTagName: this.props.userQueryTagName,263ReminderInstructionsClass: this.props.ReminderInstructionsClass,264ToolReferencesHintClass: this.props.ToolReferencesHintClass,265})} />);266}267268// We may have a summary from earlier in the conversation, but skip history if we have a new summary269for (const [i, turn] of [...this.props.promptContext.history.entries()].reverse()) {270const metadata = turn.resultMetadata;271272// Build this list in chronological order273const turnComponents: PromptElement[] = [];274275// Turn anatomy276// ______________277// | |278// | USER |279// | |280// | ASSISTANT |281// | |282// | TOOL | <-- { summary: ..., toolCallRoundId: ... }283// | ASSISTANT |284// |____________|285286let summaryForTurn: SummarizedConversationHistoryMetadata | undefined;287// If a tool call limit is exceeded, the tool call from this turn will288// have been aborted and any result should be found in the next turn.289const toolCallResultInNextTurn = metadata?.maxToolCallsExceeded;290let toolCallResults = metadata?.toolCallResults;291if (toolCallResultInNextTurn) {292const nextMetadata = this.props.promptContext.history.at(i + 1)?.responseChatResult?.metadata as IResultMetadata | undefined;293const mergeFrom = i === this.props.promptContext.history.length - 1 ? this.props.promptContext.toolCallResults : nextMetadata?.toolCallResults;294toolCallResults = { ...toolCallResults, ...mergeFrom };295}296297// Find the latest tool call round that was summarized298const toolCallRounds: IToolCallRound[] = [];299for (let i = turn.rounds.length - 1; i >= 0; i--) {300const round = turn.rounds[i];301summaryForTurn = round.summary ? new SummarizedConversationHistoryMetadata(round.id, round.summary) : undefined;302if (summaryForTurn) {303break;304}305toolCallRounds.push(round);306}307308if (summaryForTurn) {309// We have a summary for a tool call round that was part of this turn310turnComponents.push(<SummaryMessageElement endpoint={this.props.endpoint} summaryText={summaryForTurn.text} />);311} else if (!turn.isContinuation) {312turnComponents.push(<AgentUserMessage flexGrow={1} {...getUserMessagePropsFromTurn(turn, this.props.endpoint, {313userQueryTagName: this.props.userQueryTagName,314ReminderInstructionsClass: this.props.ReminderInstructionsClass,315ToolReferencesHintClass: this.props.ToolReferencesHintClass,316})} />);317}318319// Reverse the tool call rounds so they are in chronological order320toolCallRounds.reverse();321turnComponents.push(<ChatToolCalls322flexGrow={1}323promptContext={this.props.promptContext}324toolCallRounds={toolCallRounds}325toolCallResults={toolCallResults}326isHistorical={!(toolCallResultInNextTurn && i === this.props.promptContext.history.length - 1)}327truncateAt={this.props.maxToolResultLength}328/>);329330history.push(...turnComponents.reverse());331if (summaryForTurn) {332// All preceding turns are covered by the summary and shouldn't be included verbatim333break;334}335}336337return (<PrioritizedList priority={this.props.priority} descending={false} passPriority={true}>338{history.reverse()}339</PrioritizedList>);340}341}342343export interface ISummarizedConversationHistoryMetadataOptions {344readonly thinking?: ThinkingData;345readonly usage?: APIUsage;346readonly promptTokenDetails?: readonly ChatResultPromptTokenDetail[];347readonly model?: string;348readonly summarizationMode?: string;349readonly numRounds?: number;350readonly numRoundsSinceLastSummarization?: number;351readonly durationMs?: number;352readonly source?: 'foreground' | 'background';353readonly outcome?: string;354readonly contextLengthBefore?: number;355}356357export class SummarizedConversationHistoryMetadata extends PromptMetadata {358public readonly toolCallRoundId: string;359public readonly text: string;360public readonly thinking?: ThinkingData;361public readonly usage?: APIUsage;362public readonly promptTokenDetails?: readonly ChatResultPromptTokenDetail[];363public readonly model?: string;364public readonly summarizationMode?: string;365public readonly numRounds?: number;366public readonly numRoundsSinceLastSummarization?: number;367public readonly durationMs?: number;368public readonly source?: 'foreground' | 'background';369public readonly outcome?: string;370public readonly contextLengthBefore?: number;371372constructor(373toolCallRoundId: string,374text: string,375options?: ISummarizedConversationHistoryMetadataOptions,376) {377super();378this.toolCallRoundId = toolCallRoundId;379this.text = text;380this.thinking = options?.thinking;381this.usage = options?.usage;382this.promptTokenDetails = options?.promptTokenDetails;383this.model = options?.model;384this.summarizationMode = options?.summarizationMode;385this.numRounds = options?.numRounds;386this.numRoundsSinceLastSummarization = options?.numRoundsSinceLastSummarization;387this.durationMs = options?.durationMs;388this.source = options?.source;389this.outcome = options?.outcome;390this.contextLengthBefore = options?.contextLengthBefore;391}392}393394export interface SummarizedAgentHistoryProps extends BasePromptElementProps, AgentUserMessageCustomizations {395readonly priority: number;396readonly endpoint: IChatEndpoint;397readonly location: ChatLocation;398readonly promptContext: IBuildPromptContext;399readonly triggerSummarize?: boolean;400readonly tools?: ReadonlyArray<LanguageModelToolInformation> | undefined;401readonly enableCacheBreakpoints?: boolean;402readonly workingNotebook?: NotebookDocument;403readonly maxToolResultLength: number;404/** Optional hard cap on summary tokens; effective budget = min(prompt sizing tokenBudget, this value) */405readonly maxSummaryTokens?: number;406/** Optional custom instructions to include in the summarization prompt */407readonly summarizationInstructions?: string;408/** Skip Full mode and go straight to Simple mode for foreground budget-exceeded recovery. */409readonly forceSimpleSummary?: boolean;410}411412/**413* Renders conversation history with tool calls and summaries, triggering summarization while rendering if necessary.414*/415export class SummarizedConversationHistory extends PromptElement<SummarizedAgentHistoryProps> {416constructor(417props: SummarizedAgentHistoryProps,418@IInstantiationService private readonly instantiationService: IInstantiationService,419@ISessionTranscriptService private readonly sessionTranscriptService: ISessionTranscriptService,420) {421super(props);422}423424override async render(state: void, sizing: PromptSizing, progress: Progress<ChatResponsePart> | undefined, token: CancellationToken | undefined) {425const promptContext = { ...this.props.promptContext };426let historyMetadata: SummarizedConversationHistoryMetadata | undefined;427const sessionId = this.props.promptContext.conversation?.sessionId;428if (sessionId) {429// Lazily start the transcript session now (before summarization) so it430// captures the full pre-compaction conversation. startSession is431// idempotent — if hooks already started it, this is a no-op.432await this.ensureTranscriptSession();433434if (this.sessionTranscriptService.getTranscriptPath(sessionId)) {435await this.sessionTranscriptService.flush(sessionId);436}437}438439if (this.props.triggerSummarize) {440441const summarizer = this.instantiationService.createInstance(ConversationHistorySummarizer, this.props, sizing, progress, token);442const summResult = await summarizer.summarizeHistory();443if (summResult) {444historyMetadata = new SummarizedConversationHistoryMetadata(summResult.toolCallRoundId, summResult.summary, {445thinking: summResult.thinking,446usage: summResult.usage,447promptTokenDetails: summResult.promptTokenDetails,448model: summResult.model,449summarizationMode: summResult.summarizationMode,450numRounds: summResult.numRounds,451numRoundsSinceLastSummarization: summResult.numRoundsSinceLastSummarization,452durationMs: summResult.durationMs,453});454this.addSummaryToHistory(summResult.summary, summResult.toolCallRoundId, summResult.thinking);455}456}457458return <>459{historyMetadata && <meta value={historyMetadata} />}460<ConversationHistory461{...this.props}462promptContext={promptContext}463enableCacheBreakpoints={this.props.enableCacheBreakpoints} />464</>;465}466467/**468* Lazily starts a transcript session with the full conversation history.469* This is called just before summarization so that the transcript file470* contains the complete pre-compaction conversation. If a session was471* already started (e.g. by hooks), this is a no-op.472*/473private async ensureTranscriptSession(): Promise<void> {474const sessionId = this.props.promptContext.conversation?.sessionId;475if (!sessionId) {476return;477}478479// Short-circuit if session already exists — avoids rebuilding480// the full IHistoricalTurn[] array on every render.481if (this.sessionTranscriptService.getTranscriptPath(sessionId)) {482return;483}484485// Build IHistoricalTurn[] from the prompt context's Turn[] history486const history: IHistoricalTurn[] = this.props.promptContext.history.map(turn => ({487userMessage: turn.request.message,488timestamp: turn.startTime,489rounds: turn.rounds.map(round => ({490response: round.response,491toolCalls: round.toolCalls.map(tc => ({492name: tc.name,493arguments: tc.arguments,494id: tc.id,495})),496reasoningText: round.thinking497? (Array.isArray(round.thinking.text) ? round.thinking.text.join('') : round.thinking.text)498: undefined,499timestamp: round.timestamp,500})),501}));502503await this.sessionTranscriptService.startSession(sessionId, undefined, history.length > 0 ? history : undefined);504}505506private addSummaryToHistory(summary: string, toolCallRoundId: string, thinking?: ThinkingData): void {507const round = this.props.promptContext.toolCallRounds?.find(round => round.id === toolCallRoundId);508if (round) {509round.summary = summary;510round.thinking = thinking;511return;512}513514// Adding summaries to rounds in previous turns will only be persisted during the current session.515// For the next turn, need to restore them from metadata (see normalizeSummariesOnRounds).516for (const turn of [...this.props.promptContext.history].reverse()) {517const round = turn.rounds.find(round => round.id === toolCallRoundId);518if (round) {519round.summary = summary;520round.thinking = thinking;521break;522}523}524}525}526527enum SummaryMode {528Simple = 'simple',529Full = 'full'530}531532interface SummarizationResult {533result: FetchSuccess<string>;534promptTokenDetails?: readonly ChatResultPromptTokenDetail[];535model?: string;536summarizationMode?: string;537numRounds?: number;538numRoundsSinceLastSummarization?: number;539durationMs?: number;540}541542class ConversationHistorySummarizer {543private readonly summarizationId = generateUuid();544545constructor(546private readonly props: SummarizedAgentHistoryProps,547private readonly sizing: PromptSizing,548private readonly progress: Progress<ChatResponsePart> | undefined,549private readonly token: CancellationToken | undefined,550@ITelemetryService private readonly telemetryService: ITelemetryService,551@ILogService private readonly logService: ILogService,552@IInstantiationService private readonly instantiationService: IInstantiationService,553@IConfigurationService private readonly configurationService: IConfigurationService,554@IChatHookService private readonly chatHookService: IChatHookService,555@ISessionTranscriptService private readonly sessionTranscriptService: ISessionTranscriptService,556) { }557558async summarizeHistory(): Promise<{ summary: string; toolCallRoundId: string; thinking?: ThinkingData; usage?: APIUsage; promptTokenDetails?: readonly ChatResultPromptTokenDetail[]; model?: string; summarizationMode?: string; numRounds?: number; numRoundsSinceLastSummarization?: number; durationMs?: number }> {559// Execute pre-compact hook before summarization to allow hooks to archive transcripts or perform cleanup560await this.executePreCompactHook();561562// Just a function for test to create props and call this563const propsInfo = this.instantiationService.createInstance(SummarizedConversationHistoryPropsBuilder).getProps(this.props);564565const summaryPromise = this.getSummaryWithFallback(propsInfo);566this.progress?.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {567try {568await summaryPromise;569} catch { }570return l10n.t('Compacted conversation');571}));572573const summary = await summaryPromise;574const { numRounds, numRoundsSinceLastSummarization } = computeSummarizationRoundCounts(this.props.promptContext.history, this.props.promptContext.toolCallRounds);575return {576summary: this.appendTranscriptHint(summary.result.value),577toolCallRoundId: propsInfo.summarizedToolCallRoundId,578thinking: propsInfo.summarizedThinking,579usage: summary.result.usage,580promptTokenDetails: summary.promptTokenDetails,581model: summary.model,582summarizationMode: summary.summarizationMode,583numRounds,584numRoundsSinceLastSummarization,585durationMs: summary.durationMs,586};587}588589private appendTranscriptHint(summary: string): string {590const sessionId = this.props.promptContext.conversation?.sessionId;591if (!sessionId) {592return summary;593}594return appendTranscriptHintToSummary(summary, sessionId, this.sessionTranscriptService);595}596597private async getSummaryWithFallback(propsInfo: ISummarizedConversationHistoryInfo): Promise<SummarizationResult> {598const forceMode = this.configurationService.getConfig<string | undefined>(ConfigKey.Advanced.AgentHistorySummarizationMode);599if (this.props.forceSimpleSummary && forceMode !== SummaryMode.Full) {600// Foreground budget-exceeded recovery — go straight to Simple.601return await this.getSummary(SummaryMode.Simple, propsInfo);602}603if (forceMode === SummaryMode.Simple) {604return await this.getSummary(SummaryMode.Simple, propsInfo);605} else {606try {607return await this.getSummary(SummaryMode.Full, propsInfo);608} catch (e) {609if (isCancellationError(e)) {610throw e;611}612613return await this.getSummary(SummaryMode.Simple, propsInfo);614}615}616}617618private logInfo(message: string, mode: SummaryMode): void {619this.logService.info(`[ConversationHistorySummarizer] [${mode}] ${message}`);620}621622/**623* Executes the PreCompact hook before summarization starts.624* This gives hook scripts a chance to archive the transcript or perform cleanup625* before the conversation is compacted.626*/627private async executePreCompactHook(): Promise<void> {628const hooks = this.props.promptContext.request?.hooks;629if (!hooks) {630return;631}632633try {634const results = await this.chatHookService.executeHook('PreCompact', hooks, {635trigger: 'auto',636} satisfies PreCompactHookInput, this.props.promptContext.conversation?.sessionId, this.token ?? CancellationToken.None);637638for (const result of results) {639if (result.resultKind === 'error') {640const errorMessage = typeof result.output === 'string' ? result.output : 'Unknown error';641this.logService.error(`[ConversationHistorySummarizer] PreCompact hook error: ${errorMessage}`);642}643}644} catch (error) {645this.logService.error('[ConversationHistorySummarizer] Error executing PreCompact hook', error);646}647}648649private async getSummary(mode: SummaryMode, propsInfo: ISummarizedConversationHistoryInfo): Promise<SummarizationResult> {650const stopwatch = new StopWatch(false);651652// In Full mode, tools are sent alongside the summarization prompt with653// tool_choice: 'none'. Reserve budget for them so the rendered messages654// plus tools don't exceed the model's context window.655const tools = this.props.tools;656const toolTokens = mode === SummaryMode.Full && tools?.length657? await this.props.endpoint.acquireTokenizer().countToolTokens(tools)658: 0;659const endpoint = toolTokens > 0660? this.props.endpoint.cloneWithTokenOverride(661Math.max(1, Math.floor((this.props.endpoint.modelMaxPromptTokens - toolTokens) * 0.9)))662: this.props.endpoint;663664let summarizationPrompt: ChatMessage[];665const associatedRequestId = this.props.promptContext.conversation?.getLatestTurn().id;666try {667summarizationPrompt = (await renderPromptElement(this.instantiationService, endpoint, ConversationHistorySummarizationPrompt, { ...propsInfo.props, enableCacheBreakpoints: false, simpleMode: mode === SummaryMode.Simple }, undefined, this.token)).messages;668this.logInfo(`summarization prompt rendered in ${stopwatch.elapsed()}ms.`, mode);669} catch (e) {670const budgetExceeded = e instanceof BudgetExceededError;671const outcome = budgetExceeded ? 'budget_exceeded' : 'renderError';672this.logInfo(`Error rendering summarization prompt in mode: ${mode}. ${e.stack}`, mode);673this.sendSummarizationTelemetry(outcome, '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e));674throw e;675}676677let summaryResponse: ChatResponse;678let promptTypes: string | undefined;679try {680const normalizedTools = mode === SummaryMode.Full ? normalizeToolSchema(681endpoint.family,682this.props.tools?.map(tool => ({683function:684{685name: tool.name,686description: tool.description,687parameters: tool.inputSchema && Object.keys(tool.inputSchema).length ? tool.inputSchema : undefined688}, type: 'function'689})),690(tool, rule) => {691this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`);692},693) : undefined;694const toolOpts = normalizedTools?.length ? {695tool_choice: 'none' as const,696tools: normalizedTools,697} : undefined;698699stripCacheBreakpoints(summarizationPrompt);700replaceImageContentWithPlaceholders(summarizationPrompt);701702let messages = ToolCallingLoop.stripInternalToolCallIds(summarizationPrompt);703704// Strip custom client-side tool search (tool_search) tool_use/tool_result705// pairs. The summarization call uses ChatLocation.Other but706// createMessagesRequestBody still converts tool_search results to707// tool_reference blocks (customToolSearchEnabled isn't gated by location).708// Without tool search enabled in the request, Anthropic rejects them.709if (isAnthropicFamily(endpoint)) {710messages = stripToolSearchMessages(messages);711}712713// Gemini strictly requires every function_call to have a matching function_response.714// When prompt-tsx prunes tool result messages due to token budget, orphaned tool_calls715// can remain, causing a 400 INVALID_ARGUMENT error. Strip them for Gemini models.716if (isGeminiFamily(endpoint)) {717const validationResult = ToolCallingLoop.validateToolMessagesCore(messages, { stripOrphanedToolCalls: true });718messages = validationResult.messages;719if (validationResult.strippedToolCallCount > 0) {720this.logInfo(`Stripped ${validationResult.strippedToolCallCount} orphaned tool calls from summarization prompt`, mode);721/* __GDPR__722"summarization.strippedOrphanedToolCalls" : {723"owner": "vijayu",724"comment": "Tracks when orphaned tool calls are stripped from the summarization prompt for Gemini models",725"strippedToolCallCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Number of orphaned tool_calls stripped from the summarization prompt." },726"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },727"mode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode (simple or full)." }728}729*/730this.telemetryService.sendMSFTTelemetryEvent('summarization.strippedOrphanedToolCalls', {731model: endpoint.model,732mode,733}, {734strippedToolCallCount: validationResult.strippedToolCallCount,735});736}737}738739promptTypes = messages.map(msg => `${msg.role}${'name' in msg && msg.name ? `-${msg.name}` : ''}:${getTextPart(msg.content).length}`).join(',');740summaryResponse = await endpoint.makeChatRequest2({741debugName: `summarizeConversationHistory-${mode}`,742messages,743finishedCb: undefined,744location: ChatLocation.Other,745requestOptions: {746temperature: 0,747stream: false,748...toolOpts749},750telemetryProperties: associatedRequestId ? { associatedRequestId } : undefined,751enableRetryOnFilter: true752}, this.token ?? CancellationToken.None);753} catch (e) {754this.logInfo(`Error from summarization request. ${e.message}`, mode);755this.sendSummarizationTelemetry('requestThrow', '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e));756throw e;757}758759const tokenizer = endpoint.acquireTokenizer();760const promptTokenDetails = await computePromptTokenDetails({761messages: summarizationPrompt,762tokenizer,763tools: this.props.tools ?? undefined,764totalPromptTokens: summaryResponse.type === ChatFetchResponseType.Success ? summaryResponse.usage?.prompt_tokens : undefined,765});766767const durationMs = stopwatch.elapsed();768return {769result: await this.handleSummarizationResponse(summaryResponse, mode, durationMs, promptTypes),770promptTokenDetails,771model: endpoint.model,772summarizationMode: mode,773durationMs,774};775}776777private async handleSummarizationResponse(response: ChatResponse, mode: SummaryMode, elapsedTime: number, promptTypes?: string): Promise<FetchSuccess<string>> {778if (response.type !== ChatFetchResponseType.Success) {779const outcome = response.type;780this.sendSummarizationTelemetry(outcome, response.requestId, this.props.endpoint.model, mode, elapsedTime, undefined, response.reason ?? response.type);781this.logInfo(`Summarization request failed. ${response.type} ${response.reason ?? response.type}`, mode);782if (response.type === ChatFetchResponseType.Canceled) {783throw new CancellationError();784}785786throw new Error('Summarization request failed');787}788789const summarySize = await this.sizing.countTokens(response.value);790const effectiveBudget =791!!this.props.maxSummaryTokens792? Math.min(this.sizing.tokenBudget, this.props.maxSummaryTokens)793: this.sizing.tokenBudget;794if (summarySize > effectiveBudget) {795this.sendSummarizationTelemetry('too_large', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage, `${summarySize} tokens exceeds budget ${effectiveBudget}`);796this.logInfo(`Summary too large: ${summarySize} tokens (effective budget ${effectiveBudget})`, mode);797throw new Error('Summary too large');798}799800this.sendSummarizationTelemetry('success', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage, undefined, promptTypes);801this.logInfo(`Summarization usage: prompt=${response.usage?.prompt_tokens ?? '?'}, cached=${response.usage?.prompt_tokens_details?.cached_tokens ?? '?'}, completion=${response.usage?.completion_tokens ?? '?'}`, mode);802return response;803}804805/**806* Send telemetry for conversation summarization.807* @param outcome High-level result of the summarization (for example, 'success', 'too_large', or the ChatFetchResponseType value)808* @param requestId Unique identifier of the underlying chat request used for summarization809* @param model Identifier of the language model used to generate the summary810* @param mode Summarization mode indicating how the conversation was summarized811* @param elapsedTime Total time in milliseconds taken for the summarization request812* @param usage Token usage information for the summarization request, if available813* @param detailedOutcome Optional detailed reason for non-success outcomes (for example, error or cancellation reason)814* @param promptTypes Optional pre-computed promptTypes string for the summarization request815*/816private sendSummarizationTelemetry(outcome: string, requestId: string, model: string, mode: SummaryMode, elapsedTime: number, usage: APIUsage | undefined, detailedOutcome?: string, promptTypes?: string): void {817const { numRounds, numRoundsSinceLastSummarization } = computeSummarizationRoundCounts(this.props.promptContext.history, this.props.promptContext.toolCallRounds);818819const turnIndex = this.props.promptContext.history.length;820const curTurnRoundIndex = this.props.promptContext.toolCallRounds?.length ?? 0;821822const lastUsedTool = this.props.promptContext.toolCallRounds?.at(-1)?.toolCalls?.at(-1)?.name ??823this.props.promptContext.history?.at(-1)?.rounds.at(-1)?.toolCalls?.at(-1)?.name ?? 'none';824825const isDuringToolCalling = !!this.props.promptContext.toolCallRounds?.length ? 1 : 0;826const conversationId = this.props.promptContext.conversation?.sessionId;827const hasWorkingNotebook = this.props.workingNotebook ? 1 : 0;828829/* __GDPR__830"summarizedConversationHistory" : {831"owner": "roblourens",832"comment": "Tracks when summarization happens and what the outcome was",833"summarizationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "An ID to join all attempts of this summarization task." },834"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state or failure reason of the summarization." },835"detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "A more detailed error message." },836"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID used for the summarization." },837"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },838"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID that this summarization ran during." },839"promptTypes": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Role and character count of each prompt message in order, as a proxy for cache hit rate (e.g. system:1234,user:567)." },840"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The number of tool call rounds before this summarization was triggered." },841"numRoundsSinceLastSummarization": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The number of tool call rounds since the last summarization." },842"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },843"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn" },844"lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The name of the last tool used before summarization." },845"isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this summarization was triggered during a tool calling loop." },846"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Id for the current chat conversation." },847"hasWorkingNotebook": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether the conversation summary includes a working notebook." },848"mode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The mode of the conversation summary." },849"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The mode of the conversation summary." },850"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The duration of the summarization attempt in ms." },851"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, server side counted", "isMeasurement": true },852"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens hitting cache as reported by server", "isMeasurement": true },853"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of generated tokens", "isMeasurement": true }854}855*/856this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {857summarizationId: this.summarizationId,858outcome,859detailedOutcome,860requestId,861chatRequestId: this.props.promptContext.conversation?.getLatestTurn().id,862model,863lastUsedTool,864conversationId,865mode,866summarizationMode: mode, // Try to unstick GDPR867promptTypes,868}, {869numRounds,870numRoundsSinceLastSummarization,871turnIndex,872curTurnRoundIndex,873isDuringToolCalling,874hasWorkingNotebook,875duration: elapsedTime,876promptTokenCount: usage?.prompt_tokens,877promptCacheTokenCount: usage?.prompt_tokens_details?.cached_tokens,878responseTokenCount: usage?.completion_tokens,879});880}881}882883function stripCacheBreakpoints(messages: ChatMessage[]): void {884messages.forEach(message => {885message.content = message.content.filter(part => {886return part.type !== Raw.ChatCompletionContentPartKind.CacheBreakpoint;887});888});889}890891function replaceImageContentWithPlaceholders(messages: ChatMessage[]): void {892messages.forEach(message => {893message.content = message.content.map(part => {894if (part.type === Raw.ChatCompletionContentPartKind.Image) {895return { type: Raw.ChatCompletionContentPartKind.Text, text: '[Image was attached]' };896}897return part;898});899});900}901902/**903* Bake a stable transcript pointer into a freshly-produced summary text.904*905* Shared by both the full/simple summarization path906* ({@link ConversationHistorySummarizer}) and the inline background907* summarization path in `agentIntent.ts`. The hint is appended exactly once,908* at summary creation time, so the resulting string is frozen from then on909* and replayed verbatim — preserving Anthropic prompt cache hits across910* subsequent renders.911*912* Returns the input unchanged when there is no transcript on disk for the913* session.914*/915export function appendTranscriptHintToSummary(summary: string, sessionId: string, sessionTranscriptService: ISessionTranscriptService): string {916const transcriptUri = sessionTranscriptService.getTranscriptPath(sessionId);917if (!transcriptUri) {918return summary;919}920const transcriptPath = transcriptUri.fsPath;921const lineCount = sessionTranscriptService.getLineCount(sessionId);922let out = summary;923out += `\nIf you need specific details from before compaction (such as exact code snippets, error messages, tool results, or content you previously generated), use the ${ToolName.ReadFile} tool to look up the full uncompacted conversation transcript at: "${transcriptPath}"`;924if (lineCount !== undefined) {925out += `\nAt the time this summary was created, the transcript had ${lineCount} lines.`;926}927out += `\nExample usage: ${ToolName.ReadFile}(filePath: "${transcriptPath}")`;928return out;929}930931export function computeSummarizationRoundCounts(932history: IBuildPromptContext['history'],933currentRounds: readonly IToolCallRound[] | undefined,934): { numRounds: number; numRoundsSinceLastSummarization: number } {935const numRoundsInHistory = history.reduce((sum, turn) => sum + turn.rounds.length, 0);936const numRoundsInCurrentTurn = currentRounds?.length ?? 0;937const numRounds = numRoundsInHistory + numRoundsInCurrentTurn;938939const reversedCurrentRounds = [...(currentRounds ?? [])].reverse();940let numRoundsSinceLastSummarization = reversedCurrentRounds.findIndex(round => round.summary);941if (numRoundsSinceLastSummarization === -1) {942let count = numRoundsInCurrentTurn;943outer: for (const turn of Iterable.reverse(Array.from(history))) {944for (const round of Iterable.reverse(Array.from(turn.rounds ?? []))) {945if (round.summary) {946numRoundsSinceLastSummarization = count;947break outer;948}949count++;950}951}952}953return { numRounds, numRoundsSinceLastSummarization };954}955956/**957* Strip custom client-side tool search (tool_search) tool_use and tool_result958* messages from the conversation. The summarization call uses ChatLocation.Other959* but createMessagesRequestBody still converts tool_search results to960* tool_reference blocks (customToolSearchEnabled isn't gated by location).961* Without tool search enabled in the request, Anthropic rejects tool_reference962* content blocks with: "Input tag 'tool_reference' found using 'type' does not963* match any of the expected tags".964*/965export function stripToolSearchMessages(messages: ChatMessage[]): ChatMessage[] {966const toolSearchIds = new Set<string>();967for (const message of messages) {968if (message.role === Raw.ChatRole.Assistant && message.toolCalls) {969for (const tc of message.toolCalls) {970if (tc.function.name === CUSTOM_TOOL_SEARCH_NAME) {971toolSearchIds.add(tc.id);972}973}974}975}976977if (toolSearchIds.size === 0) {978return messages;979}980981return messages.map(message => {982if (message.role === Raw.ChatRole.Assistant && message.toolCalls) {983const filteredToolCalls = message.toolCalls.filter(tc => !toolSearchIds.has(tc.id));984if (filteredToolCalls.length !== message.toolCalls.length) {985return { ...message, toolCalls: filteredToolCalls.length > 0 ? filteredToolCalls : undefined };986}987} else if (message.role === Raw.ChatRole.Tool && message.toolCallId && toolSearchIds.has(message.toolCallId)) {988return undefined;989}990return message;991}).filter((m): m is ChatMessage => m !== undefined);992}993994export interface ISummarizedConversationHistoryInfo {995readonly props: SummarizedAgentHistoryProps;996readonly summarizedToolCallRoundId: string;997readonly summarizedThinking?: ThinkingData;998}9991000/**1001* Exported for test1002*/1003export class SummarizedConversationHistoryPropsBuilder {1004constructor(1005@IPromptPathRepresentationService private readonly _promptPathRepresentationService: IPromptPathRepresentationService,1006@IWorkspaceService private readonly _workspaceService: IWorkspaceService,1007) { }10081009getProps(1010props: SummarizedAgentHistoryProps1011): ISummarizedConversationHistoryInfo {1012let toolCallRounds = props.promptContext.toolCallRounds;1013let isContinuation = props.promptContext.isContinuation;1014let summarizedToolCallRoundId = '';1015if (toolCallRounds && toolCallRounds.length > 1) {1016// If there are multiple tool call rounds, exclude the last one, because it must have put us over the limit.1017// Summarize from the previous round in this turn.1018toolCallRounds = toolCallRounds.slice(0, -1);1019summarizedToolCallRoundId = toolCallRounds.at(-1)!.id;1020} else if (props.promptContext.history.length > 0) {1021// If there is only one tool call round, then summarize from the last round of the last turn.1022// Or if there are no tool call rounds, then the new user message put us over the limit. (or the last assistant message?)1023// This flag excludes the last user message from the summary.1024isContinuation = true;1025toolCallRounds = [];1026summarizedToolCallRoundId = props.promptContext.history.at(-1)!.rounds.at(-1)!.id;1027} else {1028throw new Error('Nothing to summarize');1029}10301031// For Anthropic models with thinking enabled, find the last assistant message with thinking1032// from all rounds being summarized (both current toolCallRounds and history).1033// This thinking will be used as the first thinking block after summarization.1034const summarizedThinking = isAnthropicFamily(props.endpoint) ? this.findLastThinking(props) : undefined;1035const promptContext = {1036...props.promptContext,1037toolCallRounds,1038isContinuation,1039};1040return {1041props: {1042...props,1043workingNotebook: this.getWorkingNotebook(props),1044promptContext1045},1046summarizedToolCallRoundId,1047summarizedThinking1048};1049}10501051private findLastThinking(props: SummarizedAgentHistoryProps): ThinkingData | undefined {1052if (props.promptContext.toolCallRounds) {1053for (let i = props.promptContext.toolCallRounds.length - 1; i >= 0; i--) {1054const round = props.promptContext.toolCallRounds[i];1055if (round.thinking) {1056return round.thinking;1057}1058}1059}1060return undefined;1061}10621063private getWorkingNotebook(props: SummarizedAgentHistoryProps): NotebookDocument | undefined {1064const toolCallRound = props.promptContext.toolCallRounds && [...props.promptContext.toolCallRounds].reverse().find(round => round.toolCalls.some(call => call.name === ToolName.RunNotebookCell));1065const toolCall = toolCallRound?.toolCalls.find(call => call.name === ToolName.RunNotebookCell);1066if (toolCall && toolCall.arguments) {1067try {1068const args = JSON.parse(toolCall.arguments);1069if (typeof args.filePath === 'string') {1070const uri = this._promptPathRepresentationService.resolveFilePath(args.filePath);1071if (!uri) {1072return undefined;1073}1074return this._workspaceService.notebookDocuments.find(doc => doc.uri.toString() === uri.toString());1075}1076} catch (e) {1077// Ignore parsing errors1078}1079}10801081return undefined;1082}1083}10841085interface SummaryMessageProps extends BasePromptElementProps {1086readonly summaryText: string;1087readonly endpoint: IChatEndpoint;1088}10891090class SummaryMessageElement extends PromptElement<SummaryMessageProps> {1091override async render(state: void, sizing: PromptSizing) {1092return <UserMessage>1093<Tag name='conversation-summary'>1094{this.props.summaryText}1095</Tag>1096{this.props.endpoint.family === 'gpt-4.1' && <Tag name='reminderInstructions'>1097<DefaultOpenAIKeepGoingReminder />1098</Tag>}1099</UserMessage>;1100}1101}11021103export interface InlineSummarizationUserMessageProps extends BasePromptElementProps {1104readonly endpoint: IChatEndpoint;1105}11061107/**1108* User message appended to the agent prompt when inline summarization is triggered.1109* Instructs the model to output ONLY a summary wrapped in `<summary>` tags, with1110* no tool calls. The summary is extracted from the response and stored on the round1111* for the next iteration.1112*/1113export class InlineSummarizationUserMessage extends PromptElement<InlineSummarizationUserMessageProps> {1114override async render(state: void, sizing: PromptSizing) {1115const isOpus = this.props.endpoint.model.startsWith('claude-opus');1116return <UserMessage priority={1000}>1117The conversation has grown too large for the context window and must be compacted now.<br />1118<br />1119{SummaryPrompt}1120<br />1121<br />1122IMPORTANT: Output your summary wrapped in {'<summary>'} and {'</summary>'} tags. Do NOT call any tools. Your ONLY task right now is to produce a comprehensive summary of the conversation so far.<br />1123{isOpus && <>1124<br />1125IMPORTANT: Do NOT call any tools. Your only task is to generate a text summary of the conversation. Do not attempt to execute any actions or make any tool calls.<br />1126</>}1127</UserMessage>;1128}1129}11301131/**1132* Extracts an inline summary from the model's response text.1133*1134* Parsing strategy (multi-level fallback):1135* 1. Clean `<summary>...</summary>` tags → extracts content between them1136* 2. `<summary>` found but no closing tag → takes everything after `<summary>`1137* 3. No tags found → returns undefined (caller falls back to separate-call summarization)1138*1139* @returns The extracted summary text, or `undefined` if no summary could be found.1140*/1141export function extractInlineSummary(responseText: string): string | undefined {1142// 1. Try clean <summary>...</summary> extraction1143const openTag = '<summary>';1144const closeTag = '</summary>';1145const openIdx = responseText.indexOf(openTag);1146if (openIdx !== -1) {1147const contentStart = openIdx + openTag.length;1148const closeIdx = responseText.indexOf(closeTag, contentStart);1149if (closeIdx !== -1) {1150// Clean extraction1151return responseText.substring(contentStart, closeIdx).trim();1152}1153// 2. Open tag but no closing tag — take everything after <summary>1154return responseText.substring(contentStart).trim();1155}11561157// 3. No tags found — cannot extract1158return undefined;1159}116011611162