Path: blob/main/extensions/copilot/src/extension/intents/node/cacheBreakpoints.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { Raw } from '@vscode/prompt-tsx';6import { CacheType } from '../../../platform/endpoint/common/endpointTypes';78const MaxCacheBreakpoints = 4;910/**11* Prompt cache breakpoint strategy:12*13* The prompt is structured like14* - System message15* - Custom instructions16* - Global context message (has prompt-tsx cache breakpoint)17* - History18* - Current user message with extra context19* - Current tool call rounds20*21* Below the current user message, we add cache breakpoints to the last tool result in each round.22* We add one to the current user message.23* And above the current user message, we add breakpoionts to an assistant message with no tool calls (so the terminal response in a turn).24*25* There will always be a cache miss when a new turn starts because the previous messages move from below the current user message with extra context to above it.26* For turns with no tool calling, we will have a hit on the previous assistant message in history.27* During the agentic loop, each request will have a hit on the previous tool result message.28*/29export function addCacheBreakpoints(messages: Raw.ChatMessage[]) {30// One or two cache breakpoints are already added via the prompt, assign the rest here.31let count = MaxCacheBreakpoints - countCacheBreakpoints(messages);32let isBelowCurrentUserMessage = true;33const reversedMsgs = [...messages].reverse();34for (const [idx, msg] of reversedMsgs.entries()) {35const prevMsg = reversedMsgs.at(idx - 1);36const hasCacheBreakpoint = msg.content.some(part => part.type === Raw.ChatCompletionContentPartKind.CacheBreakpoint);37if (hasCacheBreakpoint) {38continue;39}4041const isLastToolResultInRound = msg.role === Raw.ChatRole.Tool && prevMsg?.role !== Raw.ChatRole.Tool;42const isAsstMsgWithNoTools = msg.role === Raw.ChatRole.Assistant && !msg.toolCalls?.length;43if (isBelowCurrentUserMessage && (isLastToolResultInRound || msg.role === Raw.ChatRole.User) || isAsstMsgWithNoTools) {44count--;45msg.content.push({46type: Raw.ChatCompletionContentPartKind.CacheBreakpoint,47cacheType: CacheType48});4950if (count <= 0) {51break;52}53}5455if (msg.role === Raw.ChatRole.User) {56isBelowCurrentUserMessage = false;57}58}5960// If we still have cache breakpoints to allocate, add them from the system and custom instructions messages, if applicable.61for (const msg of messages) {62if (count <= 0) {63break;64}6566const hasCacheBreakpoint = msg.content.some(part => part.type === Raw.ChatCompletionContentPartKind.CacheBreakpoint);67if ((msg.role === Raw.ChatRole.User || msg.role === Raw.ChatRole.System) && !hasCacheBreakpoint) {68count--;69msg.content.push({70type: Raw.ChatCompletionContentPartKind.CacheBreakpoint,71cacheType: CacheType72});73}7475if (msg.role !== Raw.ChatRole.User && msg.role !== Raw.ChatRole.System) {76break;77}78}79}8081function countCacheBreakpoints(messages: Raw.ChatMessage[]) {82let count = 0;83for (const msg of messages) {84count += msg.content.filter(part => part.type === Raw.ChatCompletionContentPartKind.CacheBreakpoint).length;85}86return count;87}888990