Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/intents/node/cacheBreakpoints.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { Raw } from '@vscode/prompt-tsx';
7
import { CacheType } from '../../../platform/endpoint/common/endpointTypes';
8
9
const MaxCacheBreakpoints = 4;
10
11
/**
12
* Prompt cache breakpoint strategy:
13
*
14
* The prompt is structured like
15
* - System message
16
* - Custom instructions
17
* - Global context message (has prompt-tsx cache breakpoint)
18
* - History
19
* - Current user message with extra context
20
* - Current tool call rounds
21
*
22
* Below the current user message, we add cache breakpoints to the last tool result in each round.
23
* We add one to the current user message.
24
* And above the current user message, we add breakpoionts to an assistant message with no tool calls (so the terminal response in a turn).
25
*
26
* There will always be a cache miss when a new turn starts because the previous messages move from below the current user message with extra context to above it.
27
* For turns with no tool calling, we will have a hit on the previous assistant message in history.
28
* During the agentic loop, each request will have a hit on the previous tool result message.
29
*/
30
export function addCacheBreakpoints(messages: Raw.ChatMessage[]) {
31
// One or two cache breakpoints are already added via the prompt, assign the rest here.
32
let count = MaxCacheBreakpoints - countCacheBreakpoints(messages);
33
let isBelowCurrentUserMessage = true;
34
const reversedMsgs = [...messages].reverse();
35
for (const [idx, msg] of reversedMsgs.entries()) {
36
const prevMsg = reversedMsgs.at(idx - 1);
37
const hasCacheBreakpoint = msg.content.some(part => part.type === Raw.ChatCompletionContentPartKind.CacheBreakpoint);
38
if (hasCacheBreakpoint) {
39
continue;
40
}
41
42
const isLastToolResultInRound = msg.role === Raw.ChatRole.Tool && prevMsg?.role !== Raw.ChatRole.Tool;
43
const isAsstMsgWithNoTools = msg.role === Raw.ChatRole.Assistant && !msg.toolCalls?.length;
44
if (isBelowCurrentUserMessage && (isLastToolResultInRound || msg.role === Raw.ChatRole.User) || isAsstMsgWithNoTools) {
45
count--;
46
msg.content.push({
47
type: Raw.ChatCompletionContentPartKind.CacheBreakpoint,
48
cacheType: CacheType
49
});
50
51
if (count <= 0) {
52
break;
53
}
54
}
55
56
if (msg.role === Raw.ChatRole.User) {
57
isBelowCurrentUserMessage = false;
58
}
59
}
60
61
// If we still have cache breakpoints to allocate, add them from the system and custom instructions messages, if applicable.
62
for (const msg of messages) {
63
if (count <= 0) {
64
break;
65
}
66
67
const hasCacheBreakpoint = msg.content.some(part => part.type === Raw.ChatCompletionContentPartKind.CacheBreakpoint);
68
if ((msg.role === Raw.ChatRole.User || msg.role === Raw.ChatRole.System) && !hasCacheBreakpoint) {
69
count--;
70
msg.content.push({
71
type: Raw.ChatCompletionContentPartKind.CacheBreakpoint,
72
cacheType: CacheType
73
});
74
}
75
76
if (msg.role !== Raw.ChatRole.User && msg.role !== Raw.ChatRole.System) {
77
break;
78
}
79
}
80
}
81
82
function countCacheBreakpoints(messages: Raw.ChatMessage[]) {
83
let count = 0;
84
for (const msg of messages) {
85
count += msg.content.filter(part => part.type === Raw.ChatCompletionContentPartKind.CacheBreakpoint).length;
86
}
87
return count;
88
}
89
90