Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/chronicle/node/sessionReindexer.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import * as l10n from '@vscode/l10n';
7
import type { IChatDebugFileLoggerService, IDebugLogEntry } from '../../../platform/chat/common/chatDebugFileLoggerService';
8
import type { ISessionStore, SessionRow, TurnRow, FileRow, RefRow } from '../../../platform/chronicle/common/sessionStore';
9
import type { CancellationToken } from '../../../util/vs/base/common/cancellation';
10
import {
11
MAX_ASSISTANT_RESPONSE_LENGTH,
12
MAX_SUMMARY_LENGTH,
13
MAX_USER_MESSAGE_LENGTH,
14
extractAssistantResponse,
15
extractFilePath,
16
extractRefsFromMcpTool,
17
extractRefsFromTerminal,
18
extractRepoFromMcpTool,
19
isGitHubMcpTool,
20
isTerminalTool,
21
truncateForStore,
22
} from '../common/sessionStoreTracking';
23
24
/**
25
* Result of a reindex operation.
26
*/
27
export interface ReindexResult {
28
/** Number of sessions successfully processed. */
29
processed: number;
30
/** Number of sessions skipped (already indexed or errors). */
31
skipped: number;
32
/** Whether the operation was cancelled. */
33
cancelled: boolean;
34
}
35
36
/**
37
* Per-session write buffer. Allocated per-session, freed after the transaction commits.
38
* Bounded by the number of events in a single session.
39
*/
40
interface PerSessionWriteBuffer {
41
session: SessionRow | undefined;
42
turns: TurnRow[];
43
files: FileRow[];
44
refs: RefRow[];
45
}
46
47
/**
48
* Safely parse JSON from a string attribute. Returns undefined on failure.
49
*/
50
function tryParseArgs(raw: string | number | boolean | undefined): unknown {
51
if (typeof raw !== 'string') {
52
return undefined;
53
}
54
try {
55
return JSON.parse(raw);
56
} catch {
57
return undefined;
58
}
59
}
60
61
/**
62
* Rebuild the local Chronicle session store by re-reading JSONL debug logs from disk.
63
*/
64
export async function reindexSessions(
65
store: ISessionStore,
66
debugLogService: IChatDebugFileLoggerService,
67
reportProgress: (message: string) => void,
68
token: CancellationToken,
69
force: boolean = false,
70
): Promise<ReindexResult> {
71
const sessionIds = await debugLogService.listSessionIds();
72
73
let processed = 0;
74
let skipped = 0;
75
76
for (let i = 0; i < sessionIds.length; i++) {
77
if (token.isCancellationRequested) {
78
return { processed, skipped, cancelled: true };
79
}
80
81
const sessionId = sessionIds[i];
82
83
// Fast-path: skip sessions already in the store unless force mode
84
if (!force && store.getSession(sessionId)) {
85
skipped++;
86
continue;
87
}
88
89
reportProgress(l10n.t('Reindexing session {0} of {1}...', i + 1, sessionIds.length));
90
91
try {
92
await reindexOneSession(store, debugLogService, sessionId);
93
processed++;
94
} catch {
95
// Non-fatal — skip corrupt/unreadable sessions
96
skipped++;
97
}
98
99
// Yield to event loop between sessions to avoid blocking the extension host
100
await new Promise<void>(resolve => setTimeout(resolve, 0));
101
}
102
103
return { processed, skipped, cancelled: false };
104
}
105
106
/**
107
* Reindex a single session from its JSONL debug log.
108
* Streams events, builds a bounded per-session buffer, and flushes atomically.
109
*/
110
async function reindexOneSession(
111
store: ISessionStore,
112
debugLogService: IChatDebugFileLoggerService,
113
sessionId: string,
114
): Promise<void> {
115
const buffer: PerSessionWriteBuffer = {
116
session: undefined,
117
turns: [],
118
files: [],
119
refs: [],
120
};
121
122
// State for turn pairing — tracks the pending user message to pair with next assistant response.
123
let pendingUserMessage: string | undefined;
124
let pendingUserTimestamp: string | undefined;
125
let turnIndex = 0;
126
127
await debugLogService.streamEntries(sessionId, (entry: IDebugLogEntry) => {
128
processEntry(entry, sessionId, buffer, {
129
get pendingUserMessage() { return pendingUserMessage; },
130
set pendingUserMessage(v) { pendingUserMessage = v; },
131
get pendingUserTimestamp() { return pendingUserTimestamp; },
132
set pendingUserTimestamp(v) { pendingUserTimestamp = v; },
133
get turnIndex() { return turnIndex; },
134
set turnIndex(v) { turnIndex = v; },
135
});
136
});
137
138
// If there's a trailing user message without a paired assistant response, flush it
139
if (pendingUserMessage) {
140
buffer.turns.push({
141
session_id: sessionId,
142
turn_index: turnIndex,
143
user_message: truncateForStore(pendingUserMessage, MAX_USER_MESSAGE_LENGTH),
144
timestamp: pendingUserTimestamp,
145
});
146
}
147
148
// Ensure we always have a session row (even if no session_start event was found)
149
if (!buffer.session) {
150
buffer.session = { id: sessionId, host_type: 'vscode' };
151
}
152
153
// Flush all buffered data in a single transaction
154
store.runInTransaction(() => {
155
store.upsertSession(buffer.session!);
156
157
for (const turn of buffer.turns) {
158
store.insertTurn(turn);
159
}
160
for (const file of buffer.files) {
161
store.insertFile(file);
162
}
163
for (const ref of buffer.refs) {
164
store.insertRef(ref);
165
}
166
});
167
168
// Help GC by clearing references — buffer is a local variable so this
169
// is defensive; it becomes unreachable when the function returns.
170
buffer.turns.length = 0;
171
buffer.files.length = 0;
172
buffer.refs.length = 0;
173
}
174
175
interface TurnPairingState {
176
pendingUserMessage: string | undefined;
177
pendingUserTimestamp: string | undefined;
178
turnIndex: number;
179
}
180
181
/**
182
* Process a single JSONL entry and update the per-session buffer.
183
* This is the streaming callback — called once per line, no accumulation.
184
*/
185
function processEntry(
186
entry: IDebugLogEntry,
187
sessionId: string,
188
buffer: PerSessionWriteBuffer,
189
state: TurnPairingState,
190
): void {
191
switch (entry.type) {
192
case 'session_start':
193
processSessionStart(entry, sessionId, buffer);
194
break;
195
case 'user_message':
196
case 'turn_start':
197
processUserMessage(entry, state);
198
break;
199
case 'agent_response':
200
processAssistantResponse(entry, sessionId, buffer, state);
201
break;
202
case 'tool_call':
203
processToolCall(entry, sessionId, buffer, state);
204
break;
205
}
206
}
207
208
function processSessionStart(
209
entry: IDebugLogEntry,
210
sessionId: string,
211
buffer: PerSessionWriteBuffer,
212
): void {
213
const attrs = entry.attrs;
214
buffer.session = {
215
id: sessionId,
216
host_type: 'vscode',
217
cwd: typeof attrs.cwd === 'string' ? attrs.cwd : undefined,
218
repository: typeof attrs.repository === 'string' ? attrs.repository : undefined,
219
branch: typeof attrs.branch === 'string' ? attrs.branch : undefined,
220
created_at: new Date(entry.ts).toISOString(),
221
};
222
}
223
224
function processUserMessage(
225
entry: IDebugLogEntry,
226
state: TurnPairingState,
227
): void {
228
const content = typeof entry.attrs.content === 'string'
229
? entry.attrs.content
230
: typeof entry.attrs.userRequest === 'string'
231
? entry.attrs.userRequest
232
: undefined;
233
if (content) {
234
state.pendingUserMessage = content;
235
state.pendingUserTimestamp = new Date(entry.ts).toISOString();
236
}
237
}
238
239
function processAssistantResponse(
240
entry: IDebugLogEntry,
241
sessionId: string,
242
buffer: PerSessionWriteBuffer,
243
state: TurnPairingState,
244
): void {
245
// Extract assistant response from the 'response' attribute (as written by chatDebugFileLoggerService)
246
const responseRaw = entry.attrs.response as string | undefined;
247
const assistantResponse = extractAssistantResponse(responseRaw);
248
249
// Only create a turn if we have at least a user message or assistant response
250
if (!state.pendingUserMessage && !assistantResponse) {
251
return;
252
}
253
254
buffer.turns.push({
255
session_id: sessionId,
256
turn_index: state.turnIndex,
257
user_message: truncateForStore(state.pendingUserMessage, MAX_USER_MESSAGE_LENGTH),
258
assistant_response: truncateForStore(assistantResponse, MAX_ASSISTANT_RESPONSE_LENGTH),
259
timestamp: state.pendingUserTimestamp ?? new Date(entry.ts).toISOString(),
260
});
261
262
// Use first user message as summary if not yet set
263
if (!buffer.session?.summary && state.pendingUserMessage) {
264
const summary = truncateForStore(state.pendingUserMessage, MAX_SUMMARY_LENGTH);
265
if (!buffer.session) {
266
buffer.session = { id: sessionId, host_type: 'vscode' };
267
}
268
buffer.session.summary = summary;
269
}
270
271
state.turnIndex++;
272
state.pendingUserMessage = undefined;
273
state.pendingUserTimestamp = undefined;
274
}
275
276
function processToolCall(
277
entry: IDebugLogEntry,
278
sessionId: string,
279
buffer: PerSessionWriteBuffer,
280
state: TurnPairingState,
281
): void {
282
const toolName = entry.name;
283
const toolArgs = tryParseArgs(entry.attrs.args);
284
const resultText = typeof entry.attrs.result === 'string' ? entry.attrs.result : undefined;
285
286
// Extract file path
287
const filePath = extractFilePath(toolName, toolArgs);
288
if (filePath) {
289
buffer.files.push({
290
session_id: sessionId,
291
file_path: filePath,
292
tool_name: toolName,
293
turn_index: state.turnIndex,
294
});
295
}
296
297
// Extract refs from GitHub MCP tools
298
if (isGitHubMcpTool(toolName)) {
299
const refs = extractRefsFromMcpTool(toolName, toolArgs);
300
for (const ref of refs) {
301
buffer.refs.push({ session_id: sessionId, ...ref, turn_index: state.turnIndex });
302
}
303
304
const repo = extractRepoFromMcpTool(toolArgs);
305
if (repo) {
306
if (!buffer.session) {
307
buffer.session = { id: sessionId, host_type: 'vscode' };
308
}
309
buffer.session.repository = repo;
310
}
311
}
312
313
// Extract refs from terminal/shell tools
314
if (isTerminalTool(toolName)) {
315
const refs = extractRefsFromTerminal(toolArgs, resultText);
316
for (const ref of refs) {
317
buffer.refs.push({ session_id: sessionId, ...ref, turn_index: state.turnIndex });
318
}
319
}
320
}
321
322