Path: blob/main/extensions/copilot/src/extension/chronicle/node/sessionReindexer.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import * as l10n from '@vscode/l10n';6import type { IChatDebugFileLoggerService, IDebugLogEntry } from '../../../platform/chat/common/chatDebugFileLoggerService';7import type { ISessionStore, SessionRow, TurnRow, FileRow, RefRow } from '../../../platform/chronicle/common/sessionStore';8import type { CancellationToken } from '../../../util/vs/base/common/cancellation';9import {10MAX_ASSISTANT_RESPONSE_LENGTH,11MAX_SUMMARY_LENGTH,12MAX_USER_MESSAGE_LENGTH,13extractAssistantResponse,14extractFilePath,15extractRefsFromMcpTool,16extractRefsFromTerminal,17extractRepoFromMcpTool,18isGitHubMcpTool,19isTerminalTool,20truncateForStore,21} from '../common/sessionStoreTracking';2223/**24* Result of a reindex operation.25*/26export interface ReindexResult {27/** Number of sessions successfully processed. */28processed: number;29/** Number of sessions skipped (already indexed or errors). */30skipped: number;31/** Whether the operation was cancelled. */32cancelled: boolean;33}3435/**36* Per-session write buffer. Allocated per-session, freed after the transaction commits.37* Bounded by the number of events in a single session.38*/39interface PerSessionWriteBuffer {40session: SessionRow | undefined;41turns: TurnRow[];42files: FileRow[];43refs: RefRow[];44}4546/**47* Safely parse JSON from a string attribute. Returns undefined on failure.48*/49function tryParseArgs(raw: string | number | boolean | undefined): unknown {50if (typeof raw !== 'string') {51return undefined;52}53try {54return JSON.parse(raw);55} catch {56return undefined;57}58}5960/**61* Rebuild the local Chronicle session store by re-reading JSONL debug logs from disk.62*/63export async function reindexSessions(64store: ISessionStore,65debugLogService: IChatDebugFileLoggerService,66reportProgress: (message: string) => void,67token: CancellationToken,68force: boolean = false,69): Promise<ReindexResult> {70const sessionIds = await debugLogService.listSessionIds();7172let processed = 0;73let skipped = 0;7475for (let i = 0; i < sessionIds.length; i++) {76if (token.isCancellationRequested) {77return { processed, skipped, cancelled: true };78}7980const sessionId = sessionIds[i];8182// Fast-path: skip sessions already in the store unless force mode83if (!force && store.getSession(sessionId)) {84skipped++;85continue;86}8788reportProgress(l10n.t('Reindexing session {0} of {1}...', i + 1, sessionIds.length));8990try {91await reindexOneSession(store, debugLogService, sessionId);92processed++;93} catch {94// Non-fatal — skip corrupt/unreadable sessions95skipped++;96}9798// Yield to event loop between sessions to avoid blocking the extension host99await new Promise<void>(resolve => setTimeout(resolve, 0));100}101102return { processed, skipped, cancelled: false };103}104105/**106* Reindex a single session from its JSONL debug log.107* Streams events, builds a bounded per-session buffer, and flushes atomically.108*/109async function reindexOneSession(110store: ISessionStore,111debugLogService: IChatDebugFileLoggerService,112sessionId: string,113): Promise<void> {114const buffer: PerSessionWriteBuffer = {115session: undefined,116turns: [],117files: [],118refs: [],119};120121// State for turn pairing — tracks the pending user message to pair with next assistant response.122let pendingUserMessage: string | undefined;123let pendingUserTimestamp: string | undefined;124let turnIndex = 0;125126await debugLogService.streamEntries(sessionId, (entry: IDebugLogEntry) => {127processEntry(entry, sessionId, buffer, {128get pendingUserMessage() { return pendingUserMessage; },129set pendingUserMessage(v) { pendingUserMessage = v; },130get pendingUserTimestamp() { return pendingUserTimestamp; },131set pendingUserTimestamp(v) { pendingUserTimestamp = v; },132get turnIndex() { return turnIndex; },133set turnIndex(v) { turnIndex = v; },134});135});136137// If there's a trailing user message without a paired assistant response, flush it138if (pendingUserMessage) {139buffer.turns.push({140session_id: sessionId,141turn_index: turnIndex,142user_message: truncateForStore(pendingUserMessage, MAX_USER_MESSAGE_LENGTH),143timestamp: pendingUserTimestamp,144});145}146147// Ensure we always have a session row (even if no session_start event was found)148if (!buffer.session) {149buffer.session = { id: sessionId, host_type: 'vscode' };150}151152// Flush all buffered data in a single transaction153store.runInTransaction(() => {154store.upsertSession(buffer.session!);155156for (const turn of buffer.turns) {157store.insertTurn(turn);158}159for (const file of buffer.files) {160store.insertFile(file);161}162for (const ref of buffer.refs) {163store.insertRef(ref);164}165});166167// Help GC by clearing references — buffer is a local variable so this168// is defensive; it becomes unreachable when the function returns.169buffer.turns.length = 0;170buffer.files.length = 0;171buffer.refs.length = 0;172}173174interface TurnPairingState {175pendingUserMessage: string | undefined;176pendingUserTimestamp: string | undefined;177turnIndex: number;178}179180/**181* Process a single JSONL entry and update the per-session buffer.182* This is the streaming callback — called once per line, no accumulation.183*/184function processEntry(185entry: IDebugLogEntry,186sessionId: string,187buffer: PerSessionWriteBuffer,188state: TurnPairingState,189): void {190switch (entry.type) {191case 'session_start':192processSessionStart(entry, sessionId, buffer);193break;194case 'user_message':195case 'turn_start':196processUserMessage(entry, state);197break;198case 'agent_response':199processAssistantResponse(entry, sessionId, buffer, state);200break;201case 'tool_call':202processToolCall(entry, sessionId, buffer, state);203break;204}205}206207function processSessionStart(208entry: IDebugLogEntry,209sessionId: string,210buffer: PerSessionWriteBuffer,211): void {212const attrs = entry.attrs;213buffer.session = {214id: sessionId,215host_type: 'vscode',216cwd: typeof attrs.cwd === 'string' ? attrs.cwd : undefined,217repository: typeof attrs.repository === 'string' ? attrs.repository : undefined,218branch: typeof attrs.branch === 'string' ? attrs.branch : undefined,219created_at: new Date(entry.ts).toISOString(),220};221}222223function processUserMessage(224entry: IDebugLogEntry,225state: TurnPairingState,226): void {227const content = typeof entry.attrs.content === 'string'228? entry.attrs.content229: typeof entry.attrs.userRequest === 'string'230? entry.attrs.userRequest231: undefined;232if (content) {233state.pendingUserMessage = content;234state.pendingUserTimestamp = new Date(entry.ts).toISOString();235}236}237238function processAssistantResponse(239entry: IDebugLogEntry,240sessionId: string,241buffer: PerSessionWriteBuffer,242state: TurnPairingState,243): void {244// Extract assistant response from the 'response' attribute (as written by chatDebugFileLoggerService)245const responseRaw = entry.attrs.response as string | undefined;246const assistantResponse = extractAssistantResponse(responseRaw);247248// Only create a turn if we have at least a user message or assistant response249if (!state.pendingUserMessage && !assistantResponse) {250return;251}252253buffer.turns.push({254session_id: sessionId,255turn_index: state.turnIndex,256user_message: truncateForStore(state.pendingUserMessage, MAX_USER_MESSAGE_LENGTH),257assistant_response: truncateForStore(assistantResponse, MAX_ASSISTANT_RESPONSE_LENGTH),258timestamp: state.pendingUserTimestamp ?? new Date(entry.ts).toISOString(),259});260261// Use first user message as summary if not yet set262if (!buffer.session?.summary && state.pendingUserMessage) {263const summary = truncateForStore(state.pendingUserMessage, MAX_SUMMARY_LENGTH);264if (!buffer.session) {265buffer.session = { id: sessionId, host_type: 'vscode' };266}267buffer.session.summary = summary;268}269270state.turnIndex++;271state.pendingUserMessage = undefined;272state.pendingUserTimestamp = undefined;273}274275function processToolCall(276entry: IDebugLogEntry,277sessionId: string,278buffer: PerSessionWriteBuffer,279state: TurnPairingState,280): void {281const toolName = entry.name;282const toolArgs = tryParseArgs(entry.attrs.args);283const resultText = typeof entry.attrs.result === 'string' ? entry.attrs.result : undefined;284285// Extract file path286const filePath = extractFilePath(toolName, toolArgs);287if (filePath) {288buffer.files.push({289session_id: sessionId,290file_path: filePath,291tool_name: toolName,292turn_index: state.turnIndex,293});294}295296// Extract refs from GitHub MCP tools297if (isGitHubMcpTool(toolName)) {298const refs = extractRefsFromMcpTool(toolName, toolArgs);299for (const ref of refs) {300buffer.refs.push({ session_id: sessionId, ...ref, turn_index: state.turnIndex });301}302303const repo = extractRepoFromMcpTool(toolArgs);304if (repo) {305if (!buffer.session) {306buffer.session = { id: sessionId, host_type: 'vscode' };307}308buffer.session.repository = repo;309}310}311312// Extract refs from terminal/shell tools313if (isTerminalTool(toolName)) {314const refs = extractRefsFromTerminal(toolArgs, resultText);315for (const ref of refs) {316buffer.refs.push({ session_id: sessionId, ...ref, turn_index: state.turnIndex });317}318}319}320321322