CoCalc -- sessionStoreTracking.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/chronicle/common/sessionStoreTracking.ts
¹³³⁹⁹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { GenAiAttr } from '../../../platform/otel/common/genAiAttributes';
7
import type { ICompletedSpanData } from '../../../platform/otel/common/otelService';
8

9
/**
10
 * Helpers for extracting file paths and refs from tool calls,
11
 * plus shared constants for session store truncation limits.
12
 */
13

14
// ── Truncation limits (shared by sessionStoreTracker and sessionReindexer) ──
15

16
/** Maximum characters stored for user_message. */
17
export const MAX_USER_MESSAGE_LENGTH = 100;
18

19
/** Maximum characters stored for assistant_response. */
20
export const MAX_ASSISTANT_RESPONSE_LENGTH = 1000;
21

22
/** Maximum characters stored for session summary. */
23
export const MAX_SUMMARY_LENGTH = 100;
24

25
/**
26
 * Truncate a string to at most `maxLength` stored characters, appending '...' if truncated.
27
 * The returned value, including the truncation suffix, never exceeds `maxLength`.
28
 * Returns `undefined` for falsy input.
29
 */
30
export function truncateForStore(value: string | undefined, maxLength: number): string | undefined {
31
	if (!value) {
32
		return undefined;
33
	}
34
	if (value.length <= maxLength) {
35
		return value;
36
	}
37
	const ellipsis = '...';
38
	if (maxLength <= ellipsis.length) {
39
		return ellipsis.slice(0, maxLength);
40
	}
41
	return value.slice(0, maxLength - ellipsis.length).trimEnd() + ellipsis;
42
}
43

44
/** Terminal/shell tool names that may produce refs. */
45
export function isTerminalTool(toolName: string): boolean {
46
	return toolName === 'runInTerminal' || toolName === 'run_in_terminal';
47
}
48

49
/**
50
 * Extract tool arguments from an OTel span.
51
 * Parses the serialized JSON from gen_ai.tool.call.arguments attribute.
52
 * @internal Exported for testing.
53
 */
54
export function extractToolArgs(span: ICompletedSpanData): Record<string, unknown> {
55
	const serialized = span.attributes[GenAiAttr.TOOL_CALL_ARGUMENTS];
56
	if (typeof serialized === 'string') {
57
		try {
58
			return JSON.parse(serialized) as Record<string, unknown>;
59
		} catch {
60
			// ignore parse errors
61
		}
62
	}
63
	return {};
64
}
65

66
/** Tools whose arguments contain a file path being modified or read. */
67
const FILE_TRACKING_TOOLS = new Set([
68
	// VS Code model-facing tool names (from ToolName enum)
69
	'replace_string_in_file',
70
	'multi_replace_string_in_file',
71
	'insert_edit_into_file',
72
	'create_file',
73
	'create_directory',
74
	'edit_notebook_file',
75
	'apply_patch',
76
	'read_file',
77
	'view_image',
78
	'list_dir',
79
	// CLI-agent tool names (backward compat)
80
	'str_replace_editor',
81
	'create',
82
]);
83

84
/** GitHub MCP server tool prefixes. */
85
const GH_MCP_PREFIXES = ['mcp_github_', 'github-mcp-server-'];
86

87
/**
88
 * Extract absolute file path from tool arguments if available.
89
 * Handles both CLI-style (edit/create with `path`) and VS Code-style tools
90
 * that use `filePath`, as well as `apply_patch` which encodes paths in the patch input.
91
 * @internal Exported for testing.
92
 */
93
export function extractFilePath(toolName: string, toolArgs: unknown): string | undefined {
94
	if (!FILE_TRACKING_TOOLS.has(toolName)) { return undefined; }
95
	if (typeof toolArgs !== 'object' || toolArgs === null) { return undefined; }
96
	const args = toolArgs as Record<string, unknown>;
97

98
	// VS Code tools use 'filePath', CLI tools use 'path', list_dir uses 'path',
99
	// create_directory uses 'dirPath'
100
	const filePath = args.filePath ?? args.path ?? args.dirPath;
101
	if (typeof filePath === 'string') { return filePath; }
102

103
	// multi_replace_string_in_file stores filePath in each replacement item
104
	if (toolName === 'multi_replace_string_in_file' && Array.isArray(args.replacements)) {
105
		const first = args.replacements[0];
106
		if (typeof first === 'object' && first !== null) {
107
			const fp = (first as Record<string, unknown>).filePath;
108
			if (typeof fp === 'string') { return fp; }
109
		}
110
	}
111

112
	// apply_patch encodes file paths in the patch input text
113
	if (toolName === 'apply_patch' && typeof args.input === 'string') {
114
		return extractFirstFileFromPatch(args.input);
115
	}
116

117
	return undefined;
118
}
119

120
/**
121
 * Extract the first file path from an apply_patch input string.
122
 * Matches lines like `*** Update File: /path/to/file` or `*** Add File: /path`.
123
 */
124
function extractFirstFileFromPatch(input: string): string | undefined {
125
	const match = input.match(/^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$/m);
126
	return match?.[1]?.trim();
127
}
128

129
/**
130
 * Safely extract a string field from an unknown object.
131
 */
132
function getStringField(obj: unknown, field: string): string | undefined {
133
	if (typeof obj !== 'object' || obj === null) { return undefined; }
134
	const val = (obj as Record<string, unknown>)[field];
135
	return typeof val === 'string' ? val : undefined;
136
}
137

138
/**
139
 * Safely extract a number field from an unknown object.
140
 */
141
function getNumberField(obj: unknown, field: string): number | undefined {
142
	if (typeof obj !== 'object' || obj === null) { return undefined; }
143
	const val = (obj as Record<string, unknown>)[field];
144
	return typeof val === 'number' ? val : undefined;
145
}
146

147
/**
148
 * Extract refs from GitHub MCP server tool calls.
149
 * These tools use structured args with owner/repo/pullNumber/issue_number/sha etc.
150
 * @internal Exported for testing.
151
 */
152
export function extractRefsFromMcpTool(
153
	toolName: string,
154
	toolArgs: unknown,
155
): Array<{ ref_type: 'pr' | 'issue' | 'commit'; ref_value: string }> {
156
	const refs: Array<{ ref_type: 'pr' | 'issue' | 'commit'; ref_value: string }> = [];
157

158
	// PR tools: pull_request_read, list_pull_requests, search_pull_requests
159
	if (toolName.includes('pull_request')) {
160
		const pullNumber = getNumberField(toolArgs, 'pullNumber');
161
		if (pullNumber) {
162
			refs.push({ ref_type: 'pr', ref_value: String(pullNumber) });
163
		}
164
	}
165

166
	// Issue tools: issue_read, list_issues, search_issues
167
	if (toolName.includes('issue')) {
168
		const issueNumber = getNumberField(toolArgs, 'issue_number');
169
		if (issueNumber) {
170
			refs.push({ ref_type: 'issue', ref_value: String(issueNumber) });
171
		}
172
	}
173

174
	// Commit tools: get_commit, list_commits
175
	if (toolName.includes('commit')) {
176
		const sha = getStringField(toolArgs, 'sha');
177
		if (sha) {
178
			refs.push({ ref_type: 'commit', ref_value: sha });
179
		}
180
	}
181

182
	return refs;
183
}
184

185
/**
186
 * Detect git/gh commands in terminal tool arguments and extract refs from the result.
187
 * @internal Exported for testing.
188
 */
189
export function extractRefsFromTerminal(
190
	toolArgs: unknown,
191
	resultText: string | undefined,
192
): Array<{ ref_type: 'pr' | 'issue' | 'commit'; ref_value: string }> {
193
	const command = getStringField(toolArgs, 'command');
194
	if (!command) { return []; }
195

196
	const refs: Array<{ ref_type: 'pr' | 'issue' | 'commit'; ref_value: string }> = [];
197

198
	// Detect PR creation/checkout/view/merge — look for PR URL in result
199
	if (/\bgh\s+pr\s+(create|checkout|view|merge)\b/.test(command) && resultText) {
200
		const prMatch = resultText.match(/https:\/\/github\.com\/[^/]+\/[^/]+\/pull\/(\d+)/);
201
		if (prMatch?.[1]) {
202
			refs.push({ ref_type: 'pr', ref_value: prMatch[1] });
203
		}
204
	}
205

206
	// Detect issue creation — look for issue URL in result
207
	if (command.includes('gh issue create') && resultText) {
208
		const issueMatch = resultText.match(/https:\/\/github\.com\/[^/]+\/[^/]+\/issues\/(\d+)/);
209
		if (issueMatch?.[1]) {
210
			refs.push({ ref_type: 'issue', ref_value: issueMatch[1] });
211
		}
212
	}
213

214
	// Detect git commit — extract SHA from "[branch sha]" pattern in output
215
	if (/\bgit\s+commit\b/.test(command) && resultText) {
216
		const commitMatch = resultText.match(/\[[\w/.-]+\s+([0-9a-f]{7,40})\]/);
217
		if (commitMatch?.[1]) {
218
			refs.push({ ref_type: 'commit', ref_value: commitMatch[1] });
219
		}
220
	}
221

222
	return refs;
223
}
224

225
/**
226
 * Extract repository info from GitHub MCP tool args (most tools have owner + repo).
227
 * @internal Exported for testing.
228
 */
229
export function extractRepoFromMcpTool(toolArgs: unknown): string | undefined {
230
	const owner = getStringField(toolArgs, 'owner');
231
	const repo = getStringField(toolArgs, 'repo');
232
	if (owner && repo) { return `${owner}/${repo}`; }
233
	return undefined;
234
}
235

236
/**
237
 * Check whether a tool name is a GitHub MCP server tool.
238
 * Matches both VS Code-style `mcp_github_*` and CLI-style `github-mcp-server-*` prefixes.
239
 */
240
export function isGitHubMcpTool(toolName: string): boolean {
241
	return GH_MCP_PREFIXES.some(prefix => toolName.startsWith(prefix));
242
}
243

244
/** Truncation suffix appended by truncateForOTel. */
245
const OTEL_TRUNCATION_MARKER = '...[truncated';
246

247
/**
248
 * Extract assistant response text from the gen_ai.output.messages span attribute.
249
 * Handles both valid JSON and truncated JSON (where truncateForOTel cut the
250
 * JSON structure mid-string and appended a suffix).
251
 *
252
 * Expected format: [{"role":"assistant","parts":[{"type":"text","content":"..."}]}]
253
 *
254
 * @internal Exported for testing.
255
 */
256
export function extractAssistantResponse(outputMessagesRaw: string | undefined): string | undefined {
257
	if (!outputMessagesRaw) {
258
		return undefined;
259
	}
260

261
	// Fast path: try full JSON parse for non-truncated input
262
	try {
263
		const messages = JSON.parse(outputMessagesRaw) as { role: string; parts: { type: string; content: string }[] }[];
264
		const parts = messages
265
			.filter(m => m.role === 'assistant')
266
			.flatMap(m => m.parts)
267
			.filter(p => p.type === 'text')
268
			.map(p => p.content);
269
		return parts.length > 0 ? parts.join('\n') : undefined;
270
	} catch {
271
		// JSON parse failed — likely truncated by truncateForOTel
272
	}
273

274
	// Fallback: extract text from truncated JSON by matching the serialized
275
	// assistant text-part prefix, then reading until the truncation marker.
276
	if (!outputMessagesRaw.includes(OTEL_TRUNCATION_MARKER)) {
277
		return undefined;
278
	}
279
	const assistantTextContentPrefix = '"type":"text","content":"';
280
	const prefixStart = outputMessagesRaw.indexOf(assistantTextContentPrefix);
281
	if (prefixStart === -1) {
282
		return undefined;
283
	}
284
	const textStart = prefixStart + assistantTextContentPrefix.length;
285
	const truncationIdx = outputMessagesRaw.indexOf(OTEL_TRUNCATION_MARKER, textStart);
286
	if (truncationIdx === -1) {
287
		return undefined;
288
	}
289
	const extracted = outputMessagesRaw.slice(textStart, truncationIdx);
290
	if (extracted.length === 0) {
291
		return undefined;
292
	}
293
	// The extracted text is JSON-escaped (e.g. \" \n \\). Unescape by wrapping
294
	// in quotes and parsing as a JSON string value.
295
	try {
296
		return JSON.parse(`"${extracted}"`) as string;
297
	} catch {
298
		// If unescape fails (e.g. truncation mid-escape), return the raw text
299
		return extracted;
300
	}
301
}
302

303
Product

Resources

Company