CoCalc -- claudeLanguageModelServer.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/chatSessions/claude/node/claudeLanguageModelServer.ts
¹³⁴⁰⁵ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { MessageParam } from '@anthropic-ai/sdk/resources';
7
import { RequestMetadata, RequestType } from '@vscode/copilot-api';
8
import { Raw } from '@vscode/prompt-tsx';
9
import * as http from 'http';
10
import { IChatMLFetcher, Source } from '../../../../platform/chat/common/chatMLFetcher';
11
import { ChatLocation, ChatResponse } from '../../../../platform/chat/common/commonTypes';
12
import { CustomModel, EndpointEditToolName } from '../../../../platform/endpoint/common/endpointProvider';
13
import { AnthropicMessagesProcessor } from '../../../../platform/endpoint/node/messagesApi';
14
import { ILogService } from '../../../../platform/log/common/logService';
15
import { IOTelService } from '../../../../platform/otel/common/otelService';
16
import { FinishedCallback, getRequestId, OptionalChatRequestParams } from '../../../../platform/networking/common/fetch';
17
import { Response } from '../../../../platform/networking/common/fetcherService';
18
import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody, IEndpointFetchOptions, IMakeChatRequestOptions } from '../../../../platform/networking/common/networking';
19
import { ChatCompletion } from '../../../../platform/networking/common/openai';
20
import { IRequestLogger } from '../../../../platform/requestLogger/common/requestLogger';
21
import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry';
22
import { TelemetryData } from '../../../../platform/telemetry/common/telemetryData';
23
import { ITokenizer, TokenizerType } from '../../../../util/common/tokenizer';
24
import { AsyncIterableObject } from '../../../../util/vs/base/common/async';
25
import { CancellationToken, CancellationTokenSource } from '../../../../util/vs/base/common/cancellation';
26
import { Disposable, toDisposable } from '../../../../util/vs/base/common/lifecycle';
27
import { SSEParser } from '../../../../util/vs/base/common/sseParser';
28
import { generateUuid } from '../../../../util/vs/base/common/uuid';
29
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
30
import { IClaudeCodeModels } from './claudeCodeModels';
31
import { IClaudeSessionStateService } from '../common/claudeSessionStateService';
32

33
/**
34
 * A list of known Anthropic betas supported by CAPI. Used to filter incoming `anthropic-beta` header values
35
 * to prevent unsupported betas from being sent to CAPI.
36
 */
37
const SUPPORTED_ANTHROPIC_BETAS = [
38
	'interleaved-thinking',
39
	'context-management',
40
	'advanced-tool-use',
41
];
42

43
export interface IClaudeLanguageModelServerConfig {
44
	readonly port: number;
45
	readonly nonce: string;
46
}
47

48
interface AnthropicMessagesRequest {
49
	model: string;
50
	messages: MessageParam[];
51
	system?: string | Array<{ type: 'text'; text: string }>;
52
	max_tokens?: number;
53
	stream?: boolean;
54
	tools?: unknown[];
55
	[key: string]: unknown;
56
}
57

58
interface AnthropicErrorResponse {
59
	type: 'error';
60
	error: {
61
		type: 'invalid_request_error' | 'authentication_error' | 'permission_error' | 'not_found_error' | 'rate_limit_error' | 'api_error';
62
		message: string;
63
	};
64
}
65

66
const DEFAULT_MAX_TOKENS = 200_000;
67
const DEFAULT_MAX_OUTPUT_TOKENS = 64_000;
68

69
/**
70
 * HTTP server that provides an Anthropic Messages API compatible endpoint.
71
 * Acts as a pure pass-through proxy to the underlying model endpoint.
72
 */
73
export class ClaudeLanguageModelServer extends Disposable {
74
	private server: http.Server;
75
	private config: IClaudeLanguageModelServerConfig;
76
	private readonly _userInitiatedMessageCounts = new Map<string, number>();
77

78
	constructor(
79
		@ILogService private readonly logService: ILogService,
80
		@IClaudeSessionStateService private readonly sessionStateService: IClaudeSessionStateService,
81
		@IRequestLogger private readonly requestLogger: IRequestLogger,
82
		@IInstantiationService private readonly instantiationService: IInstantiationService,
83
		@IClaudeCodeModels private readonly claudeCodeModels: IClaudeCodeModels,
84
		@IOTelService private readonly _otelService: IOTelService,
85
	) {
86
		super();
87
		this.config = {
88
			port: 0, // Will be set to random available port
89
			nonce: 'vscode-lm-' + generateUuid()
90
		};
91

92
		this.server = this.createServer();
93
		this._register(toDisposable(() => this.stop()));
94
	}
95

96
	private createServer(): http.Server {
97
		return http.createServer(async (req, res) => {
98
			this.trace(`Received request: ${req.method} ${req.url}`);
99

100
			if (req.method === 'OPTIONS') {
101
				res.writeHead(200);
102
				res.end();
103
				return;
104
			}
105

106
			// Handle /v1/messages endpoint (also //messages if base URL ends in /)
107
			// Use URL to properly parse and extract pathname, ignoring query string
108
			const pathname = new URL(req.url ?? '/', 'http://localhost').pathname;
109
			if (req.method === 'POST' && (pathname === '/v1/messages' || pathname === '/messages' || pathname === '//messages')) {
110
				await this.handleMessagesRequest(req, res);
111
				return;
112
			}
113

114
			if (req.method === 'GET' && req.url === '/') {
115
				res.writeHead(200);
116
				res.end('Hello from ClaudeLanguageModelServer');
117
				return;
118
			}
119

120
			this.sendErrorResponse(res, 404, 'not_found_error', 'Not found');
121
		});
122
	}
123

124
	private async handleMessagesRequest(req: http.IncomingMessage, res: http.ServerResponse) {
125
		try {
126
			const body = await this.readRequestBody(req);
127
			const auth = extractSessionId(req.headers, this.config.nonce);
128
			if (!auth.valid) {
129
				this.error('Invalid auth key');
130
				this.sendErrorResponse(res, 401, 'authentication_error', 'Invalid authentication');
131
				return;
132
			}
133

134
			await this.handleAuthedMessagesRequest(body, req.headers, res, auth.sessionId);
135
		} catch (error) {
136
			this.sendErrorResponse(res, 500, 'api_error', error instanceof Error ? error.message : String(error));
137
		}
138
		return;
139
	}
140

141
	private async readRequestBody(req: http.IncomingMessage): Promise<string> {
142
		return new Promise((resolve, reject) => {
143
			let body = '';
144
			req.on('data', chunk => {
145
				body += chunk.toString();
146
			});
147
			req.on('end', () => {
148
				resolve(body);
149
			});
150
			req.on('error', reject);
151
		});
152
	}
153

154
	private async handleAuthedMessagesRequest(bodyString: string, headers: http.IncomingHttpHeaders, res: http.ServerResponse, sessionId: string | undefined): Promise<void> {
155
		// Create cancellation token for the request
156
		const tokenSource = new CancellationTokenSource();
157

158
		try {
159
			const requestBody: AnthropicMessagesRequest = JSON.parse(bodyString);
160

161
			const fallbackModelId = sessionId ? this.sessionStateService.getModelIdForSession(sessionId) : undefined;
162
			const selectedEndpoint = await this.claudeCodeModels.resolveEndpoint(requestBody.model, fallbackModelId);
163
			if (!selectedEndpoint) {
164
				this.error('No model found matching criteria');
165
				this.sendErrorResponse(res, 404, 'not_found_error', 'No model found matching criteria');
166
				return;
167
			}
168
			this.trace(`Session ${sessionId}: model=${selectedEndpoint.model}`);
169
			requestBody.model = selectedEndpoint.model;
170
			// Determine if this is a user-initiated message using counter-based approach
171
			const count = this._userInitiatedMessageCounts.get(selectedEndpoint.model) ?? 0;
172
			const isUserInitiatedMessage = count > 0;
173
			if (isUserInitiatedMessage) {
174
				this._userInitiatedMessageCounts.set(selectedEndpoint.model, count - 1);
175
			}
176

177
			// Set up streaming response
178
			res.writeHead(200, {
179
				'Content-Type': 'text/event-stream',
180
				'Cache-Control': 'no-cache',
181
				'Connection': 'keep-alive',
182
			});
183

184
			// Handle client disconnect
185
			let requestComplete = false;
186
			res.on('close', () => {
187
				if (!requestComplete) {
188
					this.info('Client disconnected before request complete');
189
				}
190

191
				tokenSource.cancel();
192
			});
193

194
			const endpointRequestBody = requestBody as IEndpointBody;
195
			const streamingEndpoint = this.instantiationService.createInstance(
196
				ClaudeStreamingPassThroughEndpoint,
197
				selectedEndpoint,
198
				res,
199
				endpointRequestBody,
200
				headers,
201
				'vscode_claude_code',
202
				{
203
					modelMaxPromptTokens: DEFAULT_MAX_TOKENS - DEFAULT_MAX_OUTPUT_TOKENS,
204
					maxOutputTokens: DEFAULT_MAX_OUTPUT_TOKENS
205
				},
206
				sessionId
207
			);
208

209
			let messagesForLogging: Raw.ChatMessage[] = [];
210
			try {
211
				// Don't fail based on any assumptions about the shape of the request
212
				messagesForLogging = Array.isArray(requestBody.messages) ?
213
					messagesApiInputToRawMessagesForLogging(requestBody) :
214
					[];
215
			} catch (e) {
216
				this.exception(e as Error, `Failed to parse messages for logging`);
217
			}
218

219
			const capturingToken = sessionId ? this.sessionStateService.getCapturingTokenForSession(sessionId) : undefined;
220
			const sessionReasoningEffort = sessionId ? this.sessionStateService.getReasoningEffortForSession(sessionId) : undefined;
221
			const reasoningEffort = sessionReasoningEffort && selectedEndpoint.supportsReasoningEffort?.includes(sessionReasoningEffort)
222
				? sessionReasoningEffort
223
				: undefined;
224

225
			const doRequest = () => streamingEndpoint.makeChatRequest2({
226
				debugName: 'Claude Copilot Proxy',
227
				messages: messagesForLogging,
228
				finishedCb: async () => undefined,
229
				location: ChatLocation.MessagesProxy,
230
				modelCapabilities: { enableThinking: true, reasoningEffort },
231
				userInitiatedRequest: isUserInitiatedMessage
232
			}, tokenSource.token);
233

234
			// Wrap in trace context so chat spans are parented to the invoke_agent span
235
			const traceContext = sessionId ? this.sessionStateService.getTraceContextForSession(sessionId) : undefined;
236
			const doRequestInContext = traceContext
237
				? () => this._otelService.runWithTraceContext(traceContext, doRequest)
238
				: doRequest;
239

240
			if (capturingToken) {
241
				await this.requestLogger.captureInvocation(capturingToken, doRequestInContext);
242
			} else {
243
				await doRequestInContext();
244
			}
245

246
			requestComplete = true;
247

248
			res.end();
249
		} catch (error) {
250
			this.sendErrorResponse(res, 500, 'api_error', error instanceof Error ? error.message : String(error));
251
		} finally {
252
			tokenSource.dispose();
253
		}
254
	}
255

256
	private sendErrorResponse(
257
		res: http.ServerResponse,
258
		statusCode: number,
259
		errorType: AnthropicErrorResponse['error']['type'],
260
		message: string
261
	): void {
262
		const errorResponse: AnthropicErrorResponse = {
263
			type: 'error',
264
			error: {
265
				type: errorType,
266
				message
267
			}
268
		};
269
		res.writeHead(statusCode, { 'Content-Type': 'application/json' });
270
		res.end(JSON.stringify(errorResponse));
271
	}
272

273
	public async start(): Promise<void> {
274
		if (this.config.port !== 0) {
275
			// Already started
276
			return;
277
		}
278

279
		return new Promise((resolve, reject) => {
280
			this.server.listen(0, '127.0.0.1', () => {
281
				const address = this.server.address();
282
				if (address && typeof address === 'object') {
283
					this.config = {
284
						...this.config,
285
						port: address.port
286
					};
287
					this.info(`Claude Language Model Server started on http://localhost:${this.config.port}`);
288
					resolve();
289
					return;
290
				}
291

292
				reject(new Error('Failed to start server'));
293
			});
294
		});
295
	}
296

297
	public stop(): void {
298
		this.server.close();
299
	}
300

301
	public getConfig(): IClaudeLanguageModelServerConfig {
302
		return { ...this.config };
303
	}
304

305
	/**
306
	 * Increments the user-initiated message count for a given model.
307
	 * Called when a user sends a new message in a Claude session.
308
	 */
309
	public incrementUserInitiatedMessageCount(modelId: string): void {
310
		const current = this._userInitiatedMessageCounts.get(modelId) ?? 0;
311
		this._userInitiatedMessageCounts.set(modelId, current + 1);
312
	}
313

314
	private info(message: string): void {
315
		const messageWithClassName = `[ClaudeLanguageModelServer] ${message}`;
316
		this.logService.info(messageWithClassName);
317
	}
318

319
	private error(message: string): void {
320
		const messageWithClassName = `[ClaudeLanguageModelServer] ${message}`;
321
		this.logService.error(messageWithClassName);
322
	}
323

324
	private exception(err: Error, message?: string): void {
325
		this.logService.error(err, message);
326
	}
327

328
	private trace(message: string): void {
329
		const messageWithClassName = `[ClaudeLanguageModelServer] ${message}`;
330
		this.logService.trace(messageWithClassName);
331
	}
332
}
333

334
export interface ExtractSessionIdResult {
335
	/** Whether the auth nonce is valid. */
336
	readonly valid: boolean;
337
	/** The session ID, if present in the `nonce.sessionId` format. `undefined` for legacy (nonce-only) format. */
338
	readonly sessionId: string | undefined;
339
}
340

341
/**
342
 * Extracts and validates the session ID from HTTP request headers.
343
 * Reads the `Authorization: Bearer <nonce>.<sessionId>` header set via `ANTHROPIC_AUTH_TOKEN`.
344
 *
345
 * The `x-api-key` header is intentionally ignored to prevent the user's personal
346
 * `ANTHROPIC_API_KEY` environment variable from interfering with authentication.
347
 */
348
export function extractSessionId(headers: http.IncomingHttpHeaders, expectedNonce: string): ExtractSessionIdResult {
349
	let apiKey: string | undefined;
350

351
	// Check Authorization header with Bearer prefix (set via ANTHROPIC_AUTH_TOKEN)
352
	const authHeader = headers['authorization'];
353
	if (typeof authHeader === 'string' && authHeader.startsWith('Bearer ')) {
354
		apiKey = authHeader.slice(7); // Remove "Bearer " prefix
355
	}
356

357
	if (!apiKey) {
358
		return { valid: false, sessionId: undefined };
359
	}
360

361
	// Parse `nonce.sessionId` format
362
	const dotIndex = apiKey.indexOf('.');
363
	if (dotIndex === -1) {
364
		// Legacy format without session ID — validate nonce only
365
		return { valid: apiKey === expectedNonce, sessionId: undefined };
366
	}
367

368
	const nonce = apiKey.slice(0, dotIndex);
369
	const sessionId = apiKey.slice(dotIndex + 1);
370
	const valid = nonce === expectedNonce;
371
	return { valid, sessionId: valid ? sessionId : undefined };
372
}
373

374
/**
375
 * Filters a comma-separated `anthropic-beta` header value to only include
376
 * betas that match {@link SUPPORTED_ANTHROPIC_BETAS}. Entries are matched by
377
 * prefix so that e.g. `'context-management'` allows `'context-management-2025-06-27'`.
378
 *
379
 * Returns the filtered comma-separated string, or `undefined` if no betas matched.
380
 */
381
export function filterSupportedBetas(headerValue: string): string | undefined {
382
	const filtered = headerValue
383
		.split(',')
384
		.map(b => b.trim())
385
		.filter(b => b && SUPPORTED_ANTHROPIC_BETAS.some(supported => b.startsWith(supported + '-')));
386

387
	return filtered.length > 0 ? filtered.join(',') : undefined;
388
}
389

390
/**
391
 * Converts Anthropic Messages API input to Raw.ChatMessage[] for logging purposes.
392
 */
393
function messagesApiInputToRawMessagesForLogging(request: AnthropicMessagesRequest): Raw.ChatMessage[] {
394
	const messages: Raw.ChatMessage[] = [];
395

396
	// Add system message if present
397
	if (request.system) {
398
		const systemText = typeof request.system === 'string'
399
			? request.system
400
			: request.system.map(block => block.text).join('\n');
401
		messages.push({
402
			role: Raw.ChatRole.System,
403
			content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: systemText }]
404
		});
405
	}
406

407
	// Convert each message
408
	for (const msg of request.messages ?? []) {
409
		const role = msg.role === 'user' ? Raw.ChatRole.User : Raw.ChatRole.Assistant;
410
		const content: Raw.ChatCompletionContentPart[] = [];
411

412
		if (typeof msg.content === 'string') {
413
			content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: msg.content });
414
		} else if (Array.isArray(msg.content)) {
415
			for (const block of msg.content) {
416
				if (block.type === 'text') {
417
					content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: block.text });
418
				} else if (block.type === 'image') {
419
					// Handle image blocks if needed for logging
420
					content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: '[image]' });
421
				} else if (block.type === 'tool_use') {
422
					content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: `[tool_use: ${block.name}]` });
423
				} else if (block.type === 'tool_result') {
424
					content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: `[tool_result: ${block.tool_use_id}]` });
425
				}
426
			}
427
		}
428

429
		messages.push({ role, content });
430
	}
431

432
	return messages;
433
}
434

435
class ClaudeStreamingPassThroughEndpoint implements IChatEndpoint {
436
	constructor(
437
		private readonly base: IChatEndpoint,
438
		private readonly responseStream: http.ServerResponse,
439
		private readonly requestBody: IEndpointBody,
440
		private readonly requestHeaders: http.IncomingHttpHeaders,
441
		private readonly userAgentPrefix: string,
442
		private readonly contextWindowOverride: { modelMaxPromptTokens?: number; maxOutputTokens?: number },
443
		private readonly sessionId: string | undefined,
444
		@IChatMLFetcher private readonly chatMLFetcher: IChatMLFetcher,
445
		@IInstantiationService private readonly instantiationService: IInstantiationService,
446
		@IClaudeSessionStateService private readonly sessionStateService: IClaudeSessionStateService
447
	) { }
448

449
	public get urlOrRequestMetadata(): string | RequestMetadata {
450
		// Force Messages API endpoint - we need this regardless of the useMessagesApi setting
451
		// since we're proxying Messages API format requests from Claude Code
452
		const baseUrl = this.base.urlOrRequestMetadata;
453
		if (typeof baseUrl === 'string') {
454
			return baseUrl;
455
		}
456
		return { type: RequestType.ChatMessages };
457
	}
458

459
	public getExtraHeaders(): Record<string, string> {
460
		const headers = this.base.getExtraHeaders?.(ChatLocation.MessagesProxy) ?? {};
461
		if (this.requestHeaders['user-agent']) {
462
			headers['User-Agent'] = this.getUserAgent(this.requestHeaders['user-agent']);
463
		}
464
		if (typeof this.requestHeaders['anthropic-beta'] === 'string') {
465
			const filtered = filterSupportedBetas(this.requestHeaders['anthropic-beta']);
466
			if (filtered) {
467
				headers['anthropic-beta'] = filtered;
468
			}
469
		}
470
		return headers;
471
	}
472

473
	getEndpointFetchOptions(): IEndpointFetchOptions {
474
		return {
475
			suppressIntegrationId: true
476
		};
477
	}
478

479
	private getUserAgent(incomingUserAgent: string): string {
480
		const slashIndex = incomingUserAgent.indexOf('/');
481
		if (slashIndex === -1) {
482
			return `${this.userAgentPrefix}/${incomingUserAgent}`;
483
		}
484

485
		return `${this.userAgentPrefix}${incomingUserAgent.substring(slashIndex)}`;
486
	}
487

488
	public interceptBody(body: IEndpointBody | undefined): void {
489
		this.base.interceptBody?.(body);
490
	}
491

492
	public acquireTokenizer(): ITokenizer {
493
		return this.base.acquireTokenizer();
494
	}
495

496
	public get modelMaxPromptTokens(): number {
497
		return this.contextWindowOverride.modelMaxPromptTokens ?? this.base.modelMaxPromptTokens;
498
	}
499

500
	public get maxOutputTokens(): number {
501
		return this.contextWindowOverride.maxOutputTokens ?? this.base.maxOutputTokens;
502
	}
503

504
	public get model(): string {
505
		return this.base.model;
506
	}
507

508
	public get modelProvider(): string {
509
		return this.base.modelProvider;
510
	}
511

512
	public get name(): string {
513
		return this.base.name;
514
	}
515

516
	public get version(): string {
517
		return this.base.version;
518
	}
519

520
	public get family(): string {
521
		return this.base.family;
522
	}
523

524
	public get tokenizer(): TokenizerType {
525
		return this.base.tokenizer;
526
	}
527

528
	public get showInModelPicker(): boolean {
529
		return this.base.showInModelPicker;
530
	}
531

532
	public get isPremium(): boolean | undefined {
533
		return this.base.isPremium;
534
	}
535

536
	public get degradationReason(): string | undefined {
537
		return this.base.degradationReason;
538
	}
539

540
	public get multiplier(): number | undefined {
541
		return this.base.multiplier;
542
	}
543

544
	public get tokenPricing() {
545
		return this.base.tokenPricing;
546
	}
547

548
	public get restrictedToSkus(): string[] | undefined {
549
		return this.base.restrictedToSkus;
550
	}
551

552
	public get isFallback(): boolean {
553
		return this.base.isFallback;
554
	}
555

556
	public get customModel(): CustomModel | undefined {
557
		return this.base.customModel;
558
	}
559

560
	public get isExtensionContributed(): boolean | undefined {
561
		return this.base.isExtensionContributed;
562
	}
563

564
	public get apiType(): string | undefined {
565
		return 'messages';
566
	}
567

568
	public get supportsThinkingContentInHistory(): boolean | undefined {
569
		return this.base.supportsThinkingContentInHistory;
570
	}
571

572
	public get supportsAdaptiveThinking(): boolean | undefined {
573
		return this.base.supportsAdaptiveThinking;
574
	}
575

576
	public get minThinkingBudget(): number | undefined {
577
		return this.base.minThinkingBudget;
578
	}
579

580
	public get maxThinkingBudget(): number | undefined {
581
		return this.base.maxThinkingBudget;
582
	}
583

584
	public get supportsReasoningEffort(): string[] | undefined {
585
		return this.base.supportsReasoningEffort;
586
	}
587

588
	public get supportsToolCalls(): boolean {
589
		return this.base.supportsToolCalls;
590
	}
591

592
	public get supportsVision(): boolean {
593
		return this.base.supportsVision;
594
	}
595

596
	public get supportsPrediction(): boolean {
597
		return this.base.supportsPrediction;
598
	}
599

600
	public get supportedEditTools(): readonly EndpointEditToolName[] | undefined {
601
		return this.base.supportedEditTools;
602
	}
603

604
	public async processResponseFromChatEndpoint(
605
		telemetryService: ITelemetryService,
606
		logService: ILogService,
607
		response: Response,
608
		expectedNumChoices: number,
609
		finishCallback: FinishedCallback,
610
		telemetryData: TelemetryData,
611
		cancellationToken?: CancellationToken
612
	): Promise<AsyncIterableObject<ChatCompletion>> {
613
		const body = response.body;
614
		return new AsyncIterableObject<ChatCompletion>(async feed => {
615
			// We parse the stream just to return a correct ChatCompletion for logging the response and token usage details.
616
			const requestId = response.headers.get('X-Request-ID') ?? generateUuid();
617
			const ghRequestId = response.headers.get('x-github-request-id') ?? '';
618
			const { serverExperiments } = getRequestId(response.headers);
619
			const processor = this.instantiationService.createInstance(AnthropicMessagesProcessor, telemetryData, requestId, ghRequestId, serverExperiments);
620
			const parser = new SSEParser((ev) => {
621
				try {
622
					const trimmed = ev.data?.trim();
623
					if (!trimmed || trimmed === '[DONE]') {
624
						return;
625
					}
626

627
					logService.trace(`[ClaudeStreamingPassThroughEndpoint] SSE: ${ev.data}`);
628
					const parsed = JSON.parse(trimmed);
629
					const type = parsed.type ?? ev.type;
630
					if (!type) {
631
						return;
632
					}
633
					const completion = processor.push({ ...parsed, type }, finishCallback);
634
					if (completion) {
635
						feed.emitOne(completion);
636

637
						// Report usage to the usage handler if available
638
						if (completion.usage && this.sessionId) {
639
							const usageHandler = this.sessionStateService.getUsageHandlerForSession(this.sessionId);
640
							if (usageHandler) {
641
								usageHandler({
642
									// Could we bucketize these token counts somehow for the details?
643
									promptTokens: completion.usage.prompt_tokens,
644
									completionTokens: completion.usage.completion_tokens
645
								});
646
							}
647
						}
648
					}
649
				} catch (e) {
650
					feed.reject(e);
651
				}
652
			});
653

654
			try {
655
				for await (const chunk of body) {
656
					if (cancellationToken?.isCancellationRequested) {
657
						break;
658
					}
659

660
					this.responseStream.write(chunk);
661
					parser.feed(chunk);
662
				}
663
			} finally {
664
				await body.destroy();
665
			}
666
		});
667
	}
668

669
	public makeChatRequest(
670
		debugName: string,
671
		messages: Raw.ChatMessage[],
672
		finishedCb: FinishedCallback | undefined,
673
		token: CancellationToken,
674
		location: ChatLocation,
675
		source?: Source,
676
		requestOptions?: Omit<OptionalChatRequestParams, 'n'>,
677
		userInitiatedRequest?: boolean
678
	): Promise<ChatResponse> {
679
		throw new Error('not implemented');
680
	}
681

682
	public makeChatRequest2(
683
		options: IMakeChatRequestOptions,
684
		token: CancellationToken
685
	): Promise<ChatResponse> {
686
		return this.chatMLFetcher.fetchOne({
687
			requestOptions: {},
688
			...options,
689
			endpoint: this,
690
		}, token);
691
	}
692

693
	public createRequestBody(
694
		options: ICreateEndpointBodyOptions
695
	): IEndpointBody {
696
		const base = this.base.createRequestBody(options);
697

698
		// Claude models don't support both temperature and top_p simultaneously.
699
		// If the SDK request specifies either, clear both from base to avoid conflicts.
700
		if (this.requestBody.temperature !== undefined || this.requestBody.top_p !== undefined) {
701
			delete base.temperature;
702
			delete base.top_p;
703
		}
704

705
		// Merge with original request body to preserve any additional properties
706
		// i.e. default thinking budget.
707
		return {
708
			...base,
709
			...this.requestBody
710
		};
711
	}
712

713
	public cloneWithTokenOverride(modelMaxPromptTokens: number): IChatEndpoint {
714
		throw new Error('not implemented');
715
	}
716
}
717

718
Product

Resources

Company