Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/chatSessions/claude/node/claudeLanguageModelServer.ts
13405 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { MessageParam } from '@anthropic-ai/sdk/resources';
7
import { RequestMetadata, RequestType } from '@vscode/copilot-api';
8
import { Raw } from '@vscode/prompt-tsx';
9
import * as http from 'http';
10
import { IChatMLFetcher, Source } from '../../../../platform/chat/common/chatMLFetcher';
11
import { ChatLocation, ChatResponse } from '../../../../platform/chat/common/commonTypes';
12
import { CustomModel, EndpointEditToolName } from '../../../../platform/endpoint/common/endpointProvider';
13
import { AnthropicMessagesProcessor } from '../../../../platform/endpoint/node/messagesApi';
14
import { ILogService } from '../../../../platform/log/common/logService';
15
import { IOTelService } from '../../../../platform/otel/common/otelService';
16
import { FinishedCallback, getRequestId, OptionalChatRequestParams } from '../../../../platform/networking/common/fetch';
17
import { Response } from '../../../../platform/networking/common/fetcherService';
18
import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody, IEndpointFetchOptions, IMakeChatRequestOptions } from '../../../../platform/networking/common/networking';
19
import { ChatCompletion } from '../../../../platform/networking/common/openai';
20
import { IRequestLogger } from '../../../../platform/requestLogger/common/requestLogger';
21
import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry';
22
import { TelemetryData } from '../../../../platform/telemetry/common/telemetryData';
23
import { ITokenizer, TokenizerType } from '../../../../util/common/tokenizer';
24
import { AsyncIterableObject } from '../../../../util/vs/base/common/async';
25
import { CancellationToken, CancellationTokenSource } from '../../../../util/vs/base/common/cancellation';
26
import { Disposable, toDisposable } from '../../../../util/vs/base/common/lifecycle';
27
import { SSEParser } from '../../../../util/vs/base/common/sseParser';
28
import { generateUuid } from '../../../../util/vs/base/common/uuid';
29
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
30
import { IClaudeCodeModels } from './claudeCodeModels';
31
import { IClaudeSessionStateService } from '../common/claudeSessionStateService';
32
33
/**
34
* A list of known Anthropic betas supported by CAPI. Used to filter incoming `anthropic-beta` header values
35
* to prevent unsupported betas from being sent to CAPI.
36
*/
37
const SUPPORTED_ANTHROPIC_BETAS = [
38
'interleaved-thinking',
39
'context-management',
40
'advanced-tool-use',
41
];
42
43
export interface IClaudeLanguageModelServerConfig {
44
readonly port: number;
45
readonly nonce: string;
46
}
47
48
interface AnthropicMessagesRequest {
49
model: string;
50
messages: MessageParam[];
51
system?: string | Array<{ type: 'text'; text: string }>;
52
max_tokens?: number;
53
stream?: boolean;
54
tools?: unknown[];
55
[key: string]: unknown;
56
}
57
58
interface AnthropicErrorResponse {
59
type: 'error';
60
error: {
61
type: 'invalid_request_error' | 'authentication_error' | 'permission_error' | 'not_found_error' | 'rate_limit_error' | 'api_error';
62
message: string;
63
};
64
}
65
66
const DEFAULT_MAX_TOKENS = 200_000;
67
const DEFAULT_MAX_OUTPUT_TOKENS = 64_000;
68
69
/**
70
* HTTP server that provides an Anthropic Messages API compatible endpoint.
71
* Acts as a pure pass-through proxy to the underlying model endpoint.
72
*/
73
export class ClaudeLanguageModelServer extends Disposable {
74
private server: http.Server;
75
private config: IClaudeLanguageModelServerConfig;
76
private readonly _userInitiatedMessageCounts = new Map<string, number>();
77
78
constructor(
79
@ILogService private readonly logService: ILogService,
80
@IClaudeSessionStateService private readonly sessionStateService: IClaudeSessionStateService,
81
@IRequestLogger private readonly requestLogger: IRequestLogger,
82
@IInstantiationService private readonly instantiationService: IInstantiationService,
83
@IClaudeCodeModels private readonly claudeCodeModels: IClaudeCodeModels,
84
@IOTelService private readonly _otelService: IOTelService,
85
) {
86
super();
87
this.config = {
88
port: 0, // Will be set to random available port
89
nonce: 'vscode-lm-' + generateUuid()
90
};
91
92
this.server = this.createServer();
93
this._register(toDisposable(() => this.stop()));
94
}
95
96
private createServer(): http.Server {
97
return http.createServer(async (req, res) => {
98
this.trace(`Received request: ${req.method} ${req.url}`);
99
100
if (req.method === 'OPTIONS') {
101
res.writeHead(200);
102
res.end();
103
return;
104
}
105
106
// Handle /v1/messages endpoint (also //messages if base URL ends in /)
107
// Use URL to properly parse and extract pathname, ignoring query string
108
const pathname = new URL(req.url ?? '/', 'http://localhost').pathname;
109
if (req.method === 'POST' && (pathname === '/v1/messages' || pathname === '/messages' || pathname === '//messages')) {
110
await this.handleMessagesRequest(req, res);
111
return;
112
}
113
114
if (req.method === 'GET' && req.url === '/') {
115
res.writeHead(200);
116
res.end('Hello from ClaudeLanguageModelServer');
117
return;
118
}
119
120
this.sendErrorResponse(res, 404, 'not_found_error', 'Not found');
121
});
122
}
123
124
private async handleMessagesRequest(req: http.IncomingMessage, res: http.ServerResponse) {
125
try {
126
const body = await this.readRequestBody(req);
127
const auth = extractSessionId(req.headers, this.config.nonce);
128
if (!auth.valid) {
129
this.error('Invalid auth key');
130
this.sendErrorResponse(res, 401, 'authentication_error', 'Invalid authentication');
131
return;
132
}
133
134
await this.handleAuthedMessagesRequest(body, req.headers, res, auth.sessionId);
135
} catch (error) {
136
this.sendErrorResponse(res, 500, 'api_error', error instanceof Error ? error.message : String(error));
137
}
138
return;
139
}
140
141
private async readRequestBody(req: http.IncomingMessage): Promise<string> {
142
return new Promise((resolve, reject) => {
143
let body = '';
144
req.on('data', chunk => {
145
body += chunk.toString();
146
});
147
req.on('end', () => {
148
resolve(body);
149
});
150
req.on('error', reject);
151
});
152
}
153
154
private async handleAuthedMessagesRequest(bodyString: string, headers: http.IncomingHttpHeaders, res: http.ServerResponse, sessionId: string | undefined): Promise<void> {
155
// Create cancellation token for the request
156
const tokenSource = new CancellationTokenSource();
157
158
try {
159
const requestBody: AnthropicMessagesRequest = JSON.parse(bodyString);
160
161
const fallbackModelId = sessionId ? this.sessionStateService.getModelIdForSession(sessionId) : undefined;
162
const selectedEndpoint = await this.claudeCodeModels.resolveEndpoint(requestBody.model, fallbackModelId);
163
if (!selectedEndpoint) {
164
this.error('No model found matching criteria');
165
this.sendErrorResponse(res, 404, 'not_found_error', 'No model found matching criteria');
166
return;
167
}
168
this.trace(`Session ${sessionId}: model=${selectedEndpoint.model}`);
169
requestBody.model = selectedEndpoint.model;
170
// Determine if this is a user-initiated message using counter-based approach
171
const count = this._userInitiatedMessageCounts.get(selectedEndpoint.model) ?? 0;
172
const isUserInitiatedMessage = count > 0;
173
if (isUserInitiatedMessage) {
174
this._userInitiatedMessageCounts.set(selectedEndpoint.model, count - 1);
175
}
176
177
// Set up streaming response
178
res.writeHead(200, {
179
'Content-Type': 'text/event-stream',
180
'Cache-Control': 'no-cache',
181
'Connection': 'keep-alive',
182
});
183
184
// Handle client disconnect
185
let requestComplete = false;
186
res.on('close', () => {
187
if (!requestComplete) {
188
this.info('Client disconnected before request complete');
189
}
190
191
tokenSource.cancel();
192
});
193
194
const endpointRequestBody = requestBody as IEndpointBody;
195
const streamingEndpoint = this.instantiationService.createInstance(
196
ClaudeStreamingPassThroughEndpoint,
197
selectedEndpoint,
198
res,
199
endpointRequestBody,
200
headers,
201
'vscode_claude_code',
202
{
203
modelMaxPromptTokens: DEFAULT_MAX_TOKENS - DEFAULT_MAX_OUTPUT_TOKENS,
204
maxOutputTokens: DEFAULT_MAX_OUTPUT_TOKENS
205
},
206
sessionId
207
);
208
209
let messagesForLogging: Raw.ChatMessage[] = [];
210
try {
211
// Don't fail based on any assumptions about the shape of the request
212
messagesForLogging = Array.isArray(requestBody.messages) ?
213
messagesApiInputToRawMessagesForLogging(requestBody) :
214
[];
215
} catch (e) {
216
this.exception(e as Error, `Failed to parse messages for logging`);
217
}
218
219
const capturingToken = sessionId ? this.sessionStateService.getCapturingTokenForSession(sessionId) : undefined;
220
const sessionReasoningEffort = sessionId ? this.sessionStateService.getReasoningEffortForSession(sessionId) : undefined;
221
const reasoningEffort = sessionReasoningEffort && selectedEndpoint.supportsReasoningEffort?.includes(sessionReasoningEffort)
222
? sessionReasoningEffort
223
: undefined;
224
225
const doRequest = () => streamingEndpoint.makeChatRequest2({
226
debugName: 'Claude Copilot Proxy',
227
messages: messagesForLogging,
228
finishedCb: async () => undefined,
229
location: ChatLocation.MessagesProxy,
230
modelCapabilities: { enableThinking: true, reasoningEffort },
231
userInitiatedRequest: isUserInitiatedMessage
232
}, tokenSource.token);
233
234
// Wrap in trace context so chat spans are parented to the invoke_agent span
235
const traceContext = sessionId ? this.sessionStateService.getTraceContextForSession(sessionId) : undefined;
236
const doRequestInContext = traceContext
237
? () => this._otelService.runWithTraceContext(traceContext, doRequest)
238
: doRequest;
239
240
if (capturingToken) {
241
await this.requestLogger.captureInvocation(capturingToken, doRequestInContext);
242
} else {
243
await doRequestInContext();
244
}
245
246
requestComplete = true;
247
248
res.end();
249
} catch (error) {
250
this.sendErrorResponse(res, 500, 'api_error', error instanceof Error ? error.message : String(error));
251
} finally {
252
tokenSource.dispose();
253
}
254
}
255
256
private sendErrorResponse(
257
res: http.ServerResponse,
258
statusCode: number,
259
errorType: AnthropicErrorResponse['error']['type'],
260
message: string
261
): void {
262
const errorResponse: AnthropicErrorResponse = {
263
type: 'error',
264
error: {
265
type: errorType,
266
message
267
}
268
};
269
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
270
res.end(JSON.stringify(errorResponse));
271
}
272
273
public async start(): Promise<void> {
274
if (this.config.port !== 0) {
275
// Already started
276
return;
277
}
278
279
return new Promise((resolve, reject) => {
280
this.server.listen(0, '127.0.0.1', () => {
281
const address = this.server.address();
282
if (address && typeof address === 'object') {
283
this.config = {
284
...this.config,
285
port: address.port
286
};
287
this.info(`Claude Language Model Server started on http://localhost:${this.config.port}`);
288
resolve();
289
return;
290
}
291
292
reject(new Error('Failed to start server'));
293
});
294
});
295
}
296
297
public stop(): void {
298
this.server.close();
299
}
300
301
public getConfig(): IClaudeLanguageModelServerConfig {
302
return { ...this.config };
303
}
304
305
/**
306
* Increments the user-initiated message count for a given model.
307
* Called when a user sends a new message in a Claude session.
308
*/
309
public incrementUserInitiatedMessageCount(modelId: string): void {
310
const current = this._userInitiatedMessageCounts.get(modelId) ?? 0;
311
this._userInitiatedMessageCounts.set(modelId, current + 1);
312
}
313
314
private info(message: string): void {
315
const messageWithClassName = `[ClaudeLanguageModelServer] ${message}`;
316
this.logService.info(messageWithClassName);
317
}
318
319
private error(message: string): void {
320
const messageWithClassName = `[ClaudeLanguageModelServer] ${message}`;
321
this.logService.error(messageWithClassName);
322
}
323
324
private exception(err: Error, message?: string): void {
325
this.logService.error(err, message);
326
}
327
328
private trace(message: string): void {
329
const messageWithClassName = `[ClaudeLanguageModelServer] ${message}`;
330
this.logService.trace(messageWithClassName);
331
}
332
}
333
334
export interface ExtractSessionIdResult {
335
/** Whether the auth nonce is valid. */
336
readonly valid: boolean;
337
/** The session ID, if present in the `nonce.sessionId` format. `undefined` for legacy (nonce-only) format. */
338
readonly sessionId: string | undefined;
339
}
340
341
/**
342
* Extracts and validates the session ID from HTTP request headers.
343
* Reads the `Authorization: Bearer <nonce>.<sessionId>` header set via `ANTHROPIC_AUTH_TOKEN`.
344
*
345
* The `x-api-key` header is intentionally ignored to prevent the user's personal
346
* `ANTHROPIC_API_KEY` environment variable from interfering with authentication.
347
*/
348
export function extractSessionId(headers: http.IncomingHttpHeaders, expectedNonce: string): ExtractSessionIdResult {
349
let apiKey: string | undefined;
350
351
// Check Authorization header with Bearer prefix (set via ANTHROPIC_AUTH_TOKEN)
352
const authHeader = headers['authorization'];
353
if (typeof authHeader === 'string' && authHeader.startsWith('Bearer ')) {
354
apiKey = authHeader.slice(7); // Remove "Bearer " prefix
355
}
356
357
if (!apiKey) {
358
return { valid: false, sessionId: undefined };
359
}
360
361
// Parse `nonce.sessionId` format
362
const dotIndex = apiKey.indexOf('.');
363
if (dotIndex === -1) {
364
// Legacy format without session ID — validate nonce only
365
return { valid: apiKey === expectedNonce, sessionId: undefined };
366
}
367
368
const nonce = apiKey.slice(0, dotIndex);
369
const sessionId = apiKey.slice(dotIndex + 1);
370
const valid = nonce === expectedNonce;
371
return { valid, sessionId: valid ? sessionId : undefined };
372
}
373
374
/**
375
* Filters a comma-separated `anthropic-beta` header value to only include
376
* betas that match {@link SUPPORTED_ANTHROPIC_BETAS}. Entries are matched by
377
* prefix so that e.g. `'context-management'` allows `'context-management-2025-06-27'`.
378
*
379
* Returns the filtered comma-separated string, or `undefined` if no betas matched.
380
*/
381
export function filterSupportedBetas(headerValue: string): string | undefined {
382
const filtered = headerValue
383
.split(',')
384
.map(b => b.trim())
385
.filter(b => b && SUPPORTED_ANTHROPIC_BETAS.some(supported => b.startsWith(supported + '-')));
386
387
return filtered.length > 0 ? filtered.join(',') : undefined;
388
}
389
390
/**
391
* Converts Anthropic Messages API input to Raw.ChatMessage[] for logging purposes.
392
*/
393
function messagesApiInputToRawMessagesForLogging(request: AnthropicMessagesRequest): Raw.ChatMessage[] {
394
const messages: Raw.ChatMessage[] = [];
395
396
// Add system message if present
397
if (request.system) {
398
const systemText = typeof request.system === 'string'
399
? request.system
400
: request.system.map(block => block.text).join('\n');
401
messages.push({
402
role: Raw.ChatRole.System,
403
content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: systemText }]
404
});
405
}
406
407
// Convert each message
408
for (const msg of request.messages ?? []) {
409
const role = msg.role === 'user' ? Raw.ChatRole.User : Raw.ChatRole.Assistant;
410
const content: Raw.ChatCompletionContentPart[] = [];
411
412
if (typeof msg.content === 'string') {
413
content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: msg.content });
414
} else if (Array.isArray(msg.content)) {
415
for (const block of msg.content) {
416
if (block.type === 'text') {
417
content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: block.text });
418
} else if (block.type === 'image') {
419
// Handle image blocks if needed for logging
420
content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: '[image]' });
421
} else if (block.type === 'tool_use') {
422
content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: `[tool_use: ${block.name}]` });
423
} else if (block.type === 'tool_result') {
424
content.push({ type: Raw.ChatCompletionContentPartKind.Text, text: `[tool_result: ${block.tool_use_id}]` });
425
}
426
}
427
}
428
429
messages.push({ role, content });
430
}
431
432
return messages;
433
}
434
435
class ClaudeStreamingPassThroughEndpoint implements IChatEndpoint {
436
constructor(
437
private readonly base: IChatEndpoint,
438
private readonly responseStream: http.ServerResponse,
439
private readonly requestBody: IEndpointBody,
440
private readonly requestHeaders: http.IncomingHttpHeaders,
441
private readonly userAgentPrefix: string,
442
private readonly contextWindowOverride: { modelMaxPromptTokens?: number; maxOutputTokens?: number },
443
private readonly sessionId: string | undefined,
444
@IChatMLFetcher private readonly chatMLFetcher: IChatMLFetcher,
445
@IInstantiationService private readonly instantiationService: IInstantiationService,
446
@IClaudeSessionStateService private readonly sessionStateService: IClaudeSessionStateService
447
) { }
448
449
public get urlOrRequestMetadata(): string | RequestMetadata {
450
// Force Messages API endpoint - we need this regardless of the useMessagesApi setting
451
// since we're proxying Messages API format requests from Claude Code
452
const baseUrl = this.base.urlOrRequestMetadata;
453
if (typeof baseUrl === 'string') {
454
return baseUrl;
455
}
456
return { type: RequestType.ChatMessages };
457
}
458
459
public getExtraHeaders(): Record<string, string> {
460
const headers = this.base.getExtraHeaders?.(ChatLocation.MessagesProxy) ?? {};
461
if (this.requestHeaders['user-agent']) {
462
headers['User-Agent'] = this.getUserAgent(this.requestHeaders['user-agent']);
463
}
464
if (typeof this.requestHeaders['anthropic-beta'] === 'string') {
465
const filtered = filterSupportedBetas(this.requestHeaders['anthropic-beta']);
466
if (filtered) {
467
headers['anthropic-beta'] = filtered;
468
}
469
}
470
return headers;
471
}
472
473
getEndpointFetchOptions(): IEndpointFetchOptions {
474
return {
475
suppressIntegrationId: true
476
};
477
}
478
479
private getUserAgent(incomingUserAgent: string): string {
480
const slashIndex = incomingUserAgent.indexOf('/');
481
if (slashIndex === -1) {
482
return `${this.userAgentPrefix}/${incomingUserAgent}`;
483
}
484
485
return `${this.userAgentPrefix}${incomingUserAgent.substring(slashIndex)}`;
486
}
487
488
public interceptBody(body: IEndpointBody | undefined): void {
489
this.base.interceptBody?.(body);
490
}
491
492
public acquireTokenizer(): ITokenizer {
493
return this.base.acquireTokenizer();
494
}
495
496
public get modelMaxPromptTokens(): number {
497
return this.contextWindowOverride.modelMaxPromptTokens ?? this.base.modelMaxPromptTokens;
498
}
499
500
public get maxOutputTokens(): number {
501
return this.contextWindowOverride.maxOutputTokens ?? this.base.maxOutputTokens;
502
}
503
504
public get model(): string {
505
return this.base.model;
506
}
507
508
public get modelProvider(): string {
509
return this.base.modelProvider;
510
}
511
512
public get name(): string {
513
return this.base.name;
514
}
515
516
public get version(): string {
517
return this.base.version;
518
}
519
520
public get family(): string {
521
return this.base.family;
522
}
523
524
public get tokenizer(): TokenizerType {
525
return this.base.tokenizer;
526
}
527
528
public get showInModelPicker(): boolean {
529
return this.base.showInModelPicker;
530
}
531
532
public get isPremium(): boolean | undefined {
533
return this.base.isPremium;
534
}
535
536
public get degradationReason(): string | undefined {
537
return this.base.degradationReason;
538
}
539
540
public get multiplier(): number | undefined {
541
return this.base.multiplier;
542
}
543
544
public get tokenPricing() {
545
return this.base.tokenPricing;
546
}
547
548
public get restrictedToSkus(): string[] | undefined {
549
return this.base.restrictedToSkus;
550
}
551
552
public get isFallback(): boolean {
553
return this.base.isFallback;
554
}
555
556
public get customModel(): CustomModel | undefined {
557
return this.base.customModel;
558
}
559
560
public get isExtensionContributed(): boolean | undefined {
561
return this.base.isExtensionContributed;
562
}
563
564
public get apiType(): string | undefined {
565
return 'messages';
566
}
567
568
public get supportsThinkingContentInHistory(): boolean | undefined {
569
return this.base.supportsThinkingContentInHistory;
570
}
571
572
public get supportsAdaptiveThinking(): boolean | undefined {
573
return this.base.supportsAdaptiveThinking;
574
}
575
576
public get minThinkingBudget(): number | undefined {
577
return this.base.minThinkingBudget;
578
}
579
580
public get maxThinkingBudget(): number | undefined {
581
return this.base.maxThinkingBudget;
582
}
583
584
public get supportsReasoningEffort(): string[] | undefined {
585
return this.base.supportsReasoningEffort;
586
}
587
588
public get supportsToolCalls(): boolean {
589
return this.base.supportsToolCalls;
590
}
591
592
public get supportsVision(): boolean {
593
return this.base.supportsVision;
594
}
595
596
public get supportsPrediction(): boolean {
597
return this.base.supportsPrediction;
598
}
599
600
public get supportedEditTools(): readonly EndpointEditToolName[] | undefined {
601
return this.base.supportedEditTools;
602
}
603
604
public async processResponseFromChatEndpoint(
605
telemetryService: ITelemetryService,
606
logService: ILogService,
607
response: Response,
608
expectedNumChoices: number,
609
finishCallback: FinishedCallback,
610
telemetryData: TelemetryData,
611
cancellationToken?: CancellationToken
612
): Promise<AsyncIterableObject<ChatCompletion>> {
613
const body = response.body;
614
return new AsyncIterableObject<ChatCompletion>(async feed => {
615
// We parse the stream just to return a correct ChatCompletion for logging the response and token usage details.
616
const requestId = response.headers.get('X-Request-ID') ?? generateUuid();
617
const ghRequestId = response.headers.get('x-github-request-id') ?? '';
618
const { serverExperiments } = getRequestId(response.headers);
619
const processor = this.instantiationService.createInstance(AnthropicMessagesProcessor, telemetryData, requestId, ghRequestId, serverExperiments);
620
const parser = new SSEParser((ev) => {
621
try {
622
const trimmed = ev.data?.trim();
623
if (!trimmed || trimmed === '[DONE]') {
624
return;
625
}
626
627
logService.trace(`[ClaudeStreamingPassThroughEndpoint] SSE: ${ev.data}`);
628
const parsed = JSON.parse(trimmed);
629
const type = parsed.type ?? ev.type;
630
if (!type) {
631
return;
632
}
633
const completion = processor.push({ ...parsed, type }, finishCallback);
634
if (completion) {
635
feed.emitOne(completion);
636
637
// Report usage to the usage handler if available
638
if (completion.usage && this.sessionId) {
639
const usageHandler = this.sessionStateService.getUsageHandlerForSession(this.sessionId);
640
if (usageHandler) {
641
usageHandler({
642
// Could we bucketize these token counts somehow for the details?
643
promptTokens: completion.usage.prompt_tokens,
644
completionTokens: completion.usage.completion_tokens
645
});
646
}
647
}
648
}
649
} catch (e) {
650
feed.reject(e);
651
}
652
});
653
654
try {
655
for await (const chunk of body) {
656
if (cancellationToken?.isCancellationRequested) {
657
break;
658
}
659
660
this.responseStream.write(chunk);
661
parser.feed(chunk);
662
}
663
} finally {
664
await body.destroy();
665
}
666
});
667
}
668
669
public makeChatRequest(
670
debugName: string,
671
messages: Raw.ChatMessage[],
672
finishedCb: FinishedCallback | undefined,
673
token: CancellationToken,
674
location: ChatLocation,
675
source?: Source,
676
requestOptions?: Omit<OptionalChatRequestParams, 'n'>,
677
userInitiatedRequest?: boolean
678
): Promise<ChatResponse> {
679
throw new Error('not implemented');
680
}
681
682
public makeChatRequest2(
683
options: IMakeChatRequestOptions,
684
token: CancellationToken
685
): Promise<ChatResponse> {
686
return this.chatMLFetcher.fetchOne({
687
requestOptions: {},
688
...options,
689
endpoint: this,
690
}, token);
691
}
692
693
public createRequestBody(
694
options: ICreateEndpointBodyOptions
695
): IEndpointBody {
696
const base = this.base.createRequestBody(options);
697
698
// Claude models don't support both temperature and top_p simultaneously.
699
// If the SDK request specifies either, clear both from base to avoid conflicts.
700
if (this.requestBody.temperature !== undefined || this.requestBody.top_p !== undefined) {
701
delete base.temperature;
702
delete base.top_p;
703
}
704
705
// Merge with original request body to preserve any additional properties
706
// i.e. default thinking budget.
707
return {
708
...base,
709
...this.requestBody
710
};
711
}
712
713
public cloneWithTokenOverride(modelMaxPromptTokens: number): IChatEndpoint {
714
throw new Error('not implemented');
715
}
716
}
717
718