CoCalc -- oaiLanguageModelServer.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/externalAgents/node/oaiLanguageModelServer.ts
¹³³⁹⁹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { RequestMetadata } from '@vscode/copilot-api';
7
import { Raw } from '@vscode/prompt-tsx';
8
import * as http from 'http';
9
import type OpenAI from 'openai';
10
import { IChatMLFetcher, Source } from '../../../platform/chat/common/chatMLFetcher';
11
import { ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes';
12
import { CustomModel, EndpointEditToolName, IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
13
import { getResponsesApiCompactionThresholdFromBody, OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging } from '../../../platform/endpoint/node/responsesApi';
14
import { ILogService } from '../../../platform/log/common/logService';
15
import { FinishedCallback, getRequestId, OptionalChatRequestParams } from '../../../platform/networking/common/fetch';
16
import { Response } from '../../../platform/networking/common/fetcherService';
17
import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody, IEndpointFetchOptions, IMakeChatRequestOptions } from '../../../platform/networking/common/networking';
18
import { ChatCompletion } from '../../../platform/networking/common/openai';
19
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
20
import { TelemetryData } from '../../../platform/telemetry/common/telemetryData';
21
import { ITokenizer, TokenizerType } from '../../../util/common/tokenizer';
22
import { AsyncIterableObject } from '../../../util/vs/base/common/async';
23
import { CancellationToken, CancellationTokenSource } from '../../../util/vs/base/common/cancellation';
24
import { Disposable, toDisposable } from '../../../util/vs/base/common/lifecycle';
25
import { SSEParser } from '../../../util/vs/base/common/sseParser';
26
import { generateUuid } from '../../../util/vs/base/common/uuid';
27
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
28

29
export interface ILanguageModelServerConfig {
30
	readonly port: number;
31
	readonly nonce: string;
32
}
33

34
/**
35
 * HTTP server that provides an OpenAI Responses API compatible endpoint.
36
 * Acts as a pure pass-through proxy to the underlying model endpoint.
37
 */
38
export class OpenAILanguageModelServer extends Disposable {
39
	private server: http.Server;
40
	private config: ILanguageModelServerConfig;
41

42
	constructor(
43
		@ILogService private readonly logService: ILogService,
44
		@IEndpointProvider private readonly endpointProvider: IEndpointProvider,
45
		@IInstantiationService private readonly instantiationService: IInstantiationService,
46
	) {
47
		super();
48
		this.config = {
49
			port: 0, // Will be set to random available port
50
			nonce: 'vscode-lm-' + generateUuid()
51
		};
52

53
		this.server = this.createServer();
54
		this._register(toDisposable(() => this.stop()));
55
	}
56

57
	private createServer(): http.Server {
58
		return http.createServer(async (req, res) => {
59
			this.trace(`Received request: ${req.method} ${req.url}`);
60

61
			if (req.method === 'OPTIONS') {
62
				res.writeHead(200);
63
				res.end();
64
				return;
65
			}
66

67
			// It sends //responses if OPENAI_BASE_URL ends in /
68
			if (req.method === 'POST' && (req.url === '/v1/responses' || req.url === '/responses' || req.url === '//responses')) {
69
				await this.handleResponsesRequest(req, res);
70
				return;
71
			}
72

73
			if (req.method === 'GET' && req.url === '/') {
74
				res.writeHead(200);
75
				res.end('Hello from LanguageModelServer');
76
				return;
77
			}
78

79
			res.writeHead(404, { 'Content-Type': 'application/json' });
80
			res.end(JSON.stringify({ error: 'Not found' }));
81
		});
82
	}
83

84
	private async handleResponsesRequest(req: http.IncomingMessage, res: http.ServerResponse) {
85
		try {
86
			const body = await this.readRequestBody(req);
87
			if (!(await this.isAuthTokenValid(req))) {
88
				this.error('Invalid auth key');
89
				res.writeHead(401, { 'Content-Type': 'application/json' });
90
				res.end(JSON.stringify({ error: 'Invalid authentication' }));
91
				return;
92
			}
93

94
			await this.handleAuthedResponsesRequest(body, req.headers, res);
95
		} catch (error) {
96
			res.writeHead(500, { 'Content-Type': 'application/json' });
97
			res.end(JSON.stringify({
98
				error: 'Internal server error',
99
				details: error instanceof Error ? error.message : String(error)
100
			}));
101
		}
102
		return;
103
	}
104

105
	/**
106
	 * Verify nonce
107
	 */
108
	private async isAuthTokenValid(req: http.IncomingMessage): Promise<boolean> {
109
		const authHeader = req.headers.authorization;
110
		const bearerSpace = 'Bearer ';
111
		const authKey = authHeader?.startsWith(bearerSpace) ? authHeader.substring(bearerSpace.length) : undefined;
112
		return authKey === this.config.nonce;
113
	}
114

115
	private async readRequestBody(req: http.IncomingMessage): Promise<string> {
116
		return new Promise((resolve, reject) => {
117
			let body = '';
118
			req.on('data', chunk => {
119
				body += chunk.toString();
120
			});
121
			req.on('end', () => {
122
				resolve(body);
123
			});
124
			req.on('error', reject);
125
		});
126
	}
127

128
	private async handleAuthedResponsesRequest(bodyString: string, headers: http.IncomingHttpHeaders, res: http.ServerResponse): Promise<void> {
129
		// Create cancellation token for the request
130
		const tokenSource = new CancellationTokenSource();
131

132
		try {
133
			const requestBody: OpenAI.Responses.ResponseCreateParams = JSON.parse(bodyString);
134
			if (Array.isArray(requestBody.tools)) {
135
				requestBody.tools = requestBody.tools.filter(tool => {
136
					if (typeof tool?.type === 'string' && tool.type.startsWith('web_search')) {
137
						this.warn(`Filtering out unsupported tool type: ${JSON.stringify(tool)}`);
138
						return false;
139
					}
140

141
					return true;
142
				});
143
			}
144
			const lastMessage = requestBody.input?.at(-1);
145
			const isUserInitiatedMessage = typeof lastMessage === 'string' ||
146
				lastMessage?.type === 'message' && lastMessage.role === 'user';
147

148
			const endpoints = await this.endpointProvider.getAllChatEndpoints();
149
			if (endpoints.length === 0) {
150
				this.error('No language models available');
151
				res.writeHead(404, { 'Content-Type': 'application/json' });
152
				res.end(JSON.stringify({ error: 'No language models available' }));
153
				return;
154
			}
155

156
			const selectedEndpoint = this.selectEndpoint(endpoints, requestBody.model);
157
			if (!selectedEndpoint) {
158
				this.error('No model found matching criteria');
159
				res.writeHead(404, { 'Content-Type': 'application/json' });
160
				res.end(JSON.stringify({
161
					error: 'No model found matching criteria'
162
				}));
163
				return;
164
			}
165

166
			// Set up streaming response
167
			res.writeHead(200, {
168
				'Content-Type': 'text/event-stream',
169
				'Cache-Control': 'no-cache',
170
				'Connection': 'keep-alive',
171
			});
172

173
			// Handle client disconnect
174
			let requestComplete = false;
175
			res.on('close', () => {
176
				if (!requestComplete) {
177
					this.info('Client disconnected before request complete');
178
				}
179

180
				tokenSource.cancel();
181
			});
182

183
			const endpointRequestBody = requestBody as IEndpointBody;
184
			const streamingEndpoint = this.instantiationService.createInstance(
185
				StreamingPassThroughEndpoint,
186
				selectedEndpoint,
187
				res,
188
				endpointRequestBody,
189
				headers,
190
				'vscode_codex'
191
			);
192

193
			let messagesForLogging: Raw.ChatMessage[] = [];
194
			try {
195
				// Don't fail based on any assumptions about the shape of the request
196
				messagesForLogging = Array.isArray(requestBody.input) ?
197
					responseApiInputToRawMessagesForLogging(requestBody) :
198
					[];
199
			} catch (e) {
200
				this.exception(e, `Failed to parse messages for logging`);
201
			}
202

203
			await streamingEndpoint.makeChatRequest2({
204
				debugName: 'oaiLMServer',
205
				messages: messagesForLogging,
206
				finishedCb: async () => undefined,
207
				location: ChatLocation.ResponsesProxy,
208
				modelCapabilities: { enableThinking: true },
209
				userInitiatedRequest: isUserInitiatedMessage
210
			}, tokenSource.token);
211

212
			requestComplete = true;
213

214
			res.end();
215
		} catch (error) {
216
			res.writeHead(500, { 'Content-Type': 'application/json' });
217
			res.end(JSON.stringify({
218
				error: 'Failed to process chat request',
219
				details: error instanceof Error ? error.message : String(error)
220
			}));
221
		} finally {
222
			tokenSource.dispose();
223
		}
224
	}
225

226
	private selectEndpoint(endpoints: readonly IChatEndpoint[], requestedModel?: string): IChatEndpoint | undefined {
227
		if (requestedModel) {
228
			// Try to find exact match first
229
			const selectedEndpoint = endpoints.find(e => e.family === requestedModel);
230
			return selectedEndpoint;
231
		}
232

233
		// Use first available model if no criteria specified
234
		return endpoints[0];
235
	}
236

237
	public async start(): Promise<void> {
238
		if (this.config.port !== 0) {
239
			// Already started
240
			return;
241
		}
242

243
		return new Promise((resolve, reject) => {
244
			this.server.listen(0, '127.0.0.1', () => {
245
				const address = this.server.address();
246
				if (address && typeof address === 'object') {
247
					this.config = {
248
						...this.config,
249
						port: address.port
250
					};
251
					this.info(`Language Model Server started on http://localhost:${this.config.port}`);
252
					resolve();
253
					return;
254
				}
255

256
				reject(new Error('Failed to start server'));
257
			});
258
		});
259
	}
260

261
	public stop(): void {
262
		this.server.close();
263
	}
264

265
	public getConfig(): ILanguageModelServerConfig {
266
		return { ...this.config };
267
	}
268

269
	private info(message: string): void {
270
		const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;
271
		this.logService.info(messageWithClassName);
272
	}
273

274
	private error(message: string): void {
275
		const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;
276
		this.logService.error(messageWithClassName);
277
	}
278

279
	private exception(err: Error, message?: string): void {
280
		this.logService.error(err, message);
281
	}
282

283
	private trace(message: string): void {
284
		const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;
285
		this.logService.trace(messageWithClassName);
286
	}
287

288
	private warn(message: string): void {
289
		const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;
290
		this.logService.warn(messageWithClassName);
291
	}
292
}
293

294
class StreamingPassThroughEndpoint implements IChatEndpoint {
295
	constructor(
296
		private readonly base: IChatEndpoint,
297
		private readonly responseStream: http.ServerResponse,
298
		private readonly requestBody: IEndpointBody,
299
		private readonly requestHeaders: http.IncomingHttpHeaders,
300
		private readonly userAgentPrefix: string,
301
		@IChatMLFetcher private readonly chatMLFetcher: IChatMLFetcher,
302
		@IInstantiationService private readonly instantiationService: IInstantiationService
303
	) { }
304

305
	public get urlOrRequestMetadata(): string | RequestMetadata {
306
		return this.base.urlOrRequestMetadata;
307
	}
308

309
	public getExtraHeaders(): Record<string, string> {
310
		const headers = this.base.getExtraHeaders?.() ?? {};
311
		if (this.requestHeaders['user-agent']) {
312
			headers['User-Agent'] = this.getUserAgent(this.requestHeaders['user-agent']);
313
		}
314
		return headers;
315
	}
316

317
	getEndpointFetchOptions(): IEndpointFetchOptions {
318
		return {
319
			suppressIntegrationId: true
320
		};
321
	}
322

323
	private getUserAgent(incomingUserAgent: string): string {
324
		const slashIndex = incomingUserAgent.indexOf('/');
325
		if (slashIndex === -1) {
326
			return `${this.userAgentPrefix}/${incomingUserAgent}`;
327
		}
328

329
		return `${this.userAgentPrefix}${incomingUserAgent.substring(slashIndex)}`;
330
	}
331

332
	public interceptBody(body: IEndpointBody | undefined): void {
333
		this.base.interceptBody?.(body);
334
	}
335

336
	public acquireTokenizer(): ITokenizer {
337
		return this.base.acquireTokenizer();
338
	}
339

340
	public get modelProvider(): string {
341
		return this.base.modelProvider;
342
	}
343

344
	public get modelMaxPromptTokens(): number {
345
		return this.base.modelMaxPromptTokens;
346
	}
347

348
	public get maxOutputTokens(): number {
349
		return this.base.maxOutputTokens;
350
	}
351

352
	public get model(): string {
353
		return this.base.model;
354
	}
355

356
	public get name(): string {
357
		return this.base.name;
358
	}
359

360
	public get version(): string {
361
		return this.base.version;
362
	}
363

364
	public get family(): string {
365
		return this.base.family;
366
	}
367

368
	public get tokenizer(): TokenizerType {
369
		return this.base.tokenizer;
370
	}
371

372
	public get showInModelPicker(): boolean {
373
		return this.base.showInModelPicker;
374
	}
375

376
	public get isPremium(): boolean | undefined {
377
		return this.base.isPremium;
378
	}
379

380
	public get degradationReason(): string | undefined {
381
		return this.base.degradationReason;
382
	}
383

384
	public get multiplier(): number | undefined {
385
		return this.base.multiplier;
386
	}
387

388
	public get tokenPricing() {
389
		return this.base.tokenPricing;
390
	}
391

392
	public get restrictedToSkus(): string[] | undefined {
393
		return this.base.restrictedToSkus;
394
	}
395

396
	public get isFallback(): boolean {
397
		return this.base.isFallback;
398
	}
399

400
	public get customModel(): CustomModel | undefined {
401
		return this.base.customModel;
402
	}
403

404
	public get isExtensionContributed(): boolean | undefined {
405
		return this.base.isExtensionContributed;
406
	}
407

408
	public get apiType(): string | undefined {
409
		return this.base.apiType;
410
	}
411

412
	public get supportsThinkingContentInHistory(): boolean | undefined {
413
		return this.base.supportsThinkingContentInHistory;
414
	}
415

416
	public get supportsAdaptiveThinking(): boolean | undefined {
417
		return this.base.supportsAdaptiveThinking;
418
	}
419

420
	public get minThinkingBudget(): number | undefined {
421
		return this.base.minThinkingBudget;
422
	}
423

424
	public get maxThinkingBudget(): number | undefined {
425
		return this.base.maxThinkingBudget;
426
	}
427

428
	public get supportsReasoningEffort(): string[] | undefined {
429
		return this.base.supportsReasoningEffort;
430
	}
431

432
	public get supportsToolCalls(): boolean {
433
		return this.base.supportsToolCalls;
434
	}
435

436
	public get supportsVision(): boolean {
437
		return this.base.supportsVision;
438
	}
439

440
	public get supportsPrediction(): boolean {
441
		return this.base.supportsPrediction;
442
	}
443

444
	public get supportedEditTools(): readonly EndpointEditToolName[] | undefined {
445
		return this.base.supportedEditTools;
446
	}
447

448
	public async processResponseFromChatEndpoint(
449
		telemetryService: ITelemetryService,
450
		logService: ILogService,
451
		response: Response,
452
		expectedNumChoices: number,
453
		finishCallback: FinishedCallback,
454
		telemetryData: TelemetryData,
455
		cancellationToken?: CancellationToken
456
	): Promise<AsyncIterableObject<ChatCompletion>> {
457
		const body = response.body;
458
		return new AsyncIterableObject<ChatCompletion>(async feed => {
459
			// We parse the stream just to return a correct ChatCompletion for logging the response and token usage details.
460
			const requestId = response.headers.get('X-Request-ID') ?? generateUuid();
461
			const ghRequestId = response.headers.get('x-github-request-id') ?? '';
462
			const { serverExperiments } = getRequestId(response.headers);
463
			const processor = this.instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, telemetryService, requestId, ghRequestId, serverExperiments, getResponsesApiCompactionThresholdFromBody(this.requestBody));
464
			const parser = new SSEParser((ev) => {
465
				try {
466
					logService.trace(`[StreamingPassThroughEndpoint] SSE: ${ev.data}`);
467
					const completion = processor.push({ type: ev.type, ...JSON.parse(ev.data) }, finishCallback);
468
					if (completion) {
469
						feed.emitOne(completion);
470
					}
471
				} catch (e) {
472
					feed.reject(e);
473
				}
474
			});
475

476
			try {
477
				for await (const chunk of body) {
478
					if (cancellationToken?.isCancellationRequested) {
479
						break;
480
					}
481

482
					this.responseStream.write(chunk);
483
					parser.feed(chunk);
484
				}
485
			} finally {
486
				await body.destroy();
487
			}
488
		});
489
	}
490

491
	public makeChatRequest(
492
		debugName: string,
493
		messages: Raw.ChatMessage[],
494
		finishedCb: FinishedCallback | undefined,
495
		token: CancellationToken,
496
		location: ChatLocation,
497
		source?: Source,
498
		requestOptions?: Omit<OptionalChatRequestParams, 'n'>,
499
		userInitiatedRequest?: boolean
500
	): Promise<ChatResponse> {
501
		throw new Error('not implemented');
502
	}
503

504
	public makeChatRequest2(
505
		options: IMakeChatRequestOptions,
506
		token: CancellationToken
507
	): Promise<ChatResponse> {
508
		return this.chatMLFetcher.fetchOne({
509
			requestOptions: {},
510
			...options,
511
			endpoint: this,
512
		}, token);
513
	}
514

515
	public createRequestBody(
516
		options: ICreateEndpointBodyOptions
517
	): IEndpointBody {
518
		return this.requestBody;
519
	}
520

521
	public cloneWithTokenOverride(modelMaxPromptTokens: number): IChatEndpoint {
522
		throw new Error('not implemented');
523
	}
524
}
525

526
Product

Resources

Company