Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/externalAgents/node/oaiLanguageModelServer.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { RequestMetadata } from '@vscode/copilot-api';
7
import { Raw } from '@vscode/prompt-tsx';
8
import * as http from 'http';
9
import type OpenAI from 'openai';
10
import { IChatMLFetcher, Source } from '../../../platform/chat/common/chatMLFetcher';
11
import { ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes';
12
import { CustomModel, EndpointEditToolName, IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
13
import { getResponsesApiCompactionThresholdFromBody, OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging } from '../../../platform/endpoint/node/responsesApi';
14
import { ILogService } from '../../../platform/log/common/logService';
15
import { FinishedCallback, getRequestId, OptionalChatRequestParams } from '../../../platform/networking/common/fetch';
16
import { Response } from '../../../platform/networking/common/fetcherService';
17
import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody, IEndpointFetchOptions, IMakeChatRequestOptions } from '../../../platform/networking/common/networking';
18
import { ChatCompletion } from '../../../platform/networking/common/openai';
19
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
20
import { TelemetryData } from '../../../platform/telemetry/common/telemetryData';
21
import { ITokenizer, TokenizerType } from '../../../util/common/tokenizer';
22
import { AsyncIterableObject } from '../../../util/vs/base/common/async';
23
import { CancellationToken, CancellationTokenSource } from '../../../util/vs/base/common/cancellation';
24
import { Disposable, toDisposable } from '../../../util/vs/base/common/lifecycle';
25
import { SSEParser } from '../../../util/vs/base/common/sseParser';
26
import { generateUuid } from '../../../util/vs/base/common/uuid';
27
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
28
29
export interface ILanguageModelServerConfig {
30
readonly port: number;
31
readonly nonce: string;
32
}
33
34
/**
35
* HTTP server that provides an OpenAI Responses API compatible endpoint.
36
* Acts as a pure pass-through proxy to the underlying model endpoint.
37
*/
38
export class OpenAILanguageModelServer extends Disposable {
39
private server: http.Server;
40
private config: ILanguageModelServerConfig;
41
42
constructor(
43
@ILogService private readonly logService: ILogService,
44
@IEndpointProvider private readonly endpointProvider: IEndpointProvider,
45
@IInstantiationService private readonly instantiationService: IInstantiationService,
46
) {
47
super();
48
this.config = {
49
port: 0, // Will be set to random available port
50
nonce: 'vscode-lm-' + generateUuid()
51
};
52
53
this.server = this.createServer();
54
this._register(toDisposable(() => this.stop()));
55
}
56
57
private createServer(): http.Server {
58
return http.createServer(async (req, res) => {
59
this.trace(`Received request: ${req.method} ${req.url}`);
60
61
if (req.method === 'OPTIONS') {
62
res.writeHead(200);
63
res.end();
64
return;
65
}
66
67
// It sends //responses if OPENAI_BASE_URL ends in /
68
if (req.method === 'POST' && (req.url === '/v1/responses' || req.url === '/responses' || req.url === '//responses')) {
69
await this.handleResponsesRequest(req, res);
70
return;
71
}
72
73
if (req.method === 'GET' && req.url === '/') {
74
res.writeHead(200);
75
res.end('Hello from LanguageModelServer');
76
return;
77
}
78
79
res.writeHead(404, { 'Content-Type': 'application/json' });
80
res.end(JSON.stringify({ error: 'Not found' }));
81
});
82
}
83
84
private async handleResponsesRequest(req: http.IncomingMessage, res: http.ServerResponse) {
85
try {
86
const body = await this.readRequestBody(req);
87
if (!(await this.isAuthTokenValid(req))) {
88
this.error('Invalid auth key');
89
res.writeHead(401, { 'Content-Type': 'application/json' });
90
res.end(JSON.stringify({ error: 'Invalid authentication' }));
91
return;
92
}
93
94
await this.handleAuthedResponsesRequest(body, req.headers, res);
95
} catch (error) {
96
res.writeHead(500, { 'Content-Type': 'application/json' });
97
res.end(JSON.stringify({
98
error: 'Internal server error',
99
details: error instanceof Error ? error.message : String(error)
100
}));
101
}
102
return;
103
}
104
105
/**
106
* Verify nonce
107
*/
108
private async isAuthTokenValid(req: http.IncomingMessage): Promise<boolean> {
109
const authHeader = req.headers.authorization;
110
const bearerSpace = 'Bearer ';
111
const authKey = authHeader?.startsWith(bearerSpace) ? authHeader.substring(bearerSpace.length) : undefined;
112
return authKey === this.config.nonce;
113
}
114
115
private async readRequestBody(req: http.IncomingMessage): Promise<string> {
116
return new Promise((resolve, reject) => {
117
let body = '';
118
req.on('data', chunk => {
119
body += chunk.toString();
120
});
121
req.on('end', () => {
122
resolve(body);
123
});
124
req.on('error', reject);
125
});
126
}
127
128
private async handleAuthedResponsesRequest(bodyString: string, headers: http.IncomingHttpHeaders, res: http.ServerResponse): Promise<void> {
129
// Create cancellation token for the request
130
const tokenSource = new CancellationTokenSource();
131
132
try {
133
const requestBody: OpenAI.Responses.ResponseCreateParams = JSON.parse(bodyString);
134
if (Array.isArray(requestBody.tools)) {
135
requestBody.tools = requestBody.tools.filter(tool => {
136
if (typeof tool?.type === 'string' && tool.type.startsWith('web_search')) {
137
this.warn(`Filtering out unsupported tool type: ${JSON.stringify(tool)}`);
138
return false;
139
}
140
141
return true;
142
});
143
}
144
const lastMessage = requestBody.input?.at(-1);
145
const isUserInitiatedMessage = typeof lastMessage === 'string' ||
146
lastMessage?.type === 'message' && lastMessage.role === 'user';
147
148
const endpoints = await this.endpointProvider.getAllChatEndpoints();
149
if (endpoints.length === 0) {
150
this.error('No language models available');
151
res.writeHead(404, { 'Content-Type': 'application/json' });
152
res.end(JSON.stringify({ error: 'No language models available' }));
153
return;
154
}
155
156
const selectedEndpoint = this.selectEndpoint(endpoints, requestBody.model);
157
if (!selectedEndpoint) {
158
this.error('No model found matching criteria');
159
res.writeHead(404, { 'Content-Type': 'application/json' });
160
res.end(JSON.stringify({
161
error: 'No model found matching criteria'
162
}));
163
return;
164
}
165
166
// Set up streaming response
167
res.writeHead(200, {
168
'Content-Type': 'text/event-stream',
169
'Cache-Control': 'no-cache',
170
'Connection': 'keep-alive',
171
});
172
173
// Handle client disconnect
174
let requestComplete = false;
175
res.on('close', () => {
176
if (!requestComplete) {
177
this.info('Client disconnected before request complete');
178
}
179
180
tokenSource.cancel();
181
});
182
183
const endpointRequestBody = requestBody as IEndpointBody;
184
const streamingEndpoint = this.instantiationService.createInstance(
185
StreamingPassThroughEndpoint,
186
selectedEndpoint,
187
res,
188
endpointRequestBody,
189
headers,
190
'vscode_codex'
191
);
192
193
let messagesForLogging: Raw.ChatMessage[] = [];
194
try {
195
// Don't fail based on any assumptions about the shape of the request
196
messagesForLogging = Array.isArray(requestBody.input) ?
197
responseApiInputToRawMessagesForLogging(requestBody) :
198
[];
199
} catch (e) {
200
this.exception(e, `Failed to parse messages for logging`);
201
}
202
203
await streamingEndpoint.makeChatRequest2({
204
debugName: 'oaiLMServer',
205
messages: messagesForLogging,
206
finishedCb: async () => undefined,
207
location: ChatLocation.ResponsesProxy,
208
modelCapabilities: { enableThinking: true },
209
userInitiatedRequest: isUserInitiatedMessage
210
}, tokenSource.token);
211
212
requestComplete = true;
213
214
res.end();
215
} catch (error) {
216
res.writeHead(500, { 'Content-Type': 'application/json' });
217
res.end(JSON.stringify({
218
error: 'Failed to process chat request',
219
details: error instanceof Error ? error.message : String(error)
220
}));
221
} finally {
222
tokenSource.dispose();
223
}
224
}
225
226
private selectEndpoint(endpoints: readonly IChatEndpoint[], requestedModel?: string): IChatEndpoint | undefined {
227
if (requestedModel) {
228
// Try to find exact match first
229
const selectedEndpoint = endpoints.find(e => e.family === requestedModel);
230
return selectedEndpoint;
231
}
232
233
// Use first available model if no criteria specified
234
return endpoints[0];
235
}
236
237
public async start(): Promise<void> {
238
if (this.config.port !== 0) {
239
// Already started
240
return;
241
}
242
243
return new Promise((resolve, reject) => {
244
this.server.listen(0, '127.0.0.1', () => {
245
const address = this.server.address();
246
if (address && typeof address === 'object') {
247
this.config = {
248
...this.config,
249
port: address.port
250
};
251
this.info(`Language Model Server started on http://localhost:${this.config.port}`);
252
resolve();
253
return;
254
}
255
256
reject(new Error('Failed to start server'));
257
});
258
});
259
}
260
261
public stop(): void {
262
this.server.close();
263
}
264
265
public getConfig(): ILanguageModelServerConfig {
266
return { ...this.config };
267
}
268
269
private info(message: string): void {
270
const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;
271
this.logService.info(messageWithClassName);
272
}
273
274
private error(message: string): void {
275
const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;
276
this.logService.error(messageWithClassName);
277
}
278
279
private exception(err: Error, message?: string): void {
280
this.logService.error(err, message);
281
}
282
283
private trace(message: string): void {
284
const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;
285
this.logService.trace(messageWithClassName);
286
}
287
288
private warn(message: string): void {
289
const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;
290
this.logService.warn(messageWithClassName);
291
}
292
}
293
294
class StreamingPassThroughEndpoint implements IChatEndpoint {
295
constructor(
296
private readonly base: IChatEndpoint,
297
private readonly responseStream: http.ServerResponse,
298
private readonly requestBody: IEndpointBody,
299
private readonly requestHeaders: http.IncomingHttpHeaders,
300
private readonly userAgentPrefix: string,
301
@IChatMLFetcher private readonly chatMLFetcher: IChatMLFetcher,
302
@IInstantiationService private readonly instantiationService: IInstantiationService
303
) { }
304
305
public get urlOrRequestMetadata(): string | RequestMetadata {
306
return this.base.urlOrRequestMetadata;
307
}
308
309
public getExtraHeaders(): Record<string, string> {
310
const headers = this.base.getExtraHeaders?.() ?? {};
311
if (this.requestHeaders['user-agent']) {
312
headers['User-Agent'] = this.getUserAgent(this.requestHeaders['user-agent']);
313
}
314
return headers;
315
}
316
317
getEndpointFetchOptions(): IEndpointFetchOptions {
318
return {
319
suppressIntegrationId: true
320
};
321
}
322
323
private getUserAgent(incomingUserAgent: string): string {
324
const slashIndex = incomingUserAgent.indexOf('/');
325
if (slashIndex === -1) {
326
return `${this.userAgentPrefix}/${incomingUserAgent}`;
327
}
328
329
return `${this.userAgentPrefix}${incomingUserAgent.substring(slashIndex)}`;
330
}
331
332
public interceptBody(body: IEndpointBody | undefined): void {
333
this.base.interceptBody?.(body);
334
}
335
336
public acquireTokenizer(): ITokenizer {
337
return this.base.acquireTokenizer();
338
}
339
340
public get modelProvider(): string {
341
return this.base.modelProvider;
342
}
343
344
public get modelMaxPromptTokens(): number {
345
return this.base.modelMaxPromptTokens;
346
}
347
348
public get maxOutputTokens(): number {
349
return this.base.maxOutputTokens;
350
}
351
352
public get model(): string {
353
return this.base.model;
354
}
355
356
public get name(): string {
357
return this.base.name;
358
}
359
360
public get version(): string {
361
return this.base.version;
362
}
363
364
public get family(): string {
365
return this.base.family;
366
}
367
368
public get tokenizer(): TokenizerType {
369
return this.base.tokenizer;
370
}
371
372
public get showInModelPicker(): boolean {
373
return this.base.showInModelPicker;
374
}
375
376
public get isPremium(): boolean | undefined {
377
return this.base.isPremium;
378
}
379
380
public get degradationReason(): string | undefined {
381
return this.base.degradationReason;
382
}
383
384
public get multiplier(): number | undefined {
385
return this.base.multiplier;
386
}
387
388
public get tokenPricing() {
389
return this.base.tokenPricing;
390
}
391
392
public get restrictedToSkus(): string[] | undefined {
393
return this.base.restrictedToSkus;
394
}
395
396
public get isFallback(): boolean {
397
return this.base.isFallback;
398
}
399
400
public get customModel(): CustomModel | undefined {
401
return this.base.customModel;
402
}
403
404
public get isExtensionContributed(): boolean | undefined {
405
return this.base.isExtensionContributed;
406
}
407
408
public get apiType(): string | undefined {
409
return this.base.apiType;
410
}
411
412
public get supportsThinkingContentInHistory(): boolean | undefined {
413
return this.base.supportsThinkingContentInHistory;
414
}
415
416
public get supportsAdaptiveThinking(): boolean | undefined {
417
return this.base.supportsAdaptiveThinking;
418
}
419
420
public get minThinkingBudget(): number | undefined {
421
return this.base.minThinkingBudget;
422
}
423
424
public get maxThinkingBudget(): number | undefined {
425
return this.base.maxThinkingBudget;
426
}
427
428
public get supportsReasoningEffort(): string[] | undefined {
429
return this.base.supportsReasoningEffort;
430
}
431
432
public get supportsToolCalls(): boolean {
433
return this.base.supportsToolCalls;
434
}
435
436
public get supportsVision(): boolean {
437
return this.base.supportsVision;
438
}
439
440
public get supportsPrediction(): boolean {
441
return this.base.supportsPrediction;
442
}
443
444
public get supportedEditTools(): readonly EndpointEditToolName[] | undefined {
445
return this.base.supportedEditTools;
446
}
447
448
public async processResponseFromChatEndpoint(
449
telemetryService: ITelemetryService,
450
logService: ILogService,
451
response: Response,
452
expectedNumChoices: number,
453
finishCallback: FinishedCallback,
454
telemetryData: TelemetryData,
455
cancellationToken?: CancellationToken
456
): Promise<AsyncIterableObject<ChatCompletion>> {
457
const body = response.body;
458
return new AsyncIterableObject<ChatCompletion>(async feed => {
459
// We parse the stream just to return a correct ChatCompletion for logging the response and token usage details.
460
const requestId = response.headers.get('X-Request-ID') ?? generateUuid();
461
const ghRequestId = response.headers.get('x-github-request-id') ?? '';
462
const { serverExperiments } = getRequestId(response.headers);
463
const processor = this.instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, telemetryService, requestId, ghRequestId, serverExperiments, getResponsesApiCompactionThresholdFromBody(this.requestBody));
464
const parser = new SSEParser((ev) => {
465
try {
466
logService.trace(`[StreamingPassThroughEndpoint] SSE: ${ev.data}`);
467
const completion = processor.push({ type: ev.type, ...JSON.parse(ev.data) }, finishCallback);
468
if (completion) {
469
feed.emitOne(completion);
470
}
471
} catch (e) {
472
feed.reject(e);
473
}
474
});
475
476
try {
477
for await (const chunk of body) {
478
if (cancellationToken?.isCancellationRequested) {
479
break;
480
}
481
482
this.responseStream.write(chunk);
483
parser.feed(chunk);
484
}
485
} finally {
486
await body.destroy();
487
}
488
});
489
}
490
491
public makeChatRequest(
492
debugName: string,
493
messages: Raw.ChatMessage[],
494
finishedCb: FinishedCallback | undefined,
495
token: CancellationToken,
496
location: ChatLocation,
497
source?: Source,
498
requestOptions?: Omit<OptionalChatRequestParams, 'n'>,
499
userInitiatedRequest?: boolean
500
): Promise<ChatResponse> {
501
throw new Error('not implemented');
502
}
503
504
public makeChatRequest2(
505
options: IMakeChatRequestOptions,
506
token: CancellationToken
507
): Promise<ChatResponse> {
508
return this.chatMLFetcher.fetchOne({
509
requestOptions: {},
510
...options,
511
endpoint: this,
512
}, token);
513
}
514
515
public createRequestBody(
516
options: ICreateEndpointBodyOptions
517
): IEndpointBody {
518
return this.requestBody;
519
}
520
521
public cloneWithTokenOverride(modelMaxPromptTokens: number): IChatEndpoint {
522
throw new Error('not implemented');
523
}
524
}
525
526