Path: blob/main/extensions/copilot/src/extension/externalAgents/node/oaiLanguageModelServer.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { RequestMetadata } from '@vscode/copilot-api';6import { Raw } from '@vscode/prompt-tsx';7import * as http from 'http';8import type OpenAI from 'openai';9import { IChatMLFetcher, Source } from '../../../platform/chat/common/chatMLFetcher';10import { ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes';11import { CustomModel, EndpointEditToolName, IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';12import { getResponsesApiCompactionThresholdFromBody, OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging } from '../../../platform/endpoint/node/responsesApi';13import { ILogService } from '../../../platform/log/common/logService';14import { FinishedCallback, getRequestId, OptionalChatRequestParams } from '../../../platform/networking/common/fetch';15import { Response } from '../../../platform/networking/common/fetcherService';16import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody, IEndpointFetchOptions, IMakeChatRequestOptions } from '../../../platform/networking/common/networking';17import { ChatCompletion } from '../../../platform/networking/common/openai';18import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';19import { TelemetryData } from '../../../platform/telemetry/common/telemetryData';20import { ITokenizer, TokenizerType } from '../../../util/common/tokenizer';21import { AsyncIterableObject } from '../../../util/vs/base/common/async';22import { CancellationToken, CancellationTokenSource } from '../../../util/vs/base/common/cancellation';23import { Disposable, toDisposable } from '../../../util/vs/base/common/lifecycle';24import { SSEParser } from '../../../util/vs/base/common/sseParser';25import { generateUuid } from '../../../util/vs/base/common/uuid';26import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';2728export interface ILanguageModelServerConfig {29readonly port: number;30readonly nonce: string;31}3233/**34* HTTP server that provides an OpenAI Responses API compatible endpoint.35* Acts as a pure pass-through proxy to the underlying model endpoint.36*/37export class OpenAILanguageModelServer extends Disposable {38private server: http.Server;39private config: ILanguageModelServerConfig;4041constructor(42@ILogService private readonly logService: ILogService,43@IEndpointProvider private readonly endpointProvider: IEndpointProvider,44@IInstantiationService private readonly instantiationService: IInstantiationService,45) {46super();47this.config = {48port: 0, // Will be set to random available port49nonce: 'vscode-lm-' + generateUuid()50};5152this.server = this.createServer();53this._register(toDisposable(() => this.stop()));54}5556private createServer(): http.Server {57return http.createServer(async (req, res) => {58this.trace(`Received request: ${req.method} ${req.url}`);5960if (req.method === 'OPTIONS') {61res.writeHead(200);62res.end();63return;64}6566// It sends //responses if OPENAI_BASE_URL ends in /67if (req.method === 'POST' && (req.url === '/v1/responses' || req.url === '/responses' || req.url === '//responses')) {68await this.handleResponsesRequest(req, res);69return;70}7172if (req.method === 'GET' && req.url === '/') {73res.writeHead(200);74res.end('Hello from LanguageModelServer');75return;76}7778res.writeHead(404, { 'Content-Type': 'application/json' });79res.end(JSON.stringify({ error: 'Not found' }));80});81}8283private async handleResponsesRequest(req: http.IncomingMessage, res: http.ServerResponse) {84try {85const body = await this.readRequestBody(req);86if (!(await this.isAuthTokenValid(req))) {87this.error('Invalid auth key');88res.writeHead(401, { 'Content-Type': 'application/json' });89res.end(JSON.stringify({ error: 'Invalid authentication' }));90return;91}9293await this.handleAuthedResponsesRequest(body, req.headers, res);94} catch (error) {95res.writeHead(500, { 'Content-Type': 'application/json' });96res.end(JSON.stringify({97error: 'Internal server error',98details: error instanceof Error ? error.message : String(error)99}));100}101return;102}103104/**105* Verify nonce106*/107private async isAuthTokenValid(req: http.IncomingMessage): Promise<boolean> {108const authHeader = req.headers.authorization;109const bearerSpace = 'Bearer ';110const authKey = authHeader?.startsWith(bearerSpace) ? authHeader.substring(bearerSpace.length) : undefined;111return authKey === this.config.nonce;112}113114private async readRequestBody(req: http.IncomingMessage): Promise<string> {115return new Promise((resolve, reject) => {116let body = '';117req.on('data', chunk => {118body += chunk.toString();119});120req.on('end', () => {121resolve(body);122});123req.on('error', reject);124});125}126127private async handleAuthedResponsesRequest(bodyString: string, headers: http.IncomingHttpHeaders, res: http.ServerResponse): Promise<void> {128// Create cancellation token for the request129const tokenSource = new CancellationTokenSource();130131try {132const requestBody: OpenAI.Responses.ResponseCreateParams = JSON.parse(bodyString);133if (Array.isArray(requestBody.tools)) {134requestBody.tools = requestBody.tools.filter(tool => {135if (typeof tool?.type === 'string' && tool.type.startsWith('web_search')) {136this.warn(`Filtering out unsupported tool type: ${JSON.stringify(tool)}`);137return false;138}139140return true;141});142}143const lastMessage = requestBody.input?.at(-1);144const isUserInitiatedMessage = typeof lastMessage === 'string' ||145lastMessage?.type === 'message' && lastMessage.role === 'user';146147const endpoints = await this.endpointProvider.getAllChatEndpoints();148if (endpoints.length === 0) {149this.error('No language models available');150res.writeHead(404, { 'Content-Type': 'application/json' });151res.end(JSON.stringify({ error: 'No language models available' }));152return;153}154155const selectedEndpoint = this.selectEndpoint(endpoints, requestBody.model);156if (!selectedEndpoint) {157this.error('No model found matching criteria');158res.writeHead(404, { 'Content-Type': 'application/json' });159res.end(JSON.stringify({160error: 'No model found matching criteria'161}));162return;163}164165// Set up streaming response166res.writeHead(200, {167'Content-Type': 'text/event-stream',168'Cache-Control': 'no-cache',169'Connection': 'keep-alive',170});171172// Handle client disconnect173let requestComplete = false;174res.on('close', () => {175if (!requestComplete) {176this.info('Client disconnected before request complete');177}178179tokenSource.cancel();180});181182const endpointRequestBody = requestBody as IEndpointBody;183const streamingEndpoint = this.instantiationService.createInstance(184StreamingPassThroughEndpoint,185selectedEndpoint,186res,187endpointRequestBody,188headers,189'vscode_codex'190);191192let messagesForLogging: Raw.ChatMessage[] = [];193try {194// Don't fail based on any assumptions about the shape of the request195messagesForLogging = Array.isArray(requestBody.input) ?196responseApiInputToRawMessagesForLogging(requestBody) :197[];198} catch (e) {199this.exception(e, `Failed to parse messages for logging`);200}201202await streamingEndpoint.makeChatRequest2({203debugName: 'oaiLMServer',204messages: messagesForLogging,205finishedCb: async () => undefined,206location: ChatLocation.ResponsesProxy,207modelCapabilities: { enableThinking: true },208userInitiatedRequest: isUserInitiatedMessage209}, tokenSource.token);210211requestComplete = true;212213res.end();214} catch (error) {215res.writeHead(500, { 'Content-Type': 'application/json' });216res.end(JSON.stringify({217error: 'Failed to process chat request',218details: error instanceof Error ? error.message : String(error)219}));220} finally {221tokenSource.dispose();222}223}224225private selectEndpoint(endpoints: readonly IChatEndpoint[], requestedModel?: string): IChatEndpoint | undefined {226if (requestedModel) {227// Try to find exact match first228const selectedEndpoint = endpoints.find(e => e.family === requestedModel);229return selectedEndpoint;230}231232// Use first available model if no criteria specified233return endpoints[0];234}235236public async start(): Promise<void> {237if (this.config.port !== 0) {238// Already started239return;240}241242return new Promise((resolve, reject) => {243this.server.listen(0, '127.0.0.1', () => {244const address = this.server.address();245if (address && typeof address === 'object') {246this.config = {247...this.config,248port: address.port249};250this.info(`Language Model Server started on http://localhost:${this.config.port}`);251resolve();252return;253}254255reject(new Error('Failed to start server'));256});257});258}259260public stop(): void {261this.server.close();262}263264public getConfig(): ILanguageModelServerConfig {265return { ...this.config };266}267268private info(message: string): void {269const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;270this.logService.info(messageWithClassName);271}272273private error(message: string): void {274const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;275this.logService.error(messageWithClassName);276}277278private exception(err: Error, message?: string): void {279this.logService.error(err, message);280}281282private trace(message: string): void {283const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;284this.logService.trace(messageWithClassName);285}286287private warn(message: string): void {288const messageWithClassName = `[OpenAILanguageModelServer] ${message}`;289this.logService.warn(messageWithClassName);290}291}292293class StreamingPassThroughEndpoint implements IChatEndpoint {294constructor(295private readonly base: IChatEndpoint,296private readonly responseStream: http.ServerResponse,297private readonly requestBody: IEndpointBody,298private readonly requestHeaders: http.IncomingHttpHeaders,299private readonly userAgentPrefix: string,300@IChatMLFetcher private readonly chatMLFetcher: IChatMLFetcher,301@IInstantiationService private readonly instantiationService: IInstantiationService302) { }303304public get urlOrRequestMetadata(): string | RequestMetadata {305return this.base.urlOrRequestMetadata;306}307308public getExtraHeaders(): Record<string, string> {309const headers = this.base.getExtraHeaders?.() ?? {};310if (this.requestHeaders['user-agent']) {311headers['User-Agent'] = this.getUserAgent(this.requestHeaders['user-agent']);312}313return headers;314}315316getEndpointFetchOptions(): IEndpointFetchOptions {317return {318suppressIntegrationId: true319};320}321322private getUserAgent(incomingUserAgent: string): string {323const slashIndex = incomingUserAgent.indexOf('/');324if (slashIndex === -1) {325return `${this.userAgentPrefix}/${incomingUserAgent}`;326}327328return `${this.userAgentPrefix}${incomingUserAgent.substring(slashIndex)}`;329}330331public interceptBody(body: IEndpointBody | undefined): void {332this.base.interceptBody?.(body);333}334335public acquireTokenizer(): ITokenizer {336return this.base.acquireTokenizer();337}338339public get modelProvider(): string {340return this.base.modelProvider;341}342343public get modelMaxPromptTokens(): number {344return this.base.modelMaxPromptTokens;345}346347public get maxOutputTokens(): number {348return this.base.maxOutputTokens;349}350351public get model(): string {352return this.base.model;353}354355public get name(): string {356return this.base.name;357}358359public get version(): string {360return this.base.version;361}362363public get family(): string {364return this.base.family;365}366367public get tokenizer(): TokenizerType {368return this.base.tokenizer;369}370371public get showInModelPicker(): boolean {372return this.base.showInModelPicker;373}374375public get isPremium(): boolean | undefined {376return this.base.isPremium;377}378379public get degradationReason(): string | undefined {380return this.base.degradationReason;381}382383public get multiplier(): number | undefined {384return this.base.multiplier;385}386387public get tokenPricing() {388return this.base.tokenPricing;389}390391public get restrictedToSkus(): string[] | undefined {392return this.base.restrictedToSkus;393}394395public get isFallback(): boolean {396return this.base.isFallback;397}398399public get customModel(): CustomModel | undefined {400return this.base.customModel;401}402403public get isExtensionContributed(): boolean | undefined {404return this.base.isExtensionContributed;405}406407public get apiType(): string | undefined {408return this.base.apiType;409}410411public get supportsThinkingContentInHistory(): boolean | undefined {412return this.base.supportsThinkingContentInHistory;413}414415public get supportsAdaptiveThinking(): boolean | undefined {416return this.base.supportsAdaptiveThinking;417}418419public get minThinkingBudget(): number | undefined {420return this.base.minThinkingBudget;421}422423public get maxThinkingBudget(): number | undefined {424return this.base.maxThinkingBudget;425}426427public get supportsReasoningEffort(): string[] | undefined {428return this.base.supportsReasoningEffort;429}430431public get supportsToolCalls(): boolean {432return this.base.supportsToolCalls;433}434435public get supportsVision(): boolean {436return this.base.supportsVision;437}438439public get supportsPrediction(): boolean {440return this.base.supportsPrediction;441}442443public get supportedEditTools(): readonly EndpointEditToolName[] | undefined {444return this.base.supportedEditTools;445}446447public async processResponseFromChatEndpoint(448telemetryService: ITelemetryService,449logService: ILogService,450response: Response,451expectedNumChoices: number,452finishCallback: FinishedCallback,453telemetryData: TelemetryData,454cancellationToken?: CancellationToken455): Promise<AsyncIterableObject<ChatCompletion>> {456const body = response.body;457return new AsyncIterableObject<ChatCompletion>(async feed => {458// We parse the stream just to return a correct ChatCompletion for logging the response and token usage details.459const requestId = response.headers.get('X-Request-ID') ?? generateUuid();460const ghRequestId = response.headers.get('x-github-request-id') ?? '';461const { serverExperiments } = getRequestId(response.headers);462const processor = this.instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, telemetryService, requestId, ghRequestId, serverExperiments, getResponsesApiCompactionThresholdFromBody(this.requestBody));463const parser = new SSEParser((ev) => {464try {465logService.trace(`[StreamingPassThroughEndpoint] SSE: ${ev.data}`);466const completion = processor.push({ type: ev.type, ...JSON.parse(ev.data) }, finishCallback);467if (completion) {468feed.emitOne(completion);469}470} catch (e) {471feed.reject(e);472}473});474475try {476for await (const chunk of body) {477if (cancellationToken?.isCancellationRequested) {478break;479}480481this.responseStream.write(chunk);482parser.feed(chunk);483}484} finally {485await body.destroy();486}487});488}489490public makeChatRequest(491debugName: string,492messages: Raw.ChatMessage[],493finishedCb: FinishedCallback | undefined,494token: CancellationToken,495location: ChatLocation,496source?: Source,497requestOptions?: Omit<OptionalChatRequestParams, 'n'>,498userInitiatedRequest?: boolean499): Promise<ChatResponse> {500throw new Error('not implemented');501}502503public makeChatRequest2(504options: IMakeChatRequestOptions,505token: CancellationToken506): Promise<ChatResponse> {507return this.chatMLFetcher.fetchOne({508requestOptions: {},509...options,510endpoint: this,511}, token);512}513514public createRequestBody(515options: ICreateEndpointBodyOptions516): IEndpointBody {517return this.requestBody;518}519520public cloneWithTokenOverride(modelMaxPromptTokens: number): IChatEndpoint {521throw new Error('not implemented');522}523}524525526