Path: blob/main/src/vs/platform/agentHost/node/shared/copilotApiService.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import type Anthropic from '@anthropic-ai/sdk';6import { CAPIClient, RequestType, type CCAModel, type IExtensionInformation } from '@vscode/copilot-api';7import { generateUuid } from '../../../../base/common/uuid.js';8import { getDevDeviceId, getMachineId } from '../../../../base/node/id.js';9import { createDecorator } from '../../../instantiation/common/instantiation.js';10import { ILogService } from '../../../log/common/log.js';11import { IProductService } from '../../../product/common/productService.js';1213// #region Types1415/**16* Per-call transport options for all {@link ICopilotApiService} methods.17*18* `headers` are merged into the outgoing CAPI request before security-19* sensitive headers (`Authorization`, `Content-Type`, `X-Request-Id`,20* `OpenAI-Intent`), so callers cannot override those.21*22* `signal` propagates to the outgoing API request but **not** to the23* shared token mint. The mint is deduped across concurrent callers, so24* a single caller's abort must not cancel it for everyone.25*/26export interface ICopilotApiServiceRequestOptions {27readonly headers?: Readonly<Record<string, string>>;28readonly signal?: AbortSignal;29}3031/**32* Envelope returned by the GitHub `copilot_internal/v2/token` endpoint.33* @see https://docs.github.com/en/rest/copilot34*/35interface ICopilotTokenEnvelope {36readonly token: string;37readonly expires_at: number;38readonly refresh_in: number;39readonly endpoints?: { readonly api?: string };40readonly sku?: string;41}4243interface ICachedToken {44readonly githubToken: string;45readonly copilotToken: string;46readonly expiresAt: number;47}4849interface ICapiInit {50readonly capiClient: CAPIClient;51readonly tokenUrl: string;52}5354// #endregion5556// #region Constants5758/**59* Refresh the cached Copilot token this many seconds before its real expiry,60* so an in-flight request never hits a token that expires mid-request.61*/62const TOKEN_REFRESH_BUFFER_SECONDS = 5 * 60;6364const TOKEN_API_VERSION = '2025-04-01';6566// #endregion6768export type FetchFunction = typeof globalThis.fetch;6970export const ICopilotApiService = createDecorator<ICopilotApiService>('copilotApiService');7172/**73* Foundational gateway between the agent host and GitHub Copilot's CAPI proxy74* for Anthropic-style chat completions and model discovery.75*76* ## Goals77*78* 1. **Single source of truth for CAPI auth.** Callers pass a raw GitHub token79* and never deal with Copilot session token minting, expiry, refresh, or80* invalidation themselves.81* 2. **Stable surface for chat agents.** A small, typed API that abstracts the82* underlying `CAPIClient`, SSE framing, and Anthropic event taxonomy so83* feature code can focus on prompting.84* 3. **Resource-safe streaming.** Async-generator output that fully releases85* the underlying HTTP connection regardless of how the consumer terminates86* iteration (early `break`, thrown error, abort, or natural end-of-stream).87* 4. **Skew- and revocation-tolerant token cache.** Tokens stay cached as long88* as they're usable, are re-minted when the server tells us they're stale89* (`refresh_in`), and are invalidated immediately on `401`/`403` so callers90* self-heal without restarting the host.91*92* ## Non-goals93*94* - Per-conversation history, retry/backoff, or rate-limit handling. Callers95* own request orchestration.96* - GitHub Enterprise auth host derivation. The mint URL comes from97* `IProductService.defaultChatAgent.tokenEntitlementUrl`. See the TODO in98* `_buildCapiInit` for what GHE support would require.99*100* ## Concurrency model101*102* - Multiple in-flight requests for the same GitHub token share a single103* token mint via an in-flight de-dup map (no thundering herd on cold104* start).105* - The token cache holds **one** entry. Callers that alternate between two106* GitHub tokens will pay a mint round-trip on every alternation; this is107* intentional — the agent host is single-tenant in practice.108* - `AbortSignal` is forwarded to the outgoing API request (messages, models)109* but **not** to the shared token mint, so cancellation propagates to the110* caller's own request without affecting concurrent callers sharing the mint.111*112* ## Error semantics113*114* - Network/transport errors propagate as raw `fetch` rejections.115* - Non-2xx responses throw an `Error` whose message includes the HTTP status,116* status text, and response body. **Tokens are never embedded in error117* messages.**118* - Streaming `error` SSE events throw with the server-supplied message.119* - Malformed JSON in an SSE `data:` line is logged and skipped, not thrown.120*/121export interface ICopilotApiService {122123readonly _serviceBrand: undefined;124125/**126* Stream a chat completion as raw Anthropic stream events.127*128* Yields every `Anthropic.MessageStreamEvent` in the order the server129* emits them, **including `message_stop` as the last event** before the130* generator returns. Phase 2 proxy relies on receiving a complete,131* replayable event stream.132*133* @throws on non-2xx status or SSE `error` event.134*/135messages(136githubToken: string,137request: Anthropic.MessageCreateParamsStreaming,138options?: ICopilotApiServiceRequestOptions,139): AsyncGenerator<Anthropic.MessageStreamEvent>;140141/**142* Send a chat completion and return the full aggregated response.143* @throws on non-2xx status.144*/145messages(146githubToken: string,147request: Anthropic.MessageCreateParamsNonStreaming,148options?: ICopilotApiServiceRequestOptions,149): Promise<Anthropic.Message>;150151/**152* Count tokens for a hypothetical request.153*154* @throws always — `countTokens` is not supported by CAPI in Phase 1.5.155* Phase 2 proxy maps this to HTTP 501.156*/157countTokens(158githubToken: string,159req: Anthropic.MessageCountTokensParams,160options?: ICopilotApiServiceRequestOptions,161): Promise<Anthropic.MessageTokensCount>;162163/**164* List models available to the GitHub user.165*166* Each {@link CCAModel} carries a `vendor` (e.g. `'Anthropic'`) and167* `supported_endpoints` (e.g. `['/v1/messages']`). Callers filtering for168* Anthropic-format models should match on both fields.169*170* Known CAPI values as of 2026-04-30:171* - `vendor`: `'Anthropic'` (capitalized)172* - `supported_endpoints`: `'/v1/messages'` for Anthropic chat models173*/174models(githubToken: string, options?: ICopilotApiServiceRequestOptions): Promise<CCAModel[]>;175}176177export class CopilotApiService implements ICopilotApiService {178179declare readonly _serviceBrand: undefined;180181private _capiInitPromise: Promise<ICapiInit> | null = null;182private _cachedToken: ICachedToken | null = null;183private readonly _pendingTokenMints = new Map<string, Promise<string>>();184private readonly _fetch: FetchFunction;185186constructor(187fetchFn: FetchFunction | undefined,188@ILogService private readonly _logService: ILogService,189@IProductService private readonly _productService: IProductService,190) {191this._fetch = fetchFn ?? globalThis.fetch;192}193194// #region Public API195196messages(197githubToken: string,198request: Anthropic.MessageCreateParamsStreaming,199options?: ICopilotApiServiceRequestOptions,200): AsyncGenerator<Anthropic.MessageStreamEvent>;201messages(202githubToken: string,203request: Anthropic.MessageCreateParamsNonStreaming,204options?: ICopilotApiServiceRequestOptions,205): Promise<Anthropic.Message>;206messages(207githubToken: string,208request: Anthropic.MessageCreateParams,209options?: ICopilotApiServiceRequestOptions,210): AsyncGenerator<Anthropic.MessageStreamEvent> | Promise<Anthropic.Message> {211if (request.stream) {212return this._messagesStreaming(githubToken, request, options);213}214return this._messagesNonStreaming(githubToken, request, options);215}216217async countTokens(218_githubToken: string,219_req: Anthropic.MessageCountTokensParams,220_options?: ICopilotApiServiceRequestOptions,221): Promise<Anthropic.MessageTokensCount> {222throw new Error('countTokens not supported by CAPI');223}224225async models(githubToken: string, options?: ICopilotApiServiceRequestOptions): Promise<CCAModel[]> {226const { capiClient, tokenUrl } = await this._getCapiInit();227const copilotToken = await this._getCopilotToken(githubToken, capiClient, tokenUrl);228229this._logService.debug('[CopilotApiService] GET models');230231const response = await capiClient.makeRequest<Response>(232{233method: 'GET',234headers: {235...options?.headers,236'Authorization': `Bearer ${copilotToken}`,237},238signal: options?.signal,239},240{ type: RequestType.Models },241);242243if (!response.ok) {244if (response.status === 401 || response.status === 403) {245this._invalidateCachedToken(githubToken);246}247const text = await response.text().catch(() => '');248throw new Error(`CAPI models request failed: ${response.status} ${response.statusText} — ${text}`);249}250251const json = await response.json();252return json.data ?? [];253}254255// #endregion256257// #region Lazy Init258259private _getCapiInit(): Promise<ICapiInit> {260if (!this._capiInitPromise) {261this._capiInitPromise = this._buildCapiInit().catch(err => {262this._capiInitPromise = null;263this._cachedToken = null;264throw err;265});266}267return this._capiInitPromise;268}269270private async _buildCapiInit(): Promise<ICapiInit> {271const [machineId, deviceId] = await Promise.all([272getMachineId(err => this._logService.warn('[CopilotApiService] getMachineId failed', err)),273getDevDeviceId(err => this._logService.warn('[CopilotApiService] getDevDeviceId failed', err)),274]);275276const extensionInfo: IExtensionInformation = {277name: 'agent-host',278sessionId: generateUuid(),279machineId,280deviceId,281vscodeVersion: this._productService.version,282version: this._productService.version,283buildType: this._productService.quality === 'stable' ? 'prod' : 'dev',284};285286const fetch = this._fetch;287const capiClient = new CAPIClient(extensionInfo, undefined, {288fetch: (url, options) => fetch(url, {289method: options.method ?? 'GET',290headers: options.headers,291body: options.body,292signal: options.signal as AbortSignal | undefined,293}),294});295296// TODO(GHE): For GitHub Enterprise users the mint URL must point to297// `api.<enterprise-host>/copilot_internal/v2/token` instead. This298// requires threading the enterprise host URL through `ICopilotApiService`299// (e.g. as an extra parameter on `messages`/`models`, or as a separate300// `create(enterpriseHost?)` factory) and deriving the URL the same way301// `defaultAccount.ts` does for the main workbench auth path.302const tokenUrl = this._productService.defaultChatAgent.tokenEntitlementUrl;303304return { capiClient, tokenUrl };305}306307// #endregion308309// #region Streaming310311private async *_messagesStreaming(312githubToken: string,313request: Anthropic.MessageCreateParams,314options?: ICopilotApiServiceRequestOptions,315): AsyncGenerator<Anthropic.MessageStreamEvent> {316const response = await this._sendRequest(githubToken, request, true, options);317318if (!response.body) {319throw new Error('CAPI response has no body');320}321322yield* this._readSSE(response.body);323}324325// #endregion326327// #region Non-Streaming328329private async _messagesNonStreaming(330githubToken: string,331request: Anthropic.MessageCreateParams,332options?: ICopilotApiServiceRequestOptions,333): Promise<Anthropic.Message> {334const response = await this._sendRequest(githubToken, request, false, options);335return response.json() as Promise<Anthropic.Message>;336}337338// #endregion339340// #region Shared Request341342private async _sendRequest(343githubToken: string,344request: Anthropic.MessageCreateParams,345stream: boolean,346options?: ICopilotApiServiceRequestOptions,347): Promise<Response> {348const { capiClient, tokenUrl } = await this._getCapiInit();349const copilotToken = await this._getCopilotToken(githubToken, capiClient, tokenUrl);350const requestId = generateUuid();351352this._logService.debug('[CopilotApiService] POST messages', `model=${request.model} stream=${stream} requestId=${requestId}`);353354const { system, ...rest } = request;355const body = JSON.stringify({356...rest,357stream,358// CAPI requires system as a text-block array, not a raw string359...(system !== undefined360? { system: typeof system === 'string' ? [{ type: 'text', text: system }] : system }361: {}),362});363364const response = await capiClient.makeRequest<Response>(365{366method: 'POST',367headers: {368...options?.headers,369'Content-Type': 'application/json',370'Authorization': `Bearer ${copilotToken}`,371'X-Request-Id': requestId,372'OpenAI-Intent': 'conversation',373},374body,375signal: options?.signal,376},377{ type: RequestType.ChatMessages },378);379if (!response.ok) {380if (response.status === 401 || response.status === 403) {381this._invalidateCachedToken(githubToken);382}383const text = await response.text().catch(() => '');384throw new Error(`CAPI request failed: ${response.status} ${response.statusText} — ${text}`);385}386387return response;388}389390// #endregion391392// #region Token Minting393394private async _getCopilotToken(githubToken: string, capiClient: CAPIClient, tokenUrl: string): Promise<string> {395const now = Date.now() / 1000;396if (397this._cachedToken &&398this._cachedToken.githubToken === githubToken &&399this._cachedToken.expiresAt - now > TOKEN_REFRESH_BUFFER_SECONDS400) {401return this._cachedToken.copilotToken;402}403404if (!this._pendingTokenMints.has(githubToken)) {405// Omit the caller's signal here: a deduped mint is shared across406// concurrent callers, so aborting one must not cancel the mint for407// the others. Each caller still forwards its signal to the API call.408const mint = this._mintToken(githubToken, capiClient, tokenUrl)409.finally(() => { this._pendingTokenMints.delete(githubToken); });410this._pendingTokenMints.set(githubToken, mint);411}412return this._pendingTokenMints.get(githubToken)!;413}414415private _invalidateCachedToken(githubToken: string): void {416if (this._cachedToken?.githubToken === githubToken) {417this._cachedToken = null;418}419}420421private async _mintToken(githubToken: string, capiClient: CAPIClient, tokenUrl: string): Promise<string> {422this._logService.debug('[CopilotApiService] Minting new Copilot token');423424const response = await this._fetch(tokenUrl, {425method: 'GET',426headers: {427'Authorization': `token ${githubToken}`,428'X-GitHub-Api-Version': TOKEN_API_VERSION,429},430});431432if (!response.ok) {433const text = await response.text().catch(() => '');434throw new Error(`Copilot token minting failed: ${response.status} ${response.statusText} — ${text}`);435}436437const envelope: ICopilotTokenEnvelope = await response.json();438439capiClient.updateDomains(440{ endpoints: envelope.endpoints ?? {}, sku: envelope.sku ?? '' },441undefined,442);443444// Prefer `refresh_in` over `expires_at` so clients with skewed clocks445// don't end up re-minting on every request. Mirrors the behavior in446// extensions/copilot/.../copilotTokenManager.ts.447const nowSeconds = Date.now() / 1000;448const expiresAt = typeof envelope.refresh_in === 'number'449? nowSeconds + envelope.refresh_in + TOKEN_REFRESH_BUFFER_SECONDS450: envelope.expires_at;451452this._cachedToken = {453githubToken,454copilotToken: envelope.token,455expiresAt,456};457458this._logService.debug('[CopilotApiService] Token minted, cacheValidUntil:', expiresAt, 'serverExpiresAt:', envelope.expires_at);459460return envelope.token;461}462463// #endregion464465// #region SSE Parsing466467private async *_readSSE(body: ReadableStream<Uint8Array>): AsyncGenerator<Anthropic.MessageStreamEvent> {468const reader = body.getReader();469const decoder = new TextDecoder();470let buffer = '';471472try {473while (true) {474const { done, value } = await reader.read();475if (done) {476break;477}478479buffer += decoder.decode(value, { stream: true });480const lines = buffer.split('\n');481buffer = lines.pop() ?? '';482483for (const line of lines) {484const event = this._parseDataLine(line);485if (event !== undefined) {486yield event;487if (event.type === 'message_stop') {488return;489}490}491}492}493494if (buffer.trim()) {495const event = this._parseDataLine(buffer);496if (event !== undefined) {497yield event;498if (event.type === 'message_stop') {499return;500}501}502}503} finally {504// Cancel the underlying stream so the HTTP connection is released505// even when the consumer abandons the generator early (break, throw,506// abort) or the stream ended on `message_stop` with bytes still in507// flight. `releaseLock` alone leaves the body half-read.508try {509await reader.cancel();510} catch {511// ignore — cancellation is best-effort cleanup512}513reader.releaseLock();514}515}516517/**518* @returns the parsed stream event, or `undefined` to skip the line.519* @throws on `error` events from the server.520*/521private _parseDataLine(line: string): Anthropic.MessageStreamEvent | undefined {522if (!line.startsWith('data: ')) {523return undefined;524}525526const data = line.slice('data: '.length).trim();527528let parsed: unknown;529try {530parsed = JSON.parse(data);531} catch {532this._logService.warn('[CopilotApiService] Failed to parse SSE data:', data);533return undefined;534}535536if (typeof parsed !== 'object' || parsed === null) {537return undefined;538}539540const record = parsed as Record<string, unknown>;541const type = record.type;542if (typeof type !== 'string') {543return undefined;544}545546if (type === 'error') {547const error = (parsed as { error?: { message?: string } }).error;548throw new Error(error?.message ?? 'Unknown streaming error');549}550551if (!KNOWN_SSE_EVENT_TYPES.has(type)) {552return undefined;553}554555return parsed as Anthropic.MessageStreamEvent;556}557558// #endregion559}560561const KNOWN_SSE_EVENT_TYPES = new Set([562'message_start', 'message_delta', 'message_stop',563'content_block_start', 'content_block_delta', 'content_block_stop',564]);565566567