CoCalc -- copilotApiService.ts

GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/platform/agentHost/node/shared/copilotApiService.ts
¹³³⁹⁹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import type Anthropic from '@anthropic-ai/sdk';
7
import { CAPIClient, RequestType, type CCAModel, type IExtensionInformation } from '@vscode/copilot-api';
8
import { generateUuid } from '../../../../base/common/uuid.js';
9
import { getDevDeviceId, getMachineId } from '../../../../base/node/id.js';
10
import { createDecorator } from '../../../instantiation/common/instantiation.js';
11
import { ILogService } from '../../../log/common/log.js';
12
import { IProductService } from '../../../product/common/productService.js';
13

14
// #region Types
15

16
/**
17
 * Per-call transport options for all {@link ICopilotApiService} methods.
18
 *
19
 * `headers` are merged into the outgoing CAPI request before security-
20
 * sensitive headers (`Authorization`, `Content-Type`, `X-Request-Id`,
21
 * `OpenAI-Intent`), so callers cannot override those.
22
 *
23
 * `signal` propagates to the outgoing API request but **not** to the
24
 * shared token mint. The mint is deduped across concurrent callers, so
25
 * a single caller's abort must not cancel it for everyone.
26
 */
27
export interface ICopilotApiServiceRequestOptions {
28
	readonly headers?: Readonly<Record<string, string>>;
29
	readonly signal?: AbortSignal;
30
}
31

32
/**
33
 * Envelope returned by the GitHub `copilot_internal/v2/token` endpoint.
34
 * @see https://docs.github.com/en/rest/copilot
35
 */
36
interface ICopilotTokenEnvelope {
37
	readonly token: string;
38
	readonly expires_at: number;
39
	readonly refresh_in: number;
40
	readonly endpoints?: { readonly api?: string };
41
	readonly sku?: string;
42
}
43

44
interface ICachedToken {
45
	readonly githubToken: string;
46
	readonly copilotToken: string;
47
	readonly expiresAt: number;
48
}
49

50
interface ICapiInit {
51
	readonly capiClient: CAPIClient;
52
	readonly tokenUrl: string;
53
}
54

55
// #endregion
56

57
// #region Constants
58

59
/**
60
 * Refresh the cached Copilot token this many seconds before its real expiry,
61
 * so an in-flight request never hits a token that expires mid-request.
62
 */
63
const TOKEN_REFRESH_BUFFER_SECONDS = 5 * 60;
64

65
const TOKEN_API_VERSION = '2025-04-01';
66

67
// #endregion
68

69
export type FetchFunction = typeof globalThis.fetch;
70

71
export const ICopilotApiService = createDecorator<ICopilotApiService>('copilotApiService');
72

73
/**
74
 * Foundational gateway between the agent host and GitHub Copilot's CAPI proxy
75
 * for Anthropic-style chat completions and model discovery.
76
 *
77
 * ## Goals
78
 *
79
 * 1. **Single source of truth for CAPI auth.** Callers pass a raw GitHub token
80
 *    and never deal with Copilot session token minting, expiry, refresh, or
81
 *    invalidation themselves.
82
 * 2. **Stable surface for chat agents.** A small, typed API that abstracts the
83
 *    underlying `CAPIClient`, SSE framing, and Anthropic event taxonomy so
84
 *    feature code can focus on prompting.
85
 * 3. **Resource-safe streaming.** Async-generator output that fully releases
86
 *    the underlying HTTP connection regardless of how the consumer terminates
87
 *    iteration (early `break`, thrown error, abort, or natural end-of-stream).
88
 * 4. **Skew- and revocation-tolerant token cache.** Tokens stay cached as long
89
 *    as they're usable, are re-minted when the server tells us they're stale
90
 *    (`refresh_in`), and are invalidated immediately on `401`/`403` so callers
91
 *    self-heal without restarting the host.
92
 *
93
 * ## Non-goals
94
 *
95
 * - Per-conversation history, retry/backoff, or rate-limit handling. Callers
96
 *   own request orchestration.
97
 * - GitHub Enterprise auth host derivation. The mint URL comes from
98
 *   `IProductService.defaultChatAgent.tokenEntitlementUrl`. See the TODO in
99
 *   `_buildCapiInit` for what GHE support would require.
100
 *
101
 * ## Concurrency model
102
 *
103
 * - Multiple in-flight requests for the same GitHub token share a single
104
 *   token mint via an in-flight de-dup map (no thundering herd on cold
105
 *   start).
106
 * - The token cache holds **one** entry. Callers that alternate between two
107
 *   GitHub tokens will pay a mint round-trip on every alternation; this is
108
 *   intentional — the agent host is single-tenant in practice.
109
 * - `AbortSignal` is forwarded to the outgoing API request (messages, models)
110
 *   but **not** to the shared token mint, so cancellation propagates to the
111
 *   caller's own request without affecting concurrent callers sharing the mint.
112
 *
113
 * ## Error semantics
114
 *
115
 * - Network/transport errors propagate as raw `fetch` rejections.
116
 * - Non-2xx responses throw an `Error` whose message includes the HTTP status,
117
 *   status text, and response body. **Tokens are never embedded in error
118
 *   messages.**
119
 * - Streaming `error` SSE events throw with the server-supplied message.
120
 * - Malformed JSON in an SSE `data:` line is logged and skipped, not thrown.
121
 */
122
export interface ICopilotApiService {
123

124
	readonly _serviceBrand: undefined;
125

126
	/**
127
	 * Stream a chat completion as raw Anthropic stream events.
128
	 *
129
	 * Yields every `Anthropic.MessageStreamEvent` in the order the server
130
	 * emits them, **including `message_stop` as the last event** before the
131
	 * generator returns. Phase 2 proxy relies on receiving a complete,
132
	 * replayable event stream.
133
	 *
134
	 * @throws on non-2xx status or SSE `error` event.
135
	 */
136
	messages(
137
		githubToken: string,
138
		request: Anthropic.MessageCreateParamsStreaming,
139
		options?: ICopilotApiServiceRequestOptions,
140
	): AsyncGenerator<Anthropic.MessageStreamEvent>;
141

142
	/**
143
	 * Send a chat completion and return the full aggregated response.
144
	 * @throws on non-2xx status.
145
	 */
146
	messages(
147
		githubToken: string,
148
		request: Anthropic.MessageCreateParamsNonStreaming,
149
		options?: ICopilotApiServiceRequestOptions,
150
	): Promise<Anthropic.Message>;
151

152
	/**
153
	 * Count tokens for a hypothetical request.
154
	 *
155
	 * @throws always — `countTokens` is not supported by CAPI in Phase 1.5.
156
	 * Phase 2 proxy maps this to HTTP 501.
157
	 */
158
	countTokens(
159
		githubToken: string,
160
		req: Anthropic.MessageCountTokensParams,
161
		options?: ICopilotApiServiceRequestOptions,
162
	): Promise<Anthropic.MessageTokensCount>;
163

164
	/**
165
	 * List models available to the GitHub user.
166
	 *
167
	 * Each {@link CCAModel} carries a `vendor` (e.g. `'Anthropic'`) and
168
	 * `supported_endpoints` (e.g. `['/v1/messages']`). Callers filtering for
169
	 * Anthropic-format models should match on both fields.
170
	 *
171
	 * Known CAPI values as of 2026-04-30:
172
	 * - `vendor`: `'Anthropic'` (capitalized)
173
	 * - `supported_endpoints`: `'/v1/messages'` for Anthropic chat models
174
	 */
175
	models(githubToken: string, options?: ICopilotApiServiceRequestOptions): Promise<CCAModel[]>;
176
}
177

178
export class CopilotApiService implements ICopilotApiService {
179

180
	declare readonly _serviceBrand: undefined;
181

182
	private _capiInitPromise: Promise<ICapiInit> | null = null;
183
	private _cachedToken: ICachedToken | null = null;
184
	private readonly _pendingTokenMints = new Map<string, Promise<string>>();
185
	private readonly _fetch: FetchFunction;
186

187
	constructor(
188
		fetchFn: FetchFunction | undefined,
189
		@ILogService private readonly _logService: ILogService,
190
		@IProductService private readonly _productService: IProductService,
191
	) {
192
		this._fetch = fetchFn ?? globalThis.fetch;
193
	}
194

195
	// #region Public API
196

197
	messages(
198
		githubToken: string,
199
		request: Anthropic.MessageCreateParamsStreaming,
200
		options?: ICopilotApiServiceRequestOptions,
201
	): AsyncGenerator<Anthropic.MessageStreamEvent>;
202
	messages(
203
		githubToken: string,
204
		request: Anthropic.MessageCreateParamsNonStreaming,
205
		options?: ICopilotApiServiceRequestOptions,
206
	): Promise<Anthropic.Message>;
207
	messages(
208
		githubToken: string,
209
		request: Anthropic.MessageCreateParams,
210
		options?: ICopilotApiServiceRequestOptions,
211
	): AsyncGenerator<Anthropic.MessageStreamEvent> | Promise<Anthropic.Message> {
212
		if (request.stream) {
213
			return this._messagesStreaming(githubToken, request, options);
214
		}
215
		return this._messagesNonStreaming(githubToken, request, options);
216
	}
217

218
	async countTokens(
219
		_githubToken: string,
220
		_req: Anthropic.MessageCountTokensParams,
221
		_options?: ICopilotApiServiceRequestOptions,
222
	): Promise<Anthropic.MessageTokensCount> {
223
		throw new Error('countTokens not supported by CAPI');
224
	}
225

226
	async models(githubToken: string, options?: ICopilotApiServiceRequestOptions): Promise<CCAModel[]> {
227
		const { capiClient, tokenUrl } = await this._getCapiInit();
228
		const copilotToken = await this._getCopilotToken(githubToken, capiClient, tokenUrl);
229

230
		this._logService.debug('[CopilotApiService] GET models');
231

232
		const response = await capiClient.makeRequest<Response>(
233
			{
234
				method: 'GET',
235
				headers: {
236
					...options?.headers,
237
					'Authorization': `Bearer ${copilotToken}`,
238
				},
239
				signal: options?.signal,
240
			},
241
			{ type: RequestType.Models },
242
		);
243

244
		if (!response.ok) {
245
			if (response.status === 401 || response.status === 403) {
246
				this._invalidateCachedToken(githubToken);
247
			}
248
			const text = await response.text().catch(() => '');
249
			throw new Error(`CAPI models request failed: ${response.status} ${response.statusText} — ${text}`);
250
		}
251

252
		const json = await response.json();
253
		return json.data ?? [];
254
	}
255

256
	// #endregion
257

258
	// #region Lazy Init
259

260
	private _getCapiInit(): Promise<ICapiInit> {
261
		if (!this._capiInitPromise) {
262
			this._capiInitPromise = this._buildCapiInit().catch(err => {
263
				this._capiInitPromise = null;
264
				this._cachedToken = null;
265
				throw err;
266
			});
267
		}
268
		return this._capiInitPromise;
269
	}
270

271
	private async _buildCapiInit(): Promise<ICapiInit> {
272
		const [machineId, deviceId] = await Promise.all([
273
			getMachineId(err => this._logService.warn('[CopilotApiService] getMachineId failed', err)),
274
			getDevDeviceId(err => this._logService.warn('[CopilotApiService] getDevDeviceId failed', err)),
275
		]);
276

277
		const extensionInfo: IExtensionInformation = {
278
			name: 'agent-host',
279
			sessionId: generateUuid(),
280
			machineId,
281
			deviceId,
282
			vscodeVersion: this._productService.version,
283
			version: this._productService.version,
284
			buildType: this._productService.quality === 'stable' ? 'prod' : 'dev',
285
		};
286

287
		const fetch = this._fetch;
288
		const capiClient = new CAPIClient(extensionInfo, undefined, {
289
			fetch: (url, options) => fetch(url, {
290
				method: options.method ?? 'GET',
291
				headers: options.headers,
292
				body: options.body,
293
				signal: options.signal as AbortSignal | undefined,
294
			}),
295
		});
296

297
		// TODO(GHE): For GitHub Enterprise users the mint URL must point to
298
		// `api.<enterprise-host>/copilot_internal/v2/token` instead. This
299
		// requires threading the enterprise host URL through `ICopilotApiService`
300
		// (e.g. as an extra parameter on `messages`/`models`, or as a separate
301
		// `create(enterpriseHost?)` factory) and deriving the URL the same way
302
		// `defaultAccount.ts` does for the main workbench auth path.
303
		const tokenUrl = this._productService.defaultChatAgent.tokenEntitlementUrl;
304

305
		return { capiClient, tokenUrl };
306
	}
307

308
	// #endregion
309

310
	// #region Streaming
311

312
	private async *_messagesStreaming(
313
		githubToken: string,
314
		request: Anthropic.MessageCreateParams,
315
		options?: ICopilotApiServiceRequestOptions,
316
	): AsyncGenerator<Anthropic.MessageStreamEvent> {
317
		const response = await this._sendRequest(githubToken, request, true, options);
318

319
		if (!response.body) {
320
			throw new Error('CAPI response has no body');
321
		}
322

323
		yield* this._readSSE(response.body);
324
	}
325

326
	// #endregion
327

328
	// #region Non-Streaming
329

330
	private async _messagesNonStreaming(
331
		githubToken: string,
332
		request: Anthropic.MessageCreateParams,
333
		options?: ICopilotApiServiceRequestOptions,
334
	): Promise<Anthropic.Message> {
335
		const response = await this._sendRequest(githubToken, request, false, options);
336
		return response.json() as Promise<Anthropic.Message>;
337
	}
338

339
	// #endregion
340

341
	// #region Shared Request
342

343
	private async _sendRequest(
344
		githubToken: string,
345
		request: Anthropic.MessageCreateParams,
346
		stream: boolean,
347
		options?: ICopilotApiServiceRequestOptions,
348
	): Promise<Response> {
349
		const { capiClient, tokenUrl } = await this._getCapiInit();
350
		const copilotToken = await this._getCopilotToken(githubToken, capiClient, tokenUrl);
351
		const requestId = generateUuid();
352

353
		this._logService.debug('[CopilotApiService] POST messages', `model=${request.model} stream=${stream} requestId=${requestId}`);
354

355
		const { system, ...rest } = request;
356
		const body = JSON.stringify({
357
			...rest,
358
			stream,
359
			// CAPI requires system as a text-block array, not a raw string
360
			...(system !== undefined
361
				? { system: typeof system === 'string' ? [{ type: 'text', text: system }] : system }
362
				: {}),
363
		});
364

365
		const response = await capiClient.makeRequest<Response>(
366
			{
367
				method: 'POST',
368
				headers: {
369
					...options?.headers,
370
					'Content-Type': 'application/json',
371
					'Authorization': `Bearer ${copilotToken}`,
372
					'X-Request-Id': requestId,
373
					'OpenAI-Intent': 'conversation',
374
				},
375
				body,
376
				signal: options?.signal,
377
			},
378
			{ type: RequestType.ChatMessages },
379
		);
380
		if (!response.ok) {
381
			if (response.status === 401 || response.status === 403) {
382
				this._invalidateCachedToken(githubToken);
383
			}
384
			const text = await response.text().catch(() => '');
385
			throw new Error(`CAPI request failed: ${response.status} ${response.statusText} — ${text}`);
386
		}
387

388
		return response;
389
	}
390

391
	// #endregion
392

393
	// #region Token Minting
394

395
	private async _getCopilotToken(githubToken: string, capiClient: CAPIClient, tokenUrl: string): Promise<string> {
396
		const now = Date.now() / 1000;
397
		if (
398
			this._cachedToken &&
399
			this._cachedToken.githubToken === githubToken &&
400
			this._cachedToken.expiresAt - now > TOKEN_REFRESH_BUFFER_SECONDS
401
		) {
402
			return this._cachedToken.copilotToken;
403
		}
404

405
		if (!this._pendingTokenMints.has(githubToken)) {
406
			// Omit the caller's signal here: a deduped mint is shared across
407
			// concurrent callers, so aborting one must not cancel the mint for
408
			// the others. Each caller still forwards its signal to the API call.
409
			const mint = this._mintToken(githubToken, capiClient, tokenUrl)
410
				.finally(() => { this._pendingTokenMints.delete(githubToken); });
411
			this._pendingTokenMints.set(githubToken, mint);
412
		}
413
		return this._pendingTokenMints.get(githubToken)!;
414
	}
415

416
	private _invalidateCachedToken(githubToken: string): void {
417
		if (this._cachedToken?.githubToken === githubToken) {
418
			this._cachedToken = null;
419
		}
420
	}
421

422
	private async _mintToken(githubToken: string, capiClient: CAPIClient, tokenUrl: string): Promise<string> {
423
		this._logService.debug('[CopilotApiService] Minting new Copilot token');
424

425
		const response = await this._fetch(tokenUrl, {
426
			method: 'GET',
427
			headers: {
428
				'Authorization': `token ${githubToken}`,
429
				'X-GitHub-Api-Version': TOKEN_API_VERSION,
430
			},
431
		});
432

433
		if (!response.ok) {
434
			const text = await response.text().catch(() => '');
435
			throw new Error(`Copilot token minting failed: ${response.status} ${response.statusText} — ${text}`);
436
		}
437

438
		const envelope: ICopilotTokenEnvelope = await response.json();
439

440
		capiClient.updateDomains(
441
			{ endpoints: envelope.endpoints ?? {}, sku: envelope.sku ?? '' },
442
			undefined,
443
		);
444

445
		// Prefer `refresh_in` over `expires_at` so clients with skewed clocks
446
		// don't end up re-minting on every request. Mirrors the behavior in
447
		// extensions/copilot/.../copilotTokenManager.ts.
448
		const nowSeconds = Date.now() / 1000;
449
		const expiresAt = typeof envelope.refresh_in === 'number'
450
			? nowSeconds + envelope.refresh_in + TOKEN_REFRESH_BUFFER_SECONDS
451
			: envelope.expires_at;
452

453
		this._cachedToken = {
454
			githubToken,
455
			copilotToken: envelope.token,
456
			expiresAt,
457
		};
458

459
		this._logService.debug('[CopilotApiService] Token minted, cacheValidUntil:', expiresAt, 'serverExpiresAt:', envelope.expires_at);
460

461
		return envelope.token;
462
	}
463

464
	// #endregion
465

466
	// #region SSE Parsing
467

468
	private async *_readSSE(body: ReadableStream<Uint8Array>): AsyncGenerator<Anthropic.MessageStreamEvent> {
469
		const reader = body.getReader();
470
		const decoder = new TextDecoder();
471
		let buffer = '';
472

473
		try {
474
			while (true) {
475
				const { done, value } = await reader.read();
476
				if (done) {
477
					break;
478
				}
479

480
				buffer += decoder.decode(value, { stream: true });
481
				const lines = buffer.split('\n');
482
				buffer = lines.pop() ?? '';
483

484
				for (const line of lines) {
485
					const event = this._parseDataLine(line);
486
					if (event !== undefined) {
487
						yield event;
488
						if (event.type === 'message_stop') {
489
							return;
490
						}
491
					}
492
				}
493
			}
494

495
			if (buffer.trim()) {
496
				const event = this._parseDataLine(buffer);
497
				if (event !== undefined) {
498
					yield event;
499
					if (event.type === 'message_stop') {
500
						return;
501
					}
502
				}
503
			}
504
		} finally {
505
			// Cancel the underlying stream so the HTTP connection is released
506
			// even when the consumer abandons the generator early (break, throw,
507
			// abort) or the stream ended on `message_stop` with bytes still in
508
			// flight. `releaseLock` alone leaves the body half-read.
509
			try {
510
				await reader.cancel();
511
			} catch {
512
				// ignore — cancellation is best-effort cleanup
513
			}
514
			reader.releaseLock();
515
		}
516
	}
517

518
	/**
519
	 * @returns the parsed stream event, or `undefined` to skip the line.
520
	 * @throws on `error` events from the server.
521
	 */
522
	private _parseDataLine(line: string): Anthropic.MessageStreamEvent | undefined {
523
		if (!line.startsWith('data: ')) {
524
			return undefined;
525
		}
526

527
		const data = line.slice('data: '.length).trim();
528

529
		let parsed: unknown;
530
		try {
531
			parsed = JSON.parse(data);
532
		} catch {
533
			this._logService.warn('[CopilotApiService] Failed to parse SSE data:', data);
534
			return undefined;
535
		}
536

537
		if (typeof parsed !== 'object' || parsed === null) {
538
			return undefined;
539
		}
540

541
		const record = parsed as Record<string, unknown>;
542
		const type = record.type;
543
		if (typeof type !== 'string') {
544
			return undefined;
545
		}
546

547
		if (type === 'error') {
548
			const error = (parsed as { error?: { message?: string } }).error;
549
			throw new Error(error?.message ?? 'Unknown streaming error');
550
		}
551

552
		if (!KNOWN_SSE_EVENT_TYPES.has(type)) {
553
			return undefined;
554
		}
555

556
		return parsed as Anthropic.MessageStreamEvent;
557
	}
558

559
	// #endregion
560
}
561

562
const KNOWN_SSE_EVENT_TYPES = new Set([
563
	'message_start', 'message_delta', 'message_stop',
564
	'content_block_start', 'content_block_delta', 'content_block_stop',
565
]);
566

567
Product

Resources

Company