Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/platform/agentHost/node/shared/copilotApiService.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import type Anthropic from '@anthropic-ai/sdk';
7
import { CAPIClient, RequestType, type CCAModel, type IExtensionInformation } from '@vscode/copilot-api';
8
import { generateUuid } from '../../../../base/common/uuid.js';
9
import { getDevDeviceId, getMachineId } from '../../../../base/node/id.js';
10
import { createDecorator } from '../../../instantiation/common/instantiation.js';
11
import { ILogService } from '../../../log/common/log.js';
12
import { IProductService } from '../../../product/common/productService.js';
13
14
// #region Types
15
16
/**
17
* Per-call transport options for all {@link ICopilotApiService} methods.
18
*
19
* `headers` are merged into the outgoing CAPI request before security-
20
* sensitive headers (`Authorization`, `Content-Type`, `X-Request-Id`,
21
* `OpenAI-Intent`), so callers cannot override those.
22
*
23
* `signal` propagates to the outgoing API request but **not** to the
24
* shared token mint. The mint is deduped across concurrent callers, so
25
* a single caller's abort must not cancel it for everyone.
26
*/
27
export interface ICopilotApiServiceRequestOptions {
28
readonly headers?: Readonly<Record<string, string>>;
29
readonly signal?: AbortSignal;
30
}
31
32
/**
33
* Envelope returned by the GitHub `copilot_internal/v2/token` endpoint.
34
* @see https://docs.github.com/en/rest/copilot
35
*/
36
interface ICopilotTokenEnvelope {
37
readonly token: string;
38
readonly expires_at: number;
39
readonly refresh_in: number;
40
readonly endpoints?: { readonly api?: string };
41
readonly sku?: string;
42
}
43
44
interface ICachedToken {
45
readonly githubToken: string;
46
readonly copilotToken: string;
47
readonly expiresAt: number;
48
}
49
50
interface ICapiInit {
51
readonly capiClient: CAPIClient;
52
readonly tokenUrl: string;
53
}
54
55
// #endregion
56
57
// #region Constants
58
59
/**
60
* Refresh the cached Copilot token this many seconds before its real expiry,
61
* so an in-flight request never hits a token that expires mid-request.
62
*/
63
const TOKEN_REFRESH_BUFFER_SECONDS = 5 * 60;
64
65
const TOKEN_API_VERSION = '2025-04-01';
66
67
// #endregion
68
69
export type FetchFunction = typeof globalThis.fetch;
70
71
export const ICopilotApiService = createDecorator<ICopilotApiService>('copilotApiService');
72
73
/**
74
* Foundational gateway between the agent host and GitHub Copilot's CAPI proxy
75
* for Anthropic-style chat completions and model discovery.
76
*
77
* ## Goals
78
*
79
* 1. **Single source of truth for CAPI auth.** Callers pass a raw GitHub token
80
* and never deal with Copilot session token minting, expiry, refresh, or
81
* invalidation themselves.
82
* 2. **Stable surface for chat agents.** A small, typed API that abstracts the
83
* underlying `CAPIClient`, SSE framing, and Anthropic event taxonomy so
84
* feature code can focus on prompting.
85
* 3. **Resource-safe streaming.** Async-generator output that fully releases
86
* the underlying HTTP connection regardless of how the consumer terminates
87
* iteration (early `break`, thrown error, abort, or natural end-of-stream).
88
* 4. **Skew- and revocation-tolerant token cache.** Tokens stay cached as long
89
* as they're usable, are re-minted when the server tells us they're stale
90
* (`refresh_in`), and are invalidated immediately on `401`/`403` so callers
91
* self-heal without restarting the host.
92
*
93
* ## Non-goals
94
*
95
* - Per-conversation history, retry/backoff, or rate-limit handling. Callers
96
* own request orchestration.
97
* - GitHub Enterprise auth host derivation. The mint URL comes from
98
* `IProductService.defaultChatAgent.tokenEntitlementUrl`. See the TODO in
99
* `_buildCapiInit` for what GHE support would require.
100
*
101
* ## Concurrency model
102
*
103
* - Multiple in-flight requests for the same GitHub token share a single
104
* token mint via an in-flight de-dup map (no thundering herd on cold
105
* start).
106
* - The token cache holds **one** entry. Callers that alternate between two
107
* GitHub tokens will pay a mint round-trip on every alternation; this is
108
* intentional — the agent host is single-tenant in practice.
109
* - `AbortSignal` is forwarded to the outgoing API request (messages, models)
110
* but **not** to the shared token mint, so cancellation propagates to the
111
* caller's own request without affecting concurrent callers sharing the mint.
112
*
113
* ## Error semantics
114
*
115
* - Network/transport errors propagate as raw `fetch` rejections.
116
* - Non-2xx responses throw an `Error` whose message includes the HTTP status,
117
* status text, and response body. **Tokens are never embedded in error
118
* messages.**
119
* - Streaming `error` SSE events throw with the server-supplied message.
120
* - Malformed JSON in an SSE `data:` line is logged and skipped, not thrown.
121
*/
122
export interface ICopilotApiService {
123
124
readonly _serviceBrand: undefined;
125
126
/**
127
* Stream a chat completion as raw Anthropic stream events.
128
*
129
* Yields every `Anthropic.MessageStreamEvent` in the order the server
130
* emits them, **including `message_stop` as the last event** before the
131
* generator returns. Phase 2 proxy relies on receiving a complete,
132
* replayable event stream.
133
*
134
* @throws on non-2xx status or SSE `error` event.
135
*/
136
messages(
137
githubToken: string,
138
request: Anthropic.MessageCreateParamsStreaming,
139
options?: ICopilotApiServiceRequestOptions,
140
): AsyncGenerator<Anthropic.MessageStreamEvent>;
141
142
/**
143
* Send a chat completion and return the full aggregated response.
144
* @throws on non-2xx status.
145
*/
146
messages(
147
githubToken: string,
148
request: Anthropic.MessageCreateParamsNonStreaming,
149
options?: ICopilotApiServiceRequestOptions,
150
): Promise<Anthropic.Message>;
151
152
/**
153
* Count tokens for a hypothetical request.
154
*
155
* @throws always — `countTokens` is not supported by CAPI in Phase 1.5.
156
* Phase 2 proxy maps this to HTTP 501.
157
*/
158
countTokens(
159
githubToken: string,
160
req: Anthropic.MessageCountTokensParams,
161
options?: ICopilotApiServiceRequestOptions,
162
): Promise<Anthropic.MessageTokensCount>;
163
164
/**
165
* List models available to the GitHub user.
166
*
167
* Each {@link CCAModel} carries a `vendor` (e.g. `'Anthropic'`) and
168
* `supported_endpoints` (e.g. `['/v1/messages']`). Callers filtering for
169
* Anthropic-format models should match on both fields.
170
*
171
* Known CAPI values as of 2026-04-30:
172
* - `vendor`: `'Anthropic'` (capitalized)
173
* - `supported_endpoints`: `'/v1/messages'` for Anthropic chat models
174
*/
175
models(githubToken: string, options?: ICopilotApiServiceRequestOptions): Promise<CCAModel[]>;
176
}
177
178
export class CopilotApiService implements ICopilotApiService {
179
180
declare readonly _serviceBrand: undefined;
181
182
private _capiInitPromise: Promise<ICapiInit> | null = null;
183
private _cachedToken: ICachedToken | null = null;
184
private readonly _pendingTokenMints = new Map<string, Promise<string>>();
185
private readonly _fetch: FetchFunction;
186
187
constructor(
188
fetchFn: FetchFunction | undefined,
189
@ILogService private readonly _logService: ILogService,
190
@IProductService private readonly _productService: IProductService,
191
) {
192
this._fetch = fetchFn ?? globalThis.fetch;
193
}
194
195
// #region Public API
196
197
messages(
198
githubToken: string,
199
request: Anthropic.MessageCreateParamsStreaming,
200
options?: ICopilotApiServiceRequestOptions,
201
): AsyncGenerator<Anthropic.MessageStreamEvent>;
202
messages(
203
githubToken: string,
204
request: Anthropic.MessageCreateParamsNonStreaming,
205
options?: ICopilotApiServiceRequestOptions,
206
): Promise<Anthropic.Message>;
207
messages(
208
githubToken: string,
209
request: Anthropic.MessageCreateParams,
210
options?: ICopilotApiServiceRequestOptions,
211
): AsyncGenerator<Anthropic.MessageStreamEvent> | Promise<Anthropic.Message> {
212
if (request.stream) {
213
return this._messagesStreaming(githubToken, request, options);
214
}
215
return this._messagesNonStreaming(githubToken, request, options);
216
}
217
218
async countTokens(
219
_githubToken: string,
220
_req: Anthropic.MessageCountTokensParams,
221
_options?: ICopilotApiServiceRequestOptions,
222
): Promise<Anthropic.MessageTokensCount> {
223
throw new Error('countTokens not supported by CAPI');
224
}
225
226
async models(githubToken: string, options?: ICopilotApiServiceRequestOptions): Promise<CCAModel[]> {
227
const { capiClient, tokenUrl } = await this._getCapiInit();
228
const copilotToken = await this._getCopilotToken(githubToken, capiClient, tokenUrl);
229
230
this._logService.debug('[CopilotApiService] GET models');
231
232
const response = await capiClient.makeRequest<Response>(
233
{
234
method: 'GET',
235
headers: {
236
...options?.headers,
237
'Authorization': `Bearer ${copilotToken}`,
238
},
239
signal: options?.signal,
240
},
241
{ type: RequestType.Models },
242
);
243
244
if (!response.ok) {
245
if (response.status === 401 || response.status === 403) {
246
this._invalidateCachedToken(githubToken);
247
}
248
const text = await response.text().catch(() => '');
249
throw new Error(`CAPI models request failed: ${response.status} ${response.statusText} — ${text}`);
250
}
251
252
const json = await response.json();
253
return json.data ?? [];
254
}
255
256
// #endregion
257
258
// #region Lazy Init
259
260
private _getCapiInit(): Promise<ICapiInit> {
261
if (!this._capiInitPromise) {
262
this._capiInitPromise = this._buildCapiInit().catch(err => {
263
this._capiInitPromise = null;
264
this._cachedToken = null;
265
throw err;
266
});
267
}
268
return this._capiInitPromise;
269
}
270
271
private async _buildCapiInit(): Promise<ICapiInit> {
272
const [machineId, deviceId] = await Promise.all([
273
getMachineId(err => this._logService.warn('[CopilotApiService] getMachineId failed', err)),
274
getDevDeviceId(err => this._logService.warn('[CopilotApiService] getDevDeviceId failed', err)),
275
]);
276
277
const extensionInfo: IExtensionInformation = {
278
name: 'agent-host',
279
sessionId: generateUuid(),
280
machineId,
281
deviceId,
282
vscodeVersion: this._productService.version,
283
version: this._productService.version,
284
buildType: this._productService.quality === 'stable' ? 'prod' : 'dev',
285
};
286
287
const fetch = this._fetch;
288
const capiClient = new CAPIClient(extensionInfo, undefined, {
289
fetch: (url, options) => fetch(url, {
290
method: options.method ?? 'GET',
291
headers: options.headers,
292
body: options.body,
293
signal: options.signal as AbortSignal | undefined,
294
}),
295
});
296
297
// TODO(GHE): For GitHub Enterprise users the mint URL must point to
298
// `api.<enterprise-host>/copilot_internal/v2/token` instead. This
299
// requires threading the enterprise host URL through `ICopilotApiService`
300
// (e.g. as an extra parameter on `messages`/`models`, or as a separate
301
// `create(enterpriseHost?)` factory) and deriving the URL the same way
302
// `defaultAccount.ts` does for the main workbench auth path.
303
const tokenUrl = this._productService.defaultChatAgent.tokenEntitlementUrl;
304
305
return { capiClient, tokenUrl };
306
}
307
308
// #endregion
309
310
// #region Streaming
311
312
private async *_messagesStreaming(
313
githubToken: string,
314
request: Anthropic.MessageCreateParams,
315
options?: ICopilotApiServiceRequestOptions,
316
): AsyncGenerator<Anthropic.MessageStreamEvent> {
317
const response = await this._sendRequest(githubToken, request, true, options);
318
319
if (!response.body) {
320
throw new Error('CAPI response has no body');
321
}
322
323
yield* this._readSSE(response.body);
324
}
325
326
// #endregion
327
328
// #region Non-Streaming
329
330
private async _messagesNonStreaming(
331
githubToken: string,
332
request: Anthropic.MessageCreateParams,
333
options?: ICopilotApiServiceRequestOptions,
334
): Promise<Anthropic.Message> {
335
const response = await this._sendRequest(githubToken, request, false, options);
336
return response.json() as Promise<Anthropic.Message>;
337
}
338
339
// #endregion
340
341
// #region Shared Request
342
343
private async _sendRequest(
344
githubToken: string,
345
request: Anthropic.MessageCreateParams,
346
stream: boolean,
347
options?: ICopilotApiServiceRequestOptions,
348
): Promise<Response> {
349
const { capiClient, tokenUrl } = await this._getCapiInit();
350
const copilotToken = await this._getCopilotToken(githubToken, capiClient, tokenUrl);
351
const requestId = generateUuid();
352
353
this._logService.debug('[CopilotApiService] POST messages', `model=${request.model} stream=${stream} requestId=${requestId}`);
354
355
const { system, ...rest } = request;
356
const body = JSON.stringify({
357
...rest,
358
stream,
359
// CAPI requires system as a text-block array, not a raw string
360
...(system !== undefined
361
? { system: typeof system === 'string' ? [{ type: 'text', text: system }] : system }
362
: {}),
363
});
364
365
const response = await capiClient.makeRequest<Response>(
366
{
367
method: 'POST',
368
headers: {
369
...options?.headers,
370
'Content-Type': 'application/json',
371
'Authorization': `Bearer ${copilotToken}`,
372
'X-Request-Id': requestId,
373
'OpenAI-Intent': 'conversation',
374
},
375
body,
376
signal: options?.signal,
377
},
378
{ type: RequestType.ChatMessages },
379
);
380
if (!response.ok) {
381
if (response.status === 401 || response.status === 403) {
382
this._invalidateCachedToken(githubToken);
383
}
384
const text = await response.text().catch(() => '');
385
throw new Error(`CAPI request failed: ${response.status} ${response.statusText} — ${text}`);
386
}
387
388
return response;
389
}
390
391
// #endregion
392
393
// #region Token Minting
394
395
private async _getCopilotToken(githubToken: string, capiClient: CAPIClient, tokenUrl: string): Promise<string> {
396
const now = Date.now() / 1000;
397
if (
398
this._cachedToken &&
399
this._cachedToken.githubToken === githubToken &&
400
this._cachedToken.expiresAt - now > TOKEN_REFRESH_BUFFER_SECONDS
401
) {
402
return this._cachedToken.copilotToken;
403
}
404
405
if (!this._pendingTokenMints.has(githubToken)) {
406
// Omit the caller's signal here: a deduped mint is shared across
407
// concurrent callers, so aborting one must not cancel the mint for
408
// the others. Each caller still forwards its signal to the API call.
409
const mint = this._mintToken(githubToken, capiClient, tokenUrl)
410
.finally(() => { this._pendingTokenMints.delete(githubToken); });
411
this._pendingTokenMints.set(githubToken, mint);
412
}
413
return this._pendingTokenMints.get(githubToken)!;
414
}
415
416
private _invalidateCachedToken(githubToken: string): void {
417
if (this._cachedToken?.githubToken === githubToken) {
418
this._cachedToken = null;
419
}
420
}
421
422
private async _mintToken(githubToken: string, capiClient: CAPIClient, tokenUrl: string): Promise<string> {
423
this._logService.debug('[CopilotApiService] Minting new Copilot token');
424
425
const response = await this._fetch(tokenUrl, {
426
method: 'GET',
427
headers: {
428
'Authorization': `token ${githubToken}`,
429
'X-GitHub-Api-Version': TOKEN_API_VERSION,
430
},
431
});
432
433
if (!response.ok) {
434
const text = await response.text().catch(() => '');
435
throw new Error(`Copilot token minting failed: ${response.status} ${response.statusText} — ${text}`);
436
}
437
438
const envelope: ICopilotTokenEnvelope = await response.json();
439
440
capiClient.updateDomains(
441
{ endpoints: envelope.endpoints ?? {}, sku: envelope.sku ?? '' },
442
undefined,
443
);
444
445
// Prefer `refresh_in` over `expires_at` so clients with skewed clocks
446
// don't end up re-minting on every request. Mirrors the behavior in
447
// extensions/copilot/.../copilotTokenManager.ts.
448
const nowSeconds = Date.now() / 1000;
449
const expiresAt = typeof envelope.refresh_in === 'number'
450
? nowSeconds + envelope.refresh_in + TOKEN_REFRESH_BUFFER_SECONDS
451
: envelope.expires_at;
452
453
this._cachedToken = {
454
githubToken,
455
copilotToken: envelope.token,
456
expiresAt,
457
};
458
459
this._logService.debug('[CopilotApiService] Token minted, cacheValidUntil:', expiresAt, 'serverExpiresAt:', envelope.expires_at);
460
461
return envelope.token;
462
}
463
464
// #endregion
465
466
// #region SSE Parsing
467
468
private async *_readSSE(body: ReadableStream<Uint8Array>): AsyncGenerator<Anthropic.MessageStreamEvent> {
469
const reader = body.getReader();
470
const decoder = new TextDecoder();
471
let buffer = '';
472
473
try {
474
while (true) {
475
const { done, value } = await reader.read();
476
if (done) {
477
break;
478
}
479
480
buffer += decoder.decode(value, { stream: true });
481
const lines = buffer.split('\n');
482
buffer = lines.pop() ?? '';
483
484
for (const line of lines) {
485
const event = this._parseDataLine(line);
486
if (event !== undefined) {
487
yield event;
488
if (event.type === 'message_stop') {
489
return;
490
}
491
}
492
}
493
}
494
495
if (buffer.trim()) {
496
const event = this._parseDataLine(buffer);
497
if (event !== undefined) {
498
yield event;
499
if (event.type === 'message_stop') {
500
return;
501
}
502
}
503
}
504
} finally {
505
// Cancel the underlying stream so the HTTP connection is released
506
// even when the consumer abandons the generator early (break, throw,
507
// abort) or the stream ended on `message_stop` with bytes still in
508
// flight. `releaseLock` alone leaves the body half-read.
509
try {
510
await reader.cancel();
511
} catch {
512
// ignore — cancellation is best-effort cleanup
513
}
514
reader.releaseLock();
515
}
516
}
517
518
/**
519
* @returns the parsed stream event, or `undefined` to skip the line.
520
* @throws on `error` events from the server.
521
*/
522
private _parseDataLine(line: string): Anthropic.MessageStreamEvent | undefined {
523
if (!line.startsWith('data: ')) {
524
return undefined;
525
}
526
527
const data = line.slice('data: '.length).trim();
528
529
let parsed: unknown;
530
try {
531
parsed = JSON.parse(data);
532
} catch {
533
this._logService.warn('[CopilotApiService] Failed to parse SSE data:', data);
534
return undefined;
535
}
536
537
if (typeof parsed !== 'object' || parsed === null) {
538
return undefined;
539
}
540
541
const record = parsed as Record<string, unknown>;
542
const type = record.type;
543
if (typeof type !== 'string') {
544
return undefined;
545
}
546
547
if (type === 'error') {
548
const error = (parsed as { error?: { message?: string } }).error;
549
throw new Error(error?.message ?? 'Unknown streaming error');
550
}
551
552
if (!KNOWN_SSE_EVENT_TYPES.has(type)) {
553
return undefined;
554
}
555
556
return parsed as Anthropic.MessageStreamEvent;
557
}
558
559
// #endregion
560
}
561
562
const KNOWN_SSE_EVENT_TYPES = new Set([
563
'message_start', 'message_delta', 'message_stop',
564
'content_block_start', 'content_block_delta', 'content_block_stop',
565
]);
566
567