Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/byok/vscode-node/geminiNativeProvider.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { ApiError, GenerateContentParameters, GoogleGenAI, Tool, Type } from '@google/genai';
7
import { CancellationToken, LanguageModelChatInformation, LanguageModelChatMessage, LanguageModelChatMessage2, LanguageModelResponsePart2, LanguageModelTextPart, LanguageModelThinkingPart, LanguageModelToolCallPart, Progress, ProvideLanguageModelChatResponseOptions } from 'vscode';
8
import { ChatFetchResponseType, ChatLocation } from '../../../platform/chat/common/commonTypes';
9
import { ILogService } from '../../../platform/log/common/logService';
10
import { IResponseDelta, OpenAiFunctionTool } from '../../../platform/networking/common/fetch';
11
import { APIUsage } from '../../../platform/networking/common/openai';
12
import { CopilotChatAttr, emitInferenceDetailsEvent, GenAiAttr, GenAiMetrics, GenAiOperationName, GenAiProviderName, type OTelModelOptions, StdAttr, toToolDefinitions, truncateForOTel } from '../../../platform/otel/common/index';
13
import { IOTelService, SpanKind, SpanStatusCode } from '../../../platform/otel/common/otelService';
14
import { IRequestLogger } from '../../../platform/requestLogger/common/requestLogger';
15
import { retrieveCapturingTokenByCorrelation, runWithCapturingToken } from '../../../platform/requestLogger/node/requestLogger';
16
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
17
import { toErrorMessage } from '../../../util/common/errorMessage';
18
import { RecordedProgress } from '../../../util/common/progressRecorder';
19
import { generateUuid } from '../../../util/vs/base/common/uuid';
20
import { BYOKKnownModels, byokKnownModelsToAPIInfo, BYOKModelCapabilities, LMResponsePart } from '../common/byokProvider';
21
import { toGeminiFunction as toGeminiFunctionDeclaration, ToolJsonSchema } from '../common/geminiFunctionDeclarationConverter';
22
import { apiMessageToGeminiMessage, geminiMessagesToRawMessagesForLogging } from '../common/geminiMessageConverter';
23
import { AbstractLanguageModelChatProvider, ExtendedLanguageModelChatInformation, LanguageModelChatConfiguration } from './abstractLanguageModelChatProvider';
24
import { IBYOKStorageService } from './byokStorageService';
25
26
export class GeminiNativeBYOKLMProvider extends AbstractLanguageModelChatProvider {
27
28
public static readonly providerName = 'Gemini';
29
30
constructor(
31
knownModels: BYOKKnownModels | undefined,
32
byokStorageService: IBYOKStorageService,
33
@ILogService logService: ILogService,
34
@IRequestLogger private readonly _requestLogger: IRequestLogger,
35
@ITelemetryService private readonly _telemetryService: ITelemetryService,
36
@IOTelService private readonly _otelService: IOTelService,
37
) {
38
super(GeminiNativeBYOKLMProvider.providerName.toLowerCase(), GeminiNativeBYOKLMProvider.providerName, knownModels, byokStorageService, logService);
39
}
40
41
protected async getAllModels(silent: boolean, apiKey: string | undefined): Promise<ExtendedLanguageModelChatInformation<LanguageModelChatConfiguration>[]> {
42
if (!apiKey && silent) {
43
return [];
44
}
45
46
try {
47
const client = new GoogleGenAI({ apiKey });
48
const models = await client.models.list();
49
const modelList: Record<string, BYOKModelCapabilities> = {};
50
51
for await (const model of models) {
52
const modelId = model.name;
53
if (!modelId) {
54
continue; // Skip models without names
55
}
56
57
// Enable only known models.
58
if (this._knownModels && this._knownModels[modelId]) {
59
modelList[modelId] = this._knownModels[modelId];
60
}
61
}
62
return byokKnownModelsToAPIInfo(this._name, modelList);
63
} catch (e) {
64
let error: Error;
65
if (e instanceof ApiError) {
66
let message = e.message;
67
try { message = JSON.parse(message).error?.message; } catch { /* ignore */ }
68
error = new Error(message ?? e.message, { cause: e });
69
} else {
70
error = new Error(toErrorMessage(e, true));
71
}
72
this._logService.error(error, `Error fetching available ${GeminiNativeBYOKLMProvider.providerName} models`);
73
throw error;
74
}
75
}
76
77
async provideLanguageModelChatResponse(model: ExtendedLanguageModelChatInformation<LanguageModelChatConfiguration>, messages: Array<LanguageModelChatMessage | LanguageModelChatMessage2>, options: ProvideLanguageModelChatResponseOptions, progress: Progress<LanguageModelResponsePart2>, token: CancellationToken): Promise<any> {
78
// Restore CapturingToken context if correlation ID was passed through modelOptions.
79
// This handles the case where AsyncLocalStorage context was lost crossing VS Code IPC.
80
const correlationId = (options as { modelOptions?: OTelModelOptions }).modelOptions?._capturingTokenCorrelationId;
81
const capturingToken = correlationId ? retrieveCapturingTokenByCorrelation(correlationId) : undefined;
82
83
// Restore OTel trace context to link spans back to the agent trace
84
const parentTraceContext = (options as { modelOptions?: OTelModelOptions }).modelOptions?._otelTraceContext ?? undefined;
85
86
// OTel span handle — created outside doRequest, enriched inside with usage data
87
let otelSpan: ReturnType<typeof this._otelService.startSpan> | undefined;
88
89
const doRequest = async () => {
90
const issuedTime = Date.now();
91
const apiKey = model.configuration?.apiKey;
92
if (!apiKey) {
93
throw new Error('API key not found for the model');
94
}
95
96
const client = new GoogleGenAI({ apiKey });
97
// Convert the messages from the API format into messages that we can use against Gemini
98
const { contents, systemInstruction } = apiMessageToGeminiMessage(messages as LanguageModelChatMessage[]);
99
100
const requestId = generateUuid();
101
const pendingLoggedChatRequest = this._requestLogger.logChatRequest(
102
'GeminiNativeBYOK',
103
{
104
model: model.id,
105
modelMaxPromptTokens: model.maxInputTokens,
106
urlOrRequestMetadata: 'https://generativelanguage.googleapis.com',
107
},
108
{
109
model: model.id,
110
messages: geminiMessagesToRawMessagesForLogging(contents, systemInstruction),
111
ourRequestId: requestId,
112
location: ChatLocation.Other,
113
body: {
114
tools: options.tools?.map((tool): OpenAiFunctionTool => ({
115
type: 'function',
116
function: {
117
name: tool.name,
118
description: tool.description,
119
parameters: tool.inputSchema
120
}
121
}))
122
}
123
});
124
125
// Convert VS Code tools to Gemini function declarations
126
const tools: Tool[] = (options.tools ?? []).length > 0 ? [{
127
functionDeclarations: (options.tools ?? []).map(tool => {
128
if (!tool.inputSchema) {
129
return {
130
name: tool.name,
131
description: tool.description,
132
parameters: {
133
type: Type.OBJECT,
134
properties: {},
135
required: []
136
}
137
};
138
}
139
140
// Transform the input schema to match Gemini's expectations
141
const finalTool = toGeminiFunctionDeclaration(tool.name, tool.description, tool.inputSchema as ToolJsonSchema);
142
finalTool.description = tool.description || finalTool.description;
143
return finalTool;
144
})
145
}] : [];
146
147
// Bridge VS Code cancellation token to Gemini abortSignal for early network termination
148
const abortController = new AbortController();
149
const cancelSub = token.onCancellationRequested(() => {
150
abortController.abort();
151
this._logService.trace('Gemini request aborted via VS Code cancellation token');
152
});
153
154
const params: GenerateContentParameters = {
155
model: model.id,
156
contents: contents,
157
config: {
158
systemInstruction: systemInstruction,
159
tools: tools.length > 0 ? tools : undefined,
160
maxOutputTokens: model.maxOutputTokens,
161
thinkingConfig: {
162
includeThoughts: true,
163
},
164
abortSignal: abortController.signal
165
}
166
};
167
168
const wrappedProgress = new RecordedProgress(progress);
169
170
try {
171
const result = await this._makeRequest(client, wrappedProgress, params, token, issuedTime);
172
if (result.ttft) {
173
pendingLoggedChatRequest.markTimeToFirstToken(result.ttft);
174
}
175
pendingLoggedChatRequest.resolve({
176
type: ChatFetchResponseType.Success,
177
requestId,
178
serverRequestId: requestId,
179
usage: result.usage,
180
resolvedModel: model.id,
181
value: ['value'],
182
}, wrappedProgress.items.map((i): IResponseDelta => {
183
return {
184
text: i instanceof LanguageModelTextPart ? i.value : '',
185
copilotToolCalls: i instanceof LanguageModelToolCallPart ? [{
186
name: i.name,
187
arguments: JSON.stringify(i.input),
188
id: i.callId
189
}] : undefined,
190
};
191
}));
192
193
// Enrich OTel span with usage data from the Gemini response
194
if (otelSpan && result.usage) {
195
otelSpan.setAttributes({
196
[GenAiAttr.USAGE_INPUT_TOKENS]: result.usage.prompt_tokens ?? 0,
197
[GenAiAttr.USAGE_OUTPUT_TOKENS]: result.usage.completion_tokens ?? 0,
198
...(result.usage.prompt_tokens_details?.cached_tokens
199
? { [GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS]: result.usage.prompt_tokens_details.cached_tokens }
200
: {}),
201
[GenAiAttr.RESPONSE_MODEL]: model.id,
202
[GenAiAttr.RESPONSE_ID]: requestId,
203
[GenAiAttr.RESPONSE_FINISH_REASONS]: ['stop'],
204
[GenAiAttr.CONVERSATION_ID]: requestId,
205
...(result.ttft ? { [CopilotChatAttr.TIME_TO_FIRST_TOKEN]: result.ttft } : {}),
206
[GenAiAttr.REQUEST_MAX_TOKENS]: model.maxOutputTokens ?? 0,
207
});
208
// Opt-in content capture
209
if (this._otelService.config.captureContent) {
210
const responseText = wrappedProgress.items
211
.filter((p): p is LanguageModelTextPart => p instanceof LanguageModelTextPart)
212
.map(p => p.value).join('');
213
const toolCalls = wrappedProgress.items
214
.filter((p): p is LanguageModelToolCallPart => p instanceof LanguageModelToolCallPart)
215
.map(tc => ({ type: 'tool_call' as const, id: tc.callId, name: tc.name, arguments: tc.input }));
216
const parts: Array<{ type: string; content?: string; id?: string; name?: string; arguments?: unknown }> = [];
217
if (responseText) { parts.push({ type: 'text', content: responseText }); }
218
parts.push(...toolCalls);
219
if (parts.length > 0) {
220
otelSpan.setAttribute(GenAiAttr.OUTPUT_MESSAGES, truncateForOTel(JSON.stringify([{ role: 'assistant', parts }])));
221
}
222
}
223
}
224
225
// Record OTel metrics for this Gemini LLM call
226
if (result.usage) {
227
const durationSec = (Date.now() - issuedTime) / 1000;
228
const metricAttrs = { operationName: GenAiOperationName.CHAT, providerName: 'gemini', requestModel: model.id, responseModel: model.id };
229
GenAiMetrics.recordOperationDuration(this._otelService, durationSec, metricAttrs);
230
if (result.usage.prompt_tokens) { GenAiMetrics.recordTokenUsage(this._otelService, result.usage.prompt_tokens, 'input', metricAttrs); }
231
if (result.usage.completion_tokens) { GenAiMetrics.recordTokenUsage(this._otelService, result.usage.completion_tokens, 'output', metricAttrs); }
232
if (result.ttft) { GenAiMetrics.recordTimeToFirstToken(this._otelService, model.id, result.ttft / 1000); }
233
}
234
235
// Emit OTel inference details event
236
emitInferenceDetailsEvent(
237
this._otelService,
238
{ model: model.id, maxTokens: model.maxOutputTokens },
239
result.usage ? {
240
id: requestId,
241
model: model.id,
242
finishReasons: ['stop'],
243
inputTokens: result.usage.prompt_tokens,
244
outputTokens: result.usage.completion_tokens,
245
} : undefined,
246
);
247
248
// Send success telemetry matching response.success format
249
/* __GDPR__
250
"response.success" : {
251
"owner": "digitarald",
252
"comment": "Report quality details for a successful service response.",
253
"reason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response finished" },
254
"filterReason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response was filtered" },
255
"source": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Source of the initial request" },
256
"initiatorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was initiated by a user or an agent" },
257
"model": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Model selection for the response" },
258
"modelInvoked": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Actual model invoked for the response" },
259
"apiType": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "API type for the response- chat completions or responses" },
260
"requestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Id of the current turn request" },
261
"gitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id if available" },
262
"associatedRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Another request ID that this request is associated with (eg, the originating request of a summarization request)." },
263
"reasoningEffort": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reasoning effort level" },
264
"reasoningSummary": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reasoning summary level" },
265
"fetcher": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The fetcher used for the request" },
266
"transport": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The transport used for the request (http or websocket)" },
267
"totalTokenMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum total token window", "isMeasurement": true },
268
"clientPromptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, locally counted", "isMeasurement": true },
269
"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, server side counted", "isMeasurement": true },
270
"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens hitting cache as reported by server", "isMeasurement": true },
271
"tokenCountMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum generated tokens", "isMeasurement": true },
272
"tokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of generated tokens", "isMeasurement": true },
273
"reasoningTokens": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of reasoning tokens", "isMeasurement": true },
274
"acceptedPredictionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the prediction that appeared in the completion", "isMeasurement": true },
275
"rejectedPredictionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the prediction that appeared in the completion", "isMeasurement": true },
276
"completionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the output", "isMeasurement": true },
277
"timeToFirstToken": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to first token", "isMeasurement": true },
278
"timeToFirstTokenEmitted": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to first token emitted (visible text)", "isMeasurement": true },
279
"timeToComplete": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to complete the request", "isMeasurement": true },
280
"issuedTime": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Timestamp when the request was issued", "isMeasurement": true },
281
"isVisionRequest": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether the request was for a vision model", "isMeasurement": true },
282
"isBYOK": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was for a BYOK model", "isMeasurement": true },
283
"isAuto": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was for an Auto model", "isMeasurement": true },
284
"bytesReceived": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of bytes received in the response", "isMeasurement": true },
285
"retryAfterError": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Error of the original request." },
286
"retryAfterErrorGitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id of the original request if available" },
287
"connectivityTestError": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Error of the connectivity test." },
288
"connectivityTestErrorGitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id of the connectivity test request if available" },
289
"retryAfterFilterCategory": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "If the response was filtered and this is a retry attempt, this contains the original filtered content category." },
290
"suspendEventSeen": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether a system suspend event was seen during the request", "isMeasurement": true },
291
"resumeEventSeen": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether a system resume event was seen during the request", "isMeasurement": true }
292
}
293
*/
294
this._telemetryService.sendTelemetryEvent('response.success', { github: true, microsoft: true }, {
295
source: 'byok.gemini',
296
model: model.id,
297
requestId,
298
}, {
299
totalTokenMax: model.maxInputTokens ?? -1,
300
tokenCountMax: model.maxOutputTokens ?? -1,
301
promptTokenCount: result.usage?.prompt_tokens,
302
promptCacheTokenCount: result.usage?.prompt_tokens_details?.cached_tokens,
303
tokenCount: result.usage?.total_tokens,
304
completionTokens: result.usage?.completion_tokens,
305
timeToFirstToken: result.ttft,
306
timeToFirstTokenEmitted: result.ttfte,
307
timeToComplete: Date.now() - issuedTime,
308
issuedTime,
309
isBYOK: 1,
310
});
311
} catch (err) {
312
this._logService.error(`BYOK GeminiNative error: ${toErrorMessage(err, true)}`);
313
pendingLoggedChatRequest.resolve({
314
type: token.isCancellationRequested ? ChatFetchResponseType.Canceled : ChatFetchResponseType.Unknown,
315
requestId,
316
serverRequestId: requestId,
317
reason: token.isCancellationRequested ? 'cancelled' : toErrorMessage(err)
318
}, wrappedProgress.items.map((i): IResponseDelta => {
319
return {
320
text: i instanceof LanguageModelTextPart ? i.value : '',
321
copilotToolCalls: i instanceof LanguageModelToolCallPart ? [{
322
name: i.name,
323
arguments: JSON.stringify(i.input),
324
id: i.callId
325
}] : undefined,
326
};
327
}));
328
throw err;
329
} finally {
330
cancelSub.dispose();
331
}
332
};
333
334
// Create OTel span and execute with trace context + CapturingToken
335
const executeRequest = async () => {
336
otelSpan = this._otelService.startSpan(`chat ${model.id}`, {
337
kind: SpanKind.CLIENT,
338
attributes: {
339
[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,
340
[GenAiAttr.PROVIDER_NAME]: GenAiProviderName.GEMINI,
341
[GenAiAttr.REQUEST_MODEL]: model.id,
342
[GenAiAttr.AGENT_NAME]: 'GeminiBYOK',
343
[CopilotChatAttr.MAX_PROMPT_TOKENS]: model.maxInputTokens,
344
[StdAttr.SERVER_ADDRESS]: 'generativelanguage.googleapis.com',
345
},
346
});
347
// Opt-in: capture input messages in OTel GenAI format
348
if (this._otelService.config.captureContent) {
349
// Tool definitions on the chat span (issue #299934) with `parameters`
350
// per OTel GenAI semantic conventions (issue #300318).
351
const toolDefs = toToolDefinitions(options.tools);
352
if (toolDefs) {
353
otelSpan.setAttribute(GenAiAttr.TOOL_DEFINITIONS, truncateForOTel(JSON.stringify(toolDefs)));
354
}
355
try {
356
const roleNames: Record<number, string> = { 1: 'user', 2: 'assistant', 3: 'system' };
357
const inputMsgs = messages.map(m => {
358
const msg = m as LanguageModelChatMessage;
359
const role = roleNames[msg.role] ?? String(msg.role);
360
const parts: Array<{ type: string; content?: string; id?: string; name?: string; arguments?: unknown }> = [];
361
if (Array.isArray(msg.content)) {
362
for (const p of msg.content) {
363
if (p instanceof LanguageModelTextPart) {
364
parts.push({ type: 'text', content: p.value });
365
} else if (p instanceof LanguageModelToolCallPart) {
366
parts.push({ type: 'tool_call', id: p.callId, name: p.name, arguments: p.input });
367
}
368
}
369
}
370
if (parts.length === 0) {
371
parts.push({ type: 'text', content: '[non-text content]' });
372
}
373
return { role, parts };
374
});
375
otelSpan.setAttribute(GenAiAttr.INPUT_MESSAGES, truncateForOTel(JSON.stringify(inputMsgs)));
376
} catch { /* swallow */ }
377
}
378
try {
379
const result = capturingToken
380
? await runWithCapturingToken(capturingToken, doRequest)
381
: await doRequest();
382
otelSpan.setStatus(SpanStatusCode.OK);
383
return result;
384
} catch (err) {
385
otelSpan.setStatus(SpanStatusCode.ERROR, err instanceof Error ? err.message : String(err));
386
throw err;
387
} finally {
388
otelSpan.end();
389
}
390
};
391
392
if (parentTraceContext) {
393
return this._otelService.runWithTraceContext(parentTraceContext, executeRequest);
394
}
395
return executeRequest();
396
}
397
398
async provideTokenCount(model: LanguageModelChatInformation, text: string | LanguageModelChatMessage | LanguageModelChatMessage2, token: CancellationToken): Promise<number> {
399
// Simple estimation for approximate token count - actual token count would require Gemini's tokenizer
400
return Math.ceil(text.toString().length / 4);
401
}
402
403
private async _makeRequest(client: GoogleGenAI, progress: Progress<LMResponsePart>, params: GenerateContentParameters, token: CancellationToken, issuedTime: number): Promise<{ ttft: number | undefined; ttfte: number | undefined; usage: APIUsage | undefined }> {
404
const start = Date.now();
405
let ttft: number | undefined;
406
let ttfte: number | undefined;
407
let usage: APIUsage | undefined;
408
409
try {
410
const stream = await client.models.generateContentStream(params);
411
412
let pendingThinkingSignature: string | undefined;
413
414
for await (const chunk of stream) {
415
if (token.isCancellationRequested) {
416
break;
417
}
418
419
if (ttft === undefined) {
420
ttft = Date.now() - start;
421
}
422
423
this._logService.trace(`Gemini chunk: ${JSON.stringify(chunk)}`);
424
425
// Process the streaming response chunks
426
if (chunk.candidates && chunk.candidates.length > 0) {
427
// choose the primary candidate
428
const candidate = chunk.candidates[0];
429
430
if (candidate.content && candidate.content.parts) {
431
for (const part of candidate.content.parts) {
432
// First, capture thought signature from this part (if present)
433
if ('thoughtSignature' in part && part.thoughtSignature) {
434
pendingThinkingSignature = part.thoughtSignature as string;
435
}
436
// Now handle the actual content parts
437
if ('thought' in part && part.thought === true && part.text) {
438
// Handle thinking/reasoning content from Gemini API
439
if (ttfte === undefined) {
440
ttfte = Date.now() - issuedTime;
441
}
442
progress.report(new LanguageModelThinkingPart(part.text));
443
} else if (part.text) {
444
if (ttfte === undefined) {
445
ttfte = Date.now() - issuedTime;
446
}
447
progress.report(new LanguageModelTextPart(part.text));
448
} else if (part.functionCall && part.functionCall.name) {
449
// Gemini 3 includes thought signatures for function calling
450
// If we have a pending signature, emit it as a thinking part with metadata.signature
451
if (pendingThinkingSignature) {
452
const thinkingPart = new LanguageModelThinkingPart('', undefined, { signature: pendingThinkingSignature });
453
progress.report(thinkingPart);
454
pendingThinkingSignature = undefined;
455
}
456
457
if (ttfte === undefined) {
458
ttfte = Date.now() - issuedTime;
459
}
460
progress.report(new LanguageModelToolCallPart(
461
generateUuid(),
462
part.functionCall.name,
463
part.functionCall.args || {}
464
));
465
}
466
}
467
}
468
}
469
470
// Extract usage information if available in the chunk
471
// Initialize on first chunk with usageMetadata, then update incrementally
472
// This ensures we capture prompt token info even if stream is cancelled mid-way
473
if (chunk.usageMetadata) {
474
const promptTokens = chunk.usageMetadata.promptTokenCount;
475
// For thinking models (e.g., gemini-3-pro-high), candidatesTokenCount only includes
476
// regular output tokens. thoughtsTokenCount contains the thinking/reasoning tokens.
477
// We include both in the completion token count.
478
const candidateTokens = chunk.usageMetadata.candidatesTokenCount ?? 0;
479
const thoughtTokens = chunk.usageMetadata.thoughtsTokenCount ?? 0;
480
const completionTokens = candidateTokens + thoughtTokens > 0 ? candidateTokens + thoughtTokens : undefined;
481
const cachedTokens = chunk.usageMetadata.cachedContentTokenCount;
482
483
if (!usage) {
484
// Initialize usage on first chunk - use -1 as sentinel for unavailable values
485
usage = {
486
completion_tokens: completionTokens ?? -1,
487
prompt_tokens: promptTokens ?? -1,
488
total_tokens: chunk.usageMetadata.totalTokenCount ?? -1,
489
prompt_tokens_details: {
490
cached_tokens: cachedTokens ?? 0,
491
}
492
};
493
} else {
494
// Update with latest values, preserving existing non-sentinel values
495
if (promptTokens !== undefined) {
496
usage.prompt_tokens = promptTokens;
497
}
498
if (completionTokens !== undefined) {
499
usage.completion_tokens = completionTokens;
500
}
501
if (chunk.usageMetadata.totalTokenCount !== undefined) {
502
usage.total_tokens = chunk.usageMetadata.totalTokenCount;
503
} else if (usage.prompt_tokens !== -1 && usage.completion_tokens !== -1) {
504
usage.total_tokens = usage.prompt_tokens + usage.completion_tokens;
505
}
506
if (cachedTokens !== undefined) {
507
usage.prompt_tokens_details!.cached_tokens = cachedTokens;
508
}
509
}
510
}
511
}
512
513
return { ttft, ttfte, usage };
514
} catch (error) {
515
if ((error as any)?.name === 'AbortError' || token.isCancellationRequested) {
516
this._logService.trace('Gemini streaming aborted');
517
// Return partial usage data collected before cancellation
518
return { ttft, ttfte, usage };
519
}
520
this._logService.error(`Gemini streaming error: ${toErrorMessage(error, true)}`);
521
throw error;
522
}
523
}
524
}
525
526