Path: blob/main/extensions/copilot/src/extension/byok/vscode-node/geminiNativeProvider.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { ApiError, GenerateContentParameters, GoogleGenAI, Tool, Type } from '@google/genai';6import { CancellationToken, LanguageModelChatInformation, LanguageModelChatMessage, LanguageModelChatMessage2, LanguageModelResponsePart2, LanguageModelTextPart, LanguageModelThinkingPart, LanguageModelToolCallPart, Progress, ProvideLanguageModelChatResponseOptions } from 'vscode';7import { ChatFetchResponseType, ChatLocation } from '../../../platform/chat/common/commonTypes';8import { ILogService } from '../../../platform/log/common/logService';9import { IResponseDelta, OpenAiFunctionTool } from '../../../platform/networking/common/fetch';10import { APIUsage } from '../../../platform/networking/common/openai';11import { CopilotChatAttr, emitInferenceDetailsEvent, GenAiAttr, GenAiMetrics, GenAiOperationName, GenAiProviderName, type OTelModelOptions, StdAttr, toToolDefinitions, truncateForOTel } from '../../../platform/otel/common/index';12import { IOTelService, SpanKind, SpanStatusCode } from '../../../platform/otel/common/otelService';13import { IRequestLogger } from '../../../platform/requestLogger/common/requestLogger';14import { retrieveCapturingTokenByCorrelation, runWithCapturingToken } from '../../../platform/requestLogger/node/requestLogger';15import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';16import { toErrorMessage } from '../../../util/common/errorMessage';17import { RecordedProgress } from '../../../util/common/progressRecorder';18import { generateUuid } from '../../../util/vs/base/common/uuid';19import { BYOKKnownModels, byokKnownModelsToAPIInfo, BYOKModelCapabilities, LMResponsePart } from '../common/byokProvider';20import { toGeminiFunction as toGeminiFunctionDeclaration, ToolJsonSchema } from '../common/geminiFunctionDeclarationConverter';21import { apiMessageToGeminiMessage, geminiMessagesToRawMessagesForLogging } from '../common/geminiMessageConverter';22import { AbstractLanguageModelChatProvider, ExtendedLanguageModelChatInformation, LanguageModelChatConfiguration } from './abstractLanguageModelChatProvider';23import { IBYOKStorageService } from './byokStorageService';2425export class GeminiNativeBYOKLMProvider extends AbstractLanguageModelChatProvider {2627public static readonly providerName = 'Gemini';2829constructor(30knownModels: BYOKKnownModels | undefined,31byokStorageService: IBYOKStorageService,32@ILogService logService: ILogService,33@IRequestLogger private readonly _requestLogger: IRequestLogger,34@ITelemetryService private readonly _telemetryService: ITelemetryService,35@IOTelService private readonly _otelService: IOTelService,36) {37super(GeminiNativeBYOKLMProvider.providerName.toLowerCase(), GeminiNativeBYOKLMProvider.providerName, knownModels, byokStorageService, logService);38}3940protected async getAllModels(silent: boolean, apiKey: string | undefined): Promise<ExtendedLanguageModelChatInformation<LanguageModelChatConfiguration>[]> {41if (!apiKey && silent) {42return [];43}4445try {46const client = new GoogleGenAI({ apiKey });47const models = await client.models.list();48const modelList: Record<string, BYOKModelCapabilities> = {};4950for await (const model of models) {51const modelId = model.name;52if (!modelId) {53continue; // Skip models without names54}5556// Enable only known models.57if (this._knownModels && this._knownModels[modelId]) {58modelList[modelId] = this._knownModels[modelId];59}60}61return byokKnownModelsToAPIInfo(this._name, modelList);62} catch (e) {63let error: Error;64if (e instanceof ApiError) {65let message = e.message;66try { message = JSON.parse(message).error?.message; } catch { /* ignore */ }67error = new Error(message ?? e.message, { cause: e });68} else {69error = new Error(toErrorMessage(e, true));70}71this._logService.error(error, `Error fetching available ${GeminiNativeBYOKLMProvider.providerName} models`);72throw error;73}74}7576async provideLanguageModelChatResponse(model: ExtendedLanguageModelChatInformation<LanguageModelChatConfiguration>, messages: Array<LanguageModelChatMessage | LanguageModelChatMessage2>, options: ProvideLanguageModelChatResponseOptions, progress: Progress<LanguageModelResponsePart2>, token: CancellationToken): Promise<any> {77// Restore CapturingToken context if correlation ID was passed through modelOptions.78// This handles the case where AsyncLocalStorage context was lost crossing VS Code IPC.79const correlationId = (options as { modelOptions?: OTelModelOptions }).modelOptions?._capturingTokenCorrelationId;80const capturingToken = correlationId ? retrieveCapturingTokenByCorrelation(correlationId) : undefined;8182// Restore OTel trace context to link spans back to the agent trace83const parentTraceContext = (options as { modelOptions?: OTelModelOptions }).modelOptions?._otelTraceContext ?? undefined;8485// OTel span handle — created outside doRequest, enriched inside with usage data86let otelSpan: ReturnType<typeof this._otelService.startSpan> | undefined;8788const doRequest = async () => {89const issuedTime = Date.now();90const apiKey = model.configuration?.apiKey;91if (!apiKey) {92throw new Error('API key not found for the model');93}9495const client = new GoogleGenAI({ apiKey });96// Convert the messages from the API format into messages that we can use against Gemini97const { contents, systemInstruction } = apiMessageToGeminiMessage(messages as LanguageModelChatMessage[]);9899const requestId = generateUuid();100const pendingLoggedChatRequest = this._requestLogger.logChatRequest(101'GeminiNativeBYOK',102{103model: model.id,104modelMaxPromptTokens: model.maxInputTokens,105urlOrRequestMetadata: 'https://generativelanguage.googleapis.com',106},107{108model: model.id,109messages: geminiMessagesToRawMessagesForLogging(contents, systemInstruction),110ourRequestId: requestId,111location: ChatLocation.Other,112body: {113tools: options.tools?.map((tool): OpenAiFunctionTool => ({114type: 'function',115function: {116name: tool.name,117description: tool.description,118parameters: tool.inputSchema119}120}))121}122});123124// Convert VS Code tools to Gemini function declarations125const tools: Tool[] = (options.tools ?? []).length > 0 ? [{126functionDeclarations: (options.tools ?? []).map(tool => {127if (!tool.inputSchema) {128return {129name: tool.name,130description: tool.description,131parameters: {132type: Type.OBJECT,133properties: {},134required: []135}136};137}138139// Transform the input schema to match Gemini's expectations140const finalTool = toGeminiFunctionDeclaration(tool.name, tool.description, tool.inputSchema as ToolJsonSchema);141finalTool.description = tool.description || finalTool.description;142return finalTool;143})144}] : [];145146// Bridge VS Code cancellation token to Gemini abortSignal for early network termination147const abortController = new AbortController();148const cancelSub = token.onCancellationRequested(() => {149abortController.abort();150this._logService.trace('Gemini request aborted via VS Code cancellation token');151});152153const params: GenerateContentParameters = {154model: model.id,155contents: contents,156config: {157systemInstruction: systemInstruction,158tools: tools.length > 0 ? tools : undefined,159maxOutputTokens: model.maxOutputTokens,160thinkingConfig: {161includeThoughts: true,162},163abortSignal: abortController.signal164}165};166167const wrappedProgress = new RecordedProgress(progress);168169try {170const result = await this._makeRequest(client, wrappedProgress, params, token, issuedTime);171if (result.ttft) {172pendingLoggedChatRequest.markTimeToFirstToken(result.ttft);173}174pendingLoggedChatRequest.resolve({175type: ChatFetchResponseType.Success,176requestId,177serverRequestId: requestId,178usage: result.usage,179resolvedModel: model.id,180value: ['value'],181}, wrappedProgress.items.map((i): IResponseDelta => {182return {183text: i instanceof LanguageModelTextPart ? i.value : '',184copilotToolCalls: i instanceof LanguageModelToolCallPart ? [{185name: i.name,186arguments: JSON.stringify(i.input),187id: i.callId188}] : undefined,189};190}));191192// Enrich OTel span with usage data from the Gemini response193if (otelSpan && result.usage) {194otelSpan.setAttributes({195[GenAiAttr.USAGE_INPUT_TOKENS]: result.usage.prompt_tokens ?? 0,196[GenAiAttr.USAGE_OUTPUT_TOKENS]: result.usage.completion_tokens ?? 0,197...(result.usage.prompt_tokens_details?.cached_tokens198? { [GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS]: result.usage.prompt_tokens_details.cached_tokens }199: {}),200[GenAiAttr.RESPONSE_MODEL]: model.id,201[GenAiAttr.RESPONSE_ID]: requestId,202[GenAiAttr.RESPONSE_FINISH_REASONS]: ['stop'],203[GenAiAttr.CONVERSATION_ID]: requestId,204...(result.ttft ? { [CopilotChatAttr.TIME_TO_FIRST_TOKEN]: result.ttft } : {}),205[GenAiAttr.REQUEST_MAX_TOKENS]: model.maxOutputTokens ?? 0,206});207// Opt-in content capture208if (this._otelService.config.captureContent) {209const responseText = wrappedProgress.items210.filter((p): p is LanguageModelTextPart => p instanceof LanguageModelTextPart)211.map(p => p.value).join('');212const toolCalls = wrappedProgress.items213.filter((p): p is LanguageModelToolCallPart => p instanceof LanguageModelToolCallPart)214.map(tc => ({ type: 'tool_call' as const, id: tc.callId, name: tc.name, arguments: tc.input }));215const parts: Array<{ type: string; content?: string; id?: string; name?: string; arguments?: unknown }> = [];216if (responseText) { parts.push({ type: 'text', content: responseText }); }217parts.push(...toolCalls);218if (parts.length > 0) {219otelSpan.setAttribute(GenAiAttr.OUTPUT_MESSAGES, truncateForOTel(JSON.stringify([{ role: 'assistant', parts }])));220}221}222}223224// Record OTel metrics for this Gemini LLM call225if (result.usage) {226const durationSec = (Date.now() - issuedTime) / 1000;227const metricAttrs = { operationName: GenAiOperationName.CHAT, providerName: 'gemini', requestModel: model.id, responseModel: model.id };228GenAiMetrics.recordOperationDuration(this._otelService, durationSec, metricAttrs);229if (result.usage.prompt_tokens) { GenAiMetrics.recordTokenUsage(this._otelService, result.usage.prompt_tokens, 'input', metricAttrs); }230if (result.usage.completion_tokens) { GenAiMetrics.recordTokenUsage(this._otelService, result.usage.completion_tokens, 'output', metricAttrs); }231if (result.ttft) { GenAiMetrics.recordTimeToFirstToken(this._otelService, model.id, result.ttft / 1000); }232}233234// Emit OTel inference details event235emitInferenceDetailsEvent(236this._otelService,237{ model: model.id, maxTokens: model.maxOutputTokens },238result.usage ? {239id: requestId,240model: model.id,241finishReasons: ['stop'],242inputTokens: result.usage.prompt_tokens,243outputTokens: result.usage.completion_tokens,244} : undefined,245);246247// Send success telemetry matching response.success format248/* __GDPR__249"response.success" : {250"owner": "digitarald",251"comment": "Report quality details for a successful service response.",252"reason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response finished" },253"filterReason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reason for why a response was filtered" },254"source": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Source of the initial request" },255"initiatorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was initiated by a user or an agent" },256"model": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Model selection for the response" },257"modelInvoked": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Actual model invoked for the response" },258"apiType": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "API type for the response- chat completions or responses" },259"requestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Id of the current turn request" },260"gitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id if available" },261"associatedRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Another request ID that this request is associated with (eg, the originating request of a summarization request)." },262"reasoningEffort": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reasoning effort level" },263"reasoningSummary": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Reasoning summary level" },264"fetcher": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The fetcher used for the request" },265"transport": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The transport used for the request (http or websocket)" },266"totalTokenMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum total token window", "isMeasurement": true },267"clientPromptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, locally counted", "isMeasurement": true },268"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, server side counted", "isMeasurement": true },269"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens hitting cache as reported by server", "isMeasurement": true },270"tokenCountMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum generated tokens", "isMeasurement": true },271"tokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of generated tokens", "isMeasurement": true },272"reasoningTokens": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of reasoning tokens", "isMeasurement": true },273"acceptedPredictionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the prediction that appeared in the completion", "isMeasurement": true },274"rejectedPredictionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the prediction that appeared in the completion", "isMeasurement": true },275"completionTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Number of tokens in the output", "isMeasurement": true },276"timeToFirstToken": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to first token", "isMeasurement": true },277"timeToFirstTokenEmitted": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to first token emitted (visible text)", "isMeasurement": true },278"timeToComplete": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Time to complete the request", "isMeasurement": true },279"issuedTime": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Timestamp when the request was issued", "isMeasurement": true },280"isVisionRequest": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether the request was for a vision model", "isMeasurement": true },281"isBYOK": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was for a BYOK model", "isMeasurement": true },282"isAuto": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the request was for an Auto model", "isMeasurement": true },283"bytesReceived": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of bytes received in the response", "isMeasurement": true },284"retryAfterError": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Error of the original request." },285"retryAfterErrorGitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id of the original request if available" },286"connectivityTestError": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Error of the connectivity test." },287"connectivityTestErrorGitHubRequestId": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "GitHub request id of the connectivity test request if available" },288"retryAfterFilterCategory": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "If the response was filtered and this is a retry attempt, this contains the original filtered content category." },289"suspendEventSeen": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether a system suspend event was seen during the request", "isMeasurement": true },290"resumeEventSeen": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether a system resume event was seen during the request", "isMeasurement": true }291}292*/293this._telemetryService.sendTelemetryEvent('response.success', { github: true, microsoft: true }, {294source: 'byok.gemini',295model: model.id,296requestId,297}, {298totalTokenMax: model.maxInputTokens ?? -1,299tokenCountMax: model.maxOutputTokens ?? -1,300promptTokenCount: result.usage?.prompt_tokens,301promptCacheTokenCount: result.usage?.prompt_tokens_details?.cached_tokens,302tokenCount: result.usage?.total_tokens,303completionTokens: result.usage?.completion_tokens,304timeToFirstToken: result.ttft,305timeToFirstTokenEmitted: result.ttfte,306timeToComplete: Date.now() - issuedTime,307issuedTime,308isBYOK: 1,309});310} catch (err) {311this._logService.error(`BYOK GeminiNative error: ${toErrorMessage(err, true)}`);312pendingLoggedChatRequest.resolve({313type: token.isCancellationRequested ? ChatFetchResponseType.Canceled : ChatFetchResponseType.Unknown,314requestId,315serverRequestId: requestId,316reason: token.isCancellationRequested ? 'cancelled' : toErrorMessage(err)317}, wrappedProgress.items.map((i): IResponseDelta => {318return {319text: i instanceof LanguageModelTextPart ? i.value : '',320copilotToolCalls: i instanceof LanguageModelToolCallPart ? [{321name: i.name,322arguments: JSON.stringify(i.input),323id: i.callId324}] : undefined,325};326}));327throw err;328} finally {329cancelSub.dispose();330}331};332333// Create OTel span and execute with trace context + CapturingToken334const executeRequest = async () => {335otelSpan = this._otelService.startSpan(`chat ${model.id}`, {336kind: SpanKind.CLIENT,337attributes: {338[GenAiAttr.OPERATION_NAME]: GenAiOperationName.CHAT,339[GenAiAttr.PROVIDER_NAME]: GenAiProviderName.GEMINI,340[GenAiAttr.REQUEST_MODEL]: model.id,341[GenAiAttr.AGENT_NAME]: 'GeminiBYOK',342[CopilotChatAttr.MAX_PROMPT_TOKENS]: model.maxInputTokens,343[StdAttr.SERVER_ADDRESS]: 'generativelanguage.googleapis.com',344},345});346// Opt-in: capture input messages in OTel GenAI format347if (this._otelService.config.captureContent) {348// Tool definitions on the chat span (issue #299934) with `parameters`349// per OTel GenAI semantic conventions (issue #300318).350const toolDefs = toToolDefinitions(options.tools);351if (toolDefs) {352otelSpan.setAttribute(GenAiAttr.TOOL_DEFINITIONS, truncateForOTel(JSON.stringify(toolDefs)));353}354try {355const roleNames: Record<number, string> = { 1: 'user', 2: 'assistant', 3: 'system' };356const inputMsgs = messages.map(m => {357const msg = m as LanguageModelChatMessage;358const role = roleNames[msg.role] ?? String(msg.role);359const parts: Array<{ type: string; content?: string; id?: string; name?: string; arguments?: unknown }> = [];360if (Array.isArray(msg.content)) {361for (const p of msg.content) {362if (p instanceof LanguageModelTextPart) {363parts.push({ type: 'text', content: p.value });364} else if (p instanceof LanguageModelToolCallPart) {365parts.push({ type: 'tool_call', id: p.callId, name: p.name, arguments: p.input });366}367}368}369if (parts.length === 0) {370parts.push({ type: 'text', content: '[non-text content]' });371}372return { role, parts };373});374otelSpan.setAttribute(GenAiAttr.INPUT_MESSAGES, truncateForOTel(JSON.stringify(inputMsgs)));375} catch { /* swallow */ }376}377try {378const result = capturingToken379? await runWithCapturingToken(capturingToken, doRequest)380: await doRequest();381otelSpan.setStatus(SpanStatusCode.OK);382return result;383} catch (err) {384otelSpan.setStatus(SpanStatusCode.ERROR, err instanceof Error ? err.message : String(err));385throw err;386} finally {387otelSpan.end();388}389};390391if (parentTraceContext) {392return this._otelService.runWithTraceContext(parentTraceContext, executeRequest);393}394return executeRequest();395}396397async provideTokenCount(model: LanguageModelChatInformation, text: string | LanguageModelChatMessage | LanguageModelChatMessage2, token: CancellationToken): Promise<number> {398// Simple estimation for approximate token count - actual token count would require Gemini's tokenizer399return Math.ceil(text.toString().length / 4);400}401402private async _makeRequest(client: GoogleGenAI, progress: Progress<LMResponsePart>, params: GenerateContentParameters, token: CancellationToken, issuedTime: number): Promise<{ ttft: number | undefined; ttfte: number | undefined; usage: APIUsage | undefined }> {403const start = Date.now();404let ttft: number | undefined;405let ttfte: number | undefined;406let usage: APIUsage | undefined;407408try {409const stream = await client.models.generateContentStream(params);410411let pendingThinkingSignature: string | undefined;412413for await (const chunk of stream) {414if (token.isCancellationRequested) {415break;416}417418if (ttft === undefined) {419ttft = Date.now() - start;420}421422this._logService.trace(`Gemini chunk: ${JSON.stringify(chunk)}`);423424// Process the streaming response chunks425if (chunk.candidates && chunk.candidates.length > 0) {426// choose the primary candidate427const candidate = chunk.candidates[0];428429if (candidate.content && candidate.content.parts) {430for (const part of candidate.content.parts) {431// First, capture thought signature from this part (if present)432if ('thoughtSignature' in part && part.thoughtSignature) {433pendingThinkingSignature = part.thoughtSignature as string;434}435// Now handle the actual content parts436if ('thought' in part && part.thought === true && part.text) {437// Handle thinking/reasoning content from Gemini API438if (ttfte === undefined) {439ttfte = Date.now() - issuedTime;440}441progress.report(new LanguageModelThinkingPart(part.text));442} else if (part.text) {443if (ttfte === undefined) {444ttfte = Date.now() - issuedTime;445}446progress.report(new LanguageModelTextPart(part.text));447} else if (part.functionCall && part.functionCall.name) {448// Gemini 3 includes thought signatures for function calling449// If we have a pending signature, emit it as a thinking part with metadata.signature450if (pendingThinkingSignature) {451const thinkingPart = new LanguageModelThinkingPart('', undefined, { signature: pendingThinkingSignature });452progress.report(thinkingPart);453pendingThinkingSignature = undefined;454}455456if (ttfte === undefined) {457ttfte = Date.now() - issuedTime;458}459progress.report(new LanguageModelToolCallPart(460generateUuid(),461part.functionCall.name,462part.functionCall.args || {}463));464}465}466}467}468469// Extract usage information if available in the chunk470// Initialize on first chunk with usageMetadata, then update incrementally471// This ensures we capture prompt token info even if stream is cancelled mid-way472if (chunk.usageMetadata) {473const promptTokens = chunk.usageMetadata.promptTokenCount;474// For thinking models (e.g., gemini-3-pro-high), candidatesTokenCount only includes475// regular output tokens. thoughtsTokenCount contains the thinking/reasoning tokens.476// We include both in the completion token count.477const candidateTokens = chunk.usageMetadata.candidatesTokenCount ?? 0;478const thoughtTokens = chunk.usageMetadata.thoughtsTokenCount ?? 0;479const completionTokens = candidateTokens + thoughtTokens > 0 ? candidateTokens + thoughtTokens : undefined;480const cachedTokens = chunk.usageMetadata.cachedContentTokenCount;481482if (!usage) {483// Initialize usage on first chunk - use -1 as sentinel for unavailable values484usage = {485completion_tokens: completionTokens ?? -1,486prompt_tokens: promptTokens ?? -1,487total_tokens: chunk.usageMetadata.totalTokenCount ?? -1,488prompt_tokens_details: {489cached_tokens: cachedTokens ?? 0,490}491};492} else {493// Update with latest values, preserving existing non-sentinel values494if (promptTokens !== undefined) {495usage.prompt_tokens = promptTokens;496}497if (completionTokens !== undefined) {498usage.completion_tokens = completionTokens;499}500if (chunk.usageMetadata.totalTokenCount !== undefined) {501usage.total_tokens = chunk.usageMetadata.totalTokenCount;502} else if (usage.prompt_tokens !== -1 && usage.completion_tokens !== -1) {503usage.total_tokens = usage.prompt_tokens + usage.completion_tokens;504}505if (cachedTokens !== undefined) {506usage.prompt_tokens_details!.cached_tokens = cachedTokens;507}508}509}510}511512return { ttft, ttfte, usage };513} catch (error) {514if ((error as any)?.name === 'AbortError' || token.isCancellationRequested) {515this._logService.trace('Gemini streaming aborted');516// Return partial usage data collected before cancellation517return { ttft, ttfte, usage };518}519this._logService.error(`Gemini streaming error: ${toErrorMessage(error, true)}`);520throw error;521}522}523}524525526