Path: blob/main/src/vs/workbench/contrib/chat/common/model/chatStreamStats.ts
4780 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { ILogService } from '../../../../../platform/log/common/log.js';67export interface IChatStreamStats {8impliedWordLoadRate: number;9lastWordCount: number;10}1112export interface IChatStreamStatsInternal extends IChatStreamStats {13totalTime: number;14lastUpdateTime: number;15firstMarkdownTime: number | undefined;16bootstrapActive: boolean;17wordCountAtBootstrapExit: number | undefined;18updatesWithNewWords: number;19}2021export interface IChatStreamUpdate {22totalWordCount: number;23}2425const MIN_BOOTSTRAP_TOTAL_TIME = 250;26const LARGE_BOOTSTRAP_MIN_TOTAL_TIME = 500;27const MAX_INTERVAL_TIME = 250;28const LARGE_UPDATE_MAX_INTERVAL_TIME = 1000;29const WORDS_FOR_LARGE_CHUNK = 10;30const MIN_UPDATES_FOR_STABLE_RATE = 2;3132/**33* Estimates the loading rate of a chat response stream so that we can try to match the rendering rate to34* the rate at which text is actually produced by the model. This can only be an estimate for various reasons-35* reasoning summaries don't represent real generated tokens, we don't have full visibility into tool calls,36* some model providers send text in large chunks rather than a steady stream, e.g. Gemini, we don't know about37* latency between agent requests, etc.38*39* When the first text is received, we don't know how long it actually took to generate. So we apply an assumed40* minimum time, until we have received enough data to make a stable estimate. This is the "bootstrap" phase.41*42* Since we don't have visibility into when the model started generated tool call args, or when the client was running43* a tool, we ignore long pauses. The ignore period is longer for large chunks, since those naturally take longer44* to generate anyway.45*46* After that, the word load rate is estimated using the words received since the end of the bootstrap phase.47*/48export class ChatStreamStatsTracker {49private _data: IChatStreamStatsInternal;50private _publicData: IChatStreamStats;5152constructor(53@ILogService private readonly logService: ILogService54) {55const start = Date.now();56this._data = {57totalTime: 0,58lastUpdateTime: start,59impliedWordLoadRate: 0,60lastWordCount: 0,61firstMarkdownTime: undefined,62bootstrapActive: true,63wordCountAtBootstrapExit: undefined,64updatesWithNewWords: 065};66this._publicData = { impliedWordLoadRate: 0, lastWordCount: 0 };67}6869get data(): IChatStreamStats {70return this._publicData;71}7273get internalData(): IChatStreamStatsInternal {74return this._data;75}7677update(totals: IChatStreamUpdate): IChatStreamStats | undefined {78const { totalWordCount: wordCount } = totals;79if (wordCount === this._data.lastWordCount) {80this.trace('Update- no new words');81return undefined;82}8384const now = Date.now();85const newWords = wordCount - this._data.lastWordCount;86const hadNoWordsBeforeUpdate = this._data.lastWordCount === 0;87let firstMarkdownTime = this._data.firstMarkdownTime;88let wordCountAtBootstrapExit = this._data.wordCountAtBootstrapExit;89if (typeof firstMarkdownTime !== 'number' && wordCount > 0) {90firstMarkdownTime = now;91}92const updatesWithNewWords = this._data.updatesWithNewWords + 1;9394if (hadNoWordsBeforeUpdate) {95this._data.lastUpdateTime = now;96}9798const intervalCap = newWords > WORDS_FOR_LARGE_CHUNK ? LARGE_UPDATE_MAX_INTERVAL_TIME : MAX_INTERVAL_TIME;99const timeDiff = Math.min(now - this._data.lastUpdateTime, intervalCap);100let totalTime = this._data.totalTime + timeDiff;101const minBootstrapTotalTime = hadNoWordsBeforeUpdate && wordCount > WORDS_FOR_LARGE_CHUNK ? LARGE_BOOTSTRAP_MIN_TOTAL_TIME : MIN_BOOTSTRAP_TOTAL_TIME;102103let bootstrapActive = this._data.bootstrapActive;104if (bootstrapActive) {105const stableStartTime = firstMarkdownTime;106const hasStableData = typeof stableStartTime === 'number'107&& updatesWithNewWords >= MIN_UPDATES_FOR_STABLE_RATE108&& wordCount >= WORDS_FOR_LARGE_CHUNK;109if (hasStableData) {110bootstrapActive = false;111totalTime = Math.max(now - stableStartTime, timeDiff);112wordCountAtBootstrapExit = this._data.lastWordCount;113this.trace('Has stable data');114} else {115totalTime = Math.max(totalTime, minBootstrapTotalTime);116}117}118119const wordsSinceBootstrap = typeof wordCountAtBootstrapExit === 'number' ? Math.max(wordCount - wordCountAtBootstrapExit, 0) : wordCount;120const effectiveTime = totalTime;121const effectiveWordCount = bootstrapActive ? wordCount : wordsSinceBootstrap;122const impliedWordLoadRate = effectiveTime > 0 ? effectiveWordCount / (effectiveTime / 1000) : 0;123this._data = {124totalTime,125lastUpdateTime: now,126impliedWordLoadRate,127lastWordCount: wordCount,128firstMarkdownTime,129bootstrapActive,130wordCountAtBootstrapExit,131updatesWithNewWords132};133this._publicData = {134impliedWordLoadRate,135lastWordCount: wordCount136};137138const traceWords = bootstrapActive ? wordCount : wordsSinceBootstrap;139this.trace(`Update- got ${traceWords} words over last ${totalTime}ms = ${impliedWordLoadRate} words/s`);140return this._data;141}142143private trace(message: string): void {144this.logService.trace(`ChatStreamStatsTracker#update: ${message}`);145}146}147148149