CoCalc -- chatStreamStats.ts

GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/workbench/contrib/chat/common/model/chatStreamStats.ts
⁵²⁷⁵ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { ILogService } from '../../../../../platform/log/common/log.js';
7

8
export interface IChatStreamStats {
9
	impliedWordLoadRate: number;
10
	lastWordCount: number;
11
}
12

13
export interface IChatStreamStatsInternal extends IChatStreamStats {
14
	totalTime: number;
15
	lastUpdateTime: number;
16
	firstMarkdownTime: number | undefined;
17
	bootstrapActive: boolean;
18
	wordCountAtBootstrapExit: number | undefined;
19
	updatesWithNewWords: number;
20
}
21

22
export interface IChatStreamUpdate {
23
	totalWordCount: number;
24
}
25

26
const MIN_BOOTSTRAP_TOTAL_TIME = 250;
27
const LARGE_BOOTSTRAP_MIN_TOTAL_TIME = 500;
28
const MAX_INTERVAL_TIME = 250;
29
const LARGE_UPDATE_MAX_INTERVAL_TIME = 1000;
30
const WORDS_FOR_LARGE_CHUNK = 10;
31
const MIN_UPDATES_FOR_STABLE_RATE = 2;
32

33
/**
34
 * Estimates the loading rate of a chat response stream so that we can try to match the rendering rate to
35
 * the rate at which text is actually produced by the model. This can only be an estimate for various reasons-
36
 * reasoning summaries don't represent real generated tokens, we don't have full visibility into tool calls,
37
 * some model providers send text in large chunks rather than a steady stream, e.g. Gemini, we don't know about
38
 * latency between agent requests, etc.
39
 *
40
 * When the first text is received, we don't know how long it actually took to generate. So we apply an assumed
41
 * minimum time, until we have received enough data to make a stable estimate. This is the "bootstrap" phase.
42
 *
43
 * Since we don't have visibility into when the model started generated tool call args, or when the client was running
44
 * a tool, we ignore long pauses. The ignore period is longer for large chunks, since those naturally take longer
45
 * to generate anyway.
46
 *
47
 * After that, the word load rate is estimated using the words received since the end of the bootstrap phase.
48
 */
49
export class ChatStreamStatsTracker {
50
	private _data: IChatStreamStatsInternal;
51
	private _publicData: IChatStreamStats;
52

53
	constructor(
54
		@ILogService private readonly logService: ILogService
55
	) {
56
		const start = Date.now();
57
		this._data = {
58
			totalTime: 0,
59
			lastUpdateTime: start,
60
			impliedWordLoadRate: 0,
61
			lastWordCount: 0,
62
			firstMarkdownTime: undefined,
63
			bootstrapActive: true,
64
			wordCountAtBootstrapExit: undefined,
65
			updatesWithNewWords: 0
66
		};
67
		this._publicData = { impliedWordLoadRate: 0, lastWordCount: 0 };
68
	}
69

70
	get data(): IChatStreamStats {
71
		return this._publicData;
72
	}
73

74
	get internalData(): IChatStreamStatsInternal {
75
		return this._data;
76
	}
77

78
	update(totals: IChatStreamUpdate): IChatStreamStats | undefined {
79
		const { totalWordCount: wordCount } = totals;
80
		if (wordCount === this._data.lastWordCount) {
81
			this.trace('Update- no new words');
82
			return undefined;
83
		}
84

85
		const now = Date.now();
86
		const newWords = wordCount - this._data.lastWordCount;
87
		const hadNoWordsBeforeUpdate = this._data.lastWordCount === 0;
88
		let firstMarkdownTime = this._data.firstMarkdownTime;
89
		let wordCountAtBootstrapExit = this._data.wordCountAtBootstrapExit;
90
		if (typeof firstMarkdownTime !== 'number' && wordCount > 0) {
91
			firstMarkdownTime = now;
92
		}
93
		const updatesWithNewWords = this._data.updatesWithNewWords + 1;
94

95
		if (hadNoWordsBeforeUpdate) {
96
			this._data.lastUpdateTime = now;
97
		}
98

99
		const intervalCap = newWords > WORDS_FOR_LARGE_CHUNK ? LARGE_UPDATE_MAX_INTERVAL_TIME : MAX_INTERVAL_TIME;
100
		const timeDiff = Math.min(now - this._data.lastUpdateTime, intervalCap);
101
		let totalTime = this._data.totalTime + timeDiff;
102
		const minBootstrapTotalTime = hadNoWordsBeforeUpdate && wordCount > WORDS_FOR_LARGE_CHUNK ? LARGE_BOOTSTRAP_MIN_TOTAL_TIME : MIN_BOOTSTRAP_TOTAL_TIME;
103

104
		let bootstrapActive = this._data.bootstrapActive;
105
		if (bootstrapActive) {
106
			const stableStartTime = firstMarkdownTime;
107
			const hasStableData = typeof stableStartTime === 'number'
108
				&& updatesWithNewWords >= MIN_UPDATES_FOR_STABLE_RATE
109
				&& wordCount >= WORDS_FOR_LARGE_CHUNK;
110
			if (hasStableData) {
111
				bootstrapActive = false;
112
				totalTime = Math.max(now - stableStartTime, timeDiff);
113
				wordCountAtBootstrapExit = this._data.lastWordCount;
114
				this.trace('Has stable data');
115
			} else {
116
				totalTime = Math.max(totalTime, minBootstrapTotalTime);
117
			}
118
		}
119

120
		const wordsSinceBootstrap = typeof wordCountAtBootstrapExit === 'number' ? Math.max(wordCount - wordCountAtBootstrapExit, 0) : wordCount;
121
		const effectiveTime = totalTime;
122
		const effectiveWordCount = bootstrapActive ? wordCount : wordsSinceBootstrap;
123
		const impliedWordLoadRate = effectiveTime > 0 ? effectiveWordCount / (effectiveTime / 1000) : 0;
124
		this._data = {
125
			totalTime,
126
			lastUpdateTime: now,
127
			impliedWordLoadRate,
128
			lastWordCount: wordCount,
129
			firstMarkdownTime,
130
			bootstrapActive,
131
			wordCountAtBootstrapExit,
132
			updatesWithNewWords
133
		};
134
		this._publicData = {
135
			impliedWordLoadRate,
136
			lastWordCount: wordCount
137
		};
138

139
		const traceWords = bootstrapActive ? wordCount : wordsSinceBootstrap;
140
		this.trace(`Update- got ${traceWords} words over last ${totalTime}ms = ${impliedWordLoadRate} words/s`);
141
		return this._data;
142
	}
143

144
	private trace(message: string): void {
145
		this.logService.trace(`ChatStreamStatsTracker#update: ${message}`);
146
	}
147
}
148

149
Product

Resources

Company