Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/workbench/contrib/chat/common/model/chatStreamStats.ts
4780 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { ILogService } from '../../../../../platform/log/common/log.js';
7
8
export interface IChatStreamStats {
9
impliedWordLoadRate: number;
10
lastWordCount: number;
11
}
12
13
export interface IChatStreamStatsInternal extends IChatStreamStats {
14
totalTime: number;
15
lastUpdateTime: number;
16
firstMarkdownTime: number | undefined;
17
bootstrapActive: boolean;
18
wordCountAtBootstrapExit: number | undefined;
19
updatesWithNewWords: number;
20
}
21
22
export interface IChatStreamUpdate {
23
totalWordCount: number;
24
}
25
26
const MIN_BOOTSTRAP_TOTAL_TIME = 250;
27
const LARGE_BOOTSTRAP_MIN_TOTAL_TIME = 500;
28
const MAX_INTERVAL_TIME = 250;
29
const LARGE_UPDATE_MAX_INTERVAL_TIME = 1000;
30
const WORDS_FOR_LARGE_CHUNK = 10;
31
const MIN_UPDATES_FOR_STABLE_RATE = 2;
32
33
/**
34
* Estimates the loading rate of a chat response stream so that we can try to match the rendering rate to
35
* the rate at which text is actually produced by the model. This can only be an estimate for various reasons-
36
* reasoning summaries don't represent real generated tokens, we don't have full visibility into tool calls,
37
* some model providers send text in large chunks rather than a steady stream, e.g. Gemini, we don't know about
38
* latency between agent requests, etc.
39
*
40
* When the first text is received, we don't know how long it actually took to generate. So we apply an assumed
41
* minimum time, until we have received enough data to make a stable estimate. This is the "bootstrap" phase.
42
*
43
* Since we don't have visibility into when the model started generated tool call args, or when the client was running
44
* a tool, we ignore long pauses. The ignore period is longer for large chunks, since those naturally take longer
45
* to generate anyway.
46
*
47
* After that, the word load rate is estimated using the words received since the end of the bootstrap phase.
48
*/
49
export class ChatStreamStatsTracker {
50
private _data: IChatStreamStatsInternal;
51
private _publicData: IChatStreamStats;
52
53
constructor(
54
@ILogService private readonly logService: ILogService
55
) {
56
const start = Date.now();
57
this._data = {
58
totalTime: 0,
59
lastUpdateTime: start,
60
impliedWordLoadRate: 0,
61
lastWordCount: 0,
62
firstMarkdownTime: undefined,
63
bootstrapActive: true,
64
wordCountAtBootstrapExit: undefined,
65
updatesWithNewWords: 0
66
};
67
this._publicData = { impliedWordLoadRate: 0, lastWordCount: 0 };
68
}
69
70
get data(): IChatStreamStats {
71
return this._publicData;
72
}
73
74
get internalData(): IChatStreamStatsInternal {
75
return this._data;
76
}
77
78
update(totals: IChatStreamUpdate): IChatStreamStats | undefined {
79
const { totalWordCount: wordCount } = totals;
80
if (wordCount === this._data.lastWordCount) {
81
this.trace('Update- no new words');
82
return undefined;
83
}
84
85
const now = Date.now();
86
const newWords = wordCount - this._data.lastWordCount;
87
const hadNoWordsBeforeUpdate = this._data.lastWordCount === 0;
88
let firstMarkdownTime = this._data.firstMarkdownTime;
89
let wordCountAtBootstrapExit = this._data.wordCountAtBootstrapExit;
90
if (typeof firstMarkdownTime !== 'number' && wordCount > 0) {
91
firstMarkdownTime = now;
92
}
93
const updatesWithNewWords = this._data.updatesWithNewWords + 1;
94
95
if (hadNoWordsBeforeUpdate) {
96
this._data.lastUpdateTime = now;
97
}
98
99
const intervalCap = newWords > WORDS_FOR_LARGE_CHUNK ? LARGE_UPDATE_MAX_INTERVAL_TIME : MAX_INTERVAL_TIME;
100
const timeDiff = Math.min(now - this._data.lastUpdateTime, intervalCap);
101
let totalTime = this._data.totalTime + timeDiff;
102
const minBootstrapTotalTime = hadNoWordsBeforeUpdate && wordCount > WORDS_FOR_LARGE_CHUNK ? LARGE_BOOTSTRAP_MIN_TOTAL_TIME : MIN_BOOTSTRAP_TOTAL_TIME;
103
104
let bootstrapActive = this._data.bootstrapActive;
105
if (bootstrapActive) {
106
const stableStartTime = firstMarkdownTime;
107
const hasStableData = typeof stableStartTime === 'number'
108
&& updatesWithNewWords >= MIN_UPDATES_FOR_STABLE_RATE
109
&& wordCount >= WORDS_FOR_LARGE_CHUNK;
110
if (hasStableData) {
111
bootstrapActive = false;
112
totalTime = Math.max(now - stableStartTime, timeDiff);
113
wordCountAtBootstrapExit = this._data.lastWordCount;
114
this.trace('Has stable data');
115
} else {
116
totalTime = Math.max(totalTime, minBootstrapTotalTime);
117
}
118
}
119
120
const wordsSinceBootstrap = typeof wordCountAtBootstrapExit === 'number' ? Math.max(wordCount - wordCountAtBootstrapExit, 0) : wordCount;
121
const effectiveTime = totalTime;
122
const effectiveWordCount = bootstrapActive ? wordCount : wordsSinceBootstrap;
123
const impliedWordLoadRate = effectiveTime > 0 ? effectiveWordCount / (effectiveTime / 1000) : 0;
124
this._data = {
125
totalTime,
126
lastUpdateTime: now,
127
impliedWordLoadRate,
128
lastWordCount: wordCount,
129
firstMarkdownTime,
130
bootstrapActive,
131
wordCountAtBootstrapExit,
132
updatesWithNewWords
133
};
134
this._publicData = {
135
impliedWordLoadRate,
136
lastWordCount: wordCount
137
};
138
139
const traceWords = bootstrapActive ? wordCount : wordsSinceBootstrap;
140
this.trace(`Update- got ${traceWords} words over last ${totalTime}ms = ${impliedWordLoadRate} words/s`);
141
return this._data;
142
}
143
144
private trace(message: string): void {
145
this.logService.trace(`ChatStreamStatsTracker#update: ${message}`);
146
}
147
}
148
149