Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/frontend/admin/llm/admin-llm-test.tsx
1691 views
1
import {
2
Alert,
3
Button,
4
Input,
5
Progress,
6
Select,
7
Space,
8
Table,
9
Tooltip,
10
} from "antd";
11
12
import {
13
redux,
14
useAsyncEffect,
15
useState,
16
useTypedRedux,
17
} from "@cocalc/frontend/app-framework";
18
import { Icon, Loading, Paragraph, Title } from "@cocalc/frontend/components";
19
import { LLMModelName } from "@cocalc/frontend/components/llm-name";
20
import { Markdown } from "@cocalc/frontend/markdown";
21
import { webapp_client } from "@cocalc/frontend/webapp-client";
22
import {
23
USER_SELECTABLE_LLMS_BY_VENDOR,
24
isCoreLanguageModel,
25
toCustomOpenAIModel,
26
toOllamaModel,
27
} from "@cocalc/util/db-schema/llm-utils";
28
import { trunc_middle } from "@cocalc/util/misc";
29
import { COLORS } from "@cocalc/util/theme";
30
import { PROMPTS } from "./tests";
31
import { Value } from "./value";
32
interface TestResult {
33
model: string;
34
status: "pending" | "running" | "passed" | "failed";
35
output: string;
36
error?: string;
37
firstResponseTime?: number; // Time in milliseconds until first token
38
totalTime?: number; // Total time in milliseconds until completion
39
}
40
41
export function TestLLMAdmin() {
42
const customize = redux.getStore("customize");
43
const globallyEnabledLLMs = customize.getEnabledLLMs();
44
const selectableLLMs = useTypedRedux("customize", "selectable_llms");
45
const ollama = useTypedRedux("customize", "ollama");
46
const custom_openai = useTypedRedux("customize", "custom_openai");
47
const [test, setTest] = useState<number | null>(0);
48
const [querying, setQuerying] = useState<boolean>(false);
49
const [testResults, setTestResults] = useState<TestResult[]>([]);
50
const [currentTestIndex, setCurrentTestIndex] = useState<number>(0);
51
52
// Initialize test results on component mount or when test changes
53
useAsyncEffect(() => {
54
if (test !== null) {
55
const allModels = getAllModels();
56
const initialResults: TestResult[] = allModels.map((model) => ({
57
model,
58
status: "pending",
59
output: "",
60
}));
61
setTestResults(initialResults);
62
} else {
63
setTestResults([]);
64
}
65
}, [test, custom_openai, ollama, selectableLLMs]);
66
67
function getAllModels(): string[] {
68
const models: string[] = [];
69
70
// Get core models
71
Object.entries(USER_SELECTABLE_LLMS_BY_VENDOR).forEach(([vendor, llms]) => {
72
if (vendor !== "ollama" && vendor !== "custom_openai") {
73
llms.filter(isCoreLanguageModel).forEach((llm) => {
74
models.push(llm);
75
});
76
}
77
});
78
79
// Get custom OpenAI models
80
Object.entries(custom_openai?.toJS() ?? {}).forEach(([key, _val]) => {
81
const model = toCustomOpenAIModel(key);
82
models.push(model);
83
});
84
85
// Get Ollama models
86
Object.entries(ollama?.toJS() ?? {}).forEach(([key, _val]) => {
87
const model = toOllamaModel(key);
88
models.push(model);
89
});
90
91
return models;
92
}
93
94
function getEnabledModels(): string[] {
95
return getAllModels().filter((model) => {
96
// Check if model is enabled in selectable LLMs
97
if (isCoreLanguageModel(model)) {
98
return selectableLLMs.includes(model);
99
}
100
// Custom OpenAI and Ollama models are always considered enabled if configured
101
return true;
102
});
103
}
104
105
async function runTestForModel(
106
model: string,
107
testConfig: any,
108
): Promise<TestResult> {
109
const { prompt, expected, system, history } = testConfig;
110
const expectedRegex = new RegExp(expected, "g");
111
112
return new Promise((resolve) => {
113
try {
114
const startTime = Date.now();
115
let firstResponseTime: number | undefined;
116
let totalTime: number | undefined;
117
118
const llmStream = webapp_client.openai_client.queryStream({
119
input: prompt,
120
project_id: null,
121
tag: "admin-llm-test",
122
model,
123
system,
124
history,
125
maxTokens: 20,
126
});
127
128
let reply = "";
129
130
llmStream.on("token", (token) => {
131
console.log({ model, system, token });
132
if (token != null) {
133
// Record first response time if this is the first token
134
if (firstResponseTime === undefined) {
135
firstResponseTime = Date.now() - startTime;
136
}
137
reply += token;
138
// Update the result in real-time
139
setTestResults((prev) =>
140
prev.map((r) =>
141
r.model === model ? { ...r, output: reply } : r,
142
),
143
);
144
} else {
145
// Stream is complete (token is null)
146
totalTime = Date.now() - startTime;
147
const passed = expectedRegex.test(reply);
148
resolve({
149
model,
150
status: passed ? "passed" : "failed",
151
output: reply,
152
firstResponseTime,
153
totalTime,
154
});
155
}
156
});
157
158
llmStream.on("error", (err) => {
159
totalTime = Date.now() - startTime;
160
console.error(`Error in LLM stream for model ${model}:`, err);
161
resolve({
162
model,
163
status: "failed",
164
output: reply,
165
error: err?.toString(),
166
firstResponseTime,
167
totalTime,
168
});
169
});
170
171
// Start the stream
172
llmStream.emit("start");
173
} catch (err) {
174
console.error(`Error running test for model ${model}:`, err);
175
resolve({
176
model,
177
status: "failed",
178
output: "",
179
error: err?.toString(),
180
});
181
}
182
});
183
}
184
185
async function runSingleTest(model: string) {
186
if (test === null) return;
187
188
const testConfig = PROMPTS[test];
189
190
// Find the model in the results and update its status
191
const modelIndex = testResults.findIndex((r) => r.model === model);
192
if (modelIndex === -1) return;
193
194
setCurrentTestIndex(modelIndex);
195
196
// Update status to running
197
setTestResults((prev) =>
198
prev.map((r, idx) =>
199
idx === modelIndex
200
? { ...r, status: "running", output: "", error: undefined }
201
: r,
202
),
203
);
204
205
const result = await runTestForModel(model, testConfig);
206
207
// Update with final result
208
setTestResults((prev) =>
209
prev.map((r, idx) => (idx === modelIndex ? result : r)),
210
);
211
}
212
213
async function runSequentialTests() {
214
if (test === null) return;
215
216
const models = getEnabledModels();
217
const testConfig = PROMPTS[test];
218
219
// Initialize results
220
const initialResults: TestResult[] = models.map((model) => ({
221
model,
222
status: "pending",
223
output: "",
224
}));
225
226
setTestResults(initialResults);
227
setQuerying(true);
228
setCurrentTestIndex(0);
229
230
// Run tests sequentially
231
for (let i = 0; i < models.length; i++) {
232
setCurrentTestIndex(i);
233
234
// Update status to running
235
setTestResults((prev) =>
236
prev.map((r, idx) => (idx === i ? { ...r, status: "running" } : r)),
237
);
238
239
const result = await runTestForModel(models[i], testConfig);
240
241
// Update with final result
242
setTestResults((prev) => prev.map((r, idx) => (idx === i ? result : r)));
243
244
// Add delay between tests to avoid rate limiting
245
if (i < models.length - 1) {
246
await new Promise((resolve) => setTimeout(resolve, 100));
247
}
248
}
249
250
setQuerying(false);
251
}
252
253
function renderTestResultIcon(status: TestResult["status"]) {
254
switch (status) {
255
case "pending":
256
return <Icon unicode={0x2753} />;
257
case "running":
258
return <Loading text="" />;
259
case "passed":
260
return <Value val={true} />;
261
case "failed":
262
return <Value val={false} />;
263
default:
264
return <Icon unicode={0x2753} />;
265
}
266
}
267
268
function formatTiming(timeMs: number | undefined): string {
269
if (timeMs === undefined) return "-";
270
return `${(timeMs / 1000).toFixed(1)}s`;
271
}
272
273
function renderTimingColumn(record: TestResult) {
274
const { firstResponseTime, totalTime, status } = record;
275
276
if (status === "pending" || status === "running") {
277
return <span style={{ color: COLORS.GRAY_M }}>-</span>;
278
}
279
280
if (firstResponseTime === undefined || totalTime === undefined) {
281
return <span style={{ color: COLORS.GRAY_M }}>-</span>;
282
}
283
284
// Calculate progress bar values (normalize to 10 seconds max)
285
const maxTime = Math.max(
286
10000,
287
...testResults.filter((r) => r.totalTime).map((r) => r.totalTime!),
288
);
289
const totalPercent = Math.min(100, (totalTime / maxTime) * 100);
290
291
// Determine if this is one of the slowest (top 10% quantile)
292
const completedResults = testResults.filter(
293
(r) => r.totalTime !== undefined,
294
);
295
const sortedTimes = completedResults
296
.map((r) => r.totalTime!)
297
.sort((a, b) => b - a);
298
const slowThreshold =
299
sortedTimes[Math.floor(sortedTimes.length * 0.1)] || 0;
300
const isSlow = totalTime >= slowThreshold && completedResults.length > 1;
301
302
return (
303
<div>
304
<Tooltip title="First response time / Total completion time">
305
<div style={{ marginBottom: 2 }}>
306
{formatTiming(firstResponseTime)}/{formatTiming(totalTime)}
307
</div>
308
</Tooltip>
309
<Progress
310
percent={totalPercent}
311
size="small"
312
status={isSlow ? "exception" : "normal"}
313
showInfo={false}
314
/>
315
</div>
316
);
317
}
318
319
function renderTestResults() {
320
if (testResults.length === 0) {
321
return (
322
<Paragraph>
323
Click "Run Tests" to execute the selected test on all enabled models.
324
</Paragraph>
325
);
326
}
327
328
const columns = [
329
{
330
title: "Status",
331
dataIndex: "status",
332
key: "status",
333
width: 80,
334
render: (status: TestResult["status"]) => renderTestResultIcon(status),
335
},
336
{
337
title: "Model",
338
dataIndex: "model",
339
key: "model",
340
width: 180,
341
render: (model: string /*, record: TestResult*/) => (
342
<Space>
343
<LLMModelName model={model} />
344
{/* {record.status === "running" && <span>(Running...)</span>} */}
345
</Space>
346
),
347
},
348
{
349
title: "Output",
350
dataIndex: "output",
351
key: "output",
352
render: (output: string) =>
353
output ? (
354
<Markdown value={output} />
355
) : (
356
<span style={{ color: COLORS.GRAY_M }}>-</span>
357
),
358
},
359
{
360
title: "Error",
361
dataIndex: "error",
362
key: "error",
363
render: (error: string) =>
364
error ? (
365
<Alert type="error" banner message={error} style={{ margin: 0 }} />
366
) : (
367
<span style={{ color: COLORS.GRAY_M }}>-</span>
368
),
369
},
370
{
371
title: "Timing",
372
key: "timing",
373
width: 120,
374
render: (_, record: TestResult) => renderTimingColumn(record),
375
},
376
{
377
title: "Test",
378
key: "test",
379
width: 80,
380
render: (_, record: TestResult) => {
381
const isEnabled = getEnabledModels().includes(record.model);
382
const isRunning = record.status === "running";
383
const isQuerying = querying && record.status === "running";
384
385
return (
386
<Button
387
type="primary"
388
size="small"
389
disabled={test === null || !isEnabled || isQuerying}
390
loading={isRunning}
391
onClick={() => runSingleTest(record.model)}
392
style={{ width: "60px" }}
393
>
394
{isRunning ? "" : "Run"}
395
</Button>
396
);
397
},
398
},
399
];
400
401
const dataSource = testResults.map((result, index) => ({
402
...result,
403
key: result.model,
404
// Add row styling for currently running test
405
className:
406
index === currentTestIndex && querying ? "running-row" : undefined,
407
}));
408
409
return (
410
<div>
411
<Title level={4}>Test Results</Title>
412
<Table
413
columns={columns}
414
dataSource={dataSource}
415
pagination={false}
416
size="small"
417
rowClassName={(_, index) =>
418
index === currentTestIndex && querying
419
? "admin-llm-test-running-row"
420
: ""
421
}
422
style={{ marginTop: "10px" }}
423
/>
424
</div>
425
);
426
}
427
428
return (
429
<div>
430
<Paragraph>
431
Globally enabled LLMs (Admin Settings):
432
<Value val={globallyEnabledLLMs} />.
433
</Paragraph>
434
<Paragraph>
435
<Space>
436
<Input
437
value={test != null ? PROMPTS[test].prompt : ""}
438
disabled={true || querying}
439
onChange={(e) => setTest(parseInt(e.target.value))}
440
placeholder="Enter a query..."
441
addonAfter={
442
<Select
443
onSelect={setTest}
444
defaultValue={0}
445
popupMatchSelectWidth={false}
446
>
447
{PROMPTS.map((p, i) => (
448
<Select.Option key={i} value={i}>
449
{trunc_middle(p.prompt, 25)}
450
</Select.Option>
451
))}
452
</Select>
453
}
454
/>
455
<Button
456
type="primary"
457
onClick={runSequentialTests}
458
disabled={test == null || querying}
459
>
460
Run Tests
461
</Button>
462
<Button
463
onClick={() => {
464
setTest(null);
465
setTestResults([]);
466
}}
467
>
468
Clear
469
</Button>
470
</Space>
471
</Paragraph>
472
473
{renderTestResults()}
474
475
<Title level={5}>Ollama configuration</Title>
476
<Value val={ollama} />
477
<Title level={5}>Custom OpenAI API</Title>
478
<Value val={custom_openai} />
479
</div>
480
);
481
}
482
483