Path: blob/master/src/packages/frontend/admin/llm/admin-llm-test.tsx
1691 views
import {1Alert,2Button,3Input,4Progress,5Select,6Space,7Table,8Tooltip,9} from "antd";1011import {12redux,13useAsyncEffect,14useState,15useTypedRedux,16} from "@cocalc/frontend/app-framework";17import { Icon, Loading, Paragraph, Title } from "@cocalc/frontend/components";18import { LLMModelName } from "@cocalc/frontend/components/llm-name";19import { Markdown } from "@cocalc/frontend/markdown";20import { webapp_client } from "@cocalc/frontend/webapp-client";21import {22USER_SELECTABLE_LLMS_BY_VENDOR,23isCoreLanguageModel,24toCustomOpenAIModel,25toOllamaModel,26} from "@cocalc/util/db-schema/llm-utils";27import { trunc_middle } from "@cocalc/util/misc";28import { COLORS } from "@cocalc/util/theme";29import { PROMPTS } from "./tests";30import { Value } from "./value";31interface TestResult {32model: string;33status: "pending" | "running" | "passed" | "failed";34output: string;35error?: string;36firstResponseTime?: number; // Time in milliseconds until first token37totalTime?: number; // Total time in milliseconds until completion38}3940export function TestLLMAdmin() {41const customize = redux.getStore("customize");42const globallyEnabledLLMs = customize.getEnabledLLMs();43const selectableLLMs = useTypedRedux("customize", "selectable_llms");44const ollama = useTypedRedux("customize", "ollama");45const custom_openai = useTypedRedux("customize", "custom_openai");46const [test, setTest] = useState<number | null>(0);47const [querying, setQuerying] = useState<boolean>(false);48const [testResults, setTestResults] = useState<TestResult[]>([]);49const [currentTestIndex, setCurrentTestIndex] = useState<number>(0);5051// Initialize test results on component mount or when test changes52useAsyncEffect(() => {53if (test !== null) {54const allModels = getAllModels();55const initialResults: TestResult[] = allModels.map((model) => ({56model,57status: "pending",58output: "",59}));60setTestResults(initialResults);61} else {62setTestResults([]);63}64}, [test, custom_openai, ollama, selectableLLMs]);6566function getAllModels(): string[] {67const models: string[] = [];6869// Get core models70Object.entries(USER_SELECTABLE_LLMS_BY_VENDOR).forEach(([vendor, llms]) => {71if (vendor !== "ollama" && vendor !== "custom_openai") {72llms.filter(isCoreLanguageModel).forEach((llm) => {73models.push(llm);74});75}76});7778// Get custom OpenAI models79Object.entries(custom_openai?.toJS() ?? {}).forEach(([key, _val]) => {80const model = toCustomOpenAIModel(key);81models.push(model);82});8384// Get Ollama models85Object.entries(ollama?.toJS() ?? {}).forEach(([key, _val]) => {86const model = toOllamaModel(key);87models.push(model);88});8990return models;91}9293function getEnabledModels(): string[] {94return getAllModels().filter((model) => {95// Check if model is enabled in selectable LLMs96if (isCoreLanguageModel(model)) {97return selectableLLMs.includes(model);98}99// Custom OpenAI and Ollama models are always considered enabled if configured100return true;101});102}103104async function runTestForModel(105model: string,106testConfig: any,107): Promise<TestResult> {108const { prompt, expected, system, history } = testConfig;109const expectedRegex = new RegExp(expected, "g");110111return new Promise((resolve) => {112try {113const startTime = Date.now();114let firstResponseTime: number | undefined;115let totalTime: number | undefined;116117const llmStream = webapp_client.openai_client.queryStream({118input: prompt,119project_id: null,120tag: "admin-llm-test",121model,122system,123history,124maxTokens: 20,125});126127let reply = "";128129llmStream.on("token", (token) => {130console.log({ model, system, token });131if (token != null) {132// Record first response time if this is the first token133if (firstResponseTime === undefined) {134firstResponseTime = Date.now() - startTime;135}136reply += token;137// Update the result in real-time138setTestResults((prev) =>139prev.map((r) =>140r.model === model ? { ...r, output: reply } : r,141),142);143} else {144// Stream is complete (token is null)145totalTime = Date.now() - startTime;146const passed = expectedRegex.test(reply);147resolve({148model,149status: passed ? "passed" : "failed",150output: reply,151firstResponseTime,152totalTime,153});154}155});156157llmStream.on("error", (err) => {158totalTime = Date.now() - startTime;159console.error(`Error in LLM stream for model ${model}:`, err);160resolve({161model,162status: "failed",163output: reply,164error: err?.toString(),165firstResponseTime,166totalTime,167});168});169170// Start the stream171llmStream.emit("start");172} catch (err) {173console.error(`Error running test for model ${model}:`, err);174resolve({175model,176status: "failed",177output: "",178error: err?.toString(),179});180}181});182}183184async function runSingleTest(model: string) {185if (test === null) return;186187const testConfig = PROMPTS[test];188189// Find the model in the results and update its status190const modelIndex = testResults.findIndex((r) => r.model === model);191if (modelIndex === -1) return;192193setCurrentTestIndex(modelIndex);194195// Update status to running196setTestResults((prev) =>197prev.map((r, idx) =>198idx === modelIndex199? { ...r, status: "running", output: "", error: undefined }200: r,201),202);203204const result = await runTestForModel(model, testConfig);205206// Update with final result207setTestResults((prev) =>208prev.map((r, idx) => (idx === modelIndex ? result : r)),209);210}211212async function runSequentialTests() {213if (test === null) return;214215const models = getEnabledModels();216const testConfig = PROMPTS[test];217218// Initialize results219const initialResults: TestResult[] = models.map((model) => ({220model,221status: "pending",222output: "",223}));224225setTestResults(initialResults);226setQuerying(true);227setCurrentTestIndex(0);228229// Run tests sequentially230for (let i = 0; i < models.length; i++) {231setCurrentTestIndex(i);232233// Update status to running234setTestResults((prev) =>235prev.map((r, idx) => (idx === i ? { ...r, status: "running" } : r)),236);237238const result = await runTestForModel(models[i], testConfig);239240// Update with final result241setTestResults((prev) => prev.map((r, idx) => (idx === i ? result : r)));242243// Add delay between tests to avoid rate limiting244if (i < models.length - 1) {245await new Promise((resolve) => setTimeout(resolve, 100));246}247}248249setQuerying(false);250}251252function renderTestResultIcon(status: TestResult["status"]) {253switch (status) {254case "pending":255return <Icon unicode={0x2753} />;256case "running":257return <Loading text="" />;258case "passed":259return <Value val={true} />;260case "failed":261return <Value val={false} />;262default:263return <Icon unicode={0x2753} />;264}265}266267function formatTiming(timeMs: number | undefined): string {268if (timeMs === undefined) return "-";269return `${(timeMs / 1000).toFixed(1)}s`;270}271272function renderTimingColumn(record: TestResult) {273const { firstResponseTime, totalTime, status } = record;274275if (status === "pending" || status === "running") {276return <span style={{ color: COLORS.GRAY_M }}>-</span>;277}278279if (firstResponseTime === undefined || totalTime === undefined) {280return <span style={{ color: COLORS.GRAY_M }}>-</span>;281}282283// Calculate progress bar values (normalize to 10 seconds max)284const maxTime = Math.max(28510000,286...testResults.filter((r) => r.totalTime).map((r) => r.totalTime!),287);288const totalPercent = Math.min(100, (totalTime / maxTime) * 100);289290// Determine if this is one of the slowest (top 10% quantile)291const completedResults = testResults.filter(292(r) => r.totalTime !== undefined,293);294const sortedTimes = completedResults295.map((r) => r.totalTime!)296.sort((a, b) => b - a);297const slowThreshold =298sortedTimes[Math.floor(sortedTimes.length * 0.1)] || 0;299const isSlow = totalTime >= slowThreshold && completedResults.length > 1;300301return (302<div>303<Tooltip title="First response time / Total completion time">304<div style={{ marginBottom: 2 }}>305{formatTiming(firstResponseTime)}/{formatTiming(totalTime)}306</div>307</Tooltip>308<Progress309percent={totalPercent}310size="small"311status={isSlow ? "exception" : "normal"}312showInfo={false}313/>314</div>315);316}317318function renderTestResults() {319if (testResults.length === 0) {320return (321<Paragraph>322Click "Run Tests" to execute the selected test on all enabled models.323</Paragraph>324);325}326327const columns = [328{329title: "Status",330dataIndex: "status",331key: "status",332width: 80,333render: (status: TestResult["status"]) => renderTestResultIcon(status),334},335{336title: "Model",337dataIndex: "model",338key: "model",339width: 180,340render: (model: string /*, record: TestResult*/) => (341<Space>342<LLMModelName model={model} />343{/* {record.status === "running" && <span>(Running...)</span>} */}344</Space>345),346},347{348title: "Output",349dataIndex: "output",350key: "output",351render: (output: string) =>352output ? (353<Markdown value={output} />354) : (355<span style={{ color: COLORS.GRAY_M }}>-</span>356),357},358{359title: "Error",360dataIndex: "error",361key: "error",362render: (error: string) =>363error ? (364<Alert type="error" banner message={error} style={{ margin: 0 }} />365) : (366<span style={{ color: COLORS.GRAY_M }}>-</span>367),368},369{370title: "Timing",371key: "timing",372width: 120,373render: (_, record: TestResult) => renderTimingColumn(record),374},375{376title: "Test",377key: "test",378width: 80,379render: (_, record: TestResult) => {380const isEnabled = getEnabledModels().includes(record.model);381const isRunning = record.status === "running";382const isQuerying = querying && record.status === "running";383384return (385<Button386type="primary"387size="small"388disabled={test === null || !isEnabled || isQuerying}389loading={isRunning}390onClick={() => runSingleTest(record.model)}391style={{ width: "60px" }}392>393{isRunning ? "" : "Run"}394</Button>395);396},397},398];399400const dataSource = testResults.map((result, index) => ({401...result,402key: result.model,403// Add row styling for currently running test404className:405index === currentTestIndex && querying ? "running-row" : undefined,406}));407408return (409<div>410<Title level={4}>Test Results</Title>411<Table412columns={columns}413dataSource={dataSource}414pagination={false}415size="small"416rowClassName={(_, index) =>417index === currentTestIndex && querying418? "admin-llm-test-running-row"419: ""420}421style={{ marginTop: "10px" }}422/>423</div>424);425}426427return (428<div>429<Paragraph>430Globally enabled LLMs (Admin Settings):431<Value val={globallyEnabledLLMs} />.432</Paragraph>433<Paragraph>434<Space>435<Input436value={test != null ? PROMPTS[test].prompt : ""}437disabled={true || querying}438onChange={(e) => setTest(parseInt(e.target.value))}439placeholder="Enter a query..."440addonAfter={441<Select442onSelect={setTest}443defaultValue={0}444popupMatchSelectWidth={false}445>446{PROMPTS.map((p, i) => (447<Select.Option key={i} value={i}>448{trunc_middle(p.prompt, 25)}449</Select.Option>450))}451</Select>452}453/>454<Button455type="primary"456onClick={runSequentialTests}457disabled={test == null || querying}458>459Run Tests460</Button>461<Button462onClick={() => {463setTest(null);464setTestResults([]);465}}466>467Clear468</Button>469</Space>470</Paragraph>471472{renderTestResults()}473474<Title level={5}>Ollama configuration</Title>475<Value val={ollama} />476<Title level={5}>Custom OpenAI API</Title>477<Value val={custom_openai} />478</div>479);480}481482483