Path: blob/main/scripts/chat-simulation/common/mock-llm-server.js
13383 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45// @ts-check67/**8* Local mock server that implements the OpenAI Chat Completions streaming API.9* Used by the chat perf benchmark to replace the real LLM backend with10* deterministic, zero-latency responses.11*12* Supports scenario-based responses: the `messages` array's last user message13* content is matched against scenario IDs. Unknown scenarios get a default14* text-only response.15*/1617const http = require('http');18const path = require('path');19const { EventEmitter } = require('events');2021const ROOT = path.join(__dirname, '..', '..', '..');2223// -- Scenario fixtures -------------------------------------------------------2425/**26* @typedef {{ content: string, delayMs: number }} StreamChunk27*/2829/**30* A single turn in a multi-turn scenario.31*32* @typedef {{33* kind: 'tool-calls',34* toolCalls: Array<{ toolNamePattern: RegExp, arguments: Record<string, any> }>,35* } | {36* kind: 'content',37* chunks: StreamChunk[],38* } | {39* kind: 'thinking',40* thinkingChunks: StreamChunk[],41* chunks: StreamChunk[],42* } | {43* kind: 'user',44* message: string,45* }} ScenarioTurn46*/4748/**49* A scenario turn produced by the model.50*51* @typedef {{52* kind: 'tool-calls',53* toolCalls: Array<{ toolNamePattern: RegExp, arguments: Record<string, any> }>,54* } | {55* kind: 'content',56* chunks: StreamChunk[],57* } | {58* kind: 'thinking',59* thinkingChunks: StreamChunk[],60* chunks: StreamChunk[],61* }} ModelScenarioTurn62*/6364/**65* A model turn that emits content chunks.66*67* @typedef {{68* kind: 'content',69* chunks: StreamChunk[],70* } | {71* kind: 'thinking',72* thinkingChunks: StreamChunk[],73* chunks: StreamChunk[],74* }} ContentScenarioTurn75*/7677/**78* A multi-turn scenario — an ordered sequence of turns.79* The mock server determines which model turn to serve based on the number80* of assistant→tool round-trips already present in the conversation.81* User turns are skipped by the server and instead injected by the test82* harness, which types them into the chat input and presses Enter.83*84* @typedef {{85* type: 'multi-turn',86* turns: ScenarioTurn[],87* }} MultiTurnScenario88*/8990/**91* @param {any} scenario92* @returns {scenario is MultiTurnScenario}93*/94function isMultiTurnScenario(scenario) {95return scenario && typeof scenario === 'object' && scenario.type === 'multi-turn';96}9798/**99* Helper for building scenario chunk sequences with timing control.100*/101class ScenarioBuilder {102constructor() {103/** @type {StreamChunk[]} */104this.chunks = [];105}106107/**108* Emit a content chunk immediately (no delay before it).109* @param {string} content110* @returns {this}111*/112emit(content) {113this.chunks.push({ content, delayMs: 0 });114return this;115}116117/**118* Wait, then emit a content chunk — simulates network/token generation latency.119* @param {number} ms - delay in milliseconds before this chunk120* @param {string} content121* @returns {this}122*/123wait(ms, content) {124this.chunks.push({ content, delayMs: ms });125return this;126}127128/**129* Emit multiple chunks with uniform inter-chunk delay.130* @param {string[]} contents131* @param {number} [delayMs=15] - delay between each chunk (default ~1 frame)132* @returns {this}133*/134stream(contents, delayMs = 15) {135for (const content of contents) {136this.chunks.push({ content, delayMs });137}138return this;139}140141/**142* Emit multiple chunks with no delay (burst).143* @param {string[]} contents144* @returns {this}145*/146burst(contents) {147return this.stream(contents, 0);148}149150/** @returns {StreamChunk[]} */151build() {152return this.chunks;153}154}155156/** @type {Record<string, StreamChunk[] | MultiTurnScenario>} */157const SCENARIOS = /** @type {Record<string, StreamChunk[] | MultiTurnScenario>} */ ({});158159const DEFAULT_SCENARIO = 'text-only';160161/**162* @returns {StreamChunk[]}163*/164function getDefaultScenarioChunks() {165const scenario = SCENARIOS[DEFAULT_SCENARIO];166if (isMultiTurnScenario(scenario)) {167throw new Error(`Default scenario '${DEFAULT_SCENARIO}' must be content-only`);168}169return scenario;170}171172// -- SSE chunk builder -------------------------------------------------------173174const MODEL = 'gpt-4o-2024-08-06';175176/**177* @param {string} content178* @param {number} index179* @param {boolean} finish180*/181function makeChunk(content, index, finish) {182return {183id: 'chatcmpl-perf-benchmark',184object: 'chat.completion.chunk',185created: Math.floor(Date.now() / 1000),186model: MODEL,187choices: [{188index: 0,189delta: finish ? {} : { content },190finish_reason: finish ? 'stop' : null,191content_filter_results: {},192}],193usage: null,194};195}196197function makeInitialChunk() {198return {199id: 'chatcmpl-perf-benchmark',200object: 'chat.completion.chunk',201created: Math.floor(Date.now() / 1000),202model: MODEL,203choices: [{204index: 0,205delta: { role: 'assistant', content: '' },206finish_reason: null,207content_filter_results: {},208}],209usage: null,210};211}212213/**214* Build a tool-call initial chunk (role only, no content).215*/216function makeToolCallInitialChunk() {217return {218id: 'chatcmpl-perf-benchmark',219object: 'chat.completion.chunk',220created: Math.floor(Date.now() / 1000),221model: MODEL,222choices: [{223index: 0,224delta: { role: 'assistant', content: null },225finish_reason: null,226content_filter_results: {},227}],228usage: null,229};230}231232/**233* Build a tool-call function-start chunk.234* @param {number} index - tool call index235* @param {string} callId - unique call ID236* @param {string} functionName - tool function name237*/238function makeToolCallStartChunk(index, callId, functionName) {239return {240id: 'chatcmpl-perf-benchmark',241object: 'chat.completion.chunk',242created: Math.floor(Date.now() / 1000),243model: MODEL,244choices: [{245index: 0,246delta: {247tool_calls: [{248index,249id: callId,250type: 'function',251function: { name: functionName, arguments: '' },252}],253},254finish_reason: null,255content_filter_results: {},256}],257usage: null,258};259}260261/**262* Build a tool-call arguments chunk.263* @param {number} index - tool call index264* @param {string} argsFragment - partial JSON arguments265*/266function makeToolCallArgsChunk(index, argsFragment) {267return {268id: 'chatcmpl-perf-benchmark',269object: 'chat.completion.chunk',270created: Math.floor(Date.now() / 1000),271model: MODEL,272choices: [{273index: 0,274delta: {275tool_calls: [{276index,277function: { arguments: argsFragment },278}],279},280finish_reason: null,281content_filter_results: {},282}],283usage: null,284};285}286287/**288* Build a tool-call finish chunk.289*/290function makeToolCallFinishChunk() {291return {292id: 'chatcmpl-perf-benchmark',293object: 'chat.completion.chunk',294created: Math.floor(Date.now() / 1000),295model: MODEL,296choices: [{297index: 0,298delta: {},299finish_reason: 'tool_calls',300content_filter_results: {},301}],302usage: null,303};304}305306/**307* Build a thinking (chain-of-thought summary) chunk.308* Uses the `cot_summary` field in the delta, matching the Copilot API wire format.309* @param {string} text - thinking text fragment310*/311function makeThinkingChunk(text) {312return {313id: 'chatcmpl-perf-benchmark',314object: 'chat.completion.chunk',315created: Math.floor(Date.now() / 1000),316model: MODEL,317choices: [{318index: 0,319delta: { cot_summary: text },320finish_reason: null,321content_filter_results: {},322}],323usage: null,324};325}326327/**328* Build a thinking ID chunk (sent after thinking text to close the block).329* @param {string} cotId - unique chain-of-thought ID330*/331function makeThinkingIdChunk(cotId) {332return {333id: 'chatcmpl-perf-benchmark',334object: 'chat.completion.chunk',335created: Math.floor(Date.now() / 1000),336model: MODEL,337choices: [{338index: 0,339delta: { cot_id: cotId },340finish_reason: null,341content_filter_results: {},342}],343usage: null,344};345}346347// -- Request handler ---------------------------------------------------------348349/**350* @param {http.IncomingMessage} req351* @param {http.ServerResponse} res352*/353function handleRequest(req, res) {354const contentLength = req.headers['content-length'] || '0';355const ts = new Date().toISOString().slice(11, -1); // HH:MM:SS.mmm356console.log(`[mock-llm] ${ts} ${req.method} ${req.url} (${contentLength} bytes)`);357358// CORS359res.setHeader('Access-Control-Allow-Origin', '*');360res.setHeader('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');361res.setHeader('Access-Control-Allow-Headers', '*');362if (req.method === 'OPTIONS') { res.writeHead(204); res.end(); return; }363364const url = new URL(req.url || '/', `http://${req.headers.host}`);365const path = url.pathname;366const json = (/** @type {number} */ status, /** @type {any} */ data) => {367res.writeHead(status, { 'Content-Type': 'application/json' });368res.end(JSON.stringify(data));369};370const readBody = () => new Promise(resolve => {371let body = '';372req.on('data', chunk => { body += chunk; });373req.on('end', () => resolve(body));374});375376// -- Health -------------------------------------------------------377if (path === '/health') { res.writeHead(200); res.end('ok'); return; }378379// -- Token endpoints (DomainService.tokenURL / tokenNoAuthURL) ----380// /copilot_internal/v2/token, /copilot_internal/v2/nltoken381if (path.startsWith('/copilot_internal/')) {382if (path.includes('/token') || path.includes('/nltoken')) {383json(200, {384token: 'perf-benchmark-fake-token',385expires_at: Math.floor(Date.now() / 1000) + 3600,386refresh_in: 1800,387sku: 'free_limited_copilot',388individual: true,389copilot_plan: 'free',390endpoints: {391api: `http://${req.headers.host}`,392proxy: `http://${req.headers.host}`,393},394});395} else {396// /copilot_internal/user, /copilot_internal/content_exclusion, etc.397json(200, {});398}399return;400}401402// -- Telemetry (DomainService.telemetryURL) ----------------------403if (path === '/telemetry') { json(200, {}); return; }404405// -- Model Router (DomainService.capiModelRouterURL = /models/session/intent) --406// The automode service POSTs here to get the best model for a request.407if (path === '/models/session/intent' && req.method === 'POST') {408readBody().then(() => {409json(200, { model: MODEL });410});411return;412}413414// -- Auto Models / Model Session (DomainService.capiAutoModelURL = /models/session) --415// Returns AutoModeAPIResponse: { available_models, session_token, expires_at }416if (path === '/models/session' && req.method === 'POST') {417readBody().then(() => {418json(200, {419available_models: [MODEL, 'gpt-4o-mini'],420session_token: 'perf-session-token-' + Date.now(),421expires_at: Math.floor(Date.now() / 1000) + 3600,422discounted_costs: {},423});424});425return;426}427428// -- Models (DomainService.capiModelsURL = /models) --------------429if (path === '/models' && req.method === 'GET') {430json(200, {431data: [432{433id: MODEL,434name: 'GPT-4o (Mock)',435version: '2024-05-13',436vendor: 'copilot',437model_picker_enabled: true,438is_chat_default: true,439is_chat_fallback: true,440billing: { is_premium: false, multiplier: 0 },441capabilities: {442type: 'chat',443family: 'gpt-4o',444tokenizer: 'o200k_base',445limits: {446// Use a very large token limit so the Responses API compaction447// threshold (90% of max_prompt_tokens) is never reached during448// perf benchmarks.449max_prompt_tokens: 10000000,450max_output_tokens: 131072,451max_context_window_tokens: 10000000,452},453supports: {454streaming: true,455tool_calls: true,456parallel_tool_calls: true,457vision: false,458},459},460supported_endpoints: ['/chat/completions'],461},462{463id: 'gpt-4o-mini',464name: 'GPT-4o mini (Mock)',465version: '2024-07-18',466vendor: 'copilot',467model_picker_enabled: false,468is_chat_default: false,469is_chat_fallback: false,470billing: { is_premium: false, multiplier: 0 },471capabilities: {472type: 'chat',473family: 'gpt-4o-mini',474tokenizer: 'o200k_base',475limits: {476max_prompt_tokens: 10000000,477max_output_tokens: 131072,478max_context_window_tokens: 10000000,479},480supports: {481streaming: true,482tool_calls: true,483parallel_tool_calls: true,484vision: false,485},486},487supported_endpoints: ['/chat/completions'],488},489],490});491return;492}493494// -- Model by ID (DomainService.capiModelsURL/{id}) --------------495if (path.startsWith('/models/') && req.method === 'GET') {496const modelId = path.split('/models/')[1]?.split('/')[0];497if (path.endsWith('/policy')) {498json(200, { state: 'accepted', terms: '' });499return;500}501json(200, {502id: modelId || MODEL,503name: 'GPT-4o (Mock)',504version: '2024-05-13',505vendor: 'copilot',506model_picker_enabled: true,507is_chat_default: true,508is_chat_fallback: true,509capabilities: {510type: 'chat',511family: 'gpt-4o',512tokenizer: 'o200k_base',513limits: { max_prompt_tokens: 10000000, max_output_tokens: 131072, max_context_window_tokens: 10000000 },514supports: { streaming: true, tool_calls: true, parallel_tool_calls: true, vision: false },515},516});517return;518}519520// -- Agents (DomainService.remoteAgentsURL = /agents) -------------521if (path.startsWith('/agents')) {522// /agents/sessions — CopilotSessions523if (path.includes('/sessions')) {524json(200, { sessions: [], total_count: 0, page_size: 20, page_number: 1 });525}526// /agents/swe/models — CCAModelsList527else if (path.includes('/swe/models')) {528json(200, {529data: [{530id: MODEL, name: 'GPT-4o (Mock)', vendor: 'copilot',531capabilities: { type: 'chat', family: 'gpt-4o', supports: { streaming: true } }532}]533});534}535// /agents/swe/... — agent jobs, etc.536else if (path.includes('/swe/')) {537json(200, {});538}539// /agents — list agents540else {541json(200, { agents: [] });542}543return;544}545546// -- Chat Completions (DomainService.capiChatURL = /chat/completions) --547if (path === '/chat/completions' && req.method === 'POST') {548readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res));549return;550}551552// -- Responses API (DomainService.capiResponsesURL = /responses) --553if (path === '/responses' && req.method === 'POST') {554readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res));555return;556}557558// -- Messages API (DomainService.capiMessagesURL = /v1/messages) --559if (path === '/v1/messages' && req.method === 'POST') {560readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res));561return;562}563564// -- Proxy completions (/v1/engines/*/completions) ----------------565if (path.includes('/v1/engines/') && req.method === 'POST') {566readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res));567return;568}569570// -- Skills, Search, Embeddings -----------------------------------571if (path === '/skills' || path.startsWith('/search/') || path.startsWith('/embeddings')) {572json(200, { data: [] });573return;574}575576// -- Catch-all: any remaining POST with messages → chat completions577if (req.method === 'POST') {578readBody().then((/** @type {string} */ body) => {579try {580const parsed = JSON.parse(/** @type {string} */(body));581if (parsed.messages && Array.isArray(parsed.messages)) {582handleChatCompletions(/** @type {string} */(body), res);583return;584}585} catch { }586json(200, {});587});588return;589}590591// -- Catch-all GET → empty success --------------------------------592json(200, {});593}594595// -- Server lifecycle --------------------------------------------------------596597/** Emitted when a scenario chat completion is fully served. */598const serverEvents = new EventEmitter();599600/** @param {number} ms */601const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));602603/**604* Count the number of model turns already completed for the CURRENT scenario.605* Only counts assistant messages that appear after the last user message606* containing a [scenario:X] tag. This prevents assistant messages from607* previous scenarios (in the same chat session) from inflating the count.608*609* @param {any[]} messages610* @returns {number}611*/612function countCompletedModelTurns(messages) {613// Find the index of the last user message with a scenario tag614let scenarioMsgIdx = -1;615for (let i = messages.length - 1; i >= 0; i--) {616const msg = messages[i];617if (msg.role !== 'user') { continue; }618const content = typeof msg.content === 'string'619? msg.content620: Array.isArray(msg.content)621? msg.content.map((/** @type {any} */ c) => c.text || '').join('')622: '';623if (/\[scenario:[^\]]+\]/.test(content)) {624scenarioMsgIdx = i;625break;626}627}628629// Count assistant messages after the scenario tag message630let turns = 0;631const startIdx = scenarioMsgIdx >= 0 ? scenarioMsgIdx + 1 : 0;632for (let i = startIdx; i < messages.length; i++) {633if (messages[i].role === 'assistant') {634turns++;635}636}637return turns;638}639640/**641* Compute the model-turn index for the current request given the scenario's642* turn list. User turns are skipped (they're handled by the test harness)643* and do not consume a model turn index.644*645* The algorithm counts completed assistant messages in the conversation646* history (each one = one served model turn), then maps that to the647* n-th model turn in the scenario (skipping user turns).648*649* @param {ScenarioTurn[]} turns650* @param {any[]} messages651* @returns {{ turn: ModelScenarioTurn, turnIndex: number }}652*/653function resolveCurrentTurn(turns, messages) {654const completedModelTurns = countCompletedModelTurns(messages);655// Build the model-only turn list (skip user turns)656const modelTurns = /** @type {ModelScenarioTurn[]} */ (turns.filter(t => t.kind !== 'user'));657const idx = Math.min(completedModelTurns, modelTurns.length - 1);658return { turn: modelTurns[idx], turnIndex: idx };659}660661/**662* @param {string} body663* @param {http.ServerResponse} res664*/665async function handleChatCompletions(body, res) {666let scenarioId = DEFAULT_SCENARIO;667let isScenarioRequest = false;668/** @type {string[]} */669let requestToolNames = [];670/** @type {any[]} */671let messages = [];672try {673const parsed = JSON.parse(body);674messages = parsed.messages || [];675// Log user messages for debugging676const userMsgs = messages.filter((/** @type {any} */ m) => m.role === 'user');677if (userMsgs.length > 0) {678const lastContent = typeof userMsgs[userMsgs.length - 1].content === 'string'679? userMsgs[userMsgs.length - 1].content.substring(0, 100)680: '(structured)';681const ts = new Date().toISOString().slice(11, -1);682console.log(`[mock-llm] ${ts} → ${messages.length} msgs, last user: "${lastContent}"`);683}684// Extract available tool names from the request's tools array685const tools = parsed.tools || [];686requestToolNames = tools.map((/** @type {any} */ t) => t.function?.name).filter(Boolean);687if (requestToolNames.length > 0) {688const ts = new Date().toISOString().slice(11, -1);689console.log(`[mock-llm] ${ts} → ${requestToolNames.length} tools available: ${requestToolNames.join(', ')}`);690}691692// Search user messages in reverse order (newest first) for the scenario693// tag. This ensures the most recent message's tag takes precedence when694// multiple messages with different tags exist in the same conversation695// (e.g. in the leak checker which sends many scenarios in one session).696// Follow-up user messages in multi-turn scenarios won't have a tag, so697// searching backwards still finds the correct tag from the initial message.698for (let mi = messages.length - 1; mi >= 0; mi--) {699const msg = messages[mi];700if (msg.role !== 'user') { continue; }701const content = typeof msg.content === 'string'702? msg.content703: Array.isArray(msg.content)704? msg.content.map((/** @type {any} */ c) => c.text || '').join('')705: '';706const match = content.match(/\[scenario:([^\]]+)\]/);707if (match && SCENARIOS[match[1]]) {708scenarioId = match[1];709isScenarioRequest = true;710break;711}712}713} catch { }714715const scenario = SCENARIOS[scenarioId] || SCENARIOS[DEFAULT_SCENARIO];716717res.writeHead(200, {718'Content-Type': 'text/event-stream',719'Cache-Control': 'no-cache',720'Connection': 'keep-alive',721'X-Request-Id': 'perf-benchmark-' + Date.now(),722});723724// Handle multi-turn scenarios — only when the request actually has tools.725// Ancillary requests (title generation, progress messages) also contain the726// [scenario:...] tag but don't send tools, so they fall through to content.727if (isMultiTurnScenario(scenario) && requestToolNames.length > 0) {728const { turn, turnIndex } = resolveCurrentTurn(scenario.turns, messages);729const modelTurnCount = scenario.turns.filter(t => t.kind !== 'user').length;730731const ts = new Date().toISOString().slice(11, -1);732console.log(`[mock-llm] ${ts} → multi-turn scenario ${scenarioId}, model turn ${turnIndex + 1}/${modelTurnCount} (${turn.kind}), ${countCompletedModelTurns(messages)} completed turns in history`);733734if (turn.kind === 'tool-calls') {735await streamToolCalls(res, turn.toolCalls, requestToolNames, scenarioId);736return;737}738739if (turn.kind === 'thinking') {740await streamThinkingThenContent(res, turn.thinkingChunks, turn.chunks, isScenarioRequest);741return;742}743744// kind === 'content' — stream the final text response745await streamContent(res, turn.chunks, isScenarioRequest);746return;747}748749// Standard content-only scenario (or multi-turn scenario falling back for750// ancillary requests like title generation that don't include tools)751const chunks = isMultiTurnScenario(scenario)752? getFirstContentTurn(scenario)753: /** @type {StreamChunk[]} */ (scenario);754755await streamContent(res, chunks, isScenarioRequest);756}757758/**759* Get the chunks from the first content turn of a multi-turn scenario,760* used as fallback text for ancillary requests (title generation etc).761* @param {MultiTurnScenario} scenario762* @returns {StreamChunk[]}763*/764function getFirstContentTurn(scenario) {765/** @type {ContentScenarioTurn | undefined} */766let contentTurn;767for (const turn of scenario.turns) {768if (turn.kind === 'content') {769contentTurn = turn;770break;771}772if (turn.kind === 'thinking') {773contentTurn = turn;774break;775}776}777return contentTurn?.chunks ?? getDefaultScenarioChunks();778}779780/**781* Stream content chunks as a standard SSE response.782* @param {http.ServerResponse} res783* @param {StreamChunk[]} chunks784* @param {boolean} isScenarioRequest785*/786async function streamContent(res, chunks, isScenarioRequest) {787res.write(`data: ${JSON.stringify(makeInitialChunk())}\n\n`);788789for (const chunk of chunks) {790if (chunk.delayMs > 0) { await sleep(chunk.delayMs); }791res.write(`data: ${JSON.stringify(makeChunk(chunk.content, 0, false))}\n\n`);792}793794res.write(`data: ${JSON.stringify(makeChunk('', 0, true))}\n\n`);795res.write('data: [DONE]\n\n');796res.end();797798if (isScenarioRequest) {799serverEvents.emit('scenarioCompletion');800}801}802803/**804* Stream thinking chunks followed by content chunks as an SSE response.805* Thinking is emitted as `cot_summary` deltas, then a `cot_id` to close the806* thinking block, followed by standard content deltas.807* @param {http.ServerResponse} res808* @param {StreamChunk[]} thinkingChunks809* @param {StreamChunk[]} contentChunks810* @param {boolean} isScenarioRequest811*/812async function streamThinkingThenContent(res, thinkingChunks, contentChunks, isScenarioRequest) {813res.write(`data: ${JSON.stringify(makeInitialChunk())}\n\n`);814815// Stream thinking text816for (const chunk of thinkingChunks) {817if (chunk.delayMs > 0) { await sleep(chunk.delayMs); }818res.write(`data: ${JSON.stringify(makeThinkingChunk(chunk.content))}\n\n`);819}820821// Close thinking block with ID822const cotId = `cot_perf_${Date.now()}`;823res.write(`data: ${JSON.stringify(makeThinkingIdChunk(cotId))}\n\n`);824await sleep(10);825826// Stream content827for (const chunk of contentChunks) {828if (chunk.delayMs > 0) { await sleep(chunk.delayMs); }829res.write(`data: ${JSON.stringify(makeChunk(chunk.content, 0, false))}\n\n`);830}831832res.write(`data: ${JSON.stringify(makeChunk('', 0, true))}\n\n`);833res.write('data: [DONE]\n\n');834res.end();835836if (isScenarioRequest) {837serverEvents.emit('scenarioCompletion');838}839}840841/**842* Stream tool call chunks as an SSE response.843* @param {http.ServerResponse} res844* @param {Array<{ toolNamePattern: RegExp, arguments: Record<string, any> }>} toolCalls845* @param {string[]} requestToolNames846* @param {string} scenarioId847*/848async function streamToolCalls(res, toolCalls, requestToolNames, scenarioId) {849res.write(`data: ${JSON.stringify(makeToolCallInitialChunk())}\n\n`);850851for (let i = 0; i < toolCalls.length; i++) {852const call = toolCalls[i];853const callId = `call_perf_${scenarioId}_${i}_${Date.now()}`;854855// Find the matching tool name from the request's tools array856let toolName = requestToolNames.find(name => call.toolNamePattern.test(name));857if (!toolName) {858toolName = call.toolNamePattern.source.replace(/[\\.|?*+^${}()\[\]]/g, '');859console.warn(`[mock-llm] No matching tool for pattern ${call.toolNamePattern}, using fallback: ${toolName}`);860}861862// Stream tool call: start chunk, then arguments in fragments863res.write(`data: ${JSON.stringify(makeToolCallStartChunk(i, callId, toolName))}\n\n`);864await sleep(10);865866const argsJson = JSON.stringify(call.arguments);867const fragmentSize = Math.max(20, Math.ceil(argsJson.length / 4));868for (let pos = 0; pos < argsJson.length; pos += fragmentSize) {869const fragment = argsJson.slice(pos, pos + fragmentSize);870res.write(`data: ${JSON.stringify(makeToolCallArgsChunk(i, fragment))}\n\n`);871await sleep(5);872}873}874875res.write(`data: ${JSON.stringify(makeToolCallFinishChunk())}\n\n`);876res.write('data: [DONE]\n\n');877res.end();878}879880/**881* Start the mock server and return a handle.882* @param {number} port883*/884function startServer(port = 0) {885return new Promise((resolve, reject) => {886let reqCount = 0;887let completions = 0;888/** @type {Array<() => boolean>} */889let requestWaiters = [];890/** @type {Array<() => boolean>} */891let completionWaiters = [];892893const onCompletion = () => {894completions++;895completionWaiters = completionWaiters.filter(fn => !fn());896};897serverEvents.on('scenarioCompletion', onCompletion);898899const server = http.createServer((req, res) => {900reqCount++;901requestWaiters = requestWaiters.filter(fn => !fn());902handleRequest(req, res);903});904server.listen(port, '127.0.0.1', () => {905const addr = server.address();906const actualPort = typeof addr === 'object' && addr ? addr.port : port;907const url = `http://127.0.0.1:${actualPort}`;908resolve({909port: actualPort,910url,911close: () => /** @type {Promise<void>} */(new Promise((resolve, reject) => {912serverEvents.removeListener('scenarioCompletion', onCompletion);913server.close(err => err ? reject(err) : resolve(undefined));914})),915/** Return total request count. */916requestCount: () => reqCount,917/**918* Wait until at least `n` requests have been received.919* @param {number} n920* @param {number} timeoutMs921* @returns {Promise<void>}922*/923waitForRequests: (n, timeoutMs) => new Promise((resolve, reject) => {924if (reqCount >= n) { resolve(); return; }925const timer = setTimeout(() => reject(new Error(`Timed out waiting for ${n} requests (got ${reqCount})`)), timeoutMs);926requestWaiters.push(() => {927if (reqCount >= n) { clearTimeout(timer); resolve(); return true; }928return false;929});930}),931/** Return total scenario-completion count. */932completionCount: () => completions,933/**934* Wait until at least `n` scenario chat completions have been served.935* @param {number} n936* @param {number} timeoutMs937* @returns {Promise<void>}938*/939waitForCompletion: (n, timeoutMs) => new Promise((resolve, reject) => {940if (completions >= n) { resolve(); return; }941const timer = setTimeout(() => reject(new Error(`Timed out waiting for ${n} completions (got ${completions})`)), timeoutMs);942completionWaiters.push(() => {943if (completions >= n) { clearTimeout(timer); resolve(); return true; }944return false;945});946}),947});948});949server.on('error', reject);950});951}952953// Allow running standalone for testing: node scripts/mock-llm-server.js954if (require.main === module) {955const { registerPerfScenarios } = require('./perf-scenarios');956registerPerfScenarios();957const port = parseInt(process.argv[2] || '0', 10);958startServer(port).then((/** @type {any} */ handle) => {959console.log(`Mock LLM server listening at ${handle.url}`);960console.log('Scenarios:', Object.keys(SCENARIOS).join(', '));961});962}963964/**965* Get the user follow-up messages for a scenario, in order.966* Returns an array of { message, afterModelTurn } objects where afterModelTurn967* is the 0-based index of the model turn after which this user message should968* be injected.969* @param {string} scenarioId970* @returns {Array<{ message: string, afterModelTurn: number }>}971*/972function getUserTurns(scenarioId) {973const scenario = SCENARIOS[scenarioId];974if (!isMultiTurnScenario(scenario)) { return []; }975const result = [];976let modelTurnsSeen = 0;977for (const turn of scenario.turns) {978if (turn.kind === 'user') {979result.push({ message: turn.message, afterModelTurn: modelTurnsSeen });980} else {981modelTurnsSeen++;982}983}984return result;985}986987/**988* Get the total number of model turns (non-user turns) in a scenario.989* @param {string} scenarioId990* @returns {number}991*/992function getModelTurnCount(scenarioId) {993const scenario = SCENARIOS[scenarioId];994if (!isMultiTurnScenario(scenario)) { return 1; }995return scenario.turns.filter(t => t.kind !== 'user').length;996}997998/**999* Register a scenario dynamically. Test files call this to add1000* scenarios that are only relevant to them.1001* @param {string} id - unique scenario identifier1002* @param {StreamChunk[] | MultiTurnScenario} definition - scenario data1003*/1004function registerScenario(id, definition) {1005SCENARIOS[id] = definition;1006}10071008/**1009* Return the IDs of all currently registered scenarios.1010* @returns {string[]}1011*/1012function getScenarioIds() {1013return Object.keys(SCENARIOS);1014}10151016module.exports = { startServer, SCENARIOS, ScenarioBuilder, registerScenario, getScenarioIds, getUserTurns, getModelTurnCount };101710181019