Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/scripts/chat-simulation/common/mock-llm-server.js
13383 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
// @ts-check
7
8
/**
9
* Local mock server that implements the OpenAI Chat Completions streaming API.
10
* Used by the chat perf benchmark to replace the real LLM backend with
11
* deterministic, zero-latency responses.
12
*
13
* Supports scenario-based responses: the `messages` array's last user message
14
* content is matched against scenario IDs. Unknown scenarios get a default
15
* text-only response.
16
*/
17
18
const http = require('http');
19
const path = require('path');
20
const { EventEmitter } = require('events');
21
22
const ROOT = path.join(__dirname, '..', '..', '..');
23
24
// -- Scenario fixtures -------------------------------------------------------
25
26
/**
27
* @typedef {{ content: string, delayMs: number }} StreamChunk
28
*/
29
30
/**
31
* A single turn in a multi-turn scenario.
32
*
33
* @typedef {{
34
* kind: 'tool-calls',
35
* toolCalls: Array<{ toolNamePattern: RegExp, arguments: Record<string, any> }>,
36
* } | {
37
* kind: 'content',
38
* chunks: StreamChunk[],
39
* } | {
40
* kind: 'thinking',
41
* thinkingChunks: StreamChunk[],
42
* chunks: StreamChunk[],
43
* } | {
44
* kind: 'user',
45
* message: string,
46
* }} ScenarioTurn
47
*/
48
49
/**
50
* A scenario turn produced by the model.
51
*
52
* @typedef {{
53
* kind: 'tool-calls',
54
* toolCalls: Array<{ toolNamePattern: RegExp, arguments: Record<string, any> }>,
55
* } | {
56
* kind: 'content',
57
* chunks: StreamChunk[],
58
* } | {
59
* kind: 'thinking',
60
* thinkingChunks: StreamChunk[],
61
* chunks: StreamChunk[],
62
* }} ModelScenarioTurn
63
*/
64
65
/**
66
* A model turn that emits content chunks.
67
*
68
* @typedef {{
69
* kind: 'content',
70
* chunks: StreamChunk[],
71
* } | {
72
* kind: 'thinking',
73
* thinkingChunks: StreamChunk[],
74
* chunks: StreamChunk[],
75
* }} ContentScenarioTurn
76
*/
77
78
/**
79
* A multi-turn scenario — an ordered sequence of turns.
80
* The mock server determines which model turn to serve based on the number
81
* of assistant→tool round-trips already present in the conversation.
82
* User turns are skipped by the server and instead injected by the test
83
* harness, which types them into the chat input and presses Enter.
84
*
85
* @typedef {{
86
* type: 'multi-turn',
87
* turns: ScenarioTurn[],
88
* }} MultiTurnScenario
89
*/
90
91
/**
92
* @param {any} scenario
93
* @returns {scenario is MultiTurnScenario}
94
*/
95
function isMultiTurnScenario(scenario) {
96
return scenario && typeof scenario === 'object' && scenario.type === 'multi-turn';
97
}
98
99
/**
100
* Helper for building scenario chunk sequences with timing control.
101
*/
102
class ScenarioBuilder {
103
constructor() {
104
/** @type {StreamChunk[]} */
105
this.chunks = [];
106
}
107
108
/**
109
* Emit a content chunk immediately (no delay before it).
110
* @param {string} content
111
* @returns {this}
112
*/
113
emit(content) {
114
this.chunks.push({ content, delayMs: 0 });
115
return this;
116
}
117
118
/**
119
* Wait, then emit a content chunk — simulates network/token generation latency.
120
* @param {number} ms - delay in milliseconds before this chunk
121
* @param {string} content
122
* @returns {this}
123
*/
124
wait(ms, content) {
125
this.chunks.push({ content, delayMs: ms });
126
return this;
127
}
128
129
/**
130
* Emit multiple chunks with uniform inter-chunk delay.
131
* @param {string[]} contents
132
* @param {number} [delayMs=15] - delay between each chunk (default ~1 frame)
133
* @returns {this}
134
*/
135
stream(contents, delayMs = 15) {
136
for (const content of contents) {
137
this.chunks.push({ content, delayMs });
138
}
139
return this;
140
}
141
142
/**
143
* Emit multiple chunks with no delay (burst).
144
* @param {string[]} contents
145
* @returns {this}
146
*/
147
burst(contents) {
148
return this.stream(contents, 0);
149
}
150
151
/** @returns {StreamChunk[]} */
152
build() {
153
return this.chunks;
154
}
155
}
156
157
/** @type {Record<string, StreamChunk[] | MultiTurnScenario>} */
158
const SCENARIOS = /** @type {Record<string, StreamChunk[] | MultiTurnScenario>} */ ({});
159
160
const DEFAULT_SCENARIO = 'text-only';
161
162
/**
163
* @returns {StreamChunk[]}
164
*/
165
function getDefaultScenarioChunks() {
166
const scenario = SCENARIOS[DEFAULT_SCENARIO];
167
if (isMultiTurnScenario(scenario)) {
168
throw new Error(`Default scenario '${DEFAULT_SCENARIO}' must be content-only`);
169
}
170
return scenario;
171
}
172
173
// -- SSE chunk builder -------------------------------------------------------
174
175
const MODEL = 'gpt-4o-2024-08-06';
176
177
/**
178
* @param {string} content
179
* @param {number} index
180
* @param {boolean} finish
181
*/
182
function makeChunk(content, index, finish) {
183
return {
184
id: 'chatcmpl-perf-benchmark',
185
object: 'chat.completion.chunk',
186
created: Math.floor(Date.now() / 1000),
187
model: MODEL,
188
choices: [{
189
index: 0,
190
delta: finish ? {} : { content },
191
finish_reason: finish ? 'stop' : null,
192
content_filter_results: {},
193
}],
194
usage: null,
195
};
196
}
197
198
function makeInitialChunk() {
199
return {
200
id: 'chatcmpl-perf-benchmark',
201
object: 'chat.completion.chunk',
202
created: Math.floor(Date.now() / 1000),
203
model: MODEL,
204
choices: [{
205
index: 0,
206
delta: { role: 'assistant', content: '' },
207
finish_reason: null,
208
content_filter_results: {},
209
}],
210
usage: null,
211
};
212
}
213
214
/**
215
* Build a tool-call initial chunk (role only, no content).
216
*/
217
function makeToolCallInitialChunk() {
218
return {
219
id: 'chatcmpl-perf-benchmark',
220
object: 'chat.completion.chunk',
221
created: Math.floor(Date.now() / 1000),
222
model: MODEL,
223
choices: [{
224
index: 0,
225
delta: { role: 'assistant', content: null },
226
finish_reason: null,
227
content_filter_results: {},
228
}],
229
usage: null,
230
};
231
}
232
233
/**
234
* Build a tool-call function-start chunk.
235
* @param {number} index - tool call index
236
* @param {string} callId - unique call ID
237
* @param {string} functionName - tool function name
238
*/
239
function makeToolCallStartChunk(index, callId, functionName) {
240
return {
241
id: 'chatcmpl-perf-benchmark',
242
object: 'chat.completion.chunk',
243
created: Math.floor(Date.now() / 1000),
244
model: MODEL,
245
choices: [{
246
index: 0,
247
delta: {
248
tool_calls: [{
249
index,
250
id: callId,
251
type: 'function',
252
function: { name: functionName, arguments: '' },
253
}],
254
},
255
finish_reason: null,
256
content_filter_results: {},
257
}],
258
usage: null,
259
};
260
}
261
262
/**
263
* Build a tool-call arguments chunk.
264
* @param {number} index - tool call index
265
* @param {string} argsFragment - partial JSON arguments
266
*/
267
function makeToolCallArgsChunk(index, argsFragment) {
268
return {
269
id: 'chatcmpl-perf-benchmark',
270
object: 'chat.completion.chunk',
271
created: Math.floor(Date.now() / 1000),
272
model: MODEL,
273
choices: [{
274
index: 0,
275
delta: {
276
tool_calls: [{
277
index,
278
function: { arguments: argsFragment },
279
}],
280
},
281
finish_reason: null,
282
content_filter_results: {},
283
}],
284
usage: null,
285
};
286
}
287
288
/**
289
* Build a tool-call finish chunk.
290
*/
291
function makeToolCallFinishChunk() {
292
return {
293
id: 'chatcmpl-perf-benchmark',
294
object: 'chat.completion.chunk',
295
created: Math.floor(Date.now() / 1000),
296
model: MODEL,
297
choices: [{
298
index: 0,
299
delta: {},
300
finish_reason: 'tool_calls',
301
content_filter_results: {},
302
}],
303
usage: null,
304
};
305
}
306
307
/**
308
* Build a thinking (chain-of-thought summary) chunk.
309
* Uses the `cot_summary` field in the delta, matching the Copilot API wire format.
310
* @param {string} text - thinking text fragment
311
*/
312
function makeThinkingChunk(text) {
313
return {
314
id: 'chatcmpl-perf-benchmark',
315
object: 'chat.completion.chunk',
316
created: Math.floor(Date.now() / 1000),
317
model: MODEL,
318
choices: [{
319
index: 0,
320
delta: { cot_summary: text },
321
finish_reason: null,
322
content_filter_results: {},
323
}],
324
usage: null,
325
};
326
}
327
328
/**
329
* Build a thinking ID chunk (sent after thinking text to close the block).
330
* @param {string} cotId - unique chain-of-thought ID
331
*/
332
function makeThinkingIdChunk(cotId) {
333
return {
334
id: 'chatcmpl-perf-benchmark',
335
object: 'chat.completion.chunk',
336
created: Math.floor(Date.now() / 1000),
337
model: MODEL,
338
choices: [{
339
index: 0,
340
delta: { cot_id: cotId },
341
finish_reason: null,
342
content_filter_results: {},
343
}],
344
usage: null,
345
};
346
}
347
348
// -- Request handler ---------------------------------------------------------
349
350
/**
351
* @param {http.IncomingMessage} req
352
* @param {http.ServerResponse} res
353
*/
354
function handleRequest(req, res) {
355
const contentLength = req.headers['content-length'] || '0';
356
const ts = new Date().toISOString().slice(11, -1); // HH:MM:SS.mmm
357
console.log(`[mock-llm] ${ts} ${req.method} ${req.url} (${contentLength} bytes)`);
358
359
// CORS
360
res.setHeader('Access-Control-Allow-Origin', '*');
361
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
362
res.setHeader('Access-Control-Allow-Headers', '*');
363
if (req.method === 'OPTIONS') { res.writeHead(204); res.end(); return; }
364
365
const url = new URL(req.url || '/', `http://${req.headers.host}`);
366
const path = url.pathname;
367
const json = (/** @type {number} */ status, /** @type {any} */ data) => {
368
res.writeHead(status, { 'Content-Type': 'application/json' });
369
res.end(JSON.stringify(data));
370
};
371
const readBody = () => new Promise(resolve => {
372
let body = '';
373
req.on('data', chunk => { body += chunk; });
374
req.on('end', () => resolve(body));
375
});
376
377
// -- Health -------------------------------------------------------
378
if (path === '/health') { res.writeHead(200); res.end('ok'); return; }
379
380
// -- Token endpoints (DomainService.tokenURL / tokenNoAuthURL) ----
381
// /copilot_internal/v2/token, /copilot_internal/v2/nltoken
382
if (path.startsWith('/copilot_internal/')) {
383
if (path.includes('/token') || path.includes('/nltoken')) {
384
json(200, {
385
token: 'perf-benchmark-fake-token',
386
expires_at: Math.floor(Date.now() / 1000) + 3600,
387
refresh_in: 1800,
388
sku: 'free_limited_copilot',
389
individual: true,
390
copilot_plan: 'free',
391
endpoints: {
392
api: `http://${req.headers.host}`,
393
proxy: `http://${req.headers.host}`,
394
},
395
});
396
} else {
397
// /copilot_internal/user, /copilot_internal/content_exclusion, etc.
398
json(200, {});
399
}
400
return;
401
}
402
403
// -- Telemetry (DomainService.telemetryURL) ----------------------
404
if (path === '/telemetry') { json(200, {}); return; }
405
406
// -- Model Router (DomainService.capiModelRouterURL = /models/session/intent) --
407
// The automode service POSTs here to get the best model for a request.
408
if (path === '/models/session/intent' && req.method === 'POST') {
409
readBody().then(() => {
410
json(200, { model: MODEL });
411
});
412
return;
413
}
414
415
// -- Auto Models / Model Session (DomainService.capiAutoModelURL = /models/session) --
416
// Returns AutoModeAPIResponse: { available_models, session_token, expires_at }
417
if (path === '/models/session' && req.method === 'POST') {
418
readBody().then(() => {
419
json(200, {
420
available_models: [MODEL, 'gpt-4o-mini'],
421
session_token: 'perf-session-token-' + Date.now(),
422
expires_at: Math.floor(Date.now() / 1000) + 3600,
423
discounted_costs: {},
424
});
425
});
426
return;
427
}
428
429
// -- Models (DomainService.capiModelsURL = /models) --------------
430
if (path === '/models' && req.method === 'GET') {
431
json(200, {
432
data: [
433
{
434
id: MODEL,
435
name: 'GPT-4o (Mock)',
436
version: '2024-05-13',
437
vendor: 'copilot',
438
model_picker_enabled: true,
439
is_chat_default: true,
440
is_chat_fallback: true,
441
billing: { is_premium: false, multiplier: 0 },
442
capabilities: {
443
type: 'chat',
444
family: 'gpt-4o',
445
tokenizer: 'o200k_base',
446
limits: {
447
// Use a very large token limit so the Responses API compaction
448
// threshold (90% of max_prompt_tokens) is never reached during
449
// perf benchmarks.
450
max_prompt_tokens: 10000000,
451
max_output_tokens: 131072,
452
max_context_window_tokens: 10000000,
453
},
454
supports: {
455
streaming: true,
456
tool_calls: true,
457
parallel_tool_calls: true,
458
vision: false,
459
},
460
},
461
supported_endpoints: ['/chat/completions'],
462
},
463
{
464
id: 'gpt-4o-mini',
465
name: 'GPT-4o mini (Mock)',
466
version: '2024-07-18',
467
vendor: 'copilot',
468
model_picker_enabled: false,
469
is_chat_default: false,
470
is_chat_fallback: false,
471
billing: { is_premium: false, multiplier: 0 },
472
capabilities: {
473
type: 'chat',
474
family: 'gpt-4o-mini',
475
tokenizer: 'o200k_base',
476
limits: {
477
max_prompt_tokens: 10000000,
478
max_output_tokens: 131072,
479
max_context_window_tokens: 10000000,
480
},
481
supports: {
482
streaming: true,
483
tool_calls: true,
484
parallel_tool_calls: true,
485
vision: false,
486
},
487
},
488
supported_endpoints: ['/chat/completions'],
489
},
490
],
491
});
492
return;
493
}
494
495
// -- Model by ID (DomainService.capiModelsURL/{id}) --------------
496
if (path.startsWith('/models/') && req.method === 'GET') {
497
const modelId = path.split('/models/')[1]?.split('/')[0];
498
if (path.endsWith('/policy')) {
499
json(200, { state: 'accepted', terms: '' });
500
return;
501
}
502
json(200, {
503
id: modelId || MODEL,
504
name: 'GPT-4o (Mock)',
505
version: '2024-05-13',
506
vendor: 'copilot',
507
model_picker_enabled: true,
508
is_chat_default: true,
509
is_chat_fallback: true,
510
capabilities: {
511
type: 'chat',
512
family: 'gpt-4o',
513
tokenizer: 'o200k_base',
514
limits: { max_prompt_tokens: 10000000, max_output_tokens: 131072, max_context_window_tokens: 10000000 },
515
supports: { streaming: true, tool_calls: true, parallel_tool_calls: true, vision: false },
516
},
517
});
518
return;
519
}
520
521
// -- Agents (DomainService.remoteAgentsURL = /agents) -------------
522
if (path.startsWith('/agents')) {
523
// /agents/sessions — CopilotSessions
524
if (path.includes('/sessions')) {
525
json(200, { sessions: [], total_count: 0, page_size: 20, page_number: 1 });
526
}
527
// /agents/swe/models — CCAModelsList
528
else if (path.includes('/swe/models')) {
529
json(200, {
530
data: [{
531
id: MODEL, name: 'GPT-4o (Mock)', vendor: 'copilot',
532
capabilities: { type: 'chat', family: 'gpt-4o', supports: { streaming: true } }
533
}]
534
});
535
}
536
// /agents/swe/... — agent jobs, etc.
537
else if (path.includes('/swe/')) {
538
json(200, {});
539
}
540
// /agents — list agents
541
else {
542
json(200, { agents: [] });
543
}
544
return;
545
}
546
547
// -- Chat Completions (DomainService.capiChatURL = /chat/completions) --
548
if (path === '/chat/completions' && req.method === 'POST') {
549
readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res));
550
return;
551
}
552
553
// -- Responses API (DomainService.capiResponsesURL = /responses) --
554
if (path === '/responses' && req.method === 'POST') {
555
readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res));
556
return;
557
}
558
559
// -- Messages API (DomainService.capiMessagesURL = /v1/messages) --
560
if (path === '/v1/messages' && req.method === 'POST') {
561
readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res));
562
return;
563
}
564
565
// -- Proxy completions (/v1/engines/*/completions) ----------------
566
if (path.includes('/v1/engines/') && req.method === 'POST') {
567
readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res));
568
return;
569
}
570
571
// -- Skills, Search, Embeddings -----------------------------------
572
if (path === '/skills' || path.startsWith('/search/') || path.startsWith('/embeddings')) {
573
json(200, { data: [] });
574
return;
575
}
576
577
// -- Catch-all: any remaining POST with messages → chat completions
578
if (req.method === 'POST') {
579
readBody().then((/** @type {string} */ body) => {
580
try {
581
const parsed = JSON.parse(/** @type {string} */(body));
582
if (parsed.messages && Array.isArray(parsed.messages)) {
583
handleChatCompletions(/** @type {string} */(body), res);
584
return;
585
}
586
} catch { }
587
json(200, {});
588
});
589
return;
590
}
591
592
// -- Catch-all GET → empty success --------------------------------
593
json(200, {});
594
}
595
596
// -- Server lifecycle --------------------------------------------------------
597
598
/** Emitted when a scenario chat completion is fully served. */
599
const serverEvents = new EventEmitter();
600
601
/** @param {number} ms */
602
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));
603
604
/**
605
* Count the number of model turns already completed for the CURRENT scenario.
606
* Only counts assistant messages that appear after the last user message
607
* containing a [scenario:X] tag. This prevents assistant messages from
608
* previous scenarios (in the same chat session) from inflating the count.
609
*
610
* @param {any[]} messages
611
* @returns {number}
612
*/
613
function countCompletedModelTurns(messages) {
614
// Find the index of the last user message with a scenario tag
615
let scenarioMsgIdx = -1;
616
for (let i = messages.length - 1; i >= 0; i--) {
617
const msg = messages[i];
618
if (msg.role !== 'user') { continue; }
619
const content = typeof msg.content === 'string'
620
? msg.content
621
: Array.isArray(msg.content)
622
? msg.content.map((/** @type {any} */ c) => c.text || '').join('')
623
: '';
624
if (/\[scenario:[^\]]+\]/.test(content)) {
625
scenarioMsgIdx = i;
626
break;
627
}
628
}
629
630
// Count assistant messages after the scenario tag message
631
let turns = 0;
632
const startIdx = scenarioMsgIdx >= 0 ? scenarioMsgIdx + 1 : 0;
633
for (let i = startIdx; i < messages.length; i++) {
634
if (messages[i].role === 'assistant') {
635
turns++;
636
}
637
}
638
return turns;
639
}
640
641
/**
642
* Compute the model-turn index for the current request given the scenario's
643
* turn list. User turns are skipped (they're handled by the test harness)
644
* and do not consume a model turn index.
645
*
646
* The algorithm counts completed assistant messages in the conversation
647
* history (each one = one served model turn), then maps that to the
648
* n-th model turn in the scenario (skipping user turns).
649
*
650
* @param {ScenarioTurn[]} turns
651
* @param {any[]} messages
652
* @returns {{ turn: ModelScenarioTurn, turnIndex: number }}
653
*/
654
function resolveCurrentTurn(turns, messages) {
655
const completedModelTurns = countCompletedModelTurns(messages);
656
// Build the model-only turn list (skip user turns)
657
const modelTurns = /** @type {ModelScenarioTurn[]} */ (turns.filter(t => t.kind !== 'user'));
658
const idx = Math.min(completedModelTurns, modelTurns.length - 1);
659
return { turn: modelTurns[idx], turnIndex: idx };
660
}
661
662
/**
663
* @param {string} body
664
* @param {http.ServerResponse} res
665
*/
666
async function handleChatCompletions(body, res) {
667
let scenarioId = DEFAULT_SCENARIO;
668
let isScenarioRequest = false;
669
/** @type {string[]} */
670
let requestToolNames = [];
671
/** @type {any[]} */
672
let messages = [];
673
try {
674
const parsed = JSON.parse(body);
675
messages = parsed.messages || [];
676
// Log user messages for debugging
677
const userMsgs = messages.filter((/** @type {any} */ m) => m.role === 'user');
678
if (userMsgs.length > 0) {
679
const lastContent = typeof userMsgs[userMsgs.length - 1].content === 'string'
680
? userMsgs[userMsgs.length - 1].content.substring(0, 100)
681
: '(structured)';
682
const ts = new Date().toISOString().slice(11, -1);
683
console.log(`[mock-llm] ${ts} → ${messages.length} msgs, last user: "${lastContent}"`);
684
}
685
// Extract available tool names from the request's tools array
686
const tools = parsed.tools || [];
687
requestToolNames = tools.map((/** @type {any} */ t) => t.function?.name).filter(Boolean);
688
if (requestToolNames.length > 0) {
689
const ts = new Date().toISOString().slice(11, -1);
690
console.log(`[mock-llm] ${ts} → ${requestToolNames.length} tools available: ${requestToolNames.join(', ')}`);
691
}
692
693
// Search user messages in reverse order (newest first) for the scenario
694
// tag. This ensures the most recent message's tag takes precedence when
695
// multiple messages with different tags exist in the same conversation
696
// (e.g. in the leak checker which sends many scenarios in one session).
697
// Follow-up user messages in multi-turn scenarios won't have a tag, so
698
// searching backwards still finds the correct tag from the initial message.
699
for (let mi = messages.length - 1; mi >= 0; mi--) {
700
const msg = messages[mi];
701
if (msg.role !== 'user') { continue; }
702
const content = typeof msg.content === 'string'
703
? msg.content
704
: Array.isArray(msg.content)
705
? msg.content.map((/** @type {any} */ c) => c.text || '').join('')
706
: '';
707
const match = content.match(/\[scenario:([^\]]+)\]/);
708
if (match && SCENARIOS[match[1]]) {
709
scenarioId = match[1];
710
isScenarioRequest = true;
711
break;
712
}
713
}
714
} catch { }
715
716
const scenario = SCENARIOS[scenarioId] || SCENARIOS[DEFAULT_SCENARIO];
717
718
res.writeHead(200, {
719
'Content-Type': 'text/event-stream',
720
'Cache-Control': 'no-cache',
721
'Connection': 'keep-alive',
722
'X-Request-Id': 'perf-benchmark-' + Date.now(),
723
});
724
725
// Handle multi-turn scenarios — only when the request actually has tools.
726
// Ancillary requests (title generation, progress messages) also contain the
727
// [scenario:...] tag but don't send tools, so they fall through to content.
728
if (isMultiTurnScenario(scenario) && requestToolNames.length > 0) {
729
const { turn, turnIndex } = resolveCurrentTurn(scenario.turns, messages);
730
const modelTurnCount = scenario.turns.filter(t => t.kind !== 'user').length;
731
732
const ts = new Date().toISOString().slice(11, -1);
733
console.log(`[mock-llm] ${ts} → multi-turn scenario ${scenarioId}, model turn ${turnIndex + 1}/${modelTurnCount} (${turn.kind}), ${countCompletedModelTurns(messages)} completed turns in history`);
734
735
if (turn.kind === 'tool-calls') {
736
await streamToolCalls(res, turn.toolCalls, requestToolNames, scenarioId);
737
return;
738
}
739
740
if (turn.kind === 'thinking') {
741
await streamThinkingThenContent(res, turn.thinkingChunks, turn.chunks, isScenarioRequest);
742
return;
743
}
744
745
// kind === 'content' — stream the final text response
746
await streamContent(res, turn.chunks, isScenarioRequest);
747
return;
748
}
749
750
// Standard content-only scenario (or multi-turn scenario falling back for
751
// ancillary requests like title generation that don't include tools)
752
const chunks = isMultiTurnScenario(scenario)
753
? getFirstContentTurn(scenario)
754
: /** @type {StreamChunk[]} */ (scenario);
755
756
await streamContent(res, chunks, isScenarioRequest);
757
}
758
759
/**
760
* Get the chunks from the first content turn of a multi-turn scenario,
761
* used as fallback text for ancillary requests (title generation etc).
762
* @param {MultiTurnScenario} scenario
763
* @returns {StreamChunk[]}
764
*/
765
function getFirstContentTurn(scenario) {
766
/** @type {ContentScenarioTurn | undefined} */
767
let contentTurn;
768
for (const turn of scenario.turns) {
769
if (turn.kind === 'content') {
770
contentTurn = turn;
771
break;
772
}
773
if (turn.kind === 'thinking') {
774
contentTurn = turn;
775
break;
776
}
777
}
778
return contentTurn?.chunks ?? getDefaultScenarioChunks();
779
}
780
781
/**
782
* Stream content chunks as a standard SSE response.
783
* @param {http.ServerResponse} res
784
* @param {StreamChunk[]} chunks
785
* @param {boolean} isScenarioRequest
786
*/
787
async function streamContent(res, chunks, isScenarioRequest) {
788
res.write(`data: ${JSON.stringify(makeInitialChunk())}\n\n`);
789
790
for (const chunk of chunks) {
791
if (chunk.delayMs > 0) { await sleep(chunk.delayMs); }
792
res.write(`data: ${JSON.stringify(makeChunk(chunk.content, 0, false))}\n\n`);
793
}
794
795
res.write(`data: ${JSON.stringify(makeChunk('', 0, true))}\n\n`);
796
res.write('data: [DONE]\n\n');
797
res.end();
798
799
if (isScenarioRequest) {
800
serverEvents.emit('scenarioCompletion');
801
}
802
}
803
804
/**
805
* Stream thinking chunks followed by content chunks as an SSE response.
806
* Thinking is emitted as `cot_summary` deltas, then a `cot_id` to close the
807
* thinking block, followed by standard content deltas.
808
* @param {http.ServerResponse} res
809
* @param {StreamChunk[]} thinkingChunks
810
* @param {StreamChunk[]} contentChunks
811
* @param {boolean} isScenarioRequest
812
*/
813
async function streamThinkingThenContent(res, thinkingChunks, contentChunks, isScenarioRequest) {
814
res.write(`data: ${JSON.stringify(makeInitialChunk())}\n\n`);
815
816
// Stream thinking text
817
for (const chunk of thinkingChunks) {
818
if (chunk.delayMs > 0) { await sleep(chunk.delayMs); }
819
res.write(`data: ${JSON.stringify(makeThinkingChunk(chunk.content))}\n\n`);
820
}
821
822
// Close thinking block with ID
823
const cotId = `cot_perf_${Date.now()}`;
824
res.write(`data: ${JSON.stringify(makeThinkingIdChunk(cotId))}\n\n`);
825
await sleep(10);
826
827
// Stream content
828
for (const chunk of contentChunks) {
829
if (chunk.delayMs > 0) { await sleep(chunk.delayMs); }
830
res.write(`data: ${JSON.stringify(makeChunk(chunk.content, 0, false))}\n\n`);
831
}
832
833
res.write(`data: ${JSON.stringify(makeChunk('', 0, true))}\n\n`);
834
res.write('data: [DONE]\n\n');
835
res.end();
836
837
if (isScenarioRequest) {
838
serverEvents.emit('scenarioCompletion');
839
}
840
}
841
842
/**
843
* Stream tool call chunks as an SSE response.
844
* @param {http.ServerResponse} res
845
* @param {Array<{ toolNamePattern: RegExp, arguments: Record<string, any> }>} toolCalls
846
* @param {string[]} requestToolNames
847
* @param {string} scenarioId
848
*/
849
async function streamToolCalls(res, toolCalls, requestToolNames, scenarioId) {
850
res.write(`data: ${JSON.stringify(makeToolCallInitialChunk())}\n\n`);
851
852
for (let i = 0; i < toolCalls.length; i++) {
853
const call = toolCalls[i];
854
const callId = `call_perf_${scenarioId}_${i}_${Date.now()}`;
855
856
// Find the matching tool name from the request's tools array
857
let toolName = requestToolNames.find(name => call.toolNamePattern.test(name));
858
if (!toolName) {
859
toolName = call.toolNamePattern.source.replace(/[\\.|?*+^${}()\[\]]/g, '');
860
console.warn(`[mock-llm] No matching tool for pattern ${call.toolNamePattern}, using fallback: ${toolName}`);
861
}
862
863
// Stream tool call: start chunk, then arguments in fragments
864
res.write(`data: ${JSON.stringify(makeToolCallStartChunk(i, callId, toolName))}\n\n`);
865
await sleep(10);
866
867
const argsJson = JSON.stringify(call.arguments);
868
const fragmentSize = Math.max(20, Math.ceil(argsJson.length / 4));
869
for (let pos = 0; pos < argsJson.length; pos += fragmentSize) {
870
const fragment = argsJson.slice(pos, pos + fragmentSize);
871
res.write(`data: ${JSON.stringify(makeToolCallArgsChunk(i, fragment))}\n\n`);
872
await sleep(5);
873
}
874
}
875
876
res.write(`data: ${JSON.stringify(makeToolCallFinishChunk())}\n\n`);
877
res.write('data: [DONE]\n\n');
878
res.end();
879
}
880
881
/**
882
* Start the mock server and return a handle.
883
* @param {number} port
884
*/
885
function startServer(port = 0) {
886
return new Promise((resolve, reject) => {
887
let reqCount = 0;
888
let completions = 0;
889
/** @type {Array<() => boolean>} */
890
let requestWaiters = [];
891
/** @type {Array<() => boolean>} */
892
let completionWaiters = [];
893
894
const onCompletion = () => {
895
completions++;
896
completionWaiters = completionWaiters.filter(fn => !fn());
897
};
898
serverEvents.on('scenarioCompletion', onCompletion);
899
900
const server = http.createServer((req, res) => {
901
reqCount++;
902
requestWaiters = requestWaiters.filter(fn => !fn());
903
handleRequest(req, res);
904
});
905
server.listen(port, '127.0.0.1', () => {
906
const addr = server.address();
907
const actualPort = typeof addr === 'object' && addr ? addr.port : port;
908
const url = `http://127.0.0.1:${actualPort}`;
909
resolve({
910
port: actualPort,
911
url,
912
close: () => /** @type {Promise<void>} */(new Promise((resolve, reject) => {
913
serverEvents.removeListener('scenarioCompletion', onCompletion);
914
server.close(err => err ? reject(err) : resolve(undefined));
915
})),
916
/** Return total request count. */
917
requestCount: () => reqCount,
918
/**
919
* Wait until at least `n` requests have been received.
920
* @param {number} n
921
* @param {number} timeoutMs
922
* @returns {Promise<void>}
923
*/
924
waitForRequests: (n, timeoutMs) => new Promise((resolve, reject) => {
925
if (reqCount >= n) { resolve(); return; }
926
const timer = setTimeout(() => reject(new Error(`Timed out waiting for ${n} requests (got ${reqCount})`)), timeoutMs);
927
requestWaiters.push(() => {
928
if (reqCount >= n) { clearTimeout(timer); resolve(); return true; }
929
return false;
930
});
931
}),
932
/** Return total scenario-completion count. */
933
completionCount: () => completions,
934
/**
935
* Wait until at least `n` scenario chat completions have been served.
936
* @param {number} n
937
* @param {number} timeoutMs
938
* @returns {Promise<void>}
939
*/
940
waitForCompletion: (n, timeoutMs) => new Promise((resolve, reject) => {
941
if (completions >= n) { resolve(); return; }
942
const timer = setTimeout(() => reject(new Error(`Timed out waiting for ${n} completions (got ${completions})`)), timeoutMs);
943
completionWaiters.push(() => {
944
if (completions >= n) { clearTimeout(timer); resolve(); return true; }
945
return false;
946
});
947
}),
948
});
949
});
950
server.on('error', reject);
951
});
952
}
953
954
// Allow running standalone for testing: node scripts/mock-llm-server.js
955
if (require.main === module) {
956
const { registerPerfScenarios } = require('./perf-scenarios');
957
registerPerfScenarios();
958
const port = parseInt(process.argv[2] || '0', 10);
959
startServer(port).then((/** @type {any} */ handle) => {
960
console.log(`Mock LLM server listening at ${handle.url}`);
961
console.log('Scenarios:', Object.keys(SCENARIOS).join(', '));
962
});
963
}
964
965
/**
966
* Get the user follow-up messages for a scenario, in order.
967
* Returns an array of { message, afterModelTurn } objects where afterModelTurn
968
* is the 0-based index of the model turn after which this user message should
969
* be injected.
970
* @param {string} scenarioId
971
* @returns {Array<{ message: string, afterModelTurn: number }>}
972
*/
973
function getUserTurns(scenarioId) {
974
const scenario = SCENARIOS[scenarioId];
975
if (!isMultiTurnScenario(scenario)) { return []; }
976
const result = [];
977
let modelTurnsSeen = 0;
978
for (const turn of scenario.turns) {
979
if (turn.kind === 'user') {
980
result.push({ message: turn.message, afterModelTurn: modelTurnsSeen });
981
} else {
982
modelTurnsSeen++;
983
}
984
}
985
return result;
986
}
987
988
/**
989
* Get the total number of model turns (non-user turns) in a scenario.
990
* @param {string} scenarioId
991
* @returns {number}
992
*/
993
function getModelTurnCount(scenarioId) {
994
const scenario = SCENARIOS[scenarioId];
995
if (!isMultiTurnScenario(scenario)) { return 1; }
996
return scenario.turns.filter(t => t.kind !== 'user').length;
997
}
998
999
/**
1000
* Register a scenario dynamically. Test files call this to add
1001
* scenarios that are only relevant to them.
1002
* @param {string} id - unique scenario identifier
1003
* @param {StreamChunk[] | MultiTurnScenario} definition - scenario data
1004
*/
1005
function registerScenario(id, definition) {
1006
SCENARIOS[id] = definition;
1007
}
1008
1009
/**
1010
* Return the IDs of all currently registered scenarios.
1011
* @returns {string[]}
1012
*/
1013
function getScenarioIds() {
1014
return Object.keys(SCENARIOS);
1015
}
1016
1017
module.exports = { startServer, SCENARIOS, ScenarioBuilder, registerScenario, getScenarioIds, getUserTurns, getModelTurnCount };
1018
1019