Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/test/automodeService.spec.ts
13405 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { RequestType } from '@vscode/copilot-api';
7
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
8
import type { ChatRequest } from 'vscode';
9
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
10
import { ChatLocation } from '../../../../vscodeTypes';
11
import { IAuthenticationService } from '../../../authentication/common/authentication';
12
import { ConfigKey, IConfigurationService } from '../../../configuration/common/configurationService';
13
import { DefaultsOnlyConfigurationService } from '../../../configuration/common/defaultsOnlyConfigurationService';
14
import { InMemoryConfigurationService } from '../../../configuration/test/common/inMemoryConfigurationService';
15
import { NullEnvService } from '../../../env/common/nullEnvService';
16
import { ILogService } from '../../../log/common/logService';
17
import { IChatEndpoint } from '../../../networking/common/networking';
18
import { NullRequestLogger } from '../../../requestLogger/node/nullRequestLogger';
19
import { IExperimentationService, NullExperimentationService } from '../../../telemetry/common/nullExperimentationService';
20
import { ITelemetryService } from '../../../telemetry/common/telemetry';
21
import { ICAPIClientService } from '../../common/capiClient';
22
import { AutomodeService } from '../automodeService';
23
24
function createMockHeaders(entries: Record<string, string> = {}): { get(name: string): string | null } {
25
const lower: Record<string, string> = {};
26
for (const [k, v] of Object.entries(entries)) {
27
lower[k.toLowerCase()] = v;
28
}
29
return { get: (name: string) => lower[name.toLowerCase()] ?? null };
30
}
31
32
/**
33
* Creates a mock response with a real stream-backed body so that middleware
34
* cloning (tee) works correctly. Token responses go through the middleware
35
* pipeline where {@link cloneResponse} reads the body stream.
36
*/
37
function makeMockTokenResponse(body: { available_models: string[]; expires_at: number; session_token: string }) {
38
const serialized = JSON.stringify(body);
39
return {
40
status: 200,
41
headers: createMockHeaders(),
42
body: new ReadableStream<Uint8Array>({
43
start(controller) {
44
controller.enqueue(new TextEncoder().encode(serialized));
45
controller.close();
46
},
47
}),
48
async text() { return serialized; },
49
async json() { return JSON.parse(serialized); },
50
};
51
}
52
53
describe('AutomodeService', () => {
54
let automodeService: AutomodeService;
55
let mockCAPIClientService: ICAPIClientService;
56
let mockAuthService: IAuthenticationService;
57
let mockLogService: ILogService;
58
let mockInstantiationService: IInstantiationService;
59
let mockExpService: IExperimentationService;
60
let configurationService: IConfigurationService;
61
let mockChatEndpoint: IChatEndpoint;
62
let envService: NullEnvService;
63
let mockTelemetryService: ITelemetryService & { sendMSFTTelemetryEvent: ReturnType<typeof vi.fn> };
64
65
function createEndpoint(model: string, provider: string, overrides?: Partial<IChatEndpoint>): IChatEndpoint {
66
return {
67
model,
68
modelProvider: provider,
69
displayName: model,
70
maxOutputTokens: 4096,
71
supportsToolCalls: true,
72
supportsVision: false,
73
supportsPrediction: false,
74
showInModelPicker: true,
75
isDefault: false,
76
isFallback: false,
77
policy: 'enabled',
78
...overrides,
79
} as unknown as IChatEndpoint;
80
}
81
82
function createService(): AutomodeService {
83
return new AutomodeService(
84
mockCAPIClientService,
85
mockAuthService,
86
mockLogService,
87
mockInstantiationService,
88
mockExpService,
89
configurationService,
90
envService,
91
mockTelemetryService,
92
new NullRequestLogger()
93
);
94
}
95
96
function mockApiResponse(available_models: string[], session_token = 'test-token', expiresInSeconds = 3600): void {
97
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockResolvedValue(
98
makeMockTokenResponse({
99
available_models,
100
expires_at: Math.floor(Date.now() / 1000) + expiresInSeconds,
101
session_token,
102
})
103
);
104
}
105
106
function enableRouter(): void {
107
(configurationService as InMemoryConfigurationService).setConfig(
108
ConfigKey.TeamInternal.UseAutoModeRouting,
109
true
110
);
111
}
112
113
beforeEach(() => {
114
mockChatEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI');
115
116
mockCAPIClientService = {
117
makeRequest: vi.fn().mockResolvedValue(
118
makeMockTokenResponse({
119
available_models: ['gpt-4o', 'gpt-4o-mini'],
120
expires_at: Math.floor(Date.now() / 1000) + 3600,
121
session_token: 'test-token'
122
})
123
)
124
} as unknown as ICAPIClientService;
125
126
mockAuthService = {
127
getCopilotToken: vi.fn().mockResolvedValue({ token: 'test-auth-token' }),
128
onDidAuthenticationChange: vi.fn().mockReturnValue({ dispose: vi.fn() })
129
} as unknown as IAuthenticationService;
130
131
mockLogService = {
132
trace: vi.fn(),
133
debug: vi.fn(),
134
info: vi.fn(),
135
warn: vi.fn(),
136
error: vi.fn()
137
} as unknown as ILogService;
138
139
mockInstantiationService = {
140
createInstance: vi.fn().mockImplementation(
141
(_ctor: any, wrappedEndpoint: IChatEndpoint) => wrappedEndpoint
142
)
143
} as unknown as IInstantiationService;
144
145
mockExpService = new NullExperimentationService();
146
147
configurationService = new InMemoryConfigurationService(new DefaultsOnlyConfigurationService());
148
envService = new NullEnvService();
149
mockTelemetryService = {
150
sendTelemetryEvent: vi.fn(),
151
sendMSFTTelemetryEvent: vi.fn(),
152
sendTelemetryErrorEvent: vi.fn(),
153
sendMSFTTelemetryErrorEvent: vi.fn(),
154
sendSharedTelemetryEvent: vi.fn(),
155
sendEnhancedGHTelemetryEvent: vi.fn(),
156
} as unknown as ITelemetryService & { sendMSFTTelemetryEvent: ReturnType<typeof vi.fn> };
157
});
158
159
afterEach(() => {
160
vi.useRealTimers();
161
});
162
163
describe('resolveAutoModeEndpoint', () => {
164
it('should not use router for inline chat', async () => {
165
enableRouter();
166
167
automodeService = createService();
168
169
const chatRequest: Partial<ChatRequest> = {
170
location: ChatLocation.Editor,
171
prompt: 'test prompt',
172
};
173
174
await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint]);
175
176
// Verify that router API was NOT called for inline chat
177
expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(
178
expect.anything(),
179
expect.objectContaining({ type: RequestType.ModelRouter })
180
);
181
});
182
183
it('should use router for panel chat when enabled', async () => {
184
enableRouter();
185
186
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
187
188
// Mock makeRequest to handle both auto mode token and router API calls
189
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
190
if (opts?.type === RequestType.ModelRouter) {
191
return Promise.resolve({
192
ok: true,
193
status: 200,
194
headers: createMockHeaders(),
195
text: vi.fn().mockResolvedValue(JSON.stringify({
196
predicted_label: 'needs_reasoning',
197
confidence: 0.85,
198
latency_ms: 50,
199
chosen_model: 'gpt-4o',
200
candidate_models: ['gpt-4o', 'gpt-4o-mini'],
201
scores: { needs_reasoning: 0.85, no_reasoning: 0.15 },
202
sticky_override: false
203
}))
204
});
205
}
206
return Promise.resolve(
207
makeMockTokenResponse({
208
available_models: ['gpt-4o', 'gpt-4o-mini'],
209
expires_at: Math.floor(Date.now() / 1000) + 3600,
210
session_token: 'test-token'
211
})
212
);
213
});
214
215
automodeService = createService();
216
217
const chatRequest: Partial<ChatRequest> = {
218
location: ChatLocation.Panel,
219
prompt: 'test prompt',
220
sessionId: 'session-router-panel'
221
};
222
223
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint, gpt4oEndpoint]);
224
225
// Verify that router API was called for panel chat
226
expect(mockCAPIClientService.makeRequest).toHaveBeenCalledWith(
227
expect.objectContaining({ method: 'POST' }),
228
expect.objectContaining({ type: RequestType.ModelRouter })
229
);
230
// Router should have selected gpt-4o
231
expect(result.model).toBe('gpt-4o');
232
});
233
234
it('should include context signals in router request body', async () => {
235
enableRouter();
236
237
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
238
239
let capturedBody: string | undefined;
240
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((req: any, opts: any) => {
241
if (opts?.type === RequestType.ModelRouter) {
242
capturedBody = req.body;
243
return Promise.resolve({
244
ok: true,
245
status: 200,
246
headers: createMockHeaders(),
247
text: vi.fn().mockResolvedValue(JSON.stringify({
248
predicted_label: 'needs_reasoning',
249
confidence: 0.85,
250
latency_ms: 50,
251
chosen_model: 'gpt-4o',
252
candidate_models: ['gpt-4o', 'gpt-4o-mini'],
253
scores: { needs_reasoning: 0.85, no_reasoning: 0.15 },
254
sticky_override: false
255
}))
256
});
257
}
258
return Promise.resolve(
259
makeMockTokenResponse({
260
available_models: ['gpt-4o', 'gpt-4o-mini'],
261
expires_at: Math.floor(Date.now() / 1000) + 3600,
262
session_token: 'test-token'
263
})
264
);
265
});
266
267
automodeService = createService();
268
269
const chatRequest: Partial<ChatRequest> = {
270
location: ChatLocation.Panel,
271
prompt: 'test prompt',
272
references: [{ id: 'ref1', value: 'some ref' } as any],
273
sessionId: 'test-session-123',
274
};
275
276
await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint, gpt4oEndpoint]);
277
278
expect(capturedBody).toBeDefined();
279
const parsed = JSON.parse(capturedBody!);
280
expect(parsed.prompt).toBe('test prompt');
281
expect(parsed.prompt_char_count).toBe('test prompt'.length);
282
expect(parsed.reference_count).toBe(1);
283
expect(parsed.turn_number).toBe(1);
284
expect(parsed.session_id).toBe('test-session-123');
285
expect(parsed.previous_model).toBeUndefined();
286
});
287
288
it('should not use router when routing is not enabled', async () => {
289
// Routing not enabled via UseAutoModeRouting config
290
automodeService = createService();
291
292
const chatRequest: Partial<ChatRequest> = {
293
location: ChatLocation.Panel,
294
prompt: 'test prompt'
295
};
296
297
await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint]);
298
299
// Verify that router API was NOT called (exp / config disabled)
300
expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(
301
expect.anything(),
302
expect.objectContaining({ type: RequestType.ModelRouter })
303
);
304
});
305
306
it('should not use router for terminal chat', async () => {
307
enableRouter();
308
309
automodeService = createService();
310
311
const chatRequest: Partial<ChatRequest> = {
312
location: ChatLocation.Terminal,
313
prompt: 'test prompt'
314
};
315
316
await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint]);
317
318
// Verify that router API was NOT called for terminal chat
319
expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(
320
expect.anything(),
321
expect.objectContaining({ type: RequestType.ModelRouter })
322
);
323
});
324
});
325
326
describe('model selection', () => {
327
it('should pick the first available model with a known endpoint on first mint', async () => {
328
const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');
329
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
330
mockApiResponse(['claude-sonnet', 'gpt-4o']);
331
332
automodeService = createService();
333
const chatRequest: Partial<ChatRequest> = {
334
location: ChatLocation.Panel,
335
prompt: 'test',
336
sessionId: 'session-first-mint'
337
};
338
339
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);
340
// claude-sonnet is first in available_models and has a known endpoint
341
expect(result.model).toBe('claude-sonnet');
342
});
343
344
it('should skip models without known endpoints and pick the first match', async () => {
345
const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');
346
// available_models has 'unknown-model' first, but no known endpoint for it
347
mockApiResponse(['unknown-model', 'gpt-4o']);
348
349
automodeService = createService();
350
const chatRequest: Partial<ChatRequest> = {
351
location: ChatLocation.Panel,
352
prompt: 'test',
353
sessionId: 'session-skip-unknown'
354
};
355
356
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint]);
357
expect(result.model).toBe('gpt-4o');
358
});
359
360
it('should prefer same provider model on token refresh', async () => {
361
vi.useFakeTimers();
362
const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');
363
const openaiMiniEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI');
364
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
365
366
// First mint: gpt-4o is first available, token expires in 1s to trigger immediate refresh
367
mockApiResponse(['gpt-4o', 'claude-sonnet'], 'token-1', 1);
368
369
automodeService = createService();
370
const chatRequest: Partial<ChatRequest> = {
371
location: ChatLocation.Panel,
372
prompt: 'test',
373
sessionId: 'session-affinity'
374
};
375
376
const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, openaiMiniEndpoint, claudeEndpoint]);
377
expect(firstResult.model).toBe('gpt-4o');
378
379
// Set up new token response, then advance timers to trigger refresh
380
mockApiResponse(['claude-sonnet', 'gpt-4o-mini'], 'token-2');
381
await vi.advanceTimersByTimeAsync(1);
382
383
const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, openaiMiniEndpoint, claudeEndpoint]);
384
// Should pick gpt-4o-mini because it's the first model from the same provider (OpenAI)
385
expect(secondResult.model).toBe('gpt-4o-mini');
386
vi.useRealTimers();
387
});
388
389
it('should fall back to first available model when no same-provider model exists on refresh', async () => {
390
vi.useFakeTimers();
391
const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');
392
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
393
394
// First mint: gpt-4o is first available, token expires in 1s to trigger immediate refresh
395
mockApiResponse(['gpt-4o', 'claude-sonnet'], 'token-1', 1);
396
397
automodeService = createService();
398
const chatRequest: Partial<ChatRequest> = {
399
location: ChatLocation.Panel,
400
prompt: 'test',
401
sessionId: 'session-fallback'
402
};
403
404
const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);
405
expect(firstResult.model).toBe('gpt-4o');
406
407
// Set up new token response with only Anthropic models, then advance timers
408
mockApiResponse(['claude-sonnet'], 'token-2');
409
await vi.advanceTimersByTimeAsync(1);
410
411
const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);
412
// No OpenAI models available, should fall back to first available (claude-sonnet)
413
expect(secondResult.model).toBe('claude-sonnet');
414
});
415
416
it('should return cached endpoint when session token has not changed', async () => {
417
const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');
418
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
419
420
mockApiResponse(['gpt-4o', 'claude-sonnet'], 'token-same');
421
422
automodeService = createService();
423
const chatRequest: Partial<ChatRequest> = {
424
location: ChatLocation.Panel,
425
prompt: 'test',
426
sessionId: 'session-cached'
427
};
428
429
const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);
430
const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);
431
// Same object reference since token didn't change
432
expect(secondResult).toBe(firstResult);
433
});
434
435
it('should throw when no available models match any known endpoint', async () => {
436
mockApiResponse(['unknown-model-1', 'unknown-model-2']);
437
438
automodeService = createService();
439
const chatRequest: Partial<ChatRequest> = {
440
location: ChatLocation.Panel,
441
prompt: 'test',
442
sessionId: 'session-no-match'
443
};
444
445
await expect(
446
automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint])
447
).rejects.toThrow('no available model found');
448
});
449
});
450
451
describe('router fallback', () => {
452
function mockRouterResponse(available_models: string[], routerResult: { chosen_model: string; candidate_models: string[] }, session_token = 'test-token'): void {
453
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
454
if (opts?.type === RequestType.ModelRouter) {
455
return Promise.resolve({
456
ok: true,
457
status: 200,
458
headers: createMockHeaders(),
459
text: vi.fn().mockResolvedValue(JSON.stringify({
460
predicted_label: 'needs_reasoning',
461
confidence: 0.9,
462
latency_ms: 30,
463
chosen_model: routerResult.chosen_model,
464
candidate_models: routerResult.candidate_models,
465
scores: { needs_reasoning: 0.9, no_reasoning: 0.1 },
466
sticky_override: false
467
}))
468
});
469
}
470
return Promise.resolve(
471
makeMockTokenResponse({
472
available_models,
473
expires_at: Math.floor(Date.now() / 1000) + 3600,
474
session_token,
475
})
476
);
477
});
478
}
479
480
it('should fall back to default selection when router fetch throws', async () => {
481
enableRouter();
482
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
483
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
484
485
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
486
if (opts?.type === RequestType.ModelRouter) {
487
return Promise.reject(new Error('Network error'));
488
}
489
return Promise.resolve(
490
makeMockTokenResponse({
491
available_models: ['claude-sonnet', 'gpt-4o'],
492
expires_at: Math.floor(Date.now() / 1000) + 3600,
493
session_token: 'test-token',
494
})
495
);
496
});
497
498
automodeService = createService();
499
const chatRequest: Partial<ChatRequest> = {
500
location: ChatLocation.Panel,
501
prompt: 'test prompt',
502
sessionId: 'session-router-error'
503
};
504
505
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [claudeEndpoint, gpt4oEndpoint]);
506
// Should fall back to first available model (claude-sonnet)
507
expect(result.model).toBe('claude-sonnet');
508
expect(mockLogService.error).toHaveBeenCalledWith(
509
expect.stringContaining('Failed to get routed model'),
510
expect.any(String)
511
);
512
});
513
514
it('should fall back to default selection with routerTimeout reason when router times out', async () => {
515
vi.useFakeTimers();
516
enableRouter();
517
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
518
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
519
520
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((req: any, opts: any) => {
521
if (opts?.type === RequestType.ModelRouter) {
522
// Return a pending promise that rejects when the signal is aborted,
523
// simulating a real in-flight request cancelled by the 1s timeout.
524
return new Promise((_resolve, reject) => {
525
const signal: AbortSignal = req.signal;
526
if (signal?.aborted) {
527
const err = new Error('The operation was aborted');
528
err.name = 'AbortError';
529
reject(err);
530
return;
531
}
532
signal?.addEventListener('abort', () => {
533
const err = new Error('The operation was aborted');
534
err.name = 'AbortError';
535
reject(err);
536
});
537
});
538
}
539
return Promise.resolve(
540
makeMockTokenResponse({
541
available_models: ['claude-sonnet', 'gpt-4o'],
542
expires_at: Math.floor(Date.now() / 1000) + 3600,
543
session_token: 'test-token',
544
})
545
);
546
});
547
548
automodeService = createService();
549
const chatRequest: Partial<ChatRequest> = {
550
location: ChatLocation.Panel,
551
prompt: 'test prompt',
552
sessionId: 'session-router-timeout'
553
};
554
555
const resultPromise = automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [claudeEndpoint, gpt4oEndpoint]);
556
// Advance past the 1-second router timeout to trigger the abort
557
await vi.advanceTimersByTimeAsync(1000);
558
559
const result = await resultPromise;
560
// Should fall back to first available model (claude-sonnet)
561
expect(result.model).toBe('claude-sonnet');
562
expect(mockLogService.error).toHaveBeenCalledWith(
563
expect.stringContaining('routerTimeout'),
564
expect.any(String)
565
);
566
});
567
568
it('should fall back to default selection when router returns unknown model', async () => {
569
enableRouter();
570
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
571
572
mockRouterResponse(
573
['gpt-4o'],
574
{ chosen_model: 'unknown-model', candidate_models: ['unknown-model'] }
575
);
576
577
automodeService = createService();
578
const chatRequest: Partial<ChatRequest> = {
579
location: ChatLocation.Panel,
580
prompt: 'test prompt',
581
sessionId: 'session-unknown-router-model'
582
};
583
584
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);
585
// Router returned unknown model, should fall back to first available
586
expect(result.model).toBe('gpt-4o');
587
});
588
589
it('should skip router on subsequent turns and return cached model', async () => {
590
enableRouter();
591
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
592
const gpt4oMiniEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI');
593
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
594
595
// First turn: router picks gpt-4o
596
mockRouterResponse(
597
['gpt-4o', 'gpt-4o-mini', 'claude-sonnet'],
598
{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'gpt-4o-mini', 'claude-sonnet'] }
599
);
600
601
automodeService = createService();
602
const chatRequest1: Partial<ChatRequest> = {
603
location: ChatLocation.Panel,
604
prompt: 'first question',
605
sessionId: 'session-same-provider'
606
};
607
608
const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest1 as ChatRequest, [gpt4oEndpoint, gpt4oMiniEndpoint, claudeEndpoint]);
609
expect(firstResult.model).toBe('gpt-4o');
610
611
// Second turn: router would return claude, but should be skipped (cached gpt-4o returned)
612
mockRouterResponse(
613
['gpt-4o', 'gpt-4o-mini', 'claude-sonnet'],
614
{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet', 'gpt-4o-mini'] }
615
);
616
617
const chatRequest2: Partial<ChatRequest> = {
618
location: ChatLocation.Panel,
619
prompt: 'second question',
620
sessionId: 'session-same-provider'
621
};
622
623
const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest2 as ChatRequest, [gpt4oEndpoint, gpt4oMiniEndpoint, claudeEndpoint]);
624
// Router is skipped after first turn — cached model returned
625
expect(secondResult.model).toBe('gpt-4o');
626
});
627
628
it('should re-route on subsequent turns after invalidateRouterCache', async () => {
629
enableRouter();
630
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
631
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
632
633
// First turn: router picks gpt-4o
634
mockRouterResponse(
635
['gpt-4o', 'claude-sonnet'],
636
{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'claude-sonnet'] }
637
);
638
639
automodeService = createService();
640
const chatRequest1: Partial<ChatRequest> = {
641
location: ChatLocation.Panel,
642
prompt: 'first question',
643
sessionId: 'session-no-same-provider'
644
};
645
646
const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest1 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
647
expect(firstResult.model).toBe('gpt-4o');
648
649
// Invalidate the cache (simulates compaction)
650
automodeService.invalidateRouterCache({ sessionId: 'session-no-same-provider' } as ChatRequest);
651
652
// Second turn: router is re-run after invalidation, picks claude-sonnet
653
mockRouterResponse(
654
['gpt-4o', 'claude-sonnet'],
655
{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet'] }
656
);
657
658
const chatRequest2: Partial<ChatRequest> = {
659
location: ChatLocation.Panel,
660
prompt: 'second question',
661
sessionId: 'session-no-same-provider'
662
};
663
664
const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest2 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
665
expect(secondResult.model).toBe('claude-sonnet');
666
});
667
668
it('should not re-route when prompt has not changed (tool-calling iteration)', async () => {
669
enableRouter();
670
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
671
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
672
673
mockRouterResponse(
674
['gpt-4o', 'claude-sonnet'],
675
{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'claude-sonnet'] }
676
);
677
678
automodeService = createService();
679
const chatRequest: Partial<ChatRequest> = {
680
location: ChatLocation.Panel,
681
prompt: 'same prompt',
682
sessionId: 'session-same-prompt'
683
};
684
685
await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
686
687
// Reset to track further calls
688
const routerCallCount = (mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mock.calls
689
.filter((call: any[]) => call[1]?.type === RequestType.ModelRouter).length;
690
expect(routerCallCount).toBe(1);
691
692
// Second call with same prompt — should NOT call router again
693
await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
694
695
const routerCallCount2 = (mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mock.calls
696
.filter((call: any[]) => call[1]?.type === RequestType.ModelRouter).length;
697
expect(routerCallCount2).toBe(1);
698
});
699
700
it('should skip router on subsequent turns after image request routed on first turn', async () => {
701
enableRouter();
702
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
703
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
704
705
mockRouterResponse(
706
['gpt-4o', 'claude-sonnet'],
707
{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }
708
);
709
710
automodeService = createService();
711
712
// Turn 1: image request — router IS called now
713
const imageRequest: Partial<ChatRequest> = {
714
location: ChatLocation.Panel,
715
prompt: 'describe this image',
716
sessionId: 'session-transient-fallback',
717
references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
718
};
719
720
await automodeService.resolveAutoModeEndpoint(imageRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
721
722
expect(mockCAPIClientService.makeRequest).toHaveBeenCalledWith(
723
expect.anything(),
724
expect.objectContaining({ type: RequestType.ModelRouter })
725
);
726
// Reset mock call tracking
727
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockClear();
728
mockRouterResponse(
729
['gpt-4o', 'claude-sonnet'],
730
{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }
731
);
732
733
// Turn 2: new prompt — router should NOT be called (skipRouter after first turn)
734
const textRequest: Partial<ChatRequest> = {
735
location: ChatLocation.Panel,
736
prompt: 'write a function',
737
sessionId: 'session-transient-fallback',
738
};
739
740
await automodeService.resolveAutoModeEndpoint(textRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
741
742
// Router should not have been called on turn 2
743
expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(
744
expect.anything(),
745
expect.objectContaining({ type: RequestType.ModelRouter })
746
);
747
});
748
749
it('should send has_image to router for image requests', async () => {
750
enableRouter();
751
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
752
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
753
754
mockRouterResponse(
755
['gpt-4o', 'claude-sonnet'],
756
{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }
757
);
758
759
automodeService = createService();
760
const chatRequest: Partial<ChatRequest> = {
761
location: ChatLocation.Panel,
762
prompt: 'describe this image',
763
sessionId: 'session-vision-router',
764
references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
765
};
766
767
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
768
expect(result.model).toBe('gpt-4o');
769
// Verify router WAS called (not skipped)
770
const routerCall = (mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mock.calls.find(([, opts]) => opts?.type === RequestType.ModelRouter);
771
expect(routerCall).toBeDefined();
772
const [routerRequestBody] = routerCall!;
773
expect(JSON.parse(routerRequestBody.body).has_image).toBe(true);
774
});
775
776
it('should fall back to vision model when router returns no_vision_models error', async () => {
777
enableRouter();
778
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
779
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
780
781
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
782
if (opts?.type === RequestType.ModelRouter) {
783
return Promise.resolve({
784
ok: false,
785
status: 400,
786
statusText: 'Bad Request',
787
headers: createMockHeaders(),
788
text: vi.fn().mockResolvedValue(JSON.stringify({ error: 'no_vision_models' }))
789
});
790
}
791
return Promise.resolve(
792
makeMockTokenResponse({
793
available_models: ['gpt-4o', 'claude-sonnet'],
794
expires_at: Math.floor(Date.now() / 1000) + 3600,
795
session_token: 'test-token',
796
})
797
);
798
});
799
800
automodeService = createService();
801
const chatRequest: Partial<ChatRequest> = {
802
location: ChatLocation.Panel,
803
prompt: 'describe this image',
804
sessionId: 'session-no-vision',
805
references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
806
};
807
808
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
809
// Should fall back to default selection, then vision fallback picks gpt-4o
810
expect(result.model).toBe('gpt-4o');
811
// Verify the router was called and the error code was passed through from the server
812
expect(mockCAPIClientService.makeRequest).toHaveBeenCalledWith(
813
expect.anything(),
814
expect.objectContaining({ type: RequestType.ModelRouter })
815
);
816
expect(mockLogService.error).toHaveBeenCalledWith(
817
expect.stringContaining('(no_vision_models)'),
818
expect.anything()
819
);
820
});
821
822
it('should fall back to routerError when router returns non-JSON error body', async () => {
823
// When the router returns an HTML error page or other non-JSON body,
824
// errorCode should be undefined and fallbackReason should be 'routerError'
825
// — NOT the raw response body leaked into telemetry.
826
enableRouter();
827
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
828
829
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
830
if (opts?.type === RequestType.ModelRouter) {
831
return Promise.resolve({
832
ok: false,
833
status: 502,
834
statusText: 'Bad Gateway',
835
headers: createMockHeaders(),
836
text: vi.fn().mockResolvedValue('<html><body>Bad Gateway</body></html>')
837
});
838
}
839
return Promise.resolve(
840
makeMockTokenResponse({
841
available_models: ['gpt-4o'],
842
expires_at: Math.floor(Date.now() / 1000) + 3600,
843
session_token: 'test-token',
844
})
845
);
846
});
847
848
automodeService = createService();
849
const chatRequest: Partial<ChatRequest> = {
850
location: ChatLocation.Panel,
851
prompt: 'test prompt',
852
sessionId: 'session-html-error',
853
};
854
855
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);
856
expect(result.model).toBe('gpt-4o');
857
// Should log generic 'routerError', NOT the HTML body
858
expect(mockLogService.error).toHaveBeenCalledWith(
859
expect.stringContaining('(routerError)'),
860
expect.anything()
861
);
862
});
863
864
it('should fall back to routerError when router returns JSON without error field', async () => {
865
// When the server returns valid JSON but without an 'error' field,
866
// errorCode should be undefined and fallbackReason should be 'routerError'.
867
enableRouter();
868
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
869
870
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
871
if (opts?.type === RequestType.ModelRouter) {
872
return Promise.resolve({
873
ok: false,
874
status: 400,
875
statusText: 'Bad Request',
876
headers: createMockHeaders(),
877
text: vi.fn().mockResolvedValue(JSON.stringify({ message: 'something went wrong' }))
878
});
879
}
880
return Promise.resolve(
881
makeMockTokenResponse({
882
available_models: ['gpt-4o'],
883
expires_at: Math.floor(Date.now() / 1000) + 3600,
884
session_token: 'test-token',
885
})
886
);
887
});
888
889
automodeService = createService();
890
const chatRequest: Partial<ChatRequest> = {
891
location: ChatLocation.Panel,
892
prompt: 'test prompt',
893
sessionId: 'session-json-no-error',
894
};
895
896
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);
897
expect(result.model).toBe('gpt-4o');
898
expect(mockLogService.error).toHaveBeenCalledWith(
899
expect.stringContaining('(routerError)'),
900
expect.anything()
901
);
902
});
903
904
it('should be a no-op when invalidateRouterCache is called with unknown conversationId', async () => {
905
automodeService = createService();
906
// Should not throw
907
automodeService.invalidateRouterCache({ sessionId: 'nonexistent-session' } as ChatRequest);
908
});
909
910
it('should re-run router after invalidateRouterCache is called', async () => {
911
enableRouter();
912
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
913
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
914
915
mockRouterResponse(
916
['gpt-4o', 'claude-sonnet'],
917
{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }
918
);
919
920
automodeService = createService();
921
const chatRequest: Partial<ChatRequest> = {
922
location: ChatLocation.Panel,
923
prompt: 'first question',
924
sessionId: 'session-invalidate'
925
};
926
927
const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
928
expect(firstResult.model).toBe('gpt-4o');
929
930
// Without invalidation, changing prompt should still return cached model
931
const chatRequest2: Partial<ChatRequest> = {
932
location: ChatLocation.Panel,
933
prompt: 'second question',
934
sessionId: 'session-invalidate'
935
};
936
const cachedResult = await automodeService.resolveAutoModeEndpoint(chatRequest2 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
937
expect(cachedResult.model).toBe('gpt-4o');
938
939
// Invalidate the cache
940
automodeService.invalidateRouterCache({ sessionId: 'session-invalidate' } as ChatRequest);
941
942
// Now the router should re-run and pick claude
943
mockRouterResponse(
944
['gpt-4o', 'claude-sonnet'],
945
{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet'] }
946
);
947
948
const chatRequest3: Partial<ChatRequest> = {
949
location: ChatLocation.Panel,
950
prompt: 'third question',
951
sessionId: 'session-invalidate'
952
};
953
const reEvalResult = await automodeService.resolveAutoModeEndpoint(chatRequest3 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
954
expect(reEvalResult.model).toBe('claude-sonnet');
955
});
956
});
957
958
describe('vision fallback', () => {
959
it('should fall back to vision-capable model when selected model does not support vision', async () => {
960
const nonVisionEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI', { supportsVision: false });
961
const visionEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
962
mockApiResponse(['gpt-4o-mini', 'gpt-4o']);
963
964
automodeService = createService();
965
const chatRequest: Partial<ChatRequest> = {
966
location: ChatLocation.Panel,
967
prompt: 'describe this image',
968
sessionId: 'session-vision-fallback',
969
references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
970
};
971
972
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [nonVisionEndpoint, visionEndpoint]);
973
expect(result.model).toBe('gpt-4o');
974
});
975
976
it('should keep vision-capable model when it is already selected', async () => {
977
const visionEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
978
const nonVisionEndpoint = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });
979
mockApiResponse(['gpt-4o', 'claude-sonnet']);
980
981
automodeService = createService();
982
const chatRequest: Partial<ChatRequest> = {
983
location: ChatLocation.Panel,
984
prompt: 'describe this image',
985
sessionId: 'session-vision-already-ok',
986
references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
987
};
988
989
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [visionEndpoint, nonVisionEndpoint]);
990
expect(result.model).toBe('gpt-4o');
991
});
992
993
it('should keep non-vision model when request has no image', async () => {
994
const nonVisionEndpoint = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });
995
const visionEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
996
mockApiResponse(['claude-sonnet', 'gpt-4o']);
997
998
automodeService = createService();
999
const chatRequest: Partial<ChatRequest> = {
1000
location: ChatLocation.Panel,
1001
prompt: 'write a function',
1002
sessionId: 'session-no-image'
1003
};
1004
1005
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [nonVisionEndpoint, visionEndpoint]);
1006
expect(result.model).toBe('claude-sonnet');
1007
});
1008
1009
it('should warn and keep selected model when no vision-capable model is available', async () => {
1010
const nonVisionEndpoint1 = createEndpoint('gpt-4o-mini', 'OpenAI', { supportsVision: false });
1011
const nonVisionEndpoint2 = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });
1012
mockApiResponse(['gpt-4o-mini', 'claude-sonnet']);
1013
1014
automodeService = createService();
1015
const chatRequest: Partial<ChatRequest> = {
1016
location: ChatLocation.Panel,
1017
prompt: 'describe this image',
1018
sessionId: 'session-no-vision-available',
1019
references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
1020
};
1021
1022
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [nonVisionEndpoint1, nonVisionEndpoint2]);
1023
// No vision model available, should keep the first available model and warn
1024
expect(result.model).toBe('gpt-4o-mini');
1025
expect(mockLogService.warn).toHaveBeenCalledWith(
1026
expect.stringContaining('no vision-capable model')
1027
);
1028
});
1029
});
1030
1031
describe('routerModelSelection telemetry', () => {
1032
function mockRouterResponse(available_models: string[], routerResult: { chosen_model: string; candidate_models: string[] }, session_token = 'test-token'): void {
1033
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
1034
if (opts?.type === RequestType.ModelRouter) {
1035
return Promise.resolve({
1036
ok: true,
1037
status: 200,
1038
headers: createMockHeaders(),
1039
text: vi.fn().mockResolvedValue(JSON.stringify({
1040
predicted_label: 'needs_reasoning',
1041
confidence: 0.9,
1042
latency_ms: 30,
1043
chosen_model: routerResult.chosen_model,
1044
candidate_models: routerResult.candidate_models,
1045
scores: { needs_reasoning: 0.9, no_reasoning: 0.1 },
1046
sticky_override: false
1047
}))
1048
});
1049
}
1050
return Promise.resolve(
1051
makeMockTokenResponse({
1052
available_models,
1053
expires_at: Math.floor(Date.now() / 1000) + 3600,
1054
session_token,
1055
})
1056
);
1057
});
1058
}
1059
1060
it('should emit routerModelSelection with candidateModel and actualModel when router is used', async () => {
1061
enableRouter();
1062
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
1063
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
1064
1065
mockRouterResponse(
1066
['gpt-4o', 'claude-sonnet'],
1067
{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'claude-sonnet'] }
1068
);
1069
1070
automodeService = createService();
1071
const chatRequest: Partial<ChatRequest> = {
1072
location: ChatLocation.Panel,
1073
prompt: 'test prompt',
1074
sessionId: 'session-telemetry-test'
1075
};
1076
1077
await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
1078
1079
const telemetryCalls = mockTelemetryService.sendMSFTTelemetryEvent.mock.calls;
1080
const selectionEvent = telemetryCalls.find((call: unknown[]) => call[0] === 'automode.routerModelSelection');
1081
expect(selectionEvent).toBeDefined();
1082
expect(selectionEvent![1]).toMatchObject({
1083
candidateModel: 'gpt-4o',
1084
actualModel: 'gpt-4o',
1085
overrideReason: 'none',
1086
});
1087
});
1088
1089
it('should emit overrideReason=clientOverride when vision fallback changes the model', async () => {
1090
enableRouter();
1091
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
1092
const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });
1093
1094
// Router picks claude-sonnet (no vision), vision fallback should override to gpt-4o
1095
mockRouterResponse(
1096
['claude-sonnet', 'gpt-4o'],
1097
{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet', 'gpt-4o'] }
1098
);
1099
1100
automodeService = createService();
1101
const chatRequest: Partial<ChatRequest> = {
1102
location: ChatLocation.Panel,
1103
prompt: 'describe this image',
1104
sessionId: 'session-telemetry-vision',
1105
references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
1106
};
1107
1108
await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
1109
1110
const telemetryCalls = mockTelemetryService.sendMSFTTelemetryEvent.mock.calls;
1111
const selectionEvent = telemetryCalls.find((call: unknown[]) => call[0] === 'automode.routerModelSelection');
1112
expect(selectionEvent).toBeDefined();
1113
expect(selectionEvent![1]).toMatchObject({
1114
candidateModel: 'claude-sonnet',
1115
actualModel: 'gpt-4o',
1116
overrideReason: 'clientOverride',
1117
});
1118
});
1119
1120
it('should not emit routerModelSelection when router fails', async () => {
1121
enableRouter();
1122
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
1123
1124
mockRouterResponse(
1125
['gpt-4o'],
1126
{ chosen_model: 'unknown-model', candidate_models: ['unknown-model'] }
1127
);
1128
1129
automodeService = createService();
1130
const chatRequest: Partial<ChatRequest> = {
1131
location: ChatLocation.Panel,
1132
prompt: 'test prompt',
1133
sessionId: 'session-telemetry-no-emit'
1134
};
1135
1136
await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);
1137
1138
const telemetryCalls = mockTelemetryService.sendMSFTTelemetryEvent.mock.calls;
1139
const selectionEvent = telemetryCalls.find((call: unknown[]) => call[0] === 'automode.routerModelSelection');
1140
// candidateModel is not set when router returns unknown model, so event should not emit
1141
expect(selectionEvent).toBeUndefined();
1142
});
1143
});
1144
1145
describe('available_models / knownEndpoints sync', () => {
1146
function mockRouterResponse(available_models: string[], routerResult: { chosen_model: string; candidate_models: string[] }, session_token = 'test-token'): void {
1147
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
1148
if (opts?.type === RequestType.ModelRouter) {
1149
return Promise.resolve({
1150
ok: true,
1151
status: 200,
1152
headers: createMockHeaders(),
1153
text: vi.fn().mockResolvedValue(JSON.stringify({
1154
predicted_label: 'no_reasoning',
1155
confidence: 0.96,
1156
latency_ms: 23,
1157
chosen_model: routerResult.chosen_model,
1158
candidate_models: routerResult.candidate_models,
1159
scores: { needs_reasoning: 0.04, no_reasoning: 0.96 },
1160
sticky_override: false
1161
}))
1162
});
1163
}
1164
return Promise.resolve(
1165
makeMockTokenResponse({
1166
available_models,
1167
expires_at: Math.floor(Date.now() / 1000) + 3600,
1168
session_token,
1169
})
1170
);
1171
});
1172
}
1173
1174
it('should filter out available_models that have no matching knownEndpoint before sending to router', async () => {
1175
enableRouter();
1176
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
1177
let capturedBody: string | undefined;
1178
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((req: any, opts: any) => {
1179
if (opts?.type === RequestType.ModelRouter) {
1180
capturedBody = req.body;
1181
return Promise.resolve({
1182
ok: true,
1183
status: 200,
1184
headers: createMockHeaders(),
1185
text: vi.fn().mockResolvedValue(JSON.stringify({
1186
predicted_label: 'no_reasoning',
1187
confidence: 0.96,
1188
latency_ms: 23,
1189
chosen_model: 'gpt-4o',
1190
candidate_models: ['gpt-4o'],
1191
scores: { needs_reasoning: 0.04, no_reasoning: 0.96 },
1192
sticky_override: false
1193
}))
1194
});
1195
}
1196
return Promise.resolve(
1197
makeMockTokenResponse({
1198
available_models: ['claude-haiku-4.5', 'gpt-4o', 'claude-sonnet-4.6'],
1199
expires_at: Math.floor(Date.now() / 1000) + 3600,
1200
session_token: 'test-token',
1201
})
1202
);
1203
});
1204
1205
automodeService = createService();
1206
const chatRequest: Partial<ChatRequest> = {
1207
location: ChatLocation.Panel,
1208
prompt: 'what day is today',
1209
sessionId: 'session-filter-models'
1210
};
1211
1212
await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);
1213
1214
expect(capturedBody).toBeDefined();
1215
const parsed = JSON.parse(capturedBody!);
1216
expect(parsed.available_models).toEqual(['gpt-4o']);
1217
expect(parsed.available_models).not.toContain('claude-haiku-4.5');
1218
expect(parsed.available_models).not.toContain('claude-sonnet-4.6');
1219
expect(mockLogService.info).toHaveBeenCalledWith(
1220
expect.stringContaining('Filtered 2 unresolvable model(s)')
1221
);
1222
});
1223
1224
it('should iterate all candidate_models when first candidate has no endpoint', async () => {
1225
enableRouter();
1226
const gpt41Endpoint = createEndpoint('gpt-4.1', 'OpenAI');
1227
1228
mockRouterResponse(
1229
['gpt-4.1'],
1230
{ chosen_model: 'gpt-4.1', candidate_models: ['unknown-new-model', 'gpt-4.1'] }
1231
);
1232
1233
automodeService = createService();
1234
const chatRequest: Partial<ChatRequest> = {
1235
location: ChatLocation.Panel,
1236
prompt: 'what day is today',
1237
sessionId: 'session-iterate-candidates'
1238
};
1239
1240
const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt41Endpoint]);
1241
expect(result.model).toBe('gpt-4.1');
1242
});
1243
1244
it('should throw when all available_models are unknown to knownEndpoints', async () => {
1245
enableRouter();
1246
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
1247
1248
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
1249
if (opts?.type === RequestType.ModelRouter) {
1250
throw new Error('Router should not be called when no models are routable');
1251
}
1252
return Promise.resolve(
1253
makeMockTokenResponse({
1254
available_models: ['unknown-model-a', 'unknown-model-b'],
1255
expires_at: Math.floor(Date.now() / 1000) + 3600,
1256
session_token: 'test-token',
1257
})
1258
);
1259
});
1260
1261
automodeService = createService();
1262
const chatRequest: Partial<ChatRequest> = {
1263
location: ChatLocation.Panel,
1264
prompt: 'test prompt',
1265
sessionId: 'session-all-unknown'
1266
};
1267
1268
await expect(
1269
automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint])
1270
).rejects.toThrow('no available model found');
1271
expect(mockLogService.warn).toHaveBeenCalledWith(
1272
expect.stringContaining('No available_models matched knownEndpoints')
1273
);
1274
});
1275
});
1276
});
1277
1278