CoCalc -- automodeService.spec.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/endpoint/node/test/automodeService.spec.ts
¹³⁴⁰⁵ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { RequestType } from '@vscode/copilot-api';
7
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
8
import type { ChatRequest } from 'vscode';
9
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
10
import { ChatLocation } from '../../../../vscodeTypes';
11
import { IAuthenticationService } from '../../../authentication/common/authentication';
12
import { ConfigKey, IConfigurationService } from '../../../configuration/common/configurationService';
13
import { DefaultsOnlyConfigurationService } from '../../../configuration/common/defaultsOnlyConfigurationService';
14
import { InMemoryConfigurationService } from '../../../configuration/test/common/inMemoryConfigurationService';
15
import { NullEnvService } from '../../../env/common/nullEnvService';
16
import { ILogService } from '../../../log/common/logService';
17
import { IChatEndpoint } from '../../../networking/common/networking';
18
import { NullRequestLogger } from '../../../requestLogger/node/nullRequestLogger';
19
import { IExperimentationService, NullExperimentationService } from '../../../telemetry/common/nullExperimentationService';
20
import { ITelemetryService } from '../../../telemetry/common/telemetry';
21
import { ICAPIClientService } from '../../common/capiClient';
22
import { AutomodeService } from '../automodeService';
23

24
function createMockHeaders(entries: Record<string, string> = {}): { get(name: string): string | null } {
25
	const lower: Record<string, string> = {};
26
	for (const [k, v] of Object.entries(entries)) {
27
		lower[k.toLowerCase()] = v;
28
	}
29
	return { get: (name: string) => lower[name.toLowerCase()] ?? null };
30
}
31

32
/**
33
 * Creates a mock response with a real stream-backed body so that middleware
34
 * cloning (tee) works correctly. Token responses go through the middleware
35
 * pipeline where {@link cloneResponse} reads the body stream.
36
 */
37
function makeMockTokenResponse(body: { available_models: string[]; expires_at: number; session_token: string }) {
38
	const serialized = JSON.stringify(body);
39
	return {
40
		status: 200,
41
		headers: createMockHeaders(),
42
		body: new ReadableStream<Uint8Array>({
43
			start(controller) {
44
				controller.enqueue(new TextEncoder().encode(serialized));
45
				controller.close();
46
			},
47
		}),
48
		async text() { return serialized; },
49
		async json() { return JSON.parse(serialized); },
50
	};
51
}
52

53
describe('AutomodeService', () => {
54
	let automodeService: AutomodeService;
55
	let mockCAPIClientService: ICAPIClientService;
56
	let mockAuthService: IAuthenticationService;
57
	let mockLogService: ILogService;
58
	let mockInstantiationService: IInstantiationService;
59
	let mockExpService: IExperimentationService;
60
	let configurationService: IConfigurationService;
61
	let mockChatEndpoint: IChatEndpoint;
62
	let envService: NullEnvService;
63
	let mockTelemetryService: ITelemetryService & { sendMSFTTelemetryEvent: ReturnType<typeof vi.fn> };
64

65
	function createEndpoint(model: string, provider: string, overrides?: Partial<IChatEndpoint>): IChatEndpoint {
66
		return {
67
			model,
68
			modelProvider: provider,
69
			displayName: model,
70
			maxOutputTokens: 4096,
71
			supportsToolCalls: true,
72
			supportsVision: false,
73
			supportsPrediction: false,
74
			showInModelPicker: true,
75
			isDefault: false,
76
			isFallback: false,
77
			policy: 'enabled',
78
			...overrides,
79
		} as unknown as IChatEndpoint;
80
	}
81

82
	function createService(): AutomodeService {
83
		return new AutomodeService(
84
			mockCAPIClientService,
85
			mockAuthService,
86
			mockLogService,
87
			mockInstantiationService,
88
			mockExpService,
89
			configurationService,
90
			envService,
91
			mockTelemetryService,
92
			new NullRequestLogger()
93
		);
94
	}
95

96
	function mockApiResponse(available_models: string[], session_token = 'test-token', expiresInSeconds = 3600): void {
97
		(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockResolvedValue(
98
			makeMockTokenResponse({
99
				available_models,
100
				expires_at: Math.floor(Date.now() / 1000) + expiresInSeconds,
101
				session_token,
102
			})
103
		);
104
	}
105

106
	function enableRouter(): void {
107
		(configurationService as InMemoryConfigurationService).setConfig(
108
			ConfigKey.TeamInternal.UseAutoModeRouting,
109
			true
110
		);
111
	}
112

113
	beforeEach(() => {
114
		mockChatEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI');
115

116
		mockCAPIClientService = {
117
			makeRequest: vi.fn().mockResolvedValue(
118
				makeMockTokenResponse({
119
					available_models: ['gpt-4o', 'gpt-4o-mini'],
120
					expires_at: Math.floor(Date.now() / 1000) + 3600,
121
					session_token: 'test-token'
122
				})
123
			)
124
		} as unknown as ICAPIClientService;
125

126
		mockAuthService = {
127
			getCopilotToken: vi.fn().mockResolvedValue({ token: 'test-auth-token' }),
128
			onDidAuthenticationChange: vi.fn().mockReturnValue({ dispose: vi.fn() })
129
		} as unknown as IAuthenticationService;
130

131
		mockLogService = {
132
			trace: vi.fn(),
133
			debug: vi.fn(),
134
			info: vi.fn(),
135
			warn: vi.fn(),
136
			error: vi.fn()
137
		} as unknown as ILogService;
138

139
		mockInstantiationService = {
140
			createInstance: vi.fn().mockImplementation(
141
				(_ctor: any, wrappedEndpoint: IChatEndpoint) => wrappedEndpoint
142
			)
143
		} as unknown as IInstantiationService;
144

145
		mockExpService = new NullExperimentationService();
146

147
		configurationService = new InMemoryConfigurationService(new DefaultsOnlyConfigurationService());
148
		envService = new NullEnvService();
149
		mockTelemetryService = {
150
			sendTelemetryEvent: vi.fn(),
151
			sendMSFTTelemetryEvent: vi.fn(),
152
			sendTelemetryErrorEvent: vi.fn(),
153
			sendMSFTTelemetryErrorEvent: vi.fn(),
154
			sendSharedTelemetryEvent: vi.fn(),
155
			sendEnhancedGHTelemetryEvent: vi.fn(),
156
		} as unknown as ITelemetryService & { sendMSFTTelemetryEvent: ReturnType<typeof vi.fn> };
157
	});
158

159
	afterEach(() => {
160
		vi.useRealTimers();
161
	});
162

163
	describe('resolveAutoModeEndpoint', () => {
164
		it('should not use router for inline chat', async () => {
165
			enableRouter();
166

167
			automodeService = createService();
168

169
			const chatRequest: Partial<ChatRequest> = {
170
				location: ChatLocation.Editor,
171
				prompt: 'test prompt',
172
			};
173

174
			await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint]);
175

176
			// Verify that router API was NOT called for inline chat
177
			expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(
178
				expect.anything(),
179
				expect.objectContaining({ type: RequestType.ModelRouter })
180
			);
181
		});
182

183
		it('should use router for panel chat when enabled', async () => {
184
			enableRouter();
185

186
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
187

188
			// Mock makeRequest to handle both auto mode token and router API calls
189
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
190
				if (opts?.type === RequestType.ModelRouter) {
191
					return Promise.resolve({
192
						ok: true,
193
						status: 200,
194
						headers: createMockHeaders(),
195
						text: vi.fn().mockResolvedValue(JSON.stringify({
196
							predicted_label: 'needs_reasoning',
197
							confidence: 0.85,
198
							latency_ms: 50,
199
							chosen_model: 'gpt-4o',
200
							candidate_models: ['gpt-4o', 'gpt-4o-mini'],
201
							scores: { needs_reasoning: 0.85, no_reasoning: 0.15 },
202
							sticky_override: false
203
						}))
204
					});
205
				}
206
				return Promise.resolve(
207
					makeMockTokenResponse({
208
						available_models: ['gpt-4o', 'gpt-4o-mini'],
209
						expires_at: Math.floor(Date.now() / 1000) + 3600,
210
						session_token: 'test-token'
211
					})
212
				);
213
			});
214

215
			automodeService = createService();
216

217
			const chatRequest: Partial<ChatRequest> = {
218
				location: ChatLocation.Panel,
219
				prompt: 'test prompt',
220
				sessionId: 'session-router-panel'
221
			};
222

223
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint, gpt4oEndpoint]);
224

225
			// Verify that router API was called for panel chat
226
			expect(mockCAPIClientService.makeRequest).toHaveBeenCalledWith(
227
				expect.objectContaining({ method: 'POST' }),
228
				expect.objectContaining({ type: RequestType.ModelRouter })
229
			);
230
			// Router should have selected gpt-4o
231
			expect(result.model).toBe('gpt-4o');
232
		});
233

234
		it('should include context signals in router request body', async () => {
235
			enableRouter();
236

237
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
238

239
			let capturedBody: string | undefined;
240
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((req: any, opts: any) => {
241
				if (opts?.type === RequestType.ModelRouter) {
242
					capturedBody = req.body;
243
					return Promise.resolve({
244
						ok: true,
245
						status: 200,
246
						headers: createMockHeaders(),
247
						text: vi.fn().mockResolvedValue(JSON.stringify({
248
							predicted_label: 'needs_reasoning',
249
							confidence: 0.85,
250
							latency_ms: 50,
251
							chosen_model: 'gpt-4o',
252
							candidate_models: ['gpt-4o', 'gpt-4o-mini'],
253
							scores: { needs_reasoning: 0.85, no_reasoning: 0.15 },
254
							sticky_override: false
255
						}))
256
					});
257
				}
258
				return Promise.resolve(
259
					makeMockTokenResponse({
260
						available_models: ['gpt-4o', 'gpt-4o-mini'],
261
						expires_at: Math.floor(Date.now() / 1000) + 3600,
262
						session_token: 'test-token'
263
					})
264
				);
265
			});
266

267
			automodeService = createService();
268

269
			const chatRequest: Partial<ChatRequest> = {
270
				location: ChatLocation.Panel,
271
				prompt: 'test prompt',
272
				references: [{ id: 'ref1', value: 'some ref' } as any],
273
				sessionId: 'test-session-123',
274
			};
275

276
			await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint, gpt4oEndpoint]);
277

278
			expect(capturedBody).toBeDefined();
279
			const parsed = JSON.parse(capturedBody!);
280
			expect(parsed.prompt).toBe('test prompt');
281
			expect(parsed.prompt_char_count).toBe('test prompt'.length);
282
			expect(parsed.reference_count).toBe(1);
283
			expect(parsed.turn_number).toBe(1);
284
			expect(parsed.session_id).toBe('test-session-123');
285
			expect(parsed.previous_model).toBeUndefined();
286
		});
287

288
		it('should not use router when routing is not enabled', async () => {
289
			// Routing not enabled via UseAutoModeRouting config
290
			automodeService = createService();
291

292
			const chatRequest: Partial<ChatRequest> = {
293
				location: ChatLocation.Panel,
294
				prompt: 'test prompt'
295
			};
296

297
			await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint]);
298

299
			// Verify that router API was NOT called (exp / config disabled)
300
			expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(
301
				expect.anything(),
302
				expect.objectContaining({ type: RequestType.ModelRouter })
303
			);
304
		});
305

306
		it('should not use router for terminal chat', async () => {
307
			enableRouter();
308

309
			automodeService = createService();
310

311
			const chatRequest: Partial<ChatRequest> = {
312
				location: ChatLocation.Terminal,
313
				prompt: 'test prompt'
314
			};
315

316
			await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint]);
317

318
			// Verify that router API was NOT called for terminal chat
319
			expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(
320
				expect.anything(),
321
				expect.objectContaining({ type: RequestType.ModelRouter })
322
			);
323
		});
324
	});
325

326
	describe('model selection', () => {
327
		it('should pick the first available model with a known endpoint on first mint', async () => {
328
			const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');
329
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
330
			mockApiResponse(['claude-sonnet', 'gpt-4o']);
331

332
			automodeService = createService();
333
			const chatRequest: Partial<ChatRequest> = {
334
				location: ChatLocation.Panel,
335
				prompt: 'test',
336
				sessionId: 'session-first-mint'
337
			};
338

339
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);
340
			// claude-sonnet is first in available_models and has a known endpoint
341
			expect(result.model).toBe('claude-sonnet');
342
		});
343

344
		it('should skip models without known endpoints and pick the first match', async () => {
345
			const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');
346
			// available_models has 'unknown-model' first, but no known endpoint for it
347
			mockApiResponse(['unknown-model', 'gpt-4o']);
348

349
			automodeService = createService();
350
			const chatRequest: Partial<ChatRequest> = {
351
				location: ChatLocation.Panel,
352
				prompt: 'test',
353
				sessionId: 'session-skip-unknown'
354
			};
355

356
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint]);
357
			expect(result.model).toBe('gpt-4o');
358
		});
359

360
		it('should prefer same provider model on token refresh', async () => {
361
			vi.useFakeTimers();
362
			const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');
363
			const openaiMiniEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI');
364
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
365

366
			// First mint: gpt-4o is first available, token expires in 1s to trigger immediate refresh
367
			mockApiResponse(['gpt-4o', 'claude-sonnet'], 'token-1', 1);
368

369
			automodeService = createService();
370
			const chatRequest: Partial<ChatRequest> = {
371
				location: ChatLocation.Panel,
372
				prompt: 'test',
373
				sessionId: 'session-affinity'
374
			};
375

376
			const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, openaiMiniEndpoint, claudeEndpoint]);
377
			expect(firstResult.model).toBe('gpt-4o');
378

379
			// Set up new token response, then advance timers to trigger refresh
380
			mockApiResponse(['claude-sonnet', 'gpt-4o-mini'], 'token-2');
381
			await vi.advanceTimersByTimeAsync(1);
382

383
			const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, openaiMiniEndpoint, claudeEndpoint]);
384
			// Should pick gpt-4o-mini because it's the first model from the same provider (OpenAI)
385
			expect(secondResult.model).toBe('gpt-4o-mini');
386
			vi.useRealTimers();
387
		});
388

389
		it('should fall back to first available model when no same-provider model exists on refresh', async () => {
390
			vi.useFakeTimers();
391
			const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');
392
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
393

394
			// First mint: gpt-4o is first available, token expires in 1s to trigger immediate refresh
395
			mockApiResponse(['gpt-4o', 'claude-sonnet'], 'token-1', 1);
396

397
			automodeService = createService();
398
			const chatRequest: Partial<ChatRequest> = {
399
				location: ChatLocation.Panel,
400
				prompt: 'test',
401
				sessionId: 'session-fallback'
402
			};
403

404
			const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);
405
			expect(firstResult.model).toBe('gpt-4o');
406

407
			// Set up new token response with only Anthropic models, then advance timers
408
			mockApiResponse(['claude-sonnet'], 'token-2');
409
			await vi.advanceTimersByTimeAsync(1);
410

411
			const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);
412
			// No OpenAI models available, should fall back to first available (claude-sonnet)
413
			expect(secondResult.model).toBe('claude-sonnet');
414
		});
415

416
		it('should return cached endpoint when session token has not changed', async () => {
417
			const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');
418
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
419

420
			mockApiResponse(['gpt-4o', 'claude-sonnet'], 'token-same');
421

422
			automodeService = createService();
423
			const chatRequest: Partial<ChatRequest> = {
424
				location: ChatLocation.Panel,
425
				prompt: 'test',
426
				sessionId: 'session-cached'
427
			};
428

429
			const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);
430
			const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);
431
			// Same object reference since token didn't change
432
			expect(secondResult).toBe(firstResult);
433
		});
434

435
		it('should throw when no available models match any known endpoint', async () => {
436
			mockApiResponse(['unknown-model-1', 'unknown-model-2']);
437

438
			automodeService = createService();
439
			const chatRequest: Partial<ChatRequest> = {
440
				location: ChatLocation.Panel,
441
				prompt: 'test',
442
				sessionId: 'session-no-match'
443
			};
444

445
			await expect(
446
				automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint])
447
			).rejects.toThrow('no available model found');
448
		});
449
	});
450

451
	describe('router fallback', () => {
452
		function mockRouterResponse(available_models: string[], routerResult: { chosen_model: string; candidate_models: string[] }, session_token = 'test-token'): void {
453
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
454
				if (opts?.type === RequestType.ModelRouter) {
455
					return Promise.resolve({
456
						ok: true,
457
						status: 200,
458
						headers: createMockHeaders(),
459
						text: vi.fn().mockResolvedValue(JSON.stringify({
460
							predicted_label: 'needs_reasoning',
461
							confidence: 0.9,
462
							latency_ms: 30,
463
							chosen_model: routerResult.chosen_model,
464
							candidate_models: routerResult.candidate_models,
465
							scores: { needs_reasoning: 0.9, no_reasoning: 0.1 },
466
							sticky_override: false
467
						}))
468
					});
469
				}
470
				return Promise.resolve(
471
					makeMockTokenResponse({
472
						available_models,
473
						expires_at: Math.floor(Date.now() / 1000) + 3600,
474
						session_token,
475
					})
476
				);
477
			});
478
		}
479

480
		it('should fall back to default selection when router fetch throws', async () => {
481
			enableRouter();
482
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
483
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
484

485
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
486
				if (opts?.type === RequestType.ModelRouter) {
487
					return Promise.reject(new Error('Network error'));
488
				}
489
				return Promise.resolve(
490
					makeMockTokenResponse({
491
						available_models: ['claude-sonnet', 'gpt-4o'],
492
						expires_at: Math.floor(Date.now() / 1000) + 3600,
493
						session_token: 'test-token',
494
					})
495
				);
496
			});
497

498
			automodeService = createService();
499
			const chatRequest: Partial<ChatRequest> = {
500
				location: ChatLocation.Panel,
501
				prompt: 'test prompt',
502
				sessionId: 'session-router-error'
503
			};
504

505
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [claudeEndpoint, gpt4oEndpoint]);
506
			// Should fall back to first available model (claude-sonnet)
507
			expect(result.model).toBe('claude-sonnet');
508
			expect(mockLogService.error).toHaveBeenCalledWith(
509
				expect.stringContaining('Failed to get routed model'),
510
				expect.any(String)
511
			);
512
		});
513

514
		it('should fall back to default selection with routerTimeout reason when router times out', async () => {
515
			vi.useFakeTimers();
516
			enableRouter();
517
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
518
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
519

520
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((req: any, opts: any) => {
521
				if (opts?.type === RequestType.ModelRouter) {
522
					// Return a pending promise that rejects when the signal is aborted,
523
					// simulating a real in-flight request cancelled by the 1s timeout.
524
					return new Promise((_resolve, reject) => {
525
						const signal: AbortSignal = req.signal;
526
						if (signal?.aborted) {
527
							const err = new Error('The operation was aborted');
528
							err.name = 'AbortError';
529
							reject(err);
530
							return;
531
						}
532
						signal?.addEventListener('abort', () => {
533
							const err = new Error('The operation was aborted');
534
							err.name = 'AbortError';
535
							reject(err);
536
						});
537
					});
538
				}
539
				return Promise.resolve(
540
					makeMockTokenResponse({
541
						available_models: ['claude-sonnet', 'gpt-4o'],
542
						expires_at: Math.floor(Date.now() / 1000) + 3600,
543
						session_token: 'test-token',
544
					})
545
				);
546
			});
547

548
			automodeService = createService();
549
			const chatRequest: Partial<ChatRequest> = {
550
				location: ChatLocation.Panel,
551
				prompt: 'test prompt',
552
				sessionId: 'session-router-timeout'
553
			};
554

555
			const resultPromise = automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [claudeEndpoint, gpt4oEndpoint]);
556
			// Advance past the 1-second router timeout to trigger the abort
557
			await vi.advanceTimersByTimeAsync(1000);
558

559
			const result = await resultPromise;
560
			// Should fall back to first available model (claude-sonnet)
561
			expect(result.model).toBe('claude-sonnet');
562
			expect(mockLogService.error).toHaveBeenCalledWith(
563
				expect.stringContaining('routerTimeout'),
564
				expect.any(String)
565
			);
566
		});
567

568
		it('should fall back to default selection when router returns unknown model', async () => {
569
			enableRouter();
570
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
571

572
			mockRouterResponse(
573
				['gpt-4o'],
574
				{ chosen_model: 'unknown-model', candidate_models: ['unknown-model'] }
575
			);
576

577
			automodeService = createService();
578
			const chatRequest: Partial<ChatRequest> = {
579
				location: ChatLocation.Panel,
580
				prompt: 'test prompt',
581
				sessionId: 'session-unknown-router-model'
582
			};
583

584
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);
585
			// Router returned unknown model, should fall back to first available
586
			expect(result.model).toBe('gpt-4o');
587
		});
588

589
		it('should skip router on subsequent turns and return cached model', async () => {
590
			enableRouter();
591
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
592
			const gpt4oMiniEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI');
593
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
594

595
			// First turn: router picks gpt-4o
596
			mockRouterResponse(
597
				['gpt-4o', 'gpt-4o-mini', 'claude-sonnet'],
598
				{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'gpt-4o-mini', 'claude-sonnet'] }
599
			);
600

601
			automodeService = createService();
602
			const chatRequest1: Partial<ChatRequest> = {
603
				location: ChatLocation.Panel,
604
				prompt: 'first question',
605
				sessionId: 'session-same-provider'
606
			};
607

608
			const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest1 as ChatRequest, [gpt4oEndpoint, gpt4oMiniEndpoint, claudeEndpoint]);
609
			expect(firstResult.model).toBe('gpt-4o');
610

611
			// Second turn: router would return claude, but should be skipped (cached gpt-4o returned)
612
			mockRouterResponse(
613
				['gpt-4o', 'gpt-4o-mini', 'claude-sonnet'],
614
				{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet', 'gpt-4o-mini'] }
615
			);
616

617
			const chatRequest2: Partial<ChatRequest> = {
618
				location: ChatLocation.Panel,
619
				prompt: 'second question',
620
				sessionId: 'session-same-provider'
621
			};
622

623
			const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest2 as ChatRequest, [gpt4oEndpoint, gpt4oMiniEndpoint, claudeEndpoint]);
624
			// Router is skipped after first turn — cached model returned
625
			expect(secondResult.model).toBe('gpt-4o');
626
		});
627

628
		it('should re-route on subsequent turns after invalidateRouterCache', async () => {
629
			enableRouter();
630
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
631
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
632

633
			// First turn: router picks gpt-4o
634
			mockRouterResponse(
635
				['gpt-4o', 'claude-sonnet'],
636
				{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'claude-sonnet'] }
637
			);
638

639
			automodeService = createService();
640
			const chatRequest1: Partial<ChatRequest> = {
641
				location: ChatLocation.Panel,
642
				prompt: 'first question',
643
				sessionId: 'session-no-same-provider'
644
			};
645

646
			const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest1 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
647
			expect(firstResult.model).toBe('gpt-4o');
648

649
			// Invalidate the cache (simulates compaction)
650
			automodeService.invalidateRouterCache({ sessionId: 'session-no-same-provider' } as ChatRequest);
651

652
			// Second turn: router is re-run after invalidation, picks claude-sonnet
653
			mockRouterResponse(
654
				['gpt-4o', 'claude-sonnet'],
655
				{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet'] }
656
			);
657

658
			const chatRequest2: Partial<ChatRequest> = {
659
				location: ChatLocation.Panel,
660
				prompt: 'second question',
661
				sessionId: 'session-no-same-provider'
662
			};
663

664
			const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest2 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
665
			expect(secondResult.model).toBe('claude-sonnet');
666
		});
667

668
		it('should not re-route when prompt has not changed (tool-calling iteration)', async () => {
669
			enableRouter();
670
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
671
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
672

673
			mockRouterResponse(
674
				['gpt-4o', 'claude-sonnet'],
675
				{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'claude-sonnet'] }
676
			);
677

678
			automodeService = createService();
679
			const chatRequest: Partial<ChatRequest> = {
680
				location: ChatLocation.Panel,
681
				prompt: 'same prompt',
682
				sessionId: 'session-same-prompt'
683
			};
684

685
			await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
686

687
			// Reset to track further calls
688
			const routerCallCount = (mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mock.calls
689
				.filter((call: any[]) => call[1]?.type === RequestType.ModelRouter).length;
690
			expect(routerCallCount).toBe(1);
691

692
			// Second call with same prompt — should NOT call router again
693
			await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
694

695
			const routerCallCount2 = (mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mock.calls
696
				.filter((call: any[]) => call[1]?.type === RequestType.ModelRouter).length;
697
			expect(routerCallCount2).toBe(1);
698
		});
699

700
		it('should skip router on subsequent turns after image request routed on first turn', async () => {
701
			enableRouter();
702
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
703
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
704

705
			mockRouterResponse(
706
				['gpt-4o', 'claude-sonnet'],
707
				{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }
708
			);
709

710
			automodeService = createService();
711

712
			// Turn 1: image request — router IS called now
713
			const imageRequest: Partial<ChatRequest> = {
714
				location: ChatLocation.Panel,
715
				prompt: 'describe this image',
716
				sessionId: 'session-transient-fallback',
717
				references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
718
			};
719

720
			await automodeService.resolveAutoModeEndpoint(imageRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
721

722
			expect(mockCAPIClientService.makeRequest).toHaveBeenCalledWith(
723
				expect.anything(),
724
				expect.objectContaining({ type: RequestType.ModelRouter })
725
			);
726
			// Reset mock call tracking
727
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockClear();
728
			mockRouterResponse(
729
				['gpt-4o', 'claude-sonnet'],
730
				{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }
731
			);
732

733
			// Turn 2: new prompt — router should NOT be called (skipRouter after first turn)
734
			const textRequest: Partial<ChatRequest> = {
735
				location: ChatLocation.Panel,
736
				prompt: 'write a function',
737
				sessionId: 'session-transient-fallback',
738
			};
739

740
			await automodeService.resolveAutoModeEndpoint(textRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
741

742
			// Router should not have been called on turn 2
743
			expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(
744
				expect.anything(),
745
				expect.objectContaining({ type: RequestType.ModelRouter })
746
			);
747
		});
748

749
		it('should send has_image to router for image requests', async () => {
750
			enableRouter();
751
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
752
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
753

754
			mockRouterResponse(
755
				['gpt-4o', 'claude-sonnet'],
756
				{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }
757
			);
758

759
			automodeService = createService();
760
			const chatRequest: Partial<ChatRequest> = {
761
				location: ChatLocation.Panel,
762
				prompt: 'describe this image',
763
				sessionId: 'session-vision-router',
764
				references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
765
			};
766

767
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
768
			expect(result.model).toBe('gpt-4o');
769
			// Verify router WAS called (not skipped)
770
			const routerCall = (mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mock.calls.find(([, opts]) => opts?.type === RequestType.ModelRouter);
771
			expect(routerCall).toBeDefined();
772
			const [routerRequestBody] = routerCall!;
773
			expect(JSON.parse(routerRequestBody.body).has_image).toBe(true);
774
		});
775

776
		it('should fall back to vision model when router returns no_vision_models error', async () => {
777
			enableRouter();
778
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
779
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
780

781
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
782
				if (opts?.type === RequestType.ModelRouter) {
783
					return Promise.resolve({
784
						ok: false,
785
						status: 400,
786
						statusText: 'Bad Request',
787
						headers: createMockHeaders(),
788
						text: vi.fn().mockResolvedValue(JSON.stringify({ error: 'no_vision_models' }))
789
					});
790
				}
791
				return Promise.resolve(
792
					makeMockTokenResponse({
793
						available_models: ['gpt-4o', 'claude-sonnet'],
794
						expires_at: Math.floor(Date.now() / 1000) + 3600,
795
						session_token: 'test-token',
796
					})
797
				);
798
			});
799

800
			automodeService = createService();
801
			const chatRequest: Partial<ChatRequest> = {
802
				location: ChatLocation.Panel,
803
				prompt: 'describe this image',
804
				sessionId: 'session-no-vision',
805
				references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
806
			};
807

808
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
809
			// Should fall back to default selection, then vision fallback picks gpt-4o
810
			expect(result.model).toBe('gpt-4o');
811
			// Verify the router was called and the error code was passed through from the server
812
			expect(mockCAPIClientService.makeRequest).toHaveBeenCalledWith(
813
				expect.anything(),
814
				expect.objectContaining({ type: RequestType.ModelRouter })
815
			);
816
			expect(mockLogService.error).toHaveBeenCalledWith(
817
				expect.stringContaining('(no_vision_models)'),
818
				expect.anything()
819
			);
820
		});
821

822
		it('should fall back to routerError when router returns non-JSON error body', async () => {
823
			// When the router returns an HTML error page or other non-JSON body,
824
			// errorCode should be undefined and fallbackReason should be 'routerError'
825
			// — NOT the raw response body leaked into telemetry.
826
			enableRouter();
827
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
828

829
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
830
				if (opts?.type === RequestType.ModelRouter) {
831
					return Promise.resolve({
832
						ok: false,
833
						status: 502,
834
						statusText: 'Bad Gateway',
835
						headers: createMockHeaders(),
836
						text: vi.fn().mockResolvedValue('<html><body>Bad Gateway</body></html>')
837
					});
838
				}
839
				return Promise.resolve(
840
					makeMockTokenResponse({
841
						available_models: ['gpt-4o'],
842
						expires_at: Math.floor(Date.now() / 1000) + 3600,
843
						session_token: 'test-token',
844
					})
845
				);
846
			});
847

848
			automodeService = createService();
849
			const chatRequest: Partial<ChatRequest> = {
850
				location: ChatLocation.Panel,
851
				prompt: 'test prompt',
852
				sessionId: 'session-html-error',
853
			};
854

855
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);
856
			expect(result.model).toBe('gpt-4o');
857
			// Should log generic 'routerError', NOT the HTML body
858
			expect(mockLogService.error).toHaveBeenCalledWith(
859
				expect.stringContaining('(routerError)'),
860
				expect.anything()
861
			);
862
		});
863

864
		it('should fall back to routerError when router returns JSON without error field', async () => {
865
			// When the server returns valid JSON but without an 'error' field,
866
			// errorCode should be undefined and fallbackReason should be 'routerError'.
867
			enableRouter();
868
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
869

870
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
871
				if (opts?.type === RequestType.ModelRouter) {
872
					return Promise.resolve({
873
						ok: false,
874
						status: 400,
875
						statusText: 'Bad Request',
876
						headers: createMockHeaders(),
877
						text: vi.fn().mockResolvedValue(JSON.stringify({ message: 'something went wrong' }))
878
					});
879
				}
880
				return Promise.resolve(
881
					makeMockTokenResponse({
882
						available_models: ['gpt-4o'],
883
						expires_at: Math.floor(Date.now() / 1000) + 3600,
884
						session_token: 'test-token',
885
					})
886
				);
887
			});
888

889
			automodeService = createService();
890
			const chatRequest: Partial<ChatRequest> = {
891
				location: ChatLocation.Panel,
892
				prompt: 'test prompt',
893
				sessionId: 'session-json-no-error',
894
			};
895

896
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);
897
			expect(result.model).toBe('gpt-4o');
898
			expect(mockLogService.error).toHaveBeenCalledWith(
899
				expect.stringContaining('(routerError)'),
900
				expect.anything()
901
			);
902
		});
903

904
		it('should be a no-op when invalidateRouterCache is called with unknown conversationId', async () => {
905
			automodeService = createService();
906
			// Should not throw
907
			automodeService.invalidateRouterCache({ sessionId: 'nonexistent-session' } as ChatRequest);
908
		});
909

910
		it('should re-run router after invalidateRouterCache is called', async () => {
911
			enableRouter();
912
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
913
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
914

915
			mockRouterResponse(
916
				['gpt-4o', 'claude-sonnet'],
917
				{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }
918
			);
919

920
			automodeService = createService();
921
			const chatRequest: Partial<ChatRequest> = {
922
				location: ChatLocation.Panel,
923
				prompt: 'first question',
924
				sessionId: 'session-invalidate'
925
			};
926

927
			const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
928
			expect(firstResult.model).toBe('gpt-4o');
929

930
			// Without invalidation, changing prompt should still return cached model
931
			const chatRequest2: Partial<ChatRequest> = {
932
				location: ChatLocation.Panel,
933
				prompt: 'second question',
934
				sessionId: 'session-invalidate'
935
			};
936
			const cachedResult = await automodeService.resolveAutoModeEndpoint(chatRequest2 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
937
			expect(cachedResult.model).toBe('gpt-4o');
938

939
			// Invalidate the cache
940
			automodeService.invalidateRouterCache({ sessionId: 'session-invalidate' } as ChatRequest);
941

942
			// Now the router should re-run and pick claude
943
			mockRouterResponse(
944
				['gpt-4o', 'claude-sonnet'],
945
				{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet'] }
946
			);
947

948
			const chatRequest3: Partial<ChatRequest> = {
949
				location: ChatLocation.Panel,
950
				prompt: 'third question',
951
				sessionId: 'session-invalidate'
952
			};
953
			const reEvalResult = await automodeService.resolveAutoModeEndpoint(chatRequest3 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
954
			expect(reEvalResult.model).toBe('claude-sonnet');
955
		});
956
	});
957

958
	describe('vision fallback', () => {
959
		it('should fall back to vision-capable model when selected model does not support vision', async () => {
960
			const nonVisionEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI', { supportsVision: false });
961
			const visionEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
962
			mockApiResponse(['gpt-4o-mini', 'gpt-4o']);
963

964
			automodeService = createService();
965
			const chatRequest: Partial<ChatRequest> = {
966
				location: ChatLocation.Panel,
967
				prompt: 'describe this image',
968
				sessionId: 'session-vision-fallback',
969
				references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
970
			};
971

972
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [nonVisionEndpoint, visionEndpoint]);
973
			expect(result.model).toBe('gpt-4o');
974
		});
975

976
		it('should keep vision-capable model when it is already selected', async () => {
977
			const visionEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
978
			const nonVisionEndpoint = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });
979
			mockApiResponse(['gpt-4o', 'claude-sonnet']);
980

981
			automodeService = createService();
982
			const chatRequest: Partial<ChatRequest> = {
983
				location: ChatLocation.Panel,
984
				prompt: 'describe this image',
985
				sessionId: 'session-vision-already-ok',
986
				references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
987
			};
988

989
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [visionEndpoint, nonVisionEndpoint]);
990
			expect(result.model).toBe('gpt-4o');
991
		});
992

993
		it('should keep non-vision model when request has no image', async () => {
994
			const nonVisionEndpoint = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });
995
			const visionEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
996
			mockApiResponse(['claude-sonnet', 'gpt-4o']);
997

998
			automodeService = createService();
999
			const chatRequest: Partial<ChatRequest> = {
1000
				location: ChatLocation.Panel,
1001
				prompt: 'write a function',
1002
				sessionId: 'session-no-image'
1003
			};
1004

1005
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [nonVisionEndpoint, visionEndpoint]);
1006
			expect(result.model).toBe('claude-sonnet');
1007
		});
1008

1009
		it('should warn and keep selected model when no vision-capable model is available', async () => {
1010
			const nonVisionEndpoint1 = createEndpoint('gpt-4o-mini', 'OpenAI', { supportsVision: false });
1011
			const nonVisionEndpoint2 = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });
1012
			mockApiResponse(['gpt-4o-mini', 'claude-sonnet']);
1013

1014
			automodeService = createService();
1015
			const chatRequest: Partial<ChatRequest> = {
1016
				location: ChatLocation.Panel,
1017
				prompt: 'describe this image',
1018
				sessionId: 'session-no-vision-available',
1019
				references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
1020
			};
1021

1022
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [nonVisionEndpoint1, nonVisionEndpoint2]);
1023
			// No vision model available, should keep the first available model and warn
1024
			expect(result.model).toBe('gpt-4o-mini');
1025
			expect(mockLogService.warn).toHaveBeenCalledWith(
1026
				expect.stringContaining('no vision-capable model')
1027
			);
1028
		});
1029
	});
1030

1031
	describe('routerModelSelection telemetry', () => {
1032
		function mockRouterResponse(available_models: string[], routerResult: { chosen_model: string; candidate_models: string[] }, session_token = 'test-token'): void {
1033
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
1034
				if (opts?.type === RequestType.ModelRouter) {
1035
					return Promise.resolve({
1036
						ok: true,
1037
						status: 200,
1038
						headers: createMockHeaders(),
1039
						text: vi.fn().mockResolvedValue(JSON.stringify({
1040
							predicted_label: 'needs_reasoning',
1041
							confidence: 0.9,
1042
							latency_ms: 30,
1043
							chosen_model: routerResult.chosen_model,
1044
							candidate_models: routerResult.candidate_models,
1045
							scores: { needs_reasoning: 0.9, no_reasoning: 0.1 },
1046
							sticky_override: false
1047
						}))
1048
					});
1049
				}
1050
				return Promise.resolve(
1051
					makeMockTokenResponse({
1052
						available_models,
1053
						expires_at: Math.floor(Date.now() / 1000) + 3600,
1054
						session_token,
1055
					})
1056
				);
1057
			});
1058
		}
1059

1060
		it('should emit routerModelSelection with candidateModel and actualModel when router is used', async () => {
1061
			enableRouter();
1062
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
1063
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');
1064

1065
			mockRouterResponse(
1066
				['gpt-4o', 'claude-sonnet'],
1067
				{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'claude-sonnet'] }
1068
			);
1069

1070
			automodeService = createService();
1071
			const chatRequest: Partial<ChatRequest> = {
1072
				location: ChatLocation.Panel,
1073
				prompt: 'test prompt',
1074
				sessionId: 'session-telemetry-test'
1075
			};
1076

1077
			await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
1078

1079
			const telemetryCalls = mockTelemetryService.sendMSFTTelemetryEvent.mock.calls;
1080
			const selectionEvent = telemetryCalls.find((call: unknown[]) => call[0] === 'automode.routerModelSelection');
1081
			expect(selectionEvent).toBeDefined();
1082
			expect(selectionEvent![1]).toMatchObject({
1083
				candidateModel: 'gpt-4o',
1084
				actualModel: 'gpt-4o',
1085
				overrideReason: 'none',
1086
			});
1087
		});
1088

1089
		it('should emit overrideReason=clientOverride when vision fallback changes the model', async () => {
1090
			enableRouter();
1091
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });
1092
			const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });
1093

1094
			// Router picks claude-sonnet (no vision), vision fallback should override to gpt-4o
1095
			mockRouterResponse(
1096
				['claude-sonnet', 'gpt-4o'],
1097
				{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet', 'gpt-4o'] }
1098
			);
1099

1100
			automodeService = createService();
1101
			const chatRequest: Partial<ChatRequest> = {
1102
				location: ChatLocation.Panel,
1103
				prompt: 'describe this image',
1104
				sessionId: 'session-telemetry-vision',
1105
				references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any
1106
			};
1107

1108
			await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);
1109

1110
			const telemetryCalls = mockTelemetryService.sendMSFTTelemetryEvent.mock.calls;
1111
			const selectionEvent = telemetryCalls.find((call: unknown[]) => call[0] === 'automode.routerModelSelection');
1112
			expect(selectionEvent).toBeDefined();
1113
			expect(selectionEvent![1]).toMatchObject({
1114
				candidateModel: 'claude-sonnet',
1115
				actualModel: 'gpt-4o',
1116
				overrideReason: 'clientOverride',
1117
			});
1118
		});
1119

1120
		it('should not emit routerModelSelection when router fails', async () => {
1121
			enableRouter();
1122
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
1123

1124
			mockRouterResponse(
1125
				['gpt-4o'],
1126
				{ chosen_model: 'unknown-model', candidate_models: ['unknown-model'] }
1127
			);
1128

1129
			automodeService = createService();
1130
			const chatRequest: Partial<ChatRequest> = {
1131
				location: ChatLocation.Panel,
1132
				prompt: 'test prompt',
1133
				sessionId: 'session-telemetry-no-emit'
1134
			};
1135

1136
			await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);
1137

1138
			const telemetryCalls = mockTelemetryService.sendMSFTTelemetryEvent.mock.calls;
1139
			const selectionEvent = telemetryCalls.find((call: unknown[]) => call[0] === 'automode.routerModelSelection');
1140
			// candidateModel is not set when router returns unknown model, so event should not emit
1141
			expect(selectionEvent).toBeUndefined();
1142
		});
1143
	});
1144

1145
	describe('available_models / knownEndpoints sync', () => {
1146
		function mockRouterResponse(available_models: string[], routerResult: { chosen_model: string; candidate_models: string[] }, session_token = 'test-token'): void {
1147
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
1148
				if (opts?.type === RequestType.ModelRouter) {
1149
					return Promise.resolve({
1150
						ok: true,
1151
						status: 200,
1152
						headers: createMockHeaders(),
1153
						text: vi.fn().mockResolvedValue(JSON.stringify({
1154
							predicted_label: 'no_reasoning',
1155
							confidence: 0.96,
1156
							latency_ms: 23,
1157
							chosen_model: routerResult.chosen_model,
1158
							candidate_models: routerResult.candidate_models,
1159
							scores: { needs_reasoning: 0.04, no_reasoning: 0.96 },
1160
							sticky_override: false
1161
						}))
1162
					});
1163
				}
1164
				return Promise.resolve(
1165
					makeMockTokenResponse({
1166
						available_models,
1167
						expires_at: Math.floor(Date.now() / 1000) + 3600,
1168
						session_token,
1169
					})
1170
				);
1171
			});
1172
		}
1173

1174
		it('should filter out available_models that have no matching knownEndpoint before sending to router', async () => {
1175
			enableRouter();
1176
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
1177
			let capturedBody: string | undefined;
1178
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((req: any, opts: any) => {
1179
				if (opts?.type === RequestType.ModelRouter) {
1180
					capturedBody = req.body;
1181
					return Promise.resolve({
1182
						ok: true,
1183
						status: 200,
1184
						headers: createMockHeaders(),
1185
						text: vi.fn().mockResolvedValue(JSON.stringify({
1186
							predicted_label: 'no_reasoning',
1187
							confidence: 0.96,
1188
							latency_ms: 23,
1189
							chosen_model: 'gpt-4o',
1190
							candidate_models: ['gpt-4o'],
1191
							scores: { needs_reasoning: 0.04, no_reasoning: 0.96 },
1192
							sticky_override: false
1193
						}))
1194
					});
1195
				}
1196
				return Promise.resolve(
1197
					makeMockTokenResponse({
1198
						available_models: ['claude-haiku-4.5', 'gpt-4o', 'claude-sonnet-4.6'],
1199
						expires_at: Math.floor(Date.now() / 1000) + 3600,
1200
						session_token: 'test-token',
1201
					})
1202
				);
1203
			});
1204

1205
			automodeService = createService();
1206
			const chatRequest: Partial<ChatRequest> = {
1207
				location: ChatLocation.Panel,
1208
				prompt: 'what day is today',
1209
				sessionId: 'session-filter-models'
1210
			};
1211

1212
			await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);
1213

1214
			expect(capturedBody).toBeDefined();
1215
			const parsed = JSON.parse(capturedBody!);
1216
			expect(parsed.available_models).toEqual(['gpt-4o']);
1217
			expect(parsed.available_models).not.toContain('claude-haiku-4.5');
1218
			expect(parsed.available_models).not.toContain('claude-sonnet-4.6');
1219
			expect(mockLogService.info).toHaveBeenCalledWith(
1220
				expect.stringContaining('Filtered 2 unresolvable model(s)')
1221
			);
1222
		});
1223

1224
		it('should iterate all candidate_models when first candidate has no endpoint', async () => {
1225
			enableRouter();
1226
			const gpt41Endpoint = createEndpoint('gpt-4.1', 'OpenAI');
1227

1228
			mockRouterResponse(
1229
				['gpt-4.1'],
1230
				{ chosen_model: 'gpt-4.1', candidate_models: ['unknown-new-model', 'gpt-4.1'] }
1231
			);
1232

1233
			automodeService = createService();
1234
			const chatRequest: Partial<ChatRequest> = {
1235
				location: ChatLocation.Panel,
1236
				prompt: 'what day is today',
1237
				sessionId: 'session-iterate-candidates'
1238
			};
1239

1240
			const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt41Endpoint]);
1241
			expect(result.model).toBe('gpt-4.1');
1242
		});
1243

1244
		it('should throw when all available_models are unknown to knownEndpoints', async () => {
1245
			enableRouter();
1246
			const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
1247

1248
			(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
1249
				if (opts?.type === RequestType.ModelRouter) {
1250
					throw new Error('Router should not be called when no models are routable');
1251
				}
1252
				return Promise.resolve(
1253
					makeMockTokenResponse({
1254
						available_models: ['unknown-model-a', 'unknown-model-b'],
1255
						expires_at: Math.floor(Date.now() / 1000) + 3600,
1256
						session_token: 'test-token',
1257
					})
1258
				);
1259
			});
1260

1261
			automodeService = createService();
1262
			const chatRequest: Partial<ChatRequest> = {
1263
				location: ChatLocation.Panel,
1264
				prompt: 'test prompt',
1265
				sessionId: 'session-all-unknown'
1266
			};
1267

1268
			await expect(
1269
				automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint])
1270
			).rejects.toThrow('no available model found');
1271
			expect(mockLogService.warn).toHaveBeenCalledWith(
1272
				expect.stringContaining('No available_models matched knownEndpoints')
1273
			);
1274
		});
1275
	});
1276
});
1277

1278
Product

Resources

Company