Path: blob/main/extensions/copilot/src/platform/endpoint/node/test/automodeService.spec.ts
13405 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { RequestType } from '@vscode/copilot-api';6import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';7import type { ChatRequest } from 'vscode';8import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';9import { ChatLocation } from '../../../../vscodeTypes';10import { IAuthenticationService } from '../../../authentication/common/authentication';11import { ConfigKey, IConfigurationService } from '../../../configuration/common/configurationService';12import { DefaultsOnlyConfigurationService } from '../../../configuration/common/defaultsOnlyConfigurationService';13import { InMemoryConfigurationService } from '../../../configuration/test/common/inMemoryConfigurationService';14import { NullEnvService } from '../../../env/common/nullEnvService';15import { ILogService } from '../../../log/common/logService';16import { IChatEndpoint } from '../../../networking/common/networking';17import { NullRequestLogger } from '../../../requestLogger/node/nullRequestLogger';18import { IExperimentationService, NullExperimentationService } from '../../../telemetry/common/nullExperimentationService';19import { ITelemetryService } from '../../../telemetry/common/telemetry';20import { ICAPIClientService } from '../../common/capiClient';21import { AutomodeService } from '../automodeService';2223function createMockHeaders(entries: Record<string, string> = {}): { get(name: string): string | null } {24const lower: Record<string, string> = {};25for (const [k, v] of Object.entries(entries)) {26lower[k.toLowerCase()] = v;27}28return { get: (name: string) => lower[name.toLowerCase()] ?? null };29}3031/**32* Creates a mock response with a real stream-backed body so that middleware33* cloning (tee) works correctly. Token responses go through the middleware34* pipeline where {@link cloneResponse} reads the body stream.35*/36function makeMockTokenResponse(body: { available_models: string[]; expires_at: number; session_token: string }) {37const serialized = JSON.stringify(body);38return {39status: 200,40headers: createMockHeaders(),41body: new ReadableStream<Uint8Array>({42start(controller) {43controller.enqueue(new TextEncoder().encode(serialized));44controller.close();45},46}),47async text() { return serialized; },48async json() { return JSON.parse(serialized); },49};50}5152describe('AutomodeService', () => {53let automodeService: AutomodeService;54let mockCAPIClientService: ICAPIClientService;55let mockAuthService: IAuthenticationService;56let mockLogService: ILogService;57let mockInstantiationService: IInstantiationService;58let mockExpService: IExperimentationService;59let configurationService: IConfigurationService;60let mockChatEndpoint: IChatEndpoint;61let envService: NullEnvService;62let mockTelemetryService: ITelemetryService & { sendMSFTTelemetryEvent: ReturnType<typeof vi.fn> };6364function createEndpoint(model: string, provider: string, overrides?: Partial<IChatEndpoint>): IChatEndpoint {65return {66model,67modelProvider: provider,68displayName: model,69maxOutputTokens: 4096,70supportsToolCalls: true,71supportsVision: false,72supportsPrediction: false,73showInModelPicker: true,74isDefault: false,75isFallback: false,76policy: 'enabled',77...overrides,78} as unknown as IChatEndpoint;79}8081function createService(): AutomodeService {82return new AutomodeService(83mockCAPIClientService,84mockAuthService,85mockLogService,86mockInstantiationService,87mockExpService,88configurationService,89envService,90mockTelemetryService,91new NullRequestLogger()92);93}9495function mockApiResponse(available_models: string[], session_token = 'test-token', expiresInSeconds = 3600): void {96(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockResolvedValue(97makeMockTokenResponse({98available_models,99expires_at: Math.floor(Date.now() / 1000) + expiresInSeconds,100session_token,101})102);103}104105function enableRouter(): void {106(configurationService as InMemoryConfigurationService).setConfig(107ConfigKey.TeamInternal.UseAutoModeRouting,108true109);110}111112beforeEach(() => {113mockChatEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI');114115mockCAPIClientService = {116makeRequest: vi.fn().mockResolvedValue(117makeMockTokenResponse({118available_models: ['gpt-4o', 'gpt-4o-mini'],119expires_at: Math.floor(Date.now() / 1000) + 3600,120session_token: 'test-token'121})122)123} as unknown as ICAPIClientService;124125mockAuthService = {126getCopilotToken: vi.fn().mockResolvedValue({ token: 'test-auth-token' }),127onDidAuthenticationChange: vi.fn().mockReturnValue({ dispose: vi.fn() })128} as unknown as IAuthenticationService;129130mockLogService = {131trace: vi.fn(),132debug: vi.fn(),133info: vi.fn(),134warn: vi.fn(),135error: vi.fn()136} as unknown as ILogService;137138mockInstantiationService = {139createInstance: vi.fn().mockImplementation(140(_ctor: any, wrappedEndpoint: IChatEndpoint) => wrappedEndpoint141)142} as unknown as IInstantiationService;143144mockExpService = new NullExperimentationService();145146configurationService = new InMemoryConfigurationService(new DefaultsOnlyConfigurationService());147envService = new NullEnvService();148mockTelemetryService = {149sendTelemetryEvent: vi.fn(),150sendMSFTTelemetryEvent: vi.fn(),151sendTelemetryErrorEvent: vi.fn(),152sendMSFTTelemetryErrorEvent: vi.fn(),153sendSharedTelemetryEvent: vi.fn(),154sendEnhancedGHTelemetryEvent: vi.fn(),155} as unknown as ITelemetryService & { sendMSFTTelemetryEvent: ReturnType<typeof vi.fn> };156});157158afterEach(() => {159vi.useRealTimers();160});161162describe('resolveAutoModeEndpoint', () => {163it('should not use router for inline chat', async () => {164enableRouter();165166automodeService = createService();167168const chatRequest: Partial<ChatRequest> = {169location: ChatLocation.Editor,170prompt: 'test prompt',171};172173await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint]);174175// Verify that router API was NOT called for inline chat176expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(177expect.anything(),178expect.objectContaining({ type: RequestType.ModelRouter })179);180});181182it('should use router for panel chat when enabled', async () => {183enableRouter();184185const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');186187// Mock makeRequest to handle both auto mode token and router API calls188(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {189if (opts?.type === RequestType.ModelRouter) {190return Promise.resolve({191ok: true,192status: 200,193headers: createMockHeaders(),194text: vi.fn().mockResolvedValue(JSON.stringify({195predicted_label: 'needs_reasoning',196confidence: 0.85,197latency_ms: 50,198chosen_model: 'gpt-4o',199candidate_models: ['gpt-4o', 'gpt-4o-mini'],200scores: { needs_reasoning: 0.85, no_reasoning: 0.15 },201sticky_override: false202}))203});204}205return Promise.resolve(206makeMockTokenResponse({207available_models: ['gpt-4o', 'gpt-4o-mini'],208expires_at: Math.floor(Date.now() / 1000) + 3600,209session_token: 'test-token'210})211);212});213214automodeService = createService();215216const chatRequest: Partial<ChatRequest> = {217location: ChatLocation.Panel,218prompt: 'test prompt',219sessionId: 'session-router-panel'220};221222const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint, gpt4oEndpoint]);223224// Verify that router API was called for panel chat225expect(mockCAPIClientService.makeRequest).toHaveBeenCalledWith(226expect.objectContaining({ method: 'POST' }),227expect.objectContaining({ type: RequestType.ModelRouter })228);229// Router should have selected gpt-4o230expect(result.model).toBe('gpt-4o');231});232233it('should include context signals in router request body', async () => {234enableRouter();235236const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');237238let capturedBody: string | undefined;239(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((req: any, opts: any) => {240if (opts?.type === RequestType.ModelRouter) {241capturedBody = req.body;242return Promise.resolve({243ok: true,244status: 200,245headers: createMockHeaders(),246text: vi.fn().mockResolvedValue(JSON.stringify({247predicted_label: 'needs_reasoning',248confidence: 0.85,249latency_ms: 50,250chosen_model: 'gpt-4o',251candidate_models: ['gpt-4o', 'gpt-4o-mini'],252scores: { needs_reasoning: 0.85, no_reasoning: 0.15 },253sticky_override: false254}))255});256}257return Promise.resolve(258makeMockTokenResponse({259available_models: ['gpt-4o', 'gpt-4o-mini'],260expires_at: Math.floor(Date.now() / 1000) + 3600,261session_token: 'test-token'262})263);264});265266automodeService = createService();267268const chatRequest: Partial<ChatRequest> = {269location: ChatLocation.Panel,270prompt: 'test prompt',271references: [{ id: 'ref1', value: 'some ref' } as any],272sessionId: 'test-session-123',273};274275await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint, gpt4oEndpoint]);276277expect(capturedBody).toBeDefined();278const parsed = JSON.parse(capturedBody!);279expect(parsed.prompt).toBe('test prompt');280expect(parsed.prompt_char_count).toBe('test prompt'.length);281expect(parsed.reference_count).toBe(1);282expect(parsed.turn_number).toBe(1);283expect(parsed.session_id).toBe('test-session-123');284expect(parsed.previous_model).toBeUndefined();285});286287it('should not use router when routing is not enabled', async () => {288// Routing not enabled via UseAutoModeRouting config289automodeService = createService();290291const chatRequest: Partial<ChatRequest> = {292location: ChatLocation.Panel,293prompt: 'test prompt'294};295296await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint]);297298// Verify that router API was NOT called (exp / config disabled)299expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(300expect.anything(),301expect.objectContaining({ type: RequestType.ModelRouter })302);303});304305it('should not use router for terminal chat', async () => {306enableRouter();307308automodeService = createService();309310const chatRequest: Partial<ChatRequest> = {311location: ChatLocation.Terminal,312prompt: 'test prompt'313};314315await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint]);316317// Verify that router API was NOT called for terminal chat318expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(319expect.anything(),320expect.objectContaining({ type: RequestType.ModelRouter })321);322});323});324325describe('model selection', () => {326it('should pick the first available model with a known endpoint on first mint', async () => {327const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');328const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');329mockApiResponse(['claude-sonnet', 'gpt-4o']);330331automodeService = createService();332const chatRequest: Partial<ChatRequest> = {333location: ChatLocation.Panel,334prompt: 'test',335sessionId: 'session-first-mint'336};337338const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);339// claude-sonnet is first in available_models and has a known endpoint340expect(result.model).toBe('claude-sonnet');341});342343it('should skip models without known endpoints and pick the first match', async () => {344const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');345// available_models has 'unknown-model' first, but no known endpoint for it346mockApiResponse(['unknown-model', 'gpt-4o']);347348automodeService = createService();349const chatRequest: Partial<ChatRequest> = {350location: ChatLocation.Panel,351prompt: 'test',352sessionId: 'session-skip-unknown'353};354355const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint]);356expect(result.model).toBe('gpt-4o');357});358359it('should prefer same provider model on token refresh', async () => {360vi.useFakeTimers();361const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');362const openaiMiniEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI');363const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');364365// First mint: gpt-4o is first available, token expires in 1s to trigger immediate refresh366mockApiResponse(['gpt-4o', 'claude-sonnet'], 'token-1', 1);367368automodeService = createService();369const chatRequest: Partial<ChatRequest> = {370location: ChatLocation.Panel,371prompt: 'test',372sessionId: 'session-affinity'373};374375const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, openaiMiniEndpoint, claudeEndpoint]);376expect(firstResult.model).toBe('gpt-4o');377378// Set up new token response, then advance timers to trigger refresh379mockApiResponse(['claude-sonnet', 'gpt-4o-mini'], 'token-2');380await vi.advanceTimersByTimeAsync(1);381382const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, openaiMiniEndpoint, claudeEndpoint]);383// Should pick gpt-4o-mini because it's the first model from the same provider (OpenAI)384expect(secondResult.model).toBe('gpt-4o-mini');385vi.useRealTimers();386});387388it('should fall back to first available model when no same-provider model exists on refresh', async () => {389vi.useFakeTimers();390const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');391const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');392393// First mint: gpt-4o is first available, token expires in 1s to trigger immediate refresh394mockApiResponse(['gpt-4o', 'claude-sonnet'], 'token-1', 1);395396automodeService = createService();397const chatRequest: Partial<ChatRequest> = {398location: ChatLocation.Panel,399prompt: 'test',400sessionId: 'session-fallback'401};402403const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);404expect(firstResult.model).toBe('gpt-4o');405406// Set up new token response with only Anthropic models, then advance timers407mockApiResponse(['claude-sonnet'], 'token-2');408await vi.advanceTimersByTimeAsync(1);409410const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);411// No OpenAI models available, should fall back to first available (claude-sonnet)412expect(secondResult.model).toBe('claude-sonnet');413});414415it('should return cached endpoint when session token has not changed', async () => {416const openaiEndpoint = createEndpoint('gpt-4o', 'OpenAI');417const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');418419mockApiResponse(['gpt-4o', 'claude-sonnet'], 'token-same');420421automodeService = createService();422const chatRequest: Partial<ChatRequest> = {423location: ChatLocation.Panel,424prompt: 'test',425sessionId: 'session-cached'426};427428const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);429const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [openaiEndpoint, claudeEndpoint]);430// Same object reference since token didn't change431expect(secondResult).toBe(firstResult);432});433434it('should throw when no available models match any known endpoint', async () => {435mockApiResponse(['unknown-model-1', 'unknown-model-2']);436437automodeService = createService();438const chatRequest: Partial<ChatRequest> = {439location: ChatLocation.Panel,440prompt: 'test',441sessionId: 'session-no-match'442};443444await expect(445automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint])446).rejects.toThrow('no available model found');447});448});449450describe('router fallback', () => {451function mockRouterResponse(available_models: string[], routerResult: { chosen_model: string; candidate_models: string[] }, session_token = 'test-token'): void {452(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {453if (opts?.type === RequestType.ModelRouter) {454return Promise.resolve({455ok: true,456status: 200,457headers: createMockHeaders(),458text: vi.fn().mockResolvedValue(JSON.stringify({459predicted_label: 'needs_reasoning',460confidence: 0.9,461latency_ms: 30,462chosen_model: routerResult.chosen_model,463candidate_models: routerResult.candidate_models,464scores: { needs_reasoning: 0.9, no_reasoning: 0.1 },465sticky_override: false466}))467});468}469return Promise.resolve(470makeMockTokenResponse({471available_models,472expires_at: Math.floor(Date.now() / 1000) + 3600,473session_token,474})475);476});477}478479it('should fall back to default selection when router fetch throws', async () => {480enableRouter();481const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');482const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');483484(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {485if (opts?.type === RequestType.ModelRouter) {486return Promise.reject(new Error('Network error'));487}488return Promise.resolve(489makeMockTokenResponse({490available_models: ['claude-sonnet', 'gpt-4o'],491expires_at: Math.floor(Date.now() / 1000) + 3600,492session_token: 'test-token',493})494);495});496497automodeService = createService();498const chatRequest: Partial<ChatRequest> = {499location: ChatLocation.Panel,500prompt: 'test prompt',501sessionId: 'session-router-error'502};503504const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [claudeEndpoint, gpt4oEndpoint]);505// Should fall back to first available model (claude-sonnet)506expect(result.model).toBe('claude-sonnet');507expect(mockLogService.error).toHaveBeenCalledWith(508expect.stringContaining('Failed to get routed model'),509expect.any(String)510);511});512513it('should fall back to default selection with routerTimeout reason when router times out', async () => {514vi.useFakeTimers();515enableRouter();516const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');517const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');518519(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((req: any, opts: any) => {520if (opts?.type === RequestType.ModelRouter) {521// Return a pending promise that rejects when the signal is aborted,522// simulating a real in-flight request cancelled by the 1s timeout.523return new Promise((_resolve, reject) => {524const signal: AbortSignal = req.signal;525if (signal?.aborted) {526const err = new Error('The operation was aborted');527err.name = 'AbortError';528reject(err);529return;530}531signal?.addEventListener('abort', () => {532const err = new Error('The operation was aborted');533err.name = 'AbortError';534reject(err);535});536});537}538return Promise.resolve(539makeMockTokenResponse({540available_models: ['claude-sonnet', 'gpt-4o'],541expires_at: Math.floor(Date.now() / 1000) + 3600,542session_token: 'test-token',543})544);545});546547automodeService = createService();548const chatRequest: Partial<ChatRequest> = {549location: ChatLocation.Panel,550prompt: 'test prompt',551sessionId: 'session-router-timeout'552};553554const resultPromise = automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [claudeEndpoint, gpt4oEndpoint]);555// Advance past the 1-second router timeout to trigger the abort556await vi.advanceTimersByTimeAsync(1000);557558const result = await resultPromise;559// Should fall back to first available model (claude-sonnet)560expect(result.model).toBe('claude-sonnet');561expect(mockLogService.error).toHaveBeenCalledWith(562expect.stringContaining('routerTimeout'),563expect.any(String)564);565});566567it('should fall back to default selection when router returns unknown model', async () => {568enableRouter();569const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');570571mockRouterResponse(572['gpt-4o'],573{ chosen_model: 'unknown-model', candidate_models: ['unknown-model'] }574);575576automodeService = createService();577const chatRequest: Partial<ChatRequest> = {578location: ChatLocation.Panel,579prompt: 'test prompt',580sessionId: 'session-unknown-router-model'581};582583const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);584// Router returned unknown model, should fall back to first available585expect(result.model).toBe('gpt-4o');586});587588it('should skip router on subsequent turns and return cached model', async () => {589enableRouter();590const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');591const gpt4oMiniEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI');592const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');593594// First turn: router picks gpt-4o595mockRouterResponse(596['gpt-4o', 'gpt-4o-mini', 'claude-sonnet'],597{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'gpt-4o-mini', 'claude-sonnet'] }598);599600automodeService = createService();601const chatRequest1: Partial<ChatRequest> = {602location: ChatLocation.Panel,603prompt: 'first question',604sessionId: 'session-same-provider'605};606607const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest1 as ChatRequest, [gpt4oEndpoint, gpt4oMiniEndpoint, claudeEndpoint]);608expect(firstResult.model).toBe('gpt-4o');609610// Second turn: router would return claude, but should be skipped (cached gpt-4o returned)611mockRouterResponse(612['gpt-4o', 'gpt-4o-mini', 'claude-sonnet'],613{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet', 'gpt-4o-mini'] }614);615616const chatRequest2: Partial<ChatRequest> = {617location: ChatLocation.Panel,618prompt: 'second question',619sessionId: 'session-same-provider'620};621622const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest2 as ChatRequest, [gpt4oEndpoint, gpt4oMiniEndpoint, claudeEndpoint]);623// Router is skipped after first turn — cached model returned624expect(secondResult.model).toBe('gpt-4o');625});626627it('should re-route on subsequent turns after invalidateRouterCache', async () => {628enableRouter();629const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');630const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');631632// First turn: router picks gpt-4o633mockRouterResponse(634['gpt-4o', 'claude-sonnet'],635{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'claude-sonnet'] }636);637638automodeService = createService();639const chatRequest1: Partial<ChatRequest> = {640location: ChatLocation.Panel,641prompt: 'first question',642sessionId: 'session-no-same-provider'643};644645const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest1 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);646expect(firstResult.model).toBe('gpt-4o');647648// Invalidate the cache (simulates compaction)649automodeService.invalidateRouterCache({ sessionId: 'session-no-same-provider' } as ChatRequest);650651// Second turn: router is re-run after invalidation, picks claude-sonnet652mockRouterResponse(653['gpt-4o', 'claude-sonnet'],654{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet'] }655);656657const chatRequest2: Partial<ChatRequest> = {658location: ChatLocation.Panel,659prompt: 'second question',660sessionId: 'session-no-same-provider'661};662663const secondResult = await automodeService.resolveAutoModeEndpoint(chatRequest2 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);664expect(secondResult.model).toBe('claude-sonnet');665});666667it('should not re-route when prompt has not changed (tool-calling iteration)', async () => {668enableRouter();669const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');670const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');671672mockRouterResponse(673['gpt-4o', 'claude-sonnet'],674{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'claude-sonnet'] }675);676677automodeService = createService();678const chatRequest: Partial<ChatRequest> = {679location: ChatLocation.Panel,680prompt: 'same prompt',681sessionId: 'session-same-prompt'682};683684await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);685686// Reset to track further calls687const routerCallCount = (mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mock.calls688.filter((call: any[]) => call[1]?.type === RequestType.ModelRouter).length;689expect(routerCallCount).toBe(1);690691// Second call with same prompt — should NOT call router again692await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);693694const routerCallCount2 = (mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mock.calls695.filter((call: any[]) => call[1]?.type === RequestType.ModelRouter).length;696expect(routerCallCount2).toBe(1);697});698699it('should skip router on subsequent turns after image request routed on first turn', async () => {700enableRouter();701const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });702const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');703704mockRouterResponse(705['gpt-4o', 'claude-sonnet'],706{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }707);708709automodeService = createService();710711// Turn 1: image request — router IS called now712const imageRequest: Partial<ChatRequest> = {713location: ChatLocation.Panel,714prompt: 'describe this image',715sessionId: 'session-transient-fallback',716references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any717};718719await automodeService.resolveAutoModeEndpoint(imageRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);720721expect(mockCAPIClientService.makeRequest).toHaveBeenCalledWith(722expect.anything(),723expect.objectContaining({ type: RequestType.ModelRouter })724);725// Reset mock call tracking726(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockClear();727mockRouterResponse(728['gpt-4o', 'claude-sonnet'],729{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }730);731732// Turn 2: new prompt — router should NOT be called (skipRouter after first turn)733const textRequest: Partial<ChatRequest> = {734location: ChatLocation.Panel,735prompt: 'write a function',736sessionId: 'session-transient-fallback',737};738739await automodeService.resolveAutoModeEndpoint(textRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);740741// Router should not have been called on turn 2742expect(mockCAPIClientService.makeRequest).not.toHaveBeenCalledWith(743expect.anything(),744expect.objectContaining({ type: RequestType.ModelRouter })745);746});747748it('should send has_image to router for image requests', async () => {749enableRouter();750const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });751const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');752753mockRouterResponse(754['gpt-4o', 'claude-sonnet'],755{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }756);757758automodeService = createService();759const chatRequest: Partial<ChatRequest> = {760location: ChatLocation.Panel,761prompt: 'describe this image',762sessionId: 'session-vision-router',763references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any764};765766const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);767expect(result.model).toBe('gpt-4o');768// Verify router WAS called (not skipped)769const routerCall = (mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mock.calls.find(([, opts]) => opts?.type === RequestType.ModelRouter);770expect(routerCall).toBeDefined();771const [routerRequestBody] = routerCall!;772expect(JSON.parse(routerRequestBody.body).has_image).toBe(true);773});774775it('should fall back to vision model when router returns no_vision_models error', async () => {776enableRouter();777const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });778const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');779780(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {781if (opts?.type === RequestType.ModelRouter) {782return Promise.resolve({783ok: false,784status: 400,785statusText: 'Bad Request',786headers: createMockHeaders(),787text: vi.fn().mockResolvedValue(JSON.stringify({ error: 'no_vision_models' }))788});789}790return Promise.resolve(791makeMockTokenResponse({792available_models: ['gpt-4o', 'claude-sonnet'],793expires_at: Math.floor(Date.now() / 1000) + 3600,794session_token: 'test-token',795})796);797});798799automodeService = createService();800const chatRequest: Partial<ChatRequest> = {801location: ChatLocation.Panel,802prompt: 'describe this image',803sessionId: 'session-no-vision',804references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any805};806807const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);808// Should fall back to default selection, then vision fallback picks gpt-4o809expect(result.model).toBe('gpt-4o');810// Verify the router was called and the error code was passed through from the server811expect(mockCAPIClientService.makeRequest).toHaveBeenCalledWith(812expect.anything(),813expect.objectContaining({ type: RequestType.ModelRouter })814);815expect(mockLogService.error).toHaveBeenCalledWith(816expect.stringContaining('(no_vision_models)'),817expect.anything()818);819});820821it('should fall back to routerError when router returns non-JSON error body', async () => {822// When the router returns an HTML error page or other non-JSON body,823// errorCode should be undefined and fallbackReason should be 'routerError'824// — NOT the raw response body leaked into telemetry.825enableRouter();826const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');827828(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {829if (opts?.type === RequestType.ModelRouter) {830return Promise.resolve({831ok: false,832status: 502,833statusText: 'Bad Gateway',834headers: createMockHeaders(),835text: vi.fn().mockResolvedValue('<html><body>Bad Gateway</body></html>')836});837}838return Promise.resolve(839makeMockTokenResponse({840available_models: ['gpt-4o'],841expires_at: Math.floor(Date.now() / 1000) + 3600,842session_token: 'test-token',843})844);845});846847automodeService = createService();848const chatRequest: Partial<ChatRequest> = {849location: ChatLocation.Panel,850prompt: 'test prompt',851sessionId: 'session-html-error',852};853854const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);855expect(result.model).toBe('gpt-4o');856// Should log generic 'routerError', NOT the HTML body857expect(mockLogService.error).toHaveBeenCalledWith(858expect.stringContaining('(routerError)'),859expect.anything()860);861});862863it('should fall back to routerError when router returns JSON without error field', async () => {864// When the server returns valid JSON but without an 'error' field,865// errorCode should be undefined and fallbackReason should be 'routerError'.866enableRouter();867const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');868869(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {870if (opts?.type === RequestType.ModelRouter) {871return Promise.resolve({872ok: false,873status: 400,874statusText: 'Bad Request',875headers: createMockHeaders(),876text: vi.fn().mockResolvedValue(JSON.stringify({ message: 'something went wrong' }))877});878}879return Promise.resolve(880makeMockTokenResponse({881available_models: ['gpt-4o'],882expires_at: Math.floor(Date.now() / 1000) + 3600,883session_token: 'test-token',884})885);886});887888automodeService = createService();889const chatRequest: Partial<ChatRequest> = {890location: ChatLocation.Panel,891prompt: 'test prompt',892sessionId: 'session-json-no-error',893};894895const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);896expect(result.model).toBe('gpt-4o');897expect(mockLogService.error).toHaveBeenCalledWith(898expect.stringContaining('(routerError)'),899expect.anything()900);901});902903it('should be a no-op when invalidateRouterCache is called with unknown conversationId', async () => {904automodeService = createService();905// Should not throw906automodeService.invalidateRouterCache({ sessionId: 'nonexistent-session' } as ChatRequest);907});908909it('should re-run router after invalidateRouterCache is called', async () => {910enableRouter();911const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');912const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');913914mockRouterResponse(915['gpt-4o', 'claude-sonnet'],916{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o'] }917);918919automodeService = createService();920const chatRequest: Partial<ChatRequest> = {921location: ChatLocation.Panel,922prompt: 'first question',923sessionId: 'session-invalidate'924};925926const firstResult = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);927expect(firstResult.model).toBe('gpt-4o');928929// Without invalidation, changing prompt should still return cached model930const chatRequest2: Partial<ChatRequest> = {931location: ChatLocation.Panel,932prompt: 'second question',933sessionId: 'session-invalidate'934};935const cachedResult = await automodeService.resolveAutoModeEndpoint(chatRequest2 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);936expect(cachedResult.model).toBe('gpt-4o');937938// Invalidate the cache939automodeService.invalidateRouterCache({ sessionId: 'session-invalidate' } as ChatRequest);940941// Now the router should re-run and pick claude942mockRouterResponse(943['gpt-4o', 'claude-sonnet'],944{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet'] }945);946947const chatRequest3: Partial<ChatRequest> = {948location: ChatLocation.Panel,949prompt: 'third question',950sessionId: 'session-invalidate'951};952const reEvalResult = await automodeService.resolveAutoModeEndpoint(chatRequest3 as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);953expect(reEvalResult.model).toBe('claude-sonnet');954});955});956957describe('vision fallback', () => {958it('should fall back to vision-capable model when selected model does not support vision', async () => {959const nonVisionEndpoint = createEndpoint('gpt-4o-mini', 'OpenAI', { supportsVision: false });960const visionEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });961mockApiResponse(['gpt-4o-mini', 'gpt-4o']);962963automodeService = createService();964const chatRequest: Partial<ChatRequest> = {965location: ChatLocation.Panel,966prompt: 'describe this image',967sessionId: 'session-vision-fallback',968references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any969};970971const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [nonVisionEndpoint, visionEndpoint]);972expect(result.model).toBe('gpt-4o');973});974975it('should keep vision-capable model when it is already selected', async () => {976const visionEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });977const nonVisionEndpoint = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });978mockApiResponse(['gpt-4o', 'claude-sonnet']);979980automodeService = createService();981const chatRequest: Partial<ChatRequest> = {982location: ChatLocation.Panel,983prompt: 'describe this image',984sessionId: 'session-vision-already-ok',985references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any986};987988const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [visionEndpoint, nonVisionEndpoint]);989expect(result.model).toBe('gpt-4o');990});991992it('should keep non-vision model when request has no image', async () => {993const nonVisionEndpoint = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });994const visionEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });995mockApiResponse(['claude-sonnet', 'gpt-4o']);996997automodeService = createService();998const chatRequest: Partial<ChatRequest> = {999location: ChatLocation.Panel,1000prompt: 'write a function',1001sessionId: 'session-no-image'1002};10031004const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [nonVisionEndpoint, visionEndpoint]);1005expect(result.model).toBe('claude-sonnet');1006});10071008it('should warn and keep selected model when no vision-capable model is available', async () => {1009const nonVisionEndpoint1 = createEndpoint('gpt-4o-mini', 'OpenAI', { supportsVision: false });1010const nonVisionEndpoint2 = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });1011mockApiResponse(['gpt-4o-mini', 'claude-sonnet']);10121013automodeService = createService();1014const chatRequest: Partial<ChatRequest> = {1015location: ChatLocation.Panel,1016prompt: 'describe this image',1017sessionId: 'session-no-vision-available',1018references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any1019};10201021const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [nonVisionEndpoint1, nonVisionEndpoint2]);1022// No vision model available, should keep the first available model and warn1023expect(result.model).toBe('gpt-4o-mini');1024expect(mockLogService.warn).toHaveBeenCalledWith(1025expect.stringContaining('no vision-capable model')1026);1027});1028});10291030describe('routerModelSelection telemetry', () => {1031function mockRouterResponse(available_models: string[], routerResult: { chosen_model: string; candidate_models: string[] }, session_token = 'test-token'): void {1032(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {1033if (opts?.type === RequestType.ModelRouter) {1034return Promise.resolve({1035ok: true,1036status: 200,1037headers: createMockHeaders(),1038text: vi.fn().mockResolvedValue(JSON.stringify({1039predicted_label: 'needs_reasoning',1040confidence: 0.9,1041latency_ms: 30,1042chosen_model: routerResult.chosen_model,1043candidate_models: routerResult.candidate_models,1044scores: { needs_reasoning: 0.9, no_reasoning: 0.1 },1045sticky_override: false1046}))1047});1048}1049return Promise.resolve(1050makeMockTokenResponse({1051available_models,1052expires_at: Math.floor(Date.now() / 1000) + 3600,1053session_token,1054})1055);1056});1057}10581059it('should emit routerModelSelection with candidateModel and actualModel when router is used', async () => {1060enableRouter();1061const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');1062const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic');10631064mockRouterResponse(1065['gpt-4o', 'claude-sonnet'],1066{ chosen_model: 'gpt-4o', candidate_models: ['gpt-4o', 'claude-sonnet'] }1067);10681069automodeService = createService();1070const chatRequest: Partial<ChatRequest> = {1071location: ChatLocation.Panel,1072prompt: 'test prompt',1073sessionId: 'session-telemetry-test'1074};10751076await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);10771078const telemetryCalls = mockTelemetryService.sendMSFTTelemetryEvent.mock.calls;1079const selectionEvent = telemetryCalls.find((call: unknown[]) => call[0] === 'automode.routerModelSelection');1080expect(selectionEvent).toBeDefined();1081expect(selectionEvent![1]).toMatchObject({1082candidateModel: 'gpt-4o',1083actualModel: 'gpt-4o',1084overrideReason: 'none',1085});1086});10871088it('should emit overrideReason=clientOverride when vision fallback changes the model', async () => {1089enableRouter();1090const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI', { supportsVision: true });1091const claudeEndpoint = createEndpoint('claude-sonnet', 'Anthropic', { supportsVision: false });10921093// Router picks claude-sonnet (no vision), vision fallback should override to gpt-4o1094mockRouterResponse(1095['claude-sonnet', 'gpt-4o'],1096{ chosen_model: 'claude-sonnet', candidate_models: ['claude-sonnet', 'gpt-4o'] }1097);10981099automodeService = createService();1100const chatRequest: Partial<ChatRequest> = {1101location: ChatLocation.Panel,1102prompt: 'describe this image',1103sessionId: 'session-telemetry-vision',1104references: [{ id: 'img', value: { mimeType: 'image/png', data: new Uint8Array() } }] as any1105};11061107await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint, claudeEndpoint]);11081109const telemetryCalls = mockTelemetryService.sendMSFTTelemetryEvent.mock.calls;1110const selectionEvent = telemetryCalls.find((call: unknown[]) => call[0] === 'automode.routerModelSelection');1111expect(selectionEvent).toBeDefined();1112expect(selectionEvent![1]).toMatchObject({1113candidateModel: 'claude-sonnet',1114actualModel: 'gpt-4o',1115overrideReason: 'clientOverride',1116});1117});11181119it('should not emit routerModelSelection when router fails', async () => {1120enableRouter();1121const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');11221123mockRouterResponse(1124['gpt-4o'],1125{ chosen_model: 'unknown-model', candidate_models: ['unknown-model'] }1126);11271128automodeService = createService();1129const chatRequest: Partial<ChatRequest> = {1130location: ChatLocation.Panel,1131prompt: 'test prompt',1132sessionId: 'session-telemetry-no-emit'1133};11341135await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);11361137const telemetryCalls = mockTelemetryService.sendMSFTTelemetryEvent.mock.calls;1138const selectionEvent = telemetryCalls.find((call: unknown[]) => call[0] === 'automode.routerModelSelection');1139// candidateModel is not set when router returns unknown model, so event should not emit1140expect(selectionEvent).toBeUndefined();1141});1142});11431144describe('available_models / knownEndpoints sync', () => {1145function mockRouterResponse(available_models: string[], routerResult: { chosen_model: string; candidate_models: string[] }, session_token = 'test-token'): void {1146(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {1147if (opts?.type === RequestType.ModelRouter) {1148return Promise.resolve({1149ok: true,1150status: 200,1151headers: createMockHeaders(),1152text: vi.fn().mockResolvedValue(JSON.stringify({1153predicted_label: 'no_reasoning',1154confidence: 0.96,1155latency_ms: 23,1156chosen_model: routerResult.chosen_model,1157candidate_models: routerResult.candidate_models,1158scores: { needs_reasoning: 0.04, no_reasoning: 0.96 },1159sticky_override: false1160}))1161});1162}1163return Promise.resolve(1164makeMockTokenResponse({1165available_models,1166expires_at: Math.floor(Date.now() / 1000) + 3600,1167session_token,1168})1169);1170});1171}11721173it('should filter out available_models that have no matching knownEndpoint before sending to router', async () => {1174enableRouter();1175const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');1176let capturedBody: string | undefined;1177(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((req: any, opts: any) => {1178if (opts?.type === RequestType.ModelRouter) {1179capturedBody = req.body;1180return Promise.resolve({1181ok: true,1182status: 200,1183headers: createMockHeaders(),1184text: vi.fn().mockResolvedValue(JSON.stringify({1185predicted_label: 'no_reasoning',1186confidence: 0.96,1187latency_ms: 23,1188chosen_model: 'gpt-4o',1189candidate_models: ['gpt-4o'],1190scores: { needs_reasoning: 0.04, no_reasoning: 0.96 },1191sticky_override: false1192}))1193});1194}1195return Promise.resolve(1196makeMockTokenResponse({1197available_models: ['claude-haiku-4.5', 'gpt-4o', 'claude-sonnet-4.6'],1198expires_at: Math.floor(Date.now() / 1000) + 3600,1199session_token: 'test-token',1200})1201);1202});12031204automodeService = createService();1205const chatRequest: Partial<ChatRequest> = {1206location: ChatLocation.Panel,1207prompt: 'what day is today',1208sessionId: 'session-filter-models'1209};12101211await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);12121213expect(capturedBody).toBeDefined();1214const parsed = JSON.parse(capturedBody!);1215expect(parsed.available_models).toEqual(['gpt-4o']);1216expect(parsed.available_models).not.toContain('claude-haiku-4.5');1217expect(parsed.available_models).not.toContain('claude-sonnet-4.6');1218expect(mockLogService.info).toHaveBeenCalledWith(1219expect.stringContaining('Filtered 2 unresolvable model(s)')1220);1221});12221223it('should iterate all candidate_models when first candidate has no endpoint', async () => {1224enableRouter();1225const gpt41Endpoint = createEndpoint('gpt-4.1', 'OpenAI');12261227mockRouterResponse(1228['gpt-4.1'],1229{ chosen_model: 'gpt-4.1', candidate_models: ['unknown-new-model', 'gpt-4.1'] }1230);12311232automodeService = createService();1233const chatRequest: Partial<ChatRequest> = {1234location: ChatLocation.Panel,1235prompt: 'what day is today',1236sessionId: 'session-iterate-candidates'1237};12381239const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt41Endpoint]);1240expect(result.model).toBe('gpt-4.1');1241});12421243it('should throw when all available_models are unknown to knownEndpoints', async () => {1244enableRouter();1245const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');12461247(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {1248if (opts?.type === RequestType.ModelRouter) {1249throw new Error('Router should not be called when no models are routable');1250}1251return Promise.resolve(1252makeMockTokenResponse({1253available_models: ['unknown-model-a', 'unknown-model-b'],1254expires_at: Math.floor(Date.now() / 1000) + 3600,1255session_token: 'test-token',1256})1257);1258});12591260automodeService = createService();1261const chatRequest: Partial<ChatRequest> = {1262location: ChatLocation.Panel,1263prompt: 'test prompt',1264sessionId: 'session-all-unknown'1265};12661267await expect(1268automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint])1269).rejects.toThrow('no available model found');1270expect(mockLogService.warn).toHaveBeenCalledWith(1271expect.stringContaining('No available_models matched knownEndpoints')1272);1273});1274});1275});127612771278