Path: blob/main/extensions/copilot/src/extension/byok/common/test/geminiMessageConverter.spec.ts
13405 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { Raw } from '@vscode/prompt-tsx';6import { describe, expect, it } from 'vitest';7import type { LanguageModelChatMessage } from 'vscode';8import { CustomDataPartMimeTypes } from '../../../../platform/endpoint/common/endpointTypes';9import { LanguageModelChatMessageRole, LanguageModelDataPart, LanguageModelTextPart, LanguageModelToolResultPart, LanguageModelTextPart as LMText } from '../../../../vscodeTypes';10import { apiMessageToGeminiMessage } from '../geminiMessageConverter';1112describe('GeminiMessageConverter', () => {13it('should convert basic user and assistant messages', () => {14const messages: LanguageModelChatMessage[] = [15{16role: LanguageModelChatMessageRole.User,17content: [new LanguageModelTextPart('Hello, how are you?')],18name: undefined19},20{21role: LanguageModelChatMessageRole.Assistant,22content: [new LanguageModelTextPart('I am doing well, thank you!')],23name: undefined24}25];2627const result = apiMessageToGeminiMessage(messages);2829expect(result.contents).toHaveLength(2);30expect(result.contents[0].role).toBe('user');31expect(result.contents[0].parts).toBeDefined();32expect(result.contents[0].parts![0].text).toBe('Hello, how are you?');33expect(result.contents[1].role).toBe('model');34expect(result.contents[1].parts).toBeDefined();35expect(result.contents[1].parts![0].text).toBe('I am doing well, thank you!');36});3738it('should handle system messages as system instruction', () => {39const messages: LanguageModelChatMessage[] = [40{41role: LanguageModelChatMessageRole.System,42content: [new LanguageModelTextPart('You are a helpful assistant.')],43name: undefined44},45{46role: LanguageModelChatMessageRole.User,47content: [new LanguageModelTextPart('Hello!')],48name: undefined49}50];5152const result = apiMessageToGeminiMessage(messages);5354expect(result.systemInstruction).toBeDefined();55expect(result.systemInstruction!.parts).toBeDefined();56expect(result.systemInstruction!.parts![0].text).toBe('You are a helpful assistant.');57expect(result.contents).toHaveLength(1);58expect(result.contents[0].role).toBe('user');59});6061it('should filter out empty text parts', () => {62const messages: LanguageModelChatMessage[] = [63{64role: LanguageModelChatMessageRole.User,65content: [66new LanguageModelTextPart(''),67new LanguageModelTextPart(' '),68new LanguageModelTextPart('Hello!')69],70name: undefined71}72];7374const result = apiMessageToGeminiMessage(messages);7576expect(result.contents[0].parts).toBeDefined();77expect(result.contents[0].parts!).toHaveLength(2); // Empty string filtered out, whitespace kept78expect(result.contents[0].parts![0].text).toBe(' ');79expect(result.contents[0].parts![1].text).toBe('Hello!');80});8182it('should extract functionResponse parts from model message into subsequent user message and prune empty model', () => {83// Simulate a model message that (incorrectly) contains only a tool result part84const toolResult = new LanguageModelToolResultPart('myTool_12345', [new LanguageModelTextPart('{"foo":"bar"}')]);85const messages: LanguageModelChatMessage[] = [86{87role: LanguageModelChatMessageRole.Assistant,88content: [toolResult],89name: undefined90}91];9293const { contents } = apiMessageToGeminiMessage(messages);9495// The original (empty) model message should be pruned; we expect a single user message with functionResponse96expect(contents).toHaveLength(1);97expect(contents[0].role).toBe('user');98expect(contents[0].parts![0]).toHaveProperty('functionResponse');99const fr: any = contents[0].parts![0];100expect(fr.functionResponse.name).toBe('myTool'); // extracted from callId prefix101expect(fr.functionResponse.response).toEqual({ foo: 'bar' });102});103104it('should wrap array responses in an object', () => {105const toolResult = new LanguageModelToolResultPart('listRepos_12345', [new LanguageModelTextPart('["repo1", "repo2"]')]);106const messages: LanguageModelChatMessage[] = [107{108role: LanguageModelChatMessageRole.Assistant,109content: [toolResult],110name: undefined111}112];113114const result = apiMessageToGeminiMessage(messages);115116expect(result.contents).toHaveLength(1);117expect(result.contents[0].role).toBe('user');118const fr: any = result.contents[0].parts![0];119expect(fr.functionResponse.response).toEqual({ result: ['repo1', 'repo2'] });120});121122it('should be idempotent when called multiple times (no duplication)', () => {123const toolResult = new LanguageModelToolResultPart('doThing_12345', [new LMText('{"value":42}')]);124const messages: LanguageModelChatMessage[] = [125{ role: LanguageModelChatMessageRole.Assistant, content: [new LMText('Result:'), toolResult], name: undefined }126];127const first = apiMessageToGeminiMessage(messages);128const second = apiMessageToGeminiMessage(messages); // Re-run with same original messages129130// Both runs should yield identical normalized structure (model text + user tool response) without growth131expect(first.contents.length).toBe(2);132expect(second.contents.length).toBe(2);133expect(first.contents[0].role).toBe('model');134expect(first.contents[1].role).toBe('user');135expect(second.contents[0].role).toBe('model');136expect(second.contents[1].role).toBe('user');137});138139describe('Image handling', () => {140it('should handle LanguageModelDataPart as inline image data', () => {141const imageData = new Uint8Array([137, 80, 78, 71, 13, 10, 26, 10]); // PNG header142const imagePart = new LanguageModelDataPart(imageData, 'image/png');143144const messages: LanguageModelChatMessage[] = [145{146role: LanguageModelChatMessageRole.User,147content: [new LanguageModelTextPart('Here is an image:'), imagePart as any],148name: undefined149}150];151152const result = apiMessageToGeminiMessage(messages);153154expect(result.contents).toHaveLength(1);155expect(result.contents[0].parts).toHaveLength(2);156expect(result.contents[0].parts![0].text).toBe('Here is an image:');157expect(result.contents[0].parts![1]).toHaveProperty('inlineData');158const inlineData: any = result.contents[0].parts![1];159expect(inlineData.inlineData.mimeType).toBe('image/png');160expect(inlineData.inlineData.data).toBe(Buffer.from(imageData).toString('base64'));161});162163it('should filter out StatefulMarker and CacheControl data parts', () => {164const imageData = new Uint8Array([137, 80, 78, 71]);165const validImage = new LanguageModelDataPart(imageData, 'image/jpeg');166const statefulMarker = new LanguageModelDataPart(new Uint8Array([1, 2, 3]), CustomDataPartMimeTypes.StatefulMarker);167const cacheControl = new LanguageModelDataPart(new TextEncoder().encode('ephemeral'), CustomDataPartMimeTypes.CacheControl);168169const messages: LanguageModelChatMessage[] = [170{171role: LanguageModelChatMessageRole.User,172content: [validImage as any, statefulMarker as any, cacheControl as any],173name: undefined174}175];176177const result = apiMessageToGeminiMessage(messages);178179// Should only include the valid image, not the stateful marker or cache control180expect(result.contents[0].parts).toHaveLength(1);181expect(result.contents[0].parts![0]).toHaveProperty('inlineData');182const inlineData: any = result.contents[0].parts![0];183expect(inlineData.inlineData.mimeType).toBe('image/jpeg');184});185186it('should handle images in tool result content with text', () => {187const imageData = new Uint8Array([255, 216, 255, 224]); // JPEG header188const imagePart = new LanguageModelDataPart(imageData, 'image/jpeg');189const textPart = new LanguageModelTextPart('{"success": true}');190191const toolResult = new LanguageModelToolResultPart('processImage_12345', [textPart, imagePart as any]);192const messages: LanguageModelChatMessage[] = [193{194role: LanguageModelChatMessageRole.Assistant,195content: [toolResult],196name: undefined197}198];199200const result = apiMessageToGeminiMessage(messages);201202// Should have a user message with function response203expect(result.contents).toHaveLength(1);204expect(result.contents[0].role).toBe('user');205expect(result.contents[0].parts![0]).toHaveProperty('functionResponse');206207const fr: any = result.contents[0].parts![0];208expect(fr.functionResponse.name).toBe('processImage');209expect(fr.functionResponse.response.success).toBe(true);210expect(fr.functionResponse.response.images).toBeDefined();211expect(fr.functionResponse.response.images).toHaveLength(1);212expect(fr.functionResponse.response.images[0].mimeType).toBe('image/jpeg');213expect(fr.functionResponse.response.images[0].size).toBe(imageData.length);214});215216it('should handle images in tool result content without text', () => {217const imageData1 = new Uint8Array([255, 216, 255, 224]); // JPEG header218const imageData2 = new Uint8Array([137, 80, 78, 71, 13, 10, 26, 10]); // PNG header219const imagePart1 = new LanguageModelDataPart(imageData1, 'image/jpeg');220const imagePart2 = new LanguageModelDataPart(imageData2, 'image/png');221222const toolResult = new LanguageModelToolResultPart('generateImages_12345', [imagePart1 as any, imagePart2 as any]);223const messages: LanguageModelChatMessage[] = [224{225role: LanguageModelChatMessageRole.Assistant,226content: [toolResult],227name: undefined228}229];230231const result = apiMessageToGeminiMessage(messages);232233expect(result.contents).toHaveLength(1);234expect(result.contents[0].role).toBe('user');235236const fr: any = result.contents[0].parts![0];237expect(fr.functionResponse.name).toBe('generateImages');238expect(fr.functionResponse.response.images).toHaveLength(2);239240// First image241expect(fr.functionResponse.response.images[0].mimeType).toBe('image/jpeg');242expect(fr.functionResponse.response.images[0].size).toBe(imageData1.length);243expect(fr.functionResponse.response.images[0].data).toBe(Buffer.from(imageData1).toString('base64'));244245// Second image246expect(fr.functionResponse.response.images[1].mimeType).toBe('image/png');247expect(fr.functionResponse.response.images[1].size).toBe(imageData2.length);248expect(fr.functionResponse.response.images[1].data).toBe(Buffer.from(imageData2).toString('base64'));249});250251it('should handle mixed text and filtered data parts in tool results', () => {252const validImageData = new Uint8Array([255, 216]);253const validImage = new LanguageModelDataPart(validImageData, 'image/jpeg');254const statefulMarker = new LanguageModelDataPart(new Uint8Array([1, 2, 3]), CustomDataPartMimeTypes.StatefulMarker);255const textPart = new LanguageModelTextPart('Result text');256257const toolResult = new LanguageModelToolResultPart('mixedContent_12345', [textPart, validImage as any, statefulMarker as any]);258const messages: LanguageModelChatMessage[] = [259{260role: LanguageModelChatMessageRole.Assistant,261content: [toolResult],262name: undefined263}264];265266const result = apiMessageToGeminiMessage(messages);267268const fr: any = result.contents[0].parts![0];269expect(fr.functionResponse.name).toBe('mixedContent');270// Should include text and valid image, but not stateful marker271expect(fr.functionResponse.response.result).toContain('Result text');272expect(fr.functionResponse.response.result).toContain('[Contains 1 image(s) with types: image/jpeg]');273expect(fr.functionResponse.response.images).toHaveLength(1);274expect(fr.functionResponse.response.images[0].mimeType).toBe('image/jpeg');275});276});277278describe('geminiMessagesToRawMessages', () => {279it('should convert function response with images to Raw format with image content parts', async () => {280const { geminiMessagesToRawMessages } = await import('../geminiMessageConverter');281282// Simulate a Gemini Content with function response containing images283const contents = [{284role: 'user',285parts: [{286functionResponse: {287name: 'generateImages',288response: {289success: true,290images: [291{292mimeType: 'image/jpeg',293size: 1024,294data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=='295},296{297mimeType: 'image/png',298size: 512,299data: '/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAYEBAQFBAYFBQYJBgUGCQsIBgYICwwKCgsKCgwQDAwMDAwMEAwODxAPDgwTExQUExMcGxsbHB8fHx8fHx8fHx//2wBDAQcHBw0MDRgQEBgaFREVGh8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx//wAARCAABAAEDAREAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAv/xAAUEAEAAAAAAAAAAAAAAAAAAAAA/8QAFQEBAQAAAAAAAAAAAAAAAAAAAAX/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMBAAIRAxEAPwCdABmX/9k='300}301]302}303}304}]305}];306307const rawMessages = geminiMessagesToRawMessages(contents);308309expect(rawMessages).toHaveLength(1);310// Check the role - should be Raw.ChatRole.Tool enum value311expect(rawMessages[0].role).toBe(Raw.ChatRole.Tool);312313// Type assertion for tool message314const toolMessage = rawMessages[0] as any;315expect(toolMessage.toolCallId).toBe('generateImages');316expect(rawMessages[0].content).toHaveLength(3); // 2 images + 1 text part317318// Check first image319expect(rawMessages[0].content[0].type).toBe(Raw.ChatCompletionContentPartKind.Image);320const firstImage = rawMessages[0].content[0] as any;321expect(firstImage.imageUrl?.url).toBe('data:image/jpeg;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==');322323// Check second image324expect(rawMessages[0].content[1].type).toBe(Raw.ChatCompletionContentPartKind.Image);325const secondImage = rawMessages[0].content[1] as any;326expect(secondImage.imageUrl?.url).toBe('data:image/png;base64,/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAYEBAQFBAYFBQYJBgUGCQsIBgYICwwKCgsKCgwQDAwMDAwMEAwODxAPDgwTExQUExMcGxsbHB8fHx8fHx8fHx//2wBDAQcHBw0MDRgQEBgaFREVGh8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx//wAARCAABAAEDAREAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAv/xAAUEAEAAAAAAAAAAAAAAAAAAAAA/8QAFQEBAQAAAAAAAAAAAAAAAAAAAAX/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMBAAIRAxEAPwCdABmX/9k=');327328// Check text content with cleaned response329expect(rawMessages[0].content[2].type).toBe(Raw.ChatCompletionContentPartKind.Text);330const textPart = rawMessages[0].content[2] as any;331const textContent = JSON.parse(textPart.text);332expect(textContent.success).toBe(true);333expect(textContent.images).toHaveLength(2);334expect(textContent.images[0].mimeType).toBe('image/jpeg');335expect(textContent.images[0].size).toBe(1024);336expect(textContent.images[1].mimeType).toBe('image/png');337expect(textContent.images[1].size).toBe(512);338// Should not contain raw base64 data in text content339expect(textContent.images[0]).not.toHaveProperty('data');340expect(textContent.images[1]).not.toHaveProperty('data');341});342343it('should handle function response without images normally', async () => {344const { geminiMessagesToRawMessages } = await import('../geminiMessageConverter');345346const contents = [{347role: 'user',348parts: [{349functionResponse: {350name: 'textFunction',351response: { result: 'success', value: 42 }352}353}]354}];355356const rawMessages = geminiMessagesToRawMessages(contents);357358expect(rawMessages).toHaveLength(1);359expect(rawMessages[0].role).toBe(Raw.ChatRole.Tool);360expect(rawMessages[0].content).toHaveLength(1);361expect(rawMessages[0].content[0].type).toBe(Raw.ChatCompletionContentPartKind.Text);362const textPart = rawMessages[0].content[0] as any;363expect(JSON.parse(textPart.text)).toEqual({ result: 'success', value: 42 });364});365});366});367368