Path: blob/main/src/vs/workbench/contrib/chat/common/chatImageExtraction.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { decodeBase64, VSBuffer } from '../../../../base/common/buffer.js';6import { getExtensionForMimeType, getMediaMime } from '../../../../base/common/mime.js';7import { URI } from '../../../../base/common/uri.js';8import { localize } from '../../../../nls.js';9import { isLocation } from '../../../../editor/common/languages.js';10import { IChatResponseViewModel, IChatRequestViewModel, isRequestVM } from './model/chatViewModel.js';11import { ChatResponseResource } from './model/chatModel.js';12import { IChatContentInlineReference, IChatToolInvocation, IChatToolInvocationSerialized, IToolResultOutputDetailsSerialized } from './chatService/chatService.js';13import { isToolResultInputOutputDetails, isToolResultOutputDetails, IToolResultOutputDetails } from './tools/languageModelToolsService.js';14import { isImageVariableEntry } from './attachments/chatVariableEntries.js';1516export interface IChatExtractedImage {17readonly id: string;18readonly uri: URI;19readonly name: string;20readonly mimeType: string;21readonly data: VSBuffer;22readonly source: string;23readonly caption: string | undefined;24}2526export interface IChatExtractedImageCollection {27readonly id: string;28readonly title: string;29readonly images: IChatExtractedImage[];30}3132/**33* Extract all images from a chat response's tool invocations and inline references.34* Tool invocation images are extracted from output details and message URIs.35* Inline reference images (file URIs) are read via the provided {@link readFile} callback.36*/37export async function extractImagesFromChatResponse(38response: IChatResponseViewModel,39readFile: (uri: URI) => Promise<VSBuffer>,40): Promise<IChatExtractedImageCollection> {41const allImages: IChatExtractedImage[] = [];4243for (const item of response.response.value) {44if (item.kind === 'toolInvocation' || item.kind === 'toolInvocationSerialized') {45const images = extractImagesFromToolInvocationOutputDetails(item, response.sessionResource);46allImages.push(...images);47const messageImages = await extractImagesFromToolInvocationMessages(item, readFile);48allImages.push(...messageImages);49} else if (item.kind === 'inlineReference') {50const image = await extractImageFromInlineReference(item, readFile);51if (image) {52allImages.push(image);53}54}55}5657// Use the corresponding user request as the carousel title58const request = response.session.getItems().find((item): item is IChatRequestViewModel => isRequestVM(item) && item.id === response.requestId);59const title = request ? request.messageText : localize('chatImageExtraction.defaultTitle', "Images");6061return {62id: response.sessionResource.toString() + '_' + response.id,63title,64images: allImages,65};66}6768export function extractImagesFromToolInvocationOutputDetails(toolInvocation: IChatToolInvocation | IChatToolInvocationSerialized, sessionResource: URI): IChatExtractedImage[] {69const images: IChatExtractedImage[] = [];7071const resultDetails = IChatToolInvocation.resultDetails(toolInvocation);7273const msg = toolInvocation.pastTenseMessage ?? toolInvocation.invocationMessage;74const caption = msg ? (typeof msg === 'string' ? msg : msg.value) : undefined;75const pushImage = (mimeType: string, data: VSBuffer, outputIndex: number) => {76const ext = getExtensionForMimeType(mimeType);77const permalinkBasename = ext ? `file${ext}` : 'file.bin';78const uri = ChatResponseResource.createUri(sessionResource, toolInvocation.toolCallId, outputIndex, permalinkBasename);79images.push({80id: `${toolInvocation.toolCallId}_${outputIndex}`,81uri,82name: localize('chatImageExtraction.imageName', "Image {0}", images.length + 1),83mimeType,84data,85source: localize('chatImageExtraction.toolSource', "Tool: {0}", toolInvocation.toolId),86caption,87});88};8990if (isToolResultInputOutputDetails(resultDetails)) {91for (let i = 0; i < resultDetails.output.length; i++) {92const outputItem = resultDetails.output[i];93if (outputItem.type === 'embed' && outputItem.mimeType?.startsWith('image/') && !outputItem.isText) {94pushImage(outputItem.mimeType, decodeBase64(outputItem.value), i);95}96}97}98else if (isToolResultOutputDetails(resultDetails)) {99const output = resultDetails.output;100if (output.mimeType?.startsWith('image/')) {101const data = getImageDataFromOutputDetails(resultDetails, toolInvocation);102if (data) {103pushImage(output.mimeType, data, 0);104}105}106}107108return images;109}110111export async function extractImagesFromToolInvocationMessages(112toolInvocation: IChatToolInvocation | IChatToolInvocationSerialized,113readFile: (uri: URI) => Promise<VSBuffer>114): Promise<IChatExtractedImage[]> {115// Use pastTenseMessage if available, otherwise fall back to invocationMessage.116// When pastTenseMessage exists it visually replaces invocationMessage in the UI,117// so we only look at its URIs — we don't fall back to invocationMessage URIs.118const message = toolInvocation.pastTenseMessage ?? toolInvocation.invocationMessage;119if (!message || typeof message === 'string' || !message.uris || Object.keys(message.uris).length === 0) {120return [];121}122123const images: IChatExtractedImage[] = [];124for (const uriComponents of Object.values(message.uris)) {125const uri = URI.revive(uriComponents);126const mimeType = getMediaMime(uri.path);127if (mimeType?.startsWith('image/')) {128let data: VSBuffer;129try {130data = await readFile(uri);131} catch {132continue;133}134const name = uri.path.split('/').pop() ?? 'image';135images.push({136id: uri.toString(),137uri,138name,139mimeType,140data,141source: localize('chatImageExtraction.toolSource', "Tool: {0}", toolInvocation.toolId),142caption: message.value,143});144}145}146return images;147}148149function getImageDataFromOutputDetails(resultDetails: IToolResultOutputDetails, toolInvocation: IChatToolInvocation | IChatToolInvocationSerialized): VSBuffer | undefined {150if (toolInvocation.kind === 'toolInvocationSerialized') {151const serializedDetails = resultDetails as unknown as IToolResultOutputDetailsSerialized;152if (serializedDetails.output.base64Data) {153return decodeBase64(serializedDetails.output.base64Data);154}155return undefined;156} else {157return resultDetails.output.value;158}159}160161async function extractImageFromInlineReference(162part: IChatContentInlineReference,163readFile: (uri: URI) => Promise<VSBuffer>,164): Promise<IChatExtractedImage | undefined> {165const ref = part.inlineReference;166const refUri = URI.isUri(ref) ? ref : isLocation(ref) ? ref.uri : ref.location.uri;167const mime = getMediaMime(refUri.path);168if (!mime?.startsWith('image/')) {169return undefined;170}171172let data: VSBuffer;173try {174data = await readFile(refUri);175} catch {176return undefined;177}178const name = part.name ?? refUri.path.split('/').pop() ?? 'image';179return {180id: refUri.toString(),181uri: refUri,182name,183mimeType: mime,184data,185source: localize('chatImageExtraction.inlineReference', "File"),186caption: undefined,187};188}189190export function coerceImageBuffer(value: unknown): Uint8Array | undefined {191return value instanceof Uint8Array192? value193: value instanceof ArrayBuffer194? new Uint8Array(value)195: (value && typeof value === 'object' && !Array.isArray(value))196? new Uint8Array(197Object.keys(value as Record<string, number>)198.sort((a, b) => Number(a) - Number(b))199.map(key => (value as Record<string, number>)[key])200)201: undefined;202}203204/**205* Extract images from a chat request's variable attachments (user-attached images).206*/207export function extractImagesFromChatRequest(208request: IChatRequestViewModel,209): IChatExtractedImage[] {210const images: IChatExtractedImage[] = [];211for (const variable of request.variables) {212if (!isImageVariableEntry(variable)) {213continue;214}215const buffer = coerceImageBuffer(variable.value);216if (!buffer) {217continue;218}219const mimeType = variable.mimeType ?? getMediaMime(variable.name) ?? 'image/png';220const uri = variable.references?.[0]?.reference;221const imageUri = URI.isUri(uri) ? uri : URI.from({ scheme: 'data', path: `${variable.id}/${encodeURIComponent(variable.name)}` });222images.push({223id: imageUri.toString(),224uri: imageUri,225name: variable.name,226mimeType,227data: VSBuffer.wrap(buffer),228source: localize('chatImageExtraction.userAttachment', "Attachment"),229caption: undefined,230});231}232return images;233}234235236