Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/workbench/contrib/chat/common/chatImageExtraction.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { decodeBase64, VSBuffer } from '../../../../base/common/buffer.js';
7
import { getExtensionForMimeType, getMediaMime } from '../../../../base/common/mime.js';
8
import { URI } from '../../../../base/common/uri.js';
9
import { localize } from '../../../../nls.js';
10
import { isLocation } from '../../../../editor/common/languages.js';
11
import { IChatResponseViewModel, IChatRequestViewModel, isRequestVM } from './model/chatViewModel.js';
12
import { ChatResponseResource } from './model/chatModel.js';
13
import { IChatContentInlineReference, IChatToolInvocation, IChatToolInvocationSerialized, IToolResultOutputDetailsSerialized } from './chatService/chatService.js';
14
import { isToolResultInputOutputDetails, isToolResultOutputDetails, IToolResultOutputDetails } from './tools/languageModelToolsService.js';
15
import { isImageVariableEntry } from './attachments/chatVariableEntries.js';
16
17
export interface IChatExtractedImage {
18
readonly id: string;
19
readonly uri: URI;
20
readonly name: string;
21
readonly mimeType: string;
22
readonly data: VSBuffer;
23
readonly source: string;
24
readonly caption: string | undefined;
25
}
26
27
export interface IChatExtractedImageCollection {
28
readonly id: string;
29
readonly title: string;
30
readonly images: IChatExtractedImage[];
31
}
32
33
/**
34
* Extract all images from a chat response's tool invocations and inline references.
35
* Tool invocation images are extracted from output details and message URIs.
36
* Inline reference images (file URIs) are read via the provided {@link readFile} callback.
37
*/
38
export async function extractImagesFromChatResponse(
39
response: IChatResponseViewModel,
40
readFile: (uri: URI) => Promise<VSBuffer>,
41
): Promise<IChatExtractedImageCollection> {
42
const allImages: IChatExtractedImage[] = [];
43
44
for (const item of response.response.value) {
45
if (item.kind === 'toolInvocation' || item.kind === 'toolInvocationSerialized') {
46
const images = extractImagesFromToolInvocationOutputDetails(item, response.sessionResource);
47
allImages.push(...images);
48
const messageImages = await extractImagesFromToolInvocationMessages(item, readFile);
49
allImages.push(...messageImages);
50
} else if (item.kind === 'inlineReference') {
51
const image = await extractImageFromInlineReference(item, readFile);
52
if (image) {
53
allImages.push(image);
54
}
55
}
56
}
57
58
// Use the corresponding user request as the carousel title
59
const request = response.session.getItems().find((item): item is IChatRequestViewModel => isRequestVM(item) && item.id === response.requestId);
60
const title = request ? request.messageText : localize('chatImageExtraction.defaultTitle', "Images");
61
62
return {
63
id: response.sessionResource.toString() + '_' + response.id,
64
title,
65
images: allImages,
66
};
67
}
68
69
export function extractImagesFromToolInvocationOutputDetails(toolInvocation: IChatToolInvocation | IChatToolInvocationSerialized, sessionResource: URI): IChatExtractedImage[] {
70
const images: IChatExtractedImage[] = [];
71
72
const resultDetails = IChatToolInvocation.resultDetails(toolInvocation);
73
74
const msg = toolInvocation.pastTenseMessage ?? toolInvocation.invocationMessage;
75
const caption = msg ? (typeof msg === 'string' ? msg : msg.value) : undefined;
76
const pushImage = (mimeType: string, data: VSBuffer, outputIndex: number) => {
77
const ext = getExtensionForMimeType(mimeType);
78
const permalinkBasename = ext ? `file${ext}` : 'file.bin';
79
const uri = ChatResponseResource.createUri(sessionResource, toolInvocation.toolCallId, outputIndex, permalinkBasename);
80
images.push({
81
id: `${toolInvocation.toolCallId}_${outputIndex}`,
82
uri,
83
name: localize('chatImageExtraction.imageName', "Image {0}", images.length + 1),
84
mimeType,
85
data,
86
source: localize('chatImageExtraction.toolSource', "Tool: {0}", toolInvocation.toolId),
87
caption,
88
});
89
};
90
91
if (isToolResultInputOutputDetails(resultDetails)) {
92
for (let i = 0; i < resultDetails.output.length; i++) {
93
const outputItem = resultDetails.output[i];
94
if (outputItem.type === 'embed' && outputItem.mimeType?.startsWith('image/') && !outputItem.isText) {
95
pushImage(outputItem.mimeType, decodeBase64(outputItem.value), i);
96
}
97
}
98
}
99
else if (isToolResultOutputDetails(resultDetails)) {
100
const output = resultDetails.output;
101
if (output.mimeType?.startsWith('image/')) {
102
const data = getImageDataFromOutputDetails(resultDetails, toolInvocation);
103
if (data) {
104
pushImage(output.mimeType, data, 0);
105
}
106
}
107
}
108
109
return images;
110
}
111
112
export async function extractImagesFromToolInvocationMessages(
113
toolInvocation: IChatToolInvocation | IChatToolInvocationSerialized,
114
readFile: (uri: URI) => Promise<VSBuffer>
115
): Promise<IChatExtractedImage[]> {
116
// Use pastTenseMessage if available, otherwise fall back to invocationMessage.
117
// When pastTenseMessage exists it visually replaces invocationMessage in the UI,
118
// so we only look at its URIs — we don't fall back to invocationMessage URIs.
119
const message = toolInvocation.pastTenseMessage ?? toolInvocation.invocationMessage;
120
if (!message || typeof message === 'string' || !message.uris || Object.keys(message.uris).length === 0) {
121
return [];
122
}
123
124
const images: IChatExtractedImage[] = [];
125
for (const uriComponents of Object.values(message.uris)) {
126
const uri = URI.revive(uriComponents);
127
const mimeType = getMediaMime(uri.path);
128
if (mimeType?.startsWith('image/')) {
129
let data: VSBuffer;
130
try {
131
data = await readFile(uri);
132
} catch {
133
continue;
134
}
135
const name = uri.path.split('/').pop() ?? 'image';
136
images.push({
137
id: uri.toString(),
138
uri,
139
name,
140
mimeType,
141
data,
142
source: localize('chatImageExtraction.toolSource', "Tool: {0}", toolInvocation.toolId),
143
caption: message.value,
144
});
145
}
146
}
147
return images;
148
}
149
150
function getImageDataFromOutputDetails(resultDetails: IToolResultOutputDetails, toolInvocation: IChatToolInvocation | IChatToolInvocationSerialized): VSBuffer | undefined {
151
if (toolInvocation.kind === 'toolInvocationSerialized') {
152
const serializedDetails = resultDetails as unknown as IToolResultOutputDetailsSerialized;
153
if (serializedDetails.output.base64Data) {
154
return decodeBase64(serializedDetails.output.base64Data);
155
}
156
return undefined;
157
} else {
158
return resultDetails.output.value;
159
}
160
}
161
162
async function extractImageFromInlineReference(
163
part: IChatContentInlineReference,
164
readFile: (uri: URI) => Promise<VSBuffer>,
165
): Promise<IChatExtractedImage | undefined> {
166
const ref = part.inlineReference;
167
const refUri = URI.isUri(ref) ? ref : isLocation(ref) ? ref.uri : ref.location.uri;
168
const mime = getMediaMime(refUri.path);
169
if (!mime?.startsWith('image/')) {
170
return undefined;
171
}
172
173
let data: VSBuffer;
174
try {
175
data = await readFile(refUri);
176
} catch {
177
return undefined;
178
}
179
const name = part.name ?? refUri.path.split('/').pop() ?? 'image';
180
return {
181
id: refUri.toString(),
182
uri: refUri,
183
name,
184
mimeType: mime,
185
data,
186
source: localize('chatImageExtraction.inlineReference', "File"),
187
caption: undefined,
188
};
189
}
190
191
export function coerceImageBuffer(value: unknown): Uint8Array | undefined {
192
return value instanceof Uint8Array
193
? value
194
: value instanceof ArrayBuffer
195
? new Uint8Array(value)
196
: (value && typeof value === 'object' && !Array.isArray(value))
197
? new Uint8Array(
198
Object.keys(value as Record<string, number>)
199
.sort((a, b) => Number(a) - Number(b))
200
.map(key => (value as Record<string, number>)[key])
201
)
202
: undefined;
203
}
204
205
/**
206
* Extract images from a chat request's variable attachments (user-attached images).
207
*/
208
export function extractImagesFromChatRequest(
209
request: IChatRequestViewModel,
210
): IChatExtractedImage[] {
211
const images: IChatExtractedImage[] = [];
212
for (const variable of request.variables) {
213
if (!isImageVariableEntry(variable)) {
214
continue;
215
}
216
const buffer = coerceImageBuffer(variable.value);
217
if (!buffer) {
218
continue;
219
}
220
const mimeType = variable.mimeType ?? getMediaMime(variable.name) ?? 'image/png';
221
const uri = variable.references?.[0]?.reference;
222
const imageUri = URI.isUri(uri) ? uri : URI.from({ scheme: 'data', path: `${variable.id}/${encodeURIComponent(variable.name)}` });
223
images.push({
224
id: imageUri.toString(),
225
uri: imageUri,
226
name: variable.name,
227
mimeType,
228
data: VSBuffer.wrap(buffer),
229
source: localize('chatImageExtraction.userAttachment', "Attachment"),
230
caption: undefined,
231
});
232
}
233
return images;
234
}
235
236