Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/workbench/contrib/browserView/electron-browser/tools/screenshotBrowserTool.ts
13405 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import type { CancellationToken } from '../../../../../base/common/cancellation.js';
7
import { Codicon } from '../../../../../base/common/codicons.js';
8
import {
9
escapeMarkdownSyntaxTokens,
10
MarkdownString
11
} from '../../../../../base/common/htmlContent.js';
12
import { localize } from '../../../../../nls.js';
13
import { IPlaywrightService } from '../../../../../platform/browserView/common/playwrightService.js';
14
import { ToolDataSource, type CountTokensCallback, type IPreparedToolInvocation, type IToolData, type IToolImpl, type IToolInvocation, type IToolInvocationPreparationContext, type IToolResult, type ToolProgress } from '../../../chat/common/tools/languageModelToolsService.js';
15
import { IBrowserViewWorkbenchService } from '../../common/browserView.js';
16
import { errorResult, playwrightInvokeRaw } from './browserToolHelpers.js';
17
import { BrowserChatToolReferenceName } from '../../common/browserChatToolReferenceNames.js';
18
import { OpenPageToolId } from './openBrowserTool.js';
19
import { ReadBrowserToolData } from './readBrowserTool.js';
20
21
export const ScreenshotBrowserToolData: IToolData = {
22
id: 'screenshot_page',
23
toolReferenceName: BrowserChatToolReferenceName.ScreenshotPage,
24
displayName: localize('screenshotBrowserTool.displayName', 'Screenshot Page'),
25
userDescription: localize('screenshotBrowserTool.userDescription', 'Capture a screenshot of a browser page'),
26
modelDescription: `Capture a screenshot of the current browser page. You can't perform actions based on the screenshot; use ${ReadBrowserToolData.id} for actions.`,
27
icon: Codicon.deviceCamera,
28
source: ToolDataSource.Internal,
29
inputSchema: {
30
type: 'object',
31
properties: {
32
pageId: {
33
type: 'string',
34
description: `The browser page ID to capture, acquired from context or the open tool.`
35
},
36
ref: {
37
type: 'string',
38
description: 'Element reference to capture. If omitted, captures the whole viewport.'
39
},
40
selector: {
41
type: 'string',
42
description: 'Playwright selector of an element to capture when "ref" is not available. If omitted, captures the whole viewport.'
43
},
44
element: {
45
type: 'string',
46
description: 'Human-readable description of the element to capture (e.g., "chart diagram", "product image").'
47
},
48
scrollIntoViewIfNeeded: {
49
type: 'boolean',
50
description: 'Whether to scroll the element into view before capturing. Defaults to false.',
51
}
52
},
53
required: ['pageId'],
54
},
55
};
56
57
interface IScreenshotBrowserToolParams {
58
pageId: string;
59
ref?: string;
60
selector?: string;
61
element?: string;
62
scrollIntoViewIfNeeded?: boolean;
63
}
64
65
export class ScreenshotBrowserTool implements IToolImpl {
66
constructor(
67
@IBrowserViewWorkbenchService private readonly browserViewWorkbenchService: IBrowserViewWorkbenchService,
68
@IPlaywrightService private readonly playwrightService: IPlaywrightService,
69
) { }
70
71
async prepareToolInvocation(_context: IToolInvocationPreparationContext, _token: CancellationToken): Promise<IPreparedToolInvocation | undefined> {
72
const params = _context.parameters as IScreenshotBrowserToolParams;
73
if (params.element) {
74
const element = escapeMarkdownSyntaxTokens(params.element);
75
return {
76
invocationMessage: new MarkdownString(localize('browser.screenshot.invocation.element', "Capturing screenshot of {0}", element)),
77
pastTenseMessage: new MarkdownString(localize('browser.screenshot.past.element', "Captured screenshot of {0}", element)),
78
};
79
}
80
return {
81
invocationMessage: localize('browser.screenshot.invocation', "Capturing browser screenshot"),
82
pastTenseMessage: localize('browser.screenshot.past', "Captured browser screenshot"),
83
};
84
}
85
86
async invoke(invocation: IToolInvocation, _countTokens: CountTokensCallback, _progress: ToolProgress, _token: CancellationToken): Promise<IToolResult> {
87
const params = invocation.parameters as IScreenshotBrowserToolParams;
88
89
if (!params.pageId) {
90
return errorResult(`No page ID provided. Use '${OpenPageToolId}' first.`);
91
}
92
93
let selector = params.selector;
94
if (params.ref) {
95
selector = `aria-ref=${params.ref}`;
96
}
97
98
// Note that we don't use Playwright's screenshot methods because they cause brief flashing on the page,
99
// and also doesn't handle zooming well.
100
const browserViewModel = await this.browserViewWorkbenchService.getKnownBrowserViews().get(params.pageId)?.resolve();
101
if (!browserViewModel) {
102
return errorResult(`No browser page found with ID ${params.pageId}`);
103
}
104
105
const bounds = selector && await playwrightInvokeRaw(this.playwrightService, params.pageId, async (page, selector, scrollIntoViewIfNeeded) => {
106
const locator = page.locator(selector);
107
if (scrollIntoViewIfNeeded) {
108
await locator.scrollIntoViewIfNeeded();
109
}
110
return locator.boundingBox();
111
}, selector, params.scrollIntoViewIfNeeded) || undefined;
112
const screenshot = await browserViewModel.captureScreenshot({ pageRect: bounds });
113
114
return {
115
content: [
116
{
117
kind: 'data',
118
value: {
119
mimeType: 'image/jpeg',
120
data: screenshot,
121
},
122
},
123
],
124
};
125
}
126
}
127
128