Path: blob/main/src/vs/workbench/contrib/browserView/electron-browser/tools/screenshotBrowserTool.ts
13405 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import type { CancellationToken } from '../../../../../base/common/cancellation.js';6import { Codicon } from '../../../../../base/common/codicons.js';7import {8escapeMarkdownSyntaxTokens,9MarkdownString10} from '../../../../../base/common/htmlContent.js';11import { localize } from '../../../../../nls.js';12import { IPlaywrightService } from '../../../../../platform/browserView/common/playwrightService.js';13import { ToolDataSource, type CountTokensCallback, type IPreparedToolInvocation, type IToolData, type IToolImpl, type IToolInvocation, type IToolInvocationPreparationContext, type IToolResult, type ToolProgress } from '../../../chat/common/tools/languageModelToolsService.js';14import { IBrowserViewWorkbenchService } from '../../common/browserView.js';15import { errorResult, playwrightInvokeRaw } from './browserToolHelpers.js';16import { BrowserChatToolReferenceName } from '../../common/browserChatToolReferenceNames.js';17import { OpenPageToolId } from './openBrowserTool.js';18import { ReadBrowserToolData } from './readBrowserTool.js';1920export const ScreenshotBrowserToolData: IToolData = {21id: 'screenshot_page',22toolReferenceName: BrowserChatToolReferenceName.ScreenshotPage,23displayName: localize('screenshotBrowserTool.displayName', 'Screenshot Page'),24userDescription: localize('screenshotBrowserTool.userDescription', 'Capture a screenshot of a browser page'),25modelDescription: `Capture a screenshot of the current browser page. You can't perform actions based on the screenshot; use ${ReadBrowserToolData.id} for actions.`,26icon: Codicon.deviceCamera,27source: ToolDataSource.Internal,28inputSchema: {29type: 'object',30properties: {31pageId: {32type: 'string',33description: `The browser page ID to capture, acquired from context or the open tool.`34},35ref: {36type: 'string',37description: 'Element reference to capture. If omitted, captures the whole viewport.'38},39selector: {40type: 'string',41description: 'Playwright selector of an element to capture when "ref" is not available. If omitted, captures the whole viewport.'42},43element: {44type: 'string',45description: 'Human-readable description of the element to capture (e.g., "chart diagram", "product image").'46},47scrollIntoViewIfNeeded: {48type: 'boolean',49description: 'Whether to scroll the element into view before capturing. Defaults to false.',50}51},52required: ['pageId'],53},54};5556interface IScreenshotBrowserToolParams {57pageId: string;58ref?: string;59selector?: string;60element?: string;61scrollIntoViewIfNeeded?: boolean;62}6364export class ScreenshotBrowserTool implements IToolImpl {65constructor(66@IBrowserViewWorkbenchService private readonly browserViewWorkbenchService: IBrowserViewWorkbenchService,67@IPlaywrightService private readonly playwrightService: IPlaywrightService,68) { }6970async prepareToolInvocation(_context: IToolInvocationPreparationContext, _token: CancellationToken): Promise<IPreparedToolInvocation | undefined> {71const params = _context.parameters as IScreenshotBrowserToolParams;72if (params.element) {73const element = escapeMarkdownSyntaxTokens(params.element);74return {75invocationMessage: new MarkdownString(localize('browser.screenshot.invocation.element', "Capturing screenshot of {0}", element)),76pastTenseMessage: new MarkdownString(localize('browser.screenshot.past.element', "Captured screenshot of {0}", element)),77};78}79return {80invocationMessage: localize('browser.screenshot.invocation', "Capturing browser screenshot"),81pastTenseMessage: localize('browser.screenshot.past', "Captured browser screenshot"),82};83}8485async invoke(invocation: IToolInvocation, _countTokens: CountTokensCallback, _progress: ToolProgress, _token: CancellationToken): Promise<IToolResult> {86const params = invocation.parameters as IScreenshotBrowserToolParams;8788if (!params.pageId) {89return errorResult(`No page ID provided. Use '${OpenPageToolId}' first.`);90}9192let selector = params.selector;93if (params.ref) {94selector = `aria-ref=${params.ref}`;95}9697// Note that we don't use Playwright's screenshot methods because they cause brief flashing on the page,98// and also doesn't handle zooming well.99const browserViewModel = await this.browserViewWorkbenchService.getKnownBrowserViews().get(params.pageId)?.resolve();100if (!browserViewModel) {101return errorResult(`No browser page found with ID ${params.pageId}`);102}103104const bounds = selector && await playwrightInvokeRaw(this.playwrightService, params.pageId, async (page, selector, scrollIntoViewIfNeeded) => {105const locator = page.locator(selector);106if (scrollIntoViewIfNeeded) {107await locator.scrollIntoViewIfNeeded();108}109return locator.boundingBox();110}, selector, params.scrollIntoViewIfNeeded) || undefined;111const screenshot = await browserViewModel.captureScreenshot({ pageRect: bounds });112113return {114content: [115{116kind: 'data',117value: {118mimeType: 'image/jpeg',119data: screenshot,120},121},122],123};124}125}126127128