Path: blob/main/src/vs/workbench/contrib/chat/electron-browser/actions/voiceChatActions.ts
4780 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { renderAsPlaintext } from '../../../../../base/browser/markdownRenderer.js';6import { RunOnceScheduler, disposableTimeout, raceCancellation } from '../../../../../base/common/async.js';7import { CancellationToken, CancellationTokenSource } from '../../../../../base/common/cancellation.js';8import { Codicon } from '../../../../../base/common/codicons.js';9import { Color } from '../../../../../base/common/color.js';10import { Event } from '../../../../../base/common/event.js';11import { KeyCode, KeyMod } from '../../../../../base/common/keyCodes.js';12import { Disposable, DisposableStore, MutableDisposable, toDisposable } from '../../../../../base/common/lifecycle.js';13import { isNumber } from '../../../../../base/common/types.js';14import { getCodeEditor } from '../../../../../editor/browser/editorBrowser.js';15import { EditorContextKeys } from '../../../../../editor/common/editorContextKeys.js';16import { localize, localize2 } from '../../../../../nls.js';17import { IAccessibilityService } from '../../../../../platform/accessibility/common/accessibility.js';18import { Action2, IAction2Options, MenuId } from '../../../../../platform/actions/common/actions.js';19import { CommandsRegistry, ICommandService } from '../../../../../platform/commands/common/commands.js';20import { IConfigurationService } from '../../../../../platform/configuration/common/configuration.js';21import { Extensions, IConfigurationRegistry } from '../../../../../platform/configuration/common/configurationRegistry.js';22import { ContextKeyExpr, ContextKeyExpression, IContextKeyService, RawContextKey } from '../../../../../platform/contextkey/common/contextkey.js';23import { IInstantiationService, ServicesAccessor } from '../../../../../platform/instantiation/common/instantiation.js';24import { IKeybindingService } from '../../../../../platform/keybinding/common/keybinding.js';25import { KeybindingWeight } from '../../../../../platform/keybinding/common/keybindingsRegistry.js';26import { Registry } from '../../../../../platform/registry/common/platform.js';27import { contrastBorder, focusBorder } from '../../../../../platform/theme/common/colorRegistry.js';28import { spinningLoading, syncing } from '../../../../../platform/theme/common/iconRegistry.js';29import { isHighContrast } from '../../../../../platform/theme/common/theme.js';30import { registerThemingParticipant } from '../../../../../platform/theme/common/themeService.js';31import { ActiveEditorContext } from '../../../../common/contextkeys.js';32import { IWorkbenchContribution } from '../../../../common/contributions.js';33import { ACTIVITY_BAR_FOREGROUND } from '../../../../common/theme.js';34import { IEditorService } from '../../../../services/editor/common/editorService.js';35import { IHostService } from '../../../../services/host/browser/host.js';36import { IWorkbenchLayoutService, Parts } from '../../../../services/layout/browser/layoutService.js';37import { IStatusbarEntry, IStatusbarEntryAccessor, IStatusbarService, StatusbarAlignment } from '../../../../services/statusbar/browser/statusbar.js';38import { AccessibilityVoiceSettingId, SpeechTimeoutDefault, accessibilityConfigurationNodeBase } from '../../../accessibility/browser/accessibilityConfiguration.js';39import { InlineChatController } from '../../../inlineChat/browser/inlineChatController.js';40import { CTX_INLINE_CHAT_FOCUSED, MENU_INLINE_CHAT_WIDGET_SECONDARY } from '../../../inlineChat/common/inlineChat.js';41import { NOTEBOOK_EDITOR_FOCUSED } from '../../../notebook/common/notebookContextKeys.js';42import { CONTEXT_SETTINGS_EDITOR } from '../../../preferences/common/preferences.js';43import { SearchContext } from '../../../search/common/constants.js';44import { TextToSpeechInProgress as GlobalTextToSpeechInProgress, HasSpeechProvider, ISpeechService, KeywordRecognitionStatus, SpeechToTextInProgress, SpeechToTextStatus, TextToSpeechStatus } from '../../../speech/common/speechService.js';45import { CHAT_CATEGORY } from '../../browser/actions/chatActions.js';46import { IChatExecuteActionContext } from '../../browser/actions/chatExecuteActions.js';47import { IChatWidget, IChatWidgetService, IQuickChatService } from '../../browser/chat.js';48import { IChatAgentService } from '../../common/participants/chatAgents.js';49import { ChatContextKeys } from '../../common/actions/chatContextKeys.js';50import { IChatResponseModel } from '../../common/model/chatModel.js';51import { KEYWORD_ACTIVIATION_SETTING_ID } from '../../common/chatService/chatService.js';52import { ChatResponseViewModel, IChatResponseViewModel, isResponseVM } from '../../common/model/chatViewModel.js';53import { ChatAgentLocation } from '../../common/constants.js';54import { VoiceChatInProgress as GlobalVoiceChatInProgress, IVoiceChatService } from '../../common/voiceChatService.js';55import './media/voiceChatActions.css';5657//#region Speech to Text5859type VoiceChatSessionContext = 'view' | 'inline' | 'quick' | 'editor';60const VoiceChatSessionContexts: VoiceChatSessionContext[] = ['view', 'inline', 'quick', 'editor'];6162// Global Context Keys (set on global context key service)63const CanVoiceChat = ContextKeyExpr.and(ChatContextKeys.enabled, HasSpeechProvider);64const FocusInChatInput = ContextKeyExpr.or(CTX_INLINE_CHAT_FOCUSED, ChatContextKeys.inChatInput);6566// Scoped Context Keys (set on per-chat-context scoped context key service)67const ScopedVoiceChatGettingReady = new RawContextKey<boolean>('scopedVoiceChatGettingReady', false, { type: 'boolean', description: localize('scopedVoiceChatGettingReady', "True when getting ready for receiving voice input from the microphone for voice chat. This key is only defined scoped, per chat context.") });68const ScopedVoiceChatInProgress = new RawContextKey<VoiceChatSessionContext | undefined>('scopedVoiceChatInProgress', undefined, { type: 'string', description: localize('scopedVoiceChatInProgress', "Defined as a location where voice recording from microphone is in progress for voice chat. This key is only defined scoped, per chat context.") });69const AnyScopedVoiceChatInProgress = ContextKeyExpr.or(...VoiceChatSessionContexts.map(context => ScopedVoiceChatInProgress.isEqualTo(context)));7071enum VoiceChatSessionState {72Stopped = 1,73GettingReady,74Started75}7677interface IVoiceChatSessionController {7879readonly onDidAcceptInput: Event<unknown>;80readonly onDidHideInput: Event<unknown>;8182readonly context: VoiceChatSessionContext;83readonly scopedContextKeyService: IContextKeyService;8485updateState(state: VoiceChatSessionState): void;8687focusInput(): void;88acceptInput(): Promise<IChatResponseModel | undefined>;89updateInput(text: string): void;90getInput(): string;9192setInputPlaceholder(text: string): void;93clearInputPlaceholder(): void;94}9596class VoiceChatSessionControllerFactory {9798static async create(accessor: ServicesAccessor, context: 'view' | 'inline' | 'quick' | 'focused'): Promise<IVoiceChatSessionController | undefined> {99const chatWidgetService = accessor.get(IChatWidgetService);100const quickChatService = accessor.get(IQuickChatService);101const layoutService = accessor.get(IWorkbenchLayoutService);102const editorService = accessor.get(IEditorService);103104switch (context) {105case 'focused': {106const controller = VoiceChatSessionControllerFactory.doCreateForFocusedChat(chatWidgetService, layoutService);107return controller ?? VoiceChatSessionControllerFactory.create(accessor, 'view'); // fallback to 'view'108}109case 'view': {110const chatWidget = await chatWidgetService.revealWidget();111if (chatWidget) {112return VoiceChatSessionControllerFactory.doCreateForChatWidget('view', chatWidget);113}114break;115}116case 'inline': {117const activeCodeEditor = getCodeEditor(editorService.activeTextEditorControl);118if (activeCodeEditor) {119const inlineChat = InlineChatController.get(activeCodeEditor);120if (inlineChat) {121if (!inlineChat.isActive) {122inlineChat.run();123}124return VoiceChatSessionControllerFactory.doCreateForChatWidget('inline', inlineChat.widget.chatWidget);125}126}127break;128}129case 'quick': {130quickChatService.open(); // this will populate focused chat widget in the chat widget service131return VoiceChatSessionControllerFactory.create(accessor, 'focused');132}133}134135return undefined;136}137138private static doCreateForFocusedChat(chatWidgetService: IChatWidgetService, layoutService: IWorkbenchLayoutService): IVoiceChatSessionController | undefined {139const chatWidget = chatWidgetService.lastFocusedWidget;140if (chatWidget?.hasInputFocus()) {141142// Figure out the context of the chat widget by asking143// layout service for the part that has focus. Unfortunately144// there is no better way because the widget does not know145// its location.146147let context: VoiceChatSessionContext;148if (layoutService.hasFocus(Parts.EDITOR_PART)) {149context = chatWidget.location === ChatAgentLocation.Chat ? 'editor' : 'inline';150} else if (151[Parts.SIDEBAR_PART, Parts.PANEL_PART, Parts.AUXILIARYBAR_PART, Parts.TITLEBAR_PART, Parts.STATUSBAR_PART, Parts.BANNER_PART, Parts.ACTIVITYBAR_PART].some(part => layoutService.hasFocus(part))152) {153context = 'view';154} else {155context = 'quick';156}157158return VoiceChatSessionControllerFactory.doCreateForChatWidget(context, chatWidget);159}160161return undefined;162}163164private static createChatContextKeyController(contextKeyService: IContextKeyService, context: VoiceChatSessionContext): (state: VoiceChatSessionState) => void {165const contextVoiceChatGettingReady = ScopedVoiceChatGettingReady.bindTo(contextKeyService);166const contextVoiceChatInProgress = ScopedVoiceChatInProgress.bindTo(contextKeyService);167168return (state: VoiceChatSessionState) => {169switch (state) {170case VoiceChatSessionState.GettingReady:171contextVoiceChatGettingReady.set(true);172contextVoiceChatInProgress.reset();173break;174case VoiceChatSessionState.Started:175contextVoiceChatGettingReady.reset();176contextVoiceChatInProgress.set(context);177break;178case VoiceChatSessionState.Stopped:179contextVoiceChatGettingReady.reset();180contextVoiceChatInProgress.reset();181break;182}183};184}185186private static doCreateForChatWidget(context: VoiceChatSessionContext, chatWidget: IChatWidget): IVoiceChatSessionController {187return {188context,189scopedContextKeyService: chatWidget.scopedContextKeyService,190onDidAcceptInput: chatWidget.onDidAcceptInput,191onDidHideInput: chatWidget.onDidHide,192focusInput: () => chatWidget.focusInput(),193acceptInput: () => chatWidget.acceptInput(undefined, { isVoiceInput: true }),194updateInput: text => chatWidget.setInput(text),195getInput: () => chatWidget.getInput(),196setInputPlaceholder: text => chatWidget.setInputPlaceholder(text),197clearInputPlaceholder: () => chatWidget.resetInputPlaceholder(),198updateState: VoiceChatSessionControllerFactory.createChatContextKeyController(chatWidget.scopedContextKeyService, context)199};200}201}202203interface IVoiceChatSession {204setTimeoutDisabled(disabled: boolean): void;205206accept(): void;207stop(): void;208}209210interface IActiveVoiceChatSession extends IVoiceChatSession {211readonly id: number;212readonly controller: IVoiceChatSessionController;213readonly disposables: DisposableStore;214215hasRecognizedInput: boolean;216}217218class VoiceChatSessions {219220private static instance: VoiceChatSessions | undefined = undefined;221static getInstance(instantiationService: IInstantiationService): VoiceChatSessions {222if (!VoiceChatSessions.instance) {223VoiceChatSessions.instance = instantiationService.createInstance(VoiceChatSessions);224}225226return VoiceChatSessions.instance;227}228229private currentVoiceChatSession: IActiveVoiceChatSession | undefined = undefined;230private voiceChatSessionIds = 0;231232constructor(233@IVoiceChatService private readonly voiceChatService: IVoiceChatService,234@IConfigurationService private readonly configurationService: IConfigurationService,235@IInstantiationService private readonly instantiationService: IInstantiationService,236@IAccessibilityService private readonly accessibilityService: IAccessibilityService237) { }238239async start(controller: IVoiceChatSessionController, context?: IChatExecuteActionContext): Promise<IVoiceChatSession> {240241// Stop running text-to-speech or speech-to-text sessions in chats242this.stop();243ChatSynthesizerSessions.getInstance(this.instantiationService).stop();244245let disableTimeout = false;246247const sessionId = ++this.voiceChatSessionIds;248const session: IActiveVoiceChatSession = this.currentVoiceChatSession = {249id: sessionId,250controller,251hasRecognizedInput: false,252disposables: new DisposableStore(),253setTimeoutDisabled: (disabled: boolean) => { disableTimeout = disabled; },254accept: () => this.accept(sessionId),255stop: () => this.stop(sessionId, controller.context)256};257258const cts = new CancellationTokenSource();259session.disposables.add(toDisposable(() => cts.dispose(true)));260261session.disposables.add(controller.onDidAcceptInput(() => this.stop(sessionId, controller.context)));262session.disposables.add(controller.onDidHideInput(() => this.stop(sessionId, controller.context)));263264controller.focusInput();265266controller.updateState(VoiceChatSessionState.GettingReady);267268const voiceChatSession = await this.voiceChatService.createVoiceChatSession(cts.token, { usesAgents: controller.context !== 'inline', model: context?.widget?.viewModel?.model });269270let inputValue = controller.getInput();271272let voiceChatTimeout = this.configurationService.getValue<number>(AccessibilityVoiceSettingId.SpeechTimeout);273if (!isNumber(voiceChatTimeout) || voiceChatTimeout < 0) {274voiceChatTimeout = SpeechTimeoutDefault;275}276277const acceptTranscriptionScheduler = session.disposables.add(new RunOnceScheduler(() => this.accept(sessionId), voiceChatTimeout));278session.disposables.add(voiceChatSession.onDidChange(({ status, text, waitingForInput }) => {279if (cts.token.isCancellationRequested) {280return;281}282283switch (status) {284case SpeechToTextStatus.Started:285this.onDidSpeechToTextSessionStart(controller, session.disposables);286break;287case SpeechToTextStatus.Recognizing:288if (text) {289session.hasRecognizedInput = true;290session.controller.updateInput(inputValue ? [inputValue, text].join(' ') : text);291if (voiceChatTimeout > 0 && context?.voice?.disableTimeout !== true && !disableTimeout) {292acceptTranscriptionScheduler.cancel();293}294}295break;296case SpeechToTextStatus.Recognized:297if (text) {298session.hasRecognizedInput = true;299inputValue = inputValue ? [inputValue, text].join(' ') : text;300session.controller.updateInput(inputValue);301if (voiceChatTimeout > 0 && context?.voice?.disableTimeout !== true && !waitingForInput && !disableTimeout) {302acceptTranscriptionScheduler.schedule();303}304}305break;306case SpeechToTextStatus.Stopped:307this.stop(session.id, controller.context);308break;309}310}));311312return session;313}314315private onDidSpeechToTextSessionStart(controller: IVoiceChatSessionController, disposables: DisposableStore): void {316controller.updateState(VoiceChatSessionState.Started);317318let dotCount = 0;319320const updatePlaceholder = () => {321dotCount = (dotCount + 1) % 4;322controller.setInputPlaceholder(`${localize('listening', "I'm listening")}${'.'.repeat(dotCount)}`);323placeholderScheduler.schedule();324};325326const placeholderScheduler = disposables.add(new RunOnceScheduler(updatePlaceholder, 500));327updatePlaceholder();328}329330stop(voiceChatSessionId = this.voiceChatSessionIds, context?: VoiceChatSessionContext): void {331if (332!this.currentVoiceChatSession ||333this.voiceChatSessionIds !== voiceChatSessionId ||334(context && this.currentVoiceChatSession.controller.context !== context)335) {336return;337}338339this.currentVoiceChatSession.controller.clearInputPlaceholder();340341this.currentVoiceChatSession.controller.updateState(VoiceChatSessionState.Stopped);342343this.currentVoiceChatSession.disposables.dispose();344this.currentVoiceChatSession = undefined;345}346347async accept(voiceChatSessionId = this.voiceChatSessionIds): Promise<void> {348if (349!this.currentVoiceChatSession ||350this.voiceChatSessionIds !== voiceChatSessionId351) {352return;353}354355if (!this.currentVoiceChatSession.hasRecognizedInput) {356// If we have an active session but without recognized357// input, we do not want to just accept the input that358// was maybe typed before. But we still want to stop the359// voice session because `acceptInput` would do that.360this.stop(voiceChatSessionId, this.currentVoiceChatSession.controller.context);361return;362}363364const controller = this.currentVoiceChatSession.controller;365const response = await controller.acceptInput();366if (!response) {367return;368}369const autoSynthesize = this.configurationService.getValue<'on' | 'off'>(AccessibilityVoiceSettingId.AutoSynthesize);370if (autoSynthesize === 'on' || (autoSynthesize !== 'off' && !this.accessibilityService.isScreenReaderOptimized())) {371let context: IVoiceChatSessionController | 'focused';372if (controller.context === 'inline') {373// This is ugly, but the lightweight inline chat turns into374// a different widget as soon as a response comes in, so we fallback to375// picking up from the focused chat widget376context = 'focused';377} else {378context = controller;379}380ChatSynthesizerSessions.getInstance(this.instantiationService).start(this.instantiationService.invokeFunction(accessor => ChatSynthesizerSessionController.create(accessor, context, response)));381}382}383}384385export const VOICE_KEY_HOLD_THRESHOLD = 500;386387async function startVoiceChatWithHoldMode(id: string, accessor: ServicesAccessor, target: 'view' | 'inline' | 'quick' | 'focused', context?: IChatExecuteActionContext): Promise<void> {388const instantiationService = accessor.get(IInstantiationService);389const keybindingService = accessor.get(IKeybindingService);390391const holdMode = keybindingService.enableKeybindingHoldMode(id);392393const controller = await VoiceChatSessionControllerFactory.create(accessor, target);394if (!controller) {395return;396}397398const session = await VoiceChatSessions.getInstance(instantiationService).start(controller, context);399400let acceptVoice = false;401const handle = disposableTimeout(() => {402acceptVoice = true;403session?.setTimeoutDisabled(true); // disable accept on timeout when hold mode runs for VOICE_KEY_HOLD_THRESHOLD404}, VOICE_KEY_HOLD_THRESHOLD);405await holdMode;406handle.dispose();407408if (acceptVoice) {409session.accept();410}411}412413class VoiceChatWithHoldModeAction extends Action2 {414415constructor(desc: Readonly<IAction2Options>, private readonly target: 'view' | 'inline' | 'quick') {416super(desc);417}418419run(accessor: ServicesAccessor, context?: IChatExecuteActionContext): Promise<void> {420return startVoiceChatWithHoldMode(this.desc.id, accessor, this.target, context);421}422}423424export class VoiceChatInChatViewAction extends VoiceChatWithHoldModeAction {425426static readonly ID = 'workbench.action.chat.voiceChatInChatView';427428constructor() {429super({430id: VoiceChatInChatViewAction.ID,431title: localize2('workbench.action.chat.voiceChatInView.label', "Voice Chat in Chat View"),432category: CHAT_CATEGORY,433precondition: CanVoiceChat,434f1: true435}, 'view');436}437}438439export class HoldToVoiceChatInChatViewAction extends Action2 {440441static readonly ID = 'workbench.action.chat.holdToVoiceChatInChatView';442443constructor() {444super({445id: HoldToVoiceChatInChatViewAction.ID,446title: localize2('workbench.action.chat.holdToVoiceChatInChatView.label', "Hold to Voice Chat in Chat View"),447keybinding: {448weight: KeybindingWeight.WorkbenchContrib,449when: ContextKeyExpr.and(450CanVoiceChat,451ChatContextKeys.requestInProgress.negate(), // disable when a chat request is in progress452FocusInChatInput?.negate(), // when already in chat input, disable this action and prefer to start voice chat directly453EditorContextKeys.focus.negate(), // do not steal the inline-chat keybinding454NOTEBOOK_EDITOR_FOCUSED.negate(), // do not steal the notebook keybinding455SearchContext.SearchViewFocusedKey.negate(), // do not steal the search keybinding456CONTEXT_SETTINGS_EDITOR.negate(), // do not steal the settings editor keybinding457),458primary: KeyMod.CtrlCmd | KeyCode.KeyI459}460});461}462463override async run(accessor: ServicesAccessor, context?: IChatExecuteActionContext): Promise<void> {464465// The intent of this action is to provide 2 modes to align with what `Ctrlcmd+I` in inline chat:466// - if the user press and holds, we start voice chat in the chat view467// - if the user press and releases quickly enough, we just open the chat view without voice chat468469const instantiationService = accessor.get(IInstantiationService);470const keybindingService = accessor.get(IKeybindingService);471const widgetService = accessor.get(IChatWidgetService);472473const holdMode = keybindingService.enableKeybindingHoldMode(HoldToVoiceChatInChatViewAction.ID);474475let session: IVoiceChatSession | undefined;476const handle = disposableTimeout(async () => {477const controller = await VoiceChatSessionControllerFactory.create(accessor, 'view');478if (controller) {479session = await VoiceChatSessions.getInstance(instantiationService).start(controller, context);480session.setTimeoutDisabled(true);481}482}, VOICE_KEY_HOLD_THRESHOLD);483484(await widgetService.revealWidget())?.focusInput();485486await holdMode;487handle.dispose();488489if (session) {490session.accept();491}492}493}494495export class InlineVoiceChatAction extends VoiceChatWithHoldModeAction {496497static readonly ID = 'workbench.action.chat.inlineVoiceChat';498499constructor() {500super({501id: InlineVoiceChatAction.ID,502title: localize2('workbench.action.chat.inlineVoiceChat', "Inline Voice Chat"),503category: CHAT_CATEGORY,504precondition: ContextKeyExpr.and(505CanVoiceChat,506ActiveEditorContext,507),508f1: true509}, 'inline');510}511}512513export class QuickVoiceChatAction extends VoiceChatWithHoldModeAction {514515static readonly ID = 'workbench.action.chat.quickVoiceChat';516517constructor() {518super({519id: QuickVoiceChatAction.ID,520title: localize2('workbench.action.chat.quickVoiceChat.label', "Quick Voice Chat"),521category: CHAT_CATEGORY,522precondition: CanVoiceChat,523f1: true524}, 'quick');525}526}527528const primaryVoiceActionMenu = (when: ContextKeyExpression | undefined) => {529return [530{531id: MenuId.ChatExecute,532when: ContextKeyExpr.and(ChatContextKeys.location.isEqualTo(ChatAgentLocation.Chat), when),533group: 'navigation',534order: 3535},536{537id: MenuId.ChatExecute,538when: ContextKeyExpr.and(ChatContextKeys.location.isEqualTo(ChatAgentLocation.Chat).negate(), when),539group: 'navigation',540order: 2541}542];543};544545export class StartVoiceChatAction extends Action2 {546547static readonly ID = 'workbench.action.chat.startVoiceChat';548549constructor() {550super({551id: StartVoiceChatAction.ID,552title: localize2('workbench.action.chat.startVoiceChat.label', "Start Voice Chat"),553category: CHAT_CATEGORY,554f1: true,555keybinding: {556weight: KeybindingWeight.WorkbenchContrib,557when: ContextKeyExpr.and(558FocusInChatInput, // scope this action to chat input fields only559EditorContextKeys.focus.negate(), // do not steal the editor inline-chat keybinding560NOTEBOOK_EDITOR_FOCUSED.negate() // do not steal the notebook inline-chat keybinding561),562primary: KeyMod.CtrlCmd | KeyCode.KeyI563},564icon: Codicon.mic,565precondition: ContextKeyExpr.and(566CanVoiceChat,567ScopedVoiceChatGettingReady.negate(), // disable when voice chat is getting ready568SpeechToTextInProgress.negate() // disable when speech to text is in progress569),570menu: primaryVoiceActionMenu(ContextKeyExpr.and(571HasSpeechProvider,572ScopedChatSynthesisInProgress.negate(), // hide when text to speech is in progress573AnyScopedVoiceChatInProgress?.negate(), // hide when voice chat is in progress574))575});576}577578async run(accessor: ServicesAccessor, context?: IChatExecuteActionContext): Promise<void> {579const widget = context?.widget;580if (widget) {581// if we already get a context when the action is executed582// from a toolbar within the chat widget, then make sure583// to move focus into the input field so that the controller584// is properly retrieved585widget.focusInput();586}587588return startVoiceChatWithHoldMode(this.desc.id, accessor, 'focused', context);589}590}591592export class StopListeningAction extends Action2 {593594static readonly ID = 'workbench.action.chat.stopListening';595596constructor() {597super({598id: StopListeningAction.ID,599title: localize2('workbench.action.chat.stopListening.label', "Stop Listening"),600category: CHAT_CATEGORY,601f1: true,602keybinding: {603weight: KeybindingWeight.WorkbenchContrib + 100,604primary: KeyCode.Escape,605when: AnyScopedVoiceChatInProgress606},607icon: spinningLoading,608precondition: GlobalVoiceChatInProgress, // need global context here because of `f1: true`609menu: primaryVoiceActionMenu(AnyScopedVoiceChatInProgress)610});611}612613async run(accessor: ServicesAccessor): Promise<void> {614VoiceChatSessions.getInstance(accessor.get(IInstantiationService)).stop();615}616}617618export class StopListeningAndSubmitAction extends Action2 {619620static readonly ID = 'workbench.action.chat.stopListeningAndSubmit';621622constructor() {623super({624id: StopListeningAndSubmitAction.ID,625title: localize2('workbench.action.chat.stopListeningAndSubmit.label', "Stop Listening and Submit"),626category: CHAT_CATEGORY,627f1: true,628keybinding: {629weight: KeybindingWeight.WorkbenchContrib,630when: ContextKeyExpr.and(631FocusInChatInput,632AnyScopedVoiceChatInProgress633),634primary: KeyMod.CtrlCmd | KeyCode.KeyI635},636precondition: GlobalVoiceChatInProgress // need global context here because of `f1: true`637});638}639640run(accessor: ServicesAccessor): void {641VoiceChatSessions.getInstance(accessor.get(IInstantiationService)).accept();642}643}644645//#endregion646647//#region Text to Speech648649const ScopedChatSynthesisInProgress = new RawContextKey<boolean>('scopedChatSynthesisInProgress', false, { type: 'boolean', description: localize('scopedChatSynthesisInProgress', "Defined as a location where voice recording from microphone is in progress for voice chat. This key is only defined scoped, per chat context.") });650651interface IChatSynthesizerSessionController {652653readonly onDidHideChat: Event<unknown>;654655readonly contextKeyService: IContextKeyService;656readonly response: IChatResponseModel;657}658659class ChatSynthesizerSessionController {660661static create(accessor: ServicesAccessor, context: IVoiceChatSessionController | 'focused', response: IChatResponseModel): IChatSynthesizerSessionController {662if (context === 'focused') {663return ChatSynthesizerSessionController.doCreateForFocusedChat(accessor, response);664} else {665return {666onDidHideChat: context.onDidHideInput,667contextKeyService: context.scopedContextKeyService,668response669};670}671}672673private static doCreateForFocusedChat(accessor: ServicesAccessor, response: IChatResponseModel): IChatSynthesizerSessionController {674const chatWidgetService = accessor.get(IChatWidgetService);675const contextKeyService = accessor.get(IContextKeyService);676let chatWidget = chatWidgetService.getWidgetBySessionResource(response.session.sessionResource);677if (chatWidget?.location === ChatAgentLocation.EditorInline) {678chatWidget = chatWidgetService.lastFocusedWidget; // workaround for https://github.com/microsoft/vscode/issues/212785679}680681return {682onDidHideChat: chatWidget?.onDidHide ?? Event.None,683contextKeyService: chatWidget?.scopedContextKeyService ?? contextKeyService,684response685};686}687}688689interface IChatSynthesizerContext {690readonly ignoreCodeBlocks: boolean;691insideCodeBlock: boolean;692}693694class ChatSynthesizerSessions {695696private static instance: ChatSynthesizerSessions | undefined = undefined;697static getInstance(instantiationService: IInstantiationService): ChatSynthesizerSessions {698if (!ChatSynthesizerSessions.instance) {699ChatSynthesizerSessions.instance = instantiationService.createInstance(ChatSynthesizerSessions);700}701702return ChatSynthesizerSessions.instance;703}704705private activeSession: CancellationTokenSource | undefined = undefined;706707constructor(708@ISpeechService private readonly speechService: ISpeechService,709@IConfigurationService private readonly configurationService: IConfigurationService,710@IInstantiationService private readonly instantiationService: IInstantiationService711) { }712713async start(controller: IChatSynthesizerSessionController): Promise<void> {714715// Stop running text-to-speech or speech-to-text sessions in chats716this.stop();717VoiceChatSessions.getInstance(this.instantiationService).stop();718719const activeSession = this.activeSession = new CancellationTokenSource();720721const disposables = new DisposableStore();722disposables.add(activeSession.token.onCancellationRequested(() => disposables.dispose()));723724const session = await this.speechService.createTextToSpeechSession(activeSession.token, 'chat');725726if (activeSession.token.isCancellationRequested) {727return;728}729730disposables.add(controller.onDidHideChat(() => this.stop()));731732const scopedChatToSpeechInProgress = ScopedChatSynthesisInProgress.bindTo(controller.contextKeyService);733disposables.add(toDisposable(() => scopedChatToSpeechInProgress.reset()));734735disposables.add(session.onDidChange(e => {736switch (e.status) {737case TextToSpeechStatus.Started:738scopedChatToSpeechInProgress.set(true);739break;740case TextToSpeechStatus.Stopped:741scopedChatToSpeechInProgress.reset();742break;743}744}));745746for await (const chunk of this.nextChatResponseChunk(controller.response, activeSession.token)) {747if (activeSession.token.isCancellationRequested) {748return;749}750751await raceCancellation(session.synthesize(chunk), activeSession.token);752}753}754755private async *nextChatResponseChunk(response: IChatResponseModel, token: CancellationToken): AsyncIterable<string> {756const context: IChatSynthesizerContext = {757ignoreCodeBlocks: this.configurationService.getValue<boolean>(AccessibilityVoiceSettingId.IgnoreCodeBlocks),758insideCodeBlock: false759};760761let totalOffset = 0;762let complete = false;763do {764const responseLength = response.response.toString().length;765const { chunk, offset } = this.parseNextChatResponseChunk(response, totalOffset, context);766totalOffset = offset;767complete = response.isComplete;768769if (chunk) {770yield chunk;771}772773if (token.isCancellationRequested) {774return;775}776777if (!complete && responseLength === response.response.toString().length) {778await raceCancellation(Event.toPromise(response.onDidChange), token); // wait for the response to change779}780} while (!token.isCancellationRequested && !complete);781}782783private parseNextChatResponseChunk(response: IChatResponseModel, offset: number, context: IChatSynthesizerContext): { readonly chunk: string | undefined; readonly offset: number } {784let chunk: string | undefined = undefined;785786const text = response.response.toString();787788if (response.isComplete) {789chunk = text.substring(offset);790offset = text.length + 1;791} else {792const res = parseNextChatResponseChunk(text, offset);793chunk = res.chunk;794offset = res.offset;795}796797if (chunk && context.ignoreCodeBlocks) {798chunk = this.filterCodeBlocks(chunk, context);799}800801return {802chunk: chunk ? renderAsPlaintext({ value: chunk }) : chunk, // convert markdown to plain text803offset804};805}806807private filterCodeBlocks(chunk: string, context: IChatSynthesizerContext): string {808return chunk.split('\n')809.filter(line => {810if (line.trimStart().startsWith('```')) {811context.insideCodeBlock = !context.insideCodeBlock;812return false;813}814return !context.insideCodeBlock;815})816.join('\n');817}818819stop(): void {820this.activeSession?.dispose(true);821this.activeSession = undefined;822}823}824825const sentenceDelimiter = ['.', '!', '?', ':'];826const lineDelimiter = '\n';827const wordDelimiter = ' ';828829export function parseNextChatResponseChunk(text: string, offset: number): { readonly chunk: string | undefined; readonly offset: number } {830let chunk: string | undefined = undefined;831832for (let i = text.length - 1; i >= offset; i--) { // going from end to start to produce largest chunks833const cur = text[i];834const next = text[i + 1];835if (836sentenceDelimiter.includes(cur) && next === wordDelimiter || // end of sentence837lineDelimiter === cur // end of line838) {839chunk = text.substring(offset, i + 1).trim();840offset = i + 1;841break;842}843}844845return { chunk, offset };846}847848export class ReadChatResponseAloud extends Action2 {849constructor() {850super({851id: 'workbench.action.chat.readChatResponseAloud',852title: localize2('workbench.action.chat.readChatResponseAloud', "Read Aloud"),853icon: Codicon.unmute,854precondition: CanVoiceChat,855menu: [{856id: MenuId.ChatMessageFooter,857when: ContextKeyExpr.and(858CanVoiceChat,859ChatContextKeys.isResponse, // only for responses860ScopedChatSynthesisInProgress.negate(), // but not when already in progress861ChatContextKeys.responseIsFiltered.negate(), // and not when response is filtered862),863group: 'navigation',864order: -10 // first865}, {866id: MENU_INLINE_CHAT_WIDGET_SECONDARY,867when: ContextKeyExpr.and(868CanVoiceChat,869ChatContextKeys.isResponse, // only for responses870ScopedChatSynthesisInProgress.negate(), // but not when already in progress871ChatContextKeys.responseIsFiltered.negate() // and not when response is filtered872),873group: 'navigation',874order: -10 // first875}]876});877}878879run(accessor: ServicesAccessor, ...args: unknown[]) {880const instantiationService = accessor.get(IInstantiationService);881const chatWidgetService = accessor.get(IChatWidgetService);882883let response: IChatResponseViewModel | undefined = undefined;884if (args.length > 0) {885const responseArg = args[0];886if (isResponseVM(responseArg)) {887response = responseArg;888}889} else {890const chatWidget = chatWidgetService.lastFocusedWidget;891if (chatWidget) {892893// pick focused response894const focus = chatWidget.getFocus();895if (focus instanceof ChatResponseViewModel) {896response = focus;897}898899// pick the last response900else {901const chatViewModel = chatWidget.viewModel;902if (chatViewModel) {903const items = chatViewModel.getItems();904for (let i = items.length - 1; i >= 0; i--) {905const item = items[i];906if (isResponseVM(item)) {907response = item;908break;909}910}911}912}913}914}915916if (!response) {917return;918}919920const controller = ChatSynthesizerSessionController.create(accessor, 'focused', response.model);921ChatSynthesizerSessions.getInstance(instantiationService).start(controller);922}923}924925export class StopReadAloud extends Action2 {926927static readonly ID = 'workbench.action.speech.stopReadAloud';928929constructor() {930super({931id: StopReadAloud.ID,932icon: syncing,933title: localize2('workbench.action.speech.stopReadAloud', "Stop Reading Aloud"),934f1: true,935category: CHAT_CATEGORY,936precondition: GlobalTextToSpeechInProgress, // need global context here because of `f1: true`937keybinding: {938weight: KeybindingWeight.WorkbenchContrib + 100,939primary: KeyCode.Escape,940when: ScopedChatSynthesisInProgress941},942menu: primaryVoiceActionMenu(ScopedChatSynthesisInProgress)943});944}945946async run(accessor: ServicesAccessor) {947ChatSynthesizerSessions.getInstance(accessor.get(IInstantiationService)).stop();948}949}950951export class StopReadChatItemAloud extends Action2 {952953static readonly ID = 'workbench.action.chat.stopReadChatItemAloud';954955constructor() {956super({957id: StopReadChatItemAloud.ID,958icon: Codicon.mute,959title: localize2('workbench.action.chat.stopReadChatItemAloud', "Stop Reading Aloud"),960precondition: ScopedChatSynthesisInProgress,961keybinding: {962weight: KeybindingWeight.WorkbenchContrib + 100,963primary: KeyCode.Escape,964},965menu: [966{967id: MenuId.ChatMessageFooter,968when: ContextKeyExpr.and(969ScopedChatSynthesisInProgress, // only when in progress970ChatContextKeys.isResponse, // only for responses971ChatContextKeys.responseIsFiltered.negate() // but not when response is filtered972),973group: 'navigation',974order: -10 // first975},976{977id: MENU_INLINE_CHAT_WIDGET_SECONDARY,978when: ContextKeyExpr.and(979ScopedChatSynthesisInProgress, // only when in progress980ChatContextKeys.isResponse, // only for responses981ChatContextKeys.responseIsFiltered.negate() // but not when response is filtered982),983group: 'navigation',984order: -10 // first985}986]987});988}989990async run(accessor: ServicesAccessor, ...args: unknown[]) {991ChatSynthesizerSessions.getInstance(accessor.get(IInstantiationService)).stop();992}993}994995//#endregion996997//#region Keyword Recognition998999function supportsKeywordActivation(configurationService: IConfigurationService, speechService: ISpeechService, chatAgentService: IChatAgentService): boolean {1000if (!speechService.hasSpeechProvider || !chatAgentService.getDefaultAgent(ChatAgentLocation.Chat)) {1001return false;1002}10031004const value = configurationService.getValue(KEYWORD_ACTIVIATION_SETTING_ID);10051006return typeof value === 'string' && value !== KeywordActivationContribution.SETTINGS_VALUE.OFF;1007}10081009export class KeywordActivationContribution extends Disposable implements IWorkbenchContribution {10101011static readonly ID = 'workbench.contrib.keywordActivation';10121013static SETTINGS_VALUE = {1014OFF: 'off',1015INLINE_CHAT: 'inlineChat',1016QUICK_CHAT: 'quickChat',1017VIEW_CHAT: 'chatInView',1018CHAT_IN_CONTEXT: 'chatInContext'1019};10201021private activeSession: CancellationTokenSource | undefined = undefined;10221023constructor(1024@ISpeechService private readonly speechService: ISpeechService,1025@IConfigurationService private readonly configurationService: IConfigurationService,1026@ICommandService private readonly commandService: ICommandService,1027@IInstantiationService instantiationService: IInstantiationService,1028@IEditorService private readonly editorService: IEditorService,1029@IHostService private readonly hostService: IHostService,1030@IChatAgentService private readonly chatAgentService: IChatAgentService,1031) {1032super();10331034this._register(instantiationService.createInstance(KeywordActivationStatusEntry));10351036this.registerListeners();1037}10381039private registerListeners(): void {1040this._register(Event.runAndSubscribe(this.speechService.onDidChangeHasSpeechProvider, () => {1041this.updateConfiguration();1042this.handleKeywordActivation();1043}));10441045const onDidAddDefaultAgent = this._register(this.chatAgentService.onDidChangeAgents(() => {1046if (this.chatAgentService.getDefaultAgent(ChatAgentLocation.Chat)) {1047this.updateConfiguration();1048this.handleKeywordActivation();10491050onDidAddDefaultAgent.dispose();1051}1052}));10531054this._register(this.speechService.onDidStartSpeechToTextSession(() => this.handleKeywordActivation()));1055this._register(this.speechService.onDidEndSpeechToTextSession(() => this.handleKeywordActivation()));10561057this._register(this.configurationService.onDidChangeConfiguration(e => {1058if (e.affectsConfiguration(KEYWORD_ACTIVIATION_SETTING_ID)) {1059this.handleKeywordActivation();1060}1061}));1062}10631064private updateConfiguration(): void {1065if (!this.speechService.hasSpeechProvider || !this.chatAgentService.getDefaultAgent(ChatAgentLocation.Chat)) {1066return; // these settings require a speech and chat provider1067}10681069const registry = Registry.as<IConfigurationRegistry>(Extensions.Configuration);1070registry.registerConfiguration({1071...accessibilityConfigurationNodeBase,1072properties: {1073[KEYWORD_ACTIVIATION_SETTING_ID]: {1074'type': 'string',1075'enum': [1076KeywordActivationContribution.SETTINGS_VALUE.OFF,1077KeywordActivationContribution.SETTINGS_VALUE.VIEW_CHAT,1078KeywordActivationContribution.SETTINGS_VALUE.QUICK_CHAT,1079KeywordActivationContribution.SETTINGS_VALUE.INLINE_CHAT,1080KeywordActivationContribution.SETTINGS_VALUE.CHAT_IN_CONTEXT1081],1082'enumDescriptions': [1083localize('voice.keywordActivation.off', "Keyword activation is disabled."),1084localize('voice.keywordActivation.chatInView', "Keyword activation is enabled and listening for 'Hey Code' to start a voice chat session in the chat view."),1085localize('voice.keywordActivation.quickChat', "Keyword activation is enabled and listening for 'Hey Code' to start a voice chat session in the quick chat."),1086localize('voice.keywordActivation.inlineChat', "Keyword activation is enabled and listening for 'Hey Code' to start a voice chat session in the active editor if possible."),1087localize('voice.keywordActivation.chatInContext', "Keyword activation is enabled and listening for 'Hey Code' to start a voice chat session in the active editor or view depending on keyboard focus.")1088],1089'description': localize('voice.keywordActivation', "Controls whether the keyword phrase 'Hey Code' is recognized to start a voice chat session. Enabling this will start recording from the microphone but the audio is processed locally and never sent to a server."),1090'default': 'off',1091'tags': ['accessibility']1092}1093}1094});1095}10961097private handleKeywordActivation(): void {1098const enabled =1099supportsKeywordActivation(this.configurationService, this.speechService, this.chatAgentService) &&1100!this.speechService.hasActiveSpeechToTextSession;1101if (1102(enabled && this.activeSession) ||1103(!enabled && !this.activeSession)1104) {1105return; // already running or stopped1106}11071108// Start keyword activation1109if (enabled) {1110this.enableKeywordActivation();1111}11121113// Stop keyword activation1114else {1115this.disableKeywordActivation();1116}1117}11181119private async enableKeywordActivation(): Promise<void> {1120const session = this.activeSession = new CancellationTokenSource();1121const result = await this.speechService.recognizeKeyword(session.token);1122if (session.token.isCancellationRequested || session !== this.activeSession) {1123return; // cancelled1124}11251126this.activeSession = undefined;11271128if (result === KeywordRecognitionStatus.Recognized) {1129if (this.hostService.hasFocus) {1130this.commandService.executeCommand(this.getKeywordCommand());1131}11321133// Immediately start another keyboard activation session1134// because we cannot assume that the command we execute1135// will trigger a speech recognition session.11361137this.handleKeywordActivation();1138}1139}11401141private getKeywordCommand(): string {1142const setting = this.configurationService.getValue(KEYWORD_ACTIVIATION_SETTING_ID);1143switch (setting) {1144case KeywordActivationContribution.SETTINGS_VALUE.INLINE_CHAT:1145return InlineVoiceChatAction.ID;1146case KeywordActivationContribution.SETTINGS_VALUE.QUICK_CHAT:1147return QuickVoiceChatAction.ID;1148case KeywordActivationContribution.SETTINGS_VALUE.CHAT_IN_CONTEXT: {1149const activeCodeEditor = getCodeEditor(this.editorService.activeTextEditorControl);1150if (activeCodeEditor?.hasWidgetFocus()) {1151return InlineVoiceChatAction.ID;1152}1153}1154default:1155return VoiceChatInChatViewAction.ID;1156}1157}11581159private disableKeywordActivation(): void {1160this.activeSession?.dispose(true);1161this.activeSession = undefined;1162}11631164override dispose(): void {1165this.activeSession?.dispose();11661167super.dispose();1168}1169}11701171class KeywordActivationStatusEntry extends Disposable {11721173private readonly entry = this._register(new MutableDisposable<IStatusbarEntryAccessor>());11741175private static STATUS_NAME = localize('keywordActivation.status.name', "Voice Keyword Activation");1176private static STATUS_COMMAND = 'keywordActivation.status.command';1177private static STATUS_ACTIVE = localize('keywordActivation.status.active', "Listening to 'Hey Code'...");1178private static STATUS_INACTIVE = localize('keywordActivation.status.inactive', "Waiting for voice chat to end...");11791180constructor(1181@ISpeechService private readonly speechService: ISpeechService,1182@IStatusbarService private readonly statusbarService: IStatusbarService,1183@ICommandService private readonly commandService: ICommandService,1184@IConfigurationService private readonly configurationService: IConfigurationService,1185@IChatAgentService private readonly chatAgentService: IChatAgentService1186) {1187super();11881189this._register(CommandsRegistry.registerCommand(KeywordActivationStatusEntry.STATUS_COMMAND, () => this.commandService.executeCommand('workbench.action.openSettings', KEYWORD_ACTIVIATION_SETTING_ID)));11901191this.registerListeners();1192this.updateStatusEntry();1193}11941195private registerListeners(): void {1196this._register(this.speechService.onDidStartKeywordRecognition(() => this.updateStatusEntry()));1197this._register(this.speechService.onDidEndKeywordRecognition(() => this.updateStatusEntry()));1198this._register(this.configurationService.onDidChangeConfiguration(e => {1199if (e.affectsConfiguration(KEYWORD_ACTIVIATION_SETTING_ID)) {1200this.updateStatusEntry();1201}1202}));1203}12041205private updateStatusEntry(): void {1206const visible = supportsKeywordActivation(this.configurationService, this.speechService, this.chatAgentService);1207if (visible) {1208if (!this.entry.value) {1209this.createStatusEntry();1210}12111212this.updateStatusLabel();1213} else {1214this.entry.clear();1215}1216}12171218private createStatusEntry() {1219this.entry.value = this.statusbarService.addEntry(this.getStatusEntryProperties(), 'status.voiceKeywordActivation', StatusbarAlignment.RIGHT, 103);1220}12211222private getStatusEntryProperties(): IStatusbarEntry {1223return {1224name: KeywordActivationStatusEntry.STATUS_NAME,1225text: this.speechService.hasActiveKeywordRecognition ? '$(mic-filled)' : '$(mic)',1226tooltip: this.speechService.hasActiveKeywordRecognition ? KeywordActivationStatusEntry.STATUS_ACTIVE : KeywordActivationStatusEntry.STATUS_INACTIVE,1227ariaLabel: this.speechService.hasActiveKeywordRecognition ? KeywordActivationStatusEntry.STATUS_ACTIVE : KeywordActivationStatusEntry.STATUS_INACTIVE,1228command: KeywordActivationStatusEntry.STATUS_COMMAND,1229kind: 'prominent',1230showInAllWindows: true1231};1232}12331234private updateStatusLabel(): void {1235this.entry.value?.update(this.getStatusEntryProperties());1236}1237}12381239//#endregion12401241registerThemingParticipant((theme, collector) => {1242let activeRecordingColor: Color | undefined;1243let activeRecordingDimmedColor: Color | undefined;1244if (!isHighContrast(theme.type)) {1245activeRecordingColor = theme.getColor(ACTIVITY_BAR_FOREGROUND) ?? theme.getColor(focusBorder);1246activeRecordingDimmedColor = activeRecordingColor?.transparent(0.38);1247} else {1248activeRecordingColor = theme.getColor(contrastBorder);1249activeRecordingDimmedColor = theme.getColor(contrastBorder);1250}12511252// Show a "microphone" or "pulse" icon when speech-to-text or text-to-speech is in progress that glows via outline.1253collector.addRule(`1254.monaco-workbench.monaco-enable-motion .interactive-input-part .monaco-action-bar .action-label.codicon-sync.codicon-modifier-spin:not(.disabled),1255.monaco-workbench.monaco-enable-motion .interactive-input-part .monaco-action-bar .action-label.codicon-loading.codicon-modifier-spin:not(.disabled) {1256color: ${activeRecordingColor};1257outline: 1px solid ${activeRecordingColor};1258outline-offset: -1px;1259animation: pulseAnimation 1s infinite;1260border-radius: 50%;1261}12621263.monaco-workbench.monaco-enable-motion .interactive-input-part .monaco-action-bar .action-label.codicon-sync.codicon-modifier-spin:not(.disabled)::before,1264.monaco-workbench.monaco-enable-motion .interactive-input-part .monaco-action-bar .action-label.codicon-loading.codicon-modifier-spin:not(.disabled)::before {1265position: absolute;1266outline: 1px solid ${activeRecordingColor};1267outline-offset: 2px;1268border-radius: 50%;1269width: 16px;1270height: 16px;1271}12721273.monaco-workbench.monaco-enable-motion .interactive-input-part .monaco-action-bar .action-label.codicon-sync.codicon-modifier-spin:not(.disabled)::after,1274.monaco-workbench.monaco-enable-motion .interactive-input-part .monaco-action-bar .action-label.codicon-loading.codicon-modifier-spin:not(.disabled)::after {1275outline: 2px solid ${activeRecordingColor};1276outline-offset: -1px;1277animation: pulseAnimation 1500ms cubic-bezier(0.75, 0, 0.25, 1) infinite;1278}12791280@keyframes pulseAnimation {12810% {1282outline-width: 2px;1283}128462% {1285outline-width: 5px;1286outline-color: ${activeRecordingDimmedColor};1287}1288100% {1289outline-width: 2px;1290}1291}1292`);1293});129412951296