Path: blob/main/src/vs/workbench/contrib/chat/common/voiceChatService.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { localize } from '../../../../nls.js';6import { CancellationToken } from '../../../../base/common/cancellation.js';7import { Emitter, Event } from '../../../../base/common/event.js';8import { Disposable, DisposableStore } from '../../../../base/common/lifecycle.js';9import { rtrim } from '../../../../base/common/strings.js';10import { IContextKey, IContextKeyService, RawContextKey } from '../../../../platform/contextkey/common/contextkey.js';11import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js';12import { IChatAgentService } from './chatAgents.js';13import { IChatModel } from './chatModel.js';14import { chatAgentLeader, chatSubcommandLeader } from './chatParserTypes.js';15import { ISpeechService, ISpeechToTextEvent, SpeechToTextStatus } from '../../speech/common/speechService.js';1617export const IVoiceChatService = createDecorator<IVoiceChatService>('voiceChatService');1819export interface IVoiceChatSessionOptions {20readonly usesAgents?: boolean;21readonly model?: IChatModel;22}2324export interface IVoiceChatService {2526readonly _serviceBrand: undefined;2728/**29* Similar to `ISpeechService.createSpeechToTextSession`, but with30* support for agent prefixes and command prefixes. For example,31* if the user says "at workspace slash fix this problem", the result32* will be "@workspace /fix this problem".33*/34createVoiceChatSession(token: CancellationToken, options: IVoiceChatSessionOptions): Promise<IVoiceChatSession>;35}3637export interface IVoiceChatTextEvent extends ISpeechToTextEvent {3839/**40* This property will be `true` when the text recognized41* so far only consists of agent prefixes (`@workspace`)42* and/or command prefixes (`@workspace /fix`).43*/44readonly waitingForInput?: boolean;45}4647export interface IVoiceChatSession {48readonly onDidChange: Event<IVoiceChatTextEvent>;49}5051interface IPhraseValue {52readonly agent: string;53readonly command?: string;54}5556enum PhraseTextType {57AGENT = 1,58COMMAND = 2,59AGENT_AND_COMMAND = 360}6162export const VoiceChatInProgress = new RawContextKey<boolean>('voiceChatInProgress', false, { type: 'boolean', description: localize('voiceChatInProgress', "A speech-to-text session is in progress for chat.") });6364export class VoiceChatService extends Disposable implements IVoiceChatService {6566readonly _serviceBrand: undefined;6768private static readonly AGENT_PREFIX = chatAgentLeader;69private static readonly COMMAND_PREFIX = chatSubcommandLeader;7071private static readonly PHRASES_LOWER = {72[this.AGENT_PREFIX]: 'at',73[this.COMMAND_PREFIX]: 'slash'74};7576private static readonly PHRASES_UPPER = {77[this.AGENT_PREFIX]: 'At',78[this.COMMAND_PREFIX]: 'Slash'79};8081private static readonly CHAT_AGENT_ALIAS = new Map<string, string>([['vscode', 'code']]);8283private readonly voiceChatInProgress: IContextKey<boolean>;84private activeVoiceChatSessions = 0;8586constructor(87@ISpeechService private readonly speechService: ISpeechService,88@IChatAgentService private readonly chatAgentService: IChatAgentService,89@IContextKeyService contextKeyService: IContextKeyService90) {91super();9293this.voiceChatInProgress = VoiceChatInProgress.bindTo(contextKeyService);94}9596private createPhrases(model?: IChatModel): Map<string, IPhraseValue> {97const phrases = new Map<string, IPhraseValue>();9899for (const agent of this.chatAgentService.getActivatedAgents()) {100const agentPhrase = `${VoiceChatService.PHRASES_LOWER[VoiceChatService.AGENT_PREFIX]} ${VoiceChatService.CHAT_AGENT_ALIAS.get(agent.name) ?? agent.name}`.toLowerCase();101phrases.set(agentPhrase, { agent: agent.name });102103for (const slashCommand of agent.slashCommands) {104const slashCommandPhrase = `${VoiceChatService.PHRASES_LOWER[VoiceChatService.COMMAND_PREFIX]} ${slashCommand.name}`.toLowerCase();105phrases.set(slashCommandPhrase, { agent: agent.name, command: slashCommand.name });106107const agentSlashCommandPhrase = `${agentPhrase} ${slashCommandPhrase}`.toLowerCase();108phrases.set(agentSlashCommandPhrase, { agent: agent.name, command: slashCommand.name });109}110}111112return phrases;113}114115private toText(value: IPhraseValue, type: PhraseTextType): string {116switch (type) {117case PhraseTextType.AGENT:118return `${VoiceChatService.AGENT_PREFIX}${value.agent}`;119case PhraseTextType.COMMAND:120return `${VoiceChatService.COMMAND_PREFIX}${value.command}`;121case PhraseTextType.AGENT_AND_COMMAND:122return `${VoiceChatService.AGENT_PREFIX}${value.agent} ${VoiceChatService.COMMAND_PREFIX}${value.command}`;123}124}125126async createVoiceChatSession(token: CancellationToken, options: IVoiceChatSessionOptions): Promise<IVoiceChatSession> {127const disposables = new DisposableStore();128129const onSessionStoppedOrCanceled = (dispose: boolean) => {130this.activeVoiceChatSessions = Math.max(0, this.activeVoiceChatSessions - 1);131if (this.activeVoiceChatSessions === 0) {132this.voiceChatInProgress.reset();133}134135if (dispose) {136disposables.dispose();137}138};139140disposables.add(token.onCancellationRequested(() => onSessionStoppedOrCanceled(true)));141142let detectedAgent = false;143let detectedSlashCommand = false;144145const emitter = disposables.add(new Emitter<IVoiceChatTextEvent>());146const session = await this.speechService.createSpeechToTextSession(token, 'chat');147148if (token.isCancellationRequested) {149onSessionStoppedOrCanceled(true);150}151152const phrases = this.createPhrases(options.model);153disposables.add(session.onDidChange(e => {154switch (e.status) {155case SpeechToTextStatus.Recognizing:156case SpeechToTextStatus.Recognized: {157let massagedEvent: IVoiceChatTextEvent = e;158if (e.text) {159const startsWithAgent = e.text.startsWith(VoiceChatService.PHRASES_UPPER[VoiceChatService.AGENT_PREFIX]) || e.text.startsWith(VoiceChatService.PHRASES_LOWER[VoiceChatService.AGENT_PREFIX]);160const startsWithSlashCommand = e.text.startsWith(VoiceChatService.PHRASES_UPPER[VoiceChatService.COMMAND_PREFIX]) || e.text.startsWith(VoiceChatService.PHRASES_LOWER[VoiceChatService.COMMAND_PREFIX]);161if (startsWithAgent || startsWithSlashCommand) {162const originalWords = e.text.split(' ');163let transformedWords: string[] | undefined;164165let waitingForInput = false;166167// Check for agent + slash command168if (options.usesAgents && startsWithAgent && !detectedAgent && !detectedSlashCommand && originalWords.length >= 4) {169const phrase = phrases.get(originalWords.slice(0, 4).map(word => this.normalizeWord(word)).join(' '));170if (phrase) {171transformedWords = [this.toText(phrase, PhraseTextType.AGENT_AND_COMMAND), ...originalWords.slice(4)];172173waitingForInput = originalWords.length === 4;174175if (e.status === SpeechToTextStatus.Recognized) {176detectedAgent = true;177detectedSlashCommand = true;178}179}180}181182// Check for agent (if not done already)183if (options.usesAgents && startsWithAgent && !detectedAgent && !transformedWords && originalWords.length >= 2) {184const phrase = phrases.get(originalWords.slice(0, 2).map(word => this.normalizeWord(word)).join(' '));185if (phrase) {186transformedWords = [this.toText(phrase, PhraseTextType.AGENT), ...originalWords.slice(2)];187188waitingForInput = originalWords.length === 2;189190if (e.status === SpeechToTextStatus.Recognized) {191detectedAgent = true;192}193}194}195196// Check for slash command (if not done already)197if (startsWithSlashCommand && !detectedSlashCommand && !transformedWords && originalWords.length >= 2) {198const phrase = phrases.get(originalWords.slice(0, 2).map(word => this.normalizeWord(word)).join(' '));199if (phrase) {200transformedWords = [this.toText(phrase, options.usesAgents && !detectedAgent ?201PhraseTextType.AGENT_AND_COMMAND : // rewrite `/fix` to `@workspace /foo` in this case202PhraseTextType.COMMAND // when we have not yet detected an agent before203), ...originalWords.slice(2)];204205waitingForInput = originalWords.length === 2;206207if (e.status === SpeechToTextStatus.Recognized) {208detectedSlashCommand = true;209}210}211}212213massagedEvent = {214status: e.status,215text: (transformedWords ?? originalWords).join(' '),216waitingForInput217};218}219}220emitter.fire(massagedEvent);221break;222}223case SpeechToTextStatus.Started:224this.activeVoiceChatSessions++;225this.voiceChatInProgress.set(true);226emitter.fire(e);227break;228case SpeechToTextStatus.Stopped:229onSessionStoppedOrCanceled(false);230emitter.fire(e);231break;232case SpeechToTextStatus.Error:233emitter.fire(e);234break;235}236}));237238return {239onDidChange: emitter.event240};241}242243private normalizeWord(word: string): string {244word = rtrim(word, '.');245word = rtrim(word, ',');246word = rtrim(word, '?');247248return word.toLowerCase();249}250}251252253