Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/workbench/contrib/chat/common/voiceChatService.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { localize } from '../../../../nls.js';
7
import { CancellationToken } from '../../../../base/common/cancellation.js';
8
import { Emitter, Event } from '../../../../base/common/event.js';
9
import { Disposable, DisposableStore } from '../../../../base/common/lifecycle.js';
10
import { rtrim } from '../../../../base/common/strings.js';
11
import { IContextKey, IContextKeyService, RawContextKey } from '../../../../platform/contextkey/common/contextkey.js';
12
import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js';
13
import { IChatAgentService } from './chatAgents.js';
14
import { IChatModel } from './chatModel.js';
15
import { chatAgentLeader, chatSubcommandLeader } from './chatParserTypes.js';
16
import { ISpeechService, ISpeechToTextEvent, SpeechToTextStatus } from '../../speech/common/speechService.js';
17
18
export const IVoiceChatService = createDecorator<IVoiceChatService>('voiceChatService');
19
20
export interface IVoiceChatSessionOptions {
21
readonly usesAgents?: boolean;
22
readonly model?: IChatModel;
23
}
24
25
export interface IVoiceChatService {
26
27
readonly _serviceBrand: undefined;
28
29
/**
30
* Similar to `ISpeechService.createSpeechToTextSession`, but with
31
* support for agent prefixes and command prefixes. For example,
32
* if the user says "at workspace slash fix this problem", the result
33
* will be "@workspace /fix this problem".
34
*/
35
createVoiceChatSession(token: CancellationToken, options: IVoiceChatSessionOptions): Promise<IVoiceChatSession>;
36
}
37
38
export interface IVoiceChatTextEvent extends ISpeechToTextEvent {
39
40
/**
41
* This property will be `true` when the text recognized
42
* so far only consists of agent prefixes (`@workspace`)
43
* and/or command prefixes (`@workspace /fix`).
44
*/
45
readonly waitingForInput?: boolean;
46
}
47
48
export interface IVoiceChatSession {
49
readonly onDidChange: Event<IVoiceChatTextEvent>;
50
}
51
52
interface IPhraseValue {
53
readonly agent: string;
54
readonly command?: string;
55
}
56
57
enum PhraseTextType {
58
AGENT = 1,
59
COMMAND = 2,
60
AGENT_AND_COMMAND = 3
61
}
62
63
export const VoiceChatInProgress = new RawContextKey<boolean>('voiceChatInProgress', false, { type: 'boolean', description: localize('voiceChatInProgress', "A speech-to-text session is in progress for chat.") });
64
65
export class VoiceChatService extends Disposable implements IVoiceChatService {
66
67
readonly _serviceBrand: undefined;
68
69
private static readonly AGENT_PREFIX = chatAgentLeader;
70
private static readonly COMMAND_PREFIX = chatSubcommandLeader;
71
72
private static readonly PHRASES_LOWER = {
73
[this.AGENT_PREFIX]: 'at',
74
[this.COMMAND_PREFIX]: 'slash'
75
};
76
77
private static readonly PHRASES_UPPER = {
78
[this.AGENT_PREFIX]: 'At',
79
[this.COMMAND_PREFIX]: 'Slash'
80
};
81
82
private static readonly CHAT_AGENT_ALIAS = new Map<string, string>([['vscode', 'code']]);
83
84
private readonly voiceChatInProgress: IContextKey<boolean>;
85
private activeVoiceChatSessions = 0;
86
87
constructor(
88
@ISpeechService private readonly speechService: ISpeechService,
89
@IChatAgentService private readonly chatAgentService: IChatAgentService,
90
@IContextKeyService contextKeyService: IContextKeyService
91
) {
92
super();
93
94
this.voiceChatInProgress = VoiceChatInProgress.bindTo(contextKeyService);
95
}
96
97
private createPhrases(model?: IChatModel): Map<string, IPhraseValue> {
98
const phrases = new Map<string, IPhraseValue>();
99
100
for (const agent of this.chatAgentService.getActivatedAgents()) {
101
const agentPhrase = `${VoiceChatService.PHRASES_LOWER[VoiceChatService.AGENT_PREFIX]} ${VoiceChatService.CHAT_AGENT_ALIAS.get(agent.name) ?? agent.name}`.toLowerCase();
102
phrases.set(agentPhrase, { agent: agent.name });
103
104
for (const slashCommand of agent.slashCommands) {
105
const slashCommandPhrase = `${VoiceChatService.PHRASES_LOWER[VoiceChatService.COMMAND_PREFIX]} ${slashCommand.name}`.toLowerCase();
106
phrases.set(slashCommandPhrase, { agent: agent.name, command: slashCommand.name });
107
108
const agentSlashCommandPhrase = `${agentPhrase} ${slashCommandPhrase}`.toLowerCase();
109
phrases.set(agentSlashCommandPhrase, { agent: agent.name, command: slashCommand.name });
110
}
111
}
112
113
return phrases;
114
}
115
116
private toText(value: IPhraseValue, type: PhraseTextType): string {
117
switch (type) {
118
case PhraseTextType.AGENT:
119
return `${VoiceChatService.AGENT_PREFIX}${value.agent}`;
120
case PhraseTextType.COMMAND:
121
return `${VoiceChatService.COMMAND_PREFIX}${value.command}`;
122
case PhraseTextType.AGENT_AND_COMMAND:
123
return `${VoiceChatService.AGENT_PREFIX}${value.agent} ${VoiceChatService.COMMAND_PREFIX}${value.command}`;
124
}
125
}
126
127
async createVoiceChatSession(token: CancellationToken, options: IVoiceChatSessionOptions): Promise<IVoiceChatSession> {
128
const disposables = new DisposableStore();
129
130
const onSessionStoppedOrCanceled = (dispose: boolean) => {
131
this.activeVoiceChatSessions = Math.max(0, this.activeVoiceChatSessions - 1);
132
if (this.activeVoiceChatSessions === 0) {
133
this.voiceChatInProgress.reset();
134
}
135
136
if (dispose) {
137
disposables.dispose();
138
}
139
};
140
141
disposables.add(token.onCancellationRequested(() => onSessionStoppedOrCanceled(true)));
142
143
let detectedAgent = false;
144
let detectedSlashCommand = false;
145
146
const emitter = disposables.add(new Emitter<IVoiceChatTextEvent>());
147
const session = await this.speechService.createSpeechToTextSession(token, 'chat');
148
149
if (token.isCancellationRequested) {
150
onSessionStoppedOrCanceled(true);
151
}
152
153
const phrases = this.createPhrases(options.model);
154
disposables.add(session.onDidChange(e => {
155
switch (e.status) {
156
case SpeechToTextStatus.Recognizing:
157
case SpeechToTextStatus.Recognized: {
158
let massagedEvent: IVoiceChatTextEvent = e;
159
if (e.text) {
160
const startsWithAgent = e.text.startsWith(VoiceChatService.PHRASES_UPPER[VoiceChatService.AGENT_PREFIX]) || e.text.startsWith(VoiceChatService.PHRASES_LOWER[VoiceChatService.AGENT_PREFIX]);
161
const startsWithSlashCommand = e.text.startsWith(VoiceChatService.PHRASES_UPPER[VoiceChatService.COMMAND_PREFIX]) || e.text.startsWith(VoiceChatService.PHRASES_LOWER[VoiceChatService.COMMAND_PREFIX]);
162
if (startsWithAgent || startsWithSlashCommand) {
163
const originalWords = e.text.split(' ');
164
let transformedWords: string[] | undefined;
165
166
let waitingForInput = false;
167
168
// Check for agent + slash command
169
if (options.usesAgents && startsWithAgent && !detectedAgent && !detectedSlashCommand && originalWords.length >= 4) {
170
const phrase = phrases.get(originalWords.slice(0, 4).map(word => this.normalizeWord(word)).join(' '));
171
if (phrase) {
172
transformedWords = [this.toText(phrase, PhraseTextType.AGENT_AND_COMMAND), ...originalWords.slice(4)];
173
174
waitingForInput = originalWords.length === 4;
175
176
if (e.status === SpeechToTextStatus.Recognized) {
177
detectedAgent = true;
178
detectedSlashCommand = true;
179
}
180
}
181
}
182
183
// Check for agent (if not done already)
184
if (options.usesAgents && startsWithAgent && !detectedAgent && !transformedWords && originalWords.length >= 2) {
185
const phrase = phrases.get(originalWords.slice(0, 2).map(word => this.normalizeWord(word)).join(' '));
186
if (phrase) {
187
transformedWords = [this.toText(phrase, PhraseTextType.AGENT), ...originalWords.slice(2)];
188
189
waitingForInput = originalWords.length === 2;
190
191
if (e.status === SpeechToTextStatus.Recognized) {
192
detectedAgent = true;
193
}
194
}
195
}
196
197
// Check for slash command (if not done already)
198
if (startsWithSlashCommand && !detectedSlashCommand && !transformedWords && originalWords.length >= 2) {
199
const phrase = phrases.get(originalWords.slice(0, 2).map(word => this.normalizeWord(word)).join(' '));
200
if (phrase) {
201
transformedWords = [this.toText(phrase, options.usesAgents && !detectedAgent ?
202
PhraseTextType.AGENT_AND_COMMAND : // rewrite `/fix` to `@workspace /foo` in this case
203
PhraseTextType.COMMAND // when we have not yet detected an agent before
204
), ...originalWords.slice(2)];
205
206
waitingForInput = originalWords.length === 2;
207
208
if (e.status === SpeechToTextStatus.Recognized) {
209
detectedSlashCommand = true;
210
}
211
}
212
}
213
214
massagedEvent = {
215
status: e.status,
216
text: (transformedWords ?? originalWords).join(' '),
217
waitingForInput
218
};
219
}
220
}
221
emitter.fire(massagedEvent);
222
break;
223
}
224
case SpeechToTextStatus.Started:
225
this.activeVoiceChatSessions++;
226
this.voiceChatInProgress.set(true);
227
emitter.fire(e);
228
break;
229
case SpeechToTextStatus.Stopped:
230
onSessionStoppedOrCanceled(false);
231
emitter.fire(e);
232
break;
233
case SpeechToTextStatus.Error:
234
emitter.fire(e);
235
break;
236
}
237
}));
238
239
return {
240
onDidChange: emitter.event
241
};
242
}
243
244
private normalizeWord(word: string): string {
245
word = rtrim(word, '.');
246
word = rtrim(word, ',');
247
word = rtrim(word, '?');
248
249
return word.toLowerCase();
250
}
251
}
252
253