Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/platform/windows/tts_driver_onecore.cpp
46006 views
1
/**************************************************************************/
2
/* tts_driver_onecore.cpp */
3
/**************************************************************************/
4
/* This file is part of: */
5
/* GODOT ENGINE */
6
/* https://godotengine.org */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10
/* */
11
/* Permission is hereby granted, free of charge, to any person obtaining */
12
/* a copy of this software and associated documentation files (the */
13
/* "Software"), to deal in the Software without restriction, including */
14
/* without limitation the rights to use, copy, modify, merge, publish, */
15
/* distribute, sublicense, and/or sell copies of the Software, and to */
16
/* permit persons to whom the Software is furnished to do so, subject to */
17
/* the following conditions: */
18
/* */
19
/* The above copyright notice and this permission notice shall be */
20
/* included in all copies or substantial portions of the Software. */
21
/* */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29
/**************************************************************************/
30
31
#include "tts_driver_onecore.h"
32
33
#include "core/object/callable_mp.h"
34
#include "servers/display/display_server.h"
35
36
TTSDriverOneCore *TTSDriverOneCore::singleton = nullptr;
37
38
void TTSDriverOneCore::_speech_index_mark(int p_msg_id, int p_index_mark) {
39
DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_BOUNDARY, p_msg_id, p_index_mark);
40
}
41
42
void TTSDriverOneCore::_speech_cancel(int p_msg_id) {
43
DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, p_msg_id);
44
}
45
46
void TTSDriverOneCore::_speech_end(int p_msg_id) {
47
DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_ENDED, p_msg_id);
48
}
49
50
void TTSDriverOneCore::_dispose_current(bool p_silent, bool p_canceled) {
51
if (media.get() != nullptr) {
52
for (const TrackData &T : tracks) {
53
T.track.CueEntered(T.token);
54
}
55
tracks.clear();
56
media->MediaFailed(singleton->token_f);
57
media->MediaEnded(singleton->token_e);
58
if (!ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) {
59
media->PlaybackMediaMarkerReached(singleton->token_s);
60
}
61
media->Close();
62
media.reset();
63
64
if (!p_silent) {
65
if (p_canceled) {
66
callable_mp(this, &TTSDriverOneCore::_speech_cancel).call_deferred(id);
67
} else {
68
callable_mp(this, &TTSDriverOneCore::_speech_end).call_deferred(id);
69
}
70
}
71
id = -1;
72
string = Char16String();
73
playing = false;
74
paused = false;
75
offset = 0;
76
}
77
}
78
79
void TTSDriverOneCore::process_events() {
80
if (update_requested && !paused && queue.size() > 0 && !is_speaking()) {
81
TTSUtterance &message = queue.front()->get();
82
_dispose_current(true);
83
playing = true;
84
85
SpeechSynthesizer synth = SpeechSynthesizer();
86
87
if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) {
88
synth.Options().IncludeWordBoundaryMetadata(true);
89
}
90
if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 5)) {
91
synth.Options().SpeakingRate(CLAMP(message.rate, 0.5, 6.0));
92
synth.Options().AudioPitch(CLAMP(message.pitch, 0.0, 2.0));
93
synth.Options().AudioVolume(CLAMP((double)message.volume / 100.0, 0.0, 1.0));
94
}
95
96
winrt::hstring name = winrt::hstring((const wchar_t *)message.voice.utf16().get_data());
97
IVectorView<VoiceInformation> voices = SpeechSynthesizer::AllVoices();
98
for (uint32_t i = 0; i < voices.Size(); i++) {
99
VoiceInformation voice = voices.GetAt(i);
100
if (voice.Id() == name) {
101
synth.Voice(voice);
102
break;
103
}
104
}
105
106
string = message.text.utf16();
107
winrt::hstring text = winrt::hstring((const wchar_t *)string.get_data());
108
109
SpeechSynthesisStream stream = synth.SynthesizeTextToStreamAsync(text).get();
110
111
media = std::make_shared<MediaPlayer>();
112
token_f = media->MediaFailed([=, this](const MediaPlayer &p_sender, const MediaPlayerFailedEventArgs &p_args) {
113
_dispose_current(false, true);
114
});
115
token_e = media->MediaEnded([=, this](const MediaPlayer &p_sender, const IInspectable &p_args) {
116
_dispose_current(false, false);
117
});
118
if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) {
119
MediaPlaybackItem mitem = MediaPlaybackItem(MediaSource::CreateFromStream(stream, stream.ContentType()));
120
media->Source(mitem);
121
MediaPlaybackTimedMetadataTrackList list = mitem.TimedMetadataTracks();
122
123
for (uint32_t i = 0; i < list.Size(); i++) {
124
TimedMetadataTrack track = list.GetAt(i);
125
if (track.TimedMetadataKind() == TimedMetadataKind::Speech) {
126
winrt::event_token token = track.CueEntered([=, this](const TimedMetadataTrack &p_sender, const MediaCueEventArgs &p_args) {
127
SpeechCue sq;
128
p_args.Cue().as(sq);
129
int32_t pos16 = sq.StartPositionInInput().Value();
130
int pos = 0;
131
for (int j = 0; j < MIN(pos16, string.length()); j++) {
132
char16_t c = string[j];
133
if ((c & 0xfffffc00) == 0xd800) {
134
j++;
135
}
136
pos++;
137
}
138
callable_mp(singleton, &TTSDriverOneCore::_speech_index_mark).call_deferred(id, pos);
139
});
140
tracks.push_back({ track, token });
141
list.SetPresentationMode(i, TimedMetadataTrackPresentationMode::ApplicationPresented);
142
}
143
}
144
} else {
145
media->Source(MediaSource::CreateFromStream(stream, stream.ContentType()));
146
token_s = media->PlaybackMediaMarkerReached([=, this](const MediaPlayer &p_sender, const PlaybackMediaMarkerReachedEventArgs &p_args) {
147
offset += p_args.PlaybackMediaMarker().Text().size() + 1;
148
int pos = 0;
149
for (int j = 0; j < MIN(offset, string.length()); j++) {
150
char16_t c = string[j];
151
if ((c & 0xfffffc00) == 0xd800) {
152
j++;
153
}
154
pos++;
155
}
156
callable_mp(singleton, &TTSDriverOneCore::_speech_index_mark).call_deferred(id, pos);
157
});
158
}
159
media->AutoPlay(true);
160
161
id = message.id;
162
update_requested = false;
163
paused = false;
164
165
media->Play();
166
167
DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_STARTED, message.id);
168
queue.pop_front();
169
}
170
}
171
172
bool TTSDriverOneCore::is_speaking() const {
173
return playing;
174
}
175
176
bool TTSDriverOneCore::is_paused() const {
177
return paused;
178
}
179
180
Array TTSDriverOneCore::get_voices() const {
181
Array list;
182
183
IVectorView<VoiceInformation> voices = SpeechSynthesizer::AllVoices();
184
for (uint32_t i = 0; i < voices.Size(); i++) {
185
VoiceInformation voice = voices.GetAt(i);
186
winrt::hstring vname = voice.DisplayName();
187
winrt::hstring vid = voice.Id();
188
winrt::hstring vlang = voice.Language();
189
190
Dictionary voice_d;
191
voice_d["id"] = String::utf16((const char16_t *)vid.c_str(), vid.size());
192
voice_d["name"] = String::utf16((const char16_t *)vname.c_str(), vname.size());
193
voice_d["language"] = String::utf16((const char16_t *)vlang.c_str(), vlang.size());
194
list.push_back(voice_d);
195
}
196
return list;
197
}
198
199
void TTSDriverOneCore::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int64_t p_utterance_id, bool p_interrupt) {
200
if (p_interrupt) {
201
stop();
202
}
203
204
if (p_text.is_empty()) {
205
DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, p_utterance_id);
206
return;
207
}
208
209
TTSUtterance message;
210
message.text = p_text;
211
message.voice = p_voice;
212
message.volume = CLAMP(p_volume, 0, 100);
213
message.pitch = CLAMP(p_pitch, 0.f, 2.f);
214
message.rate = CLAMP(p_rate, 0.1f, 10.f);
215
message.id = p_utterance_id;
216
queue.push_back(message);
217
218
if (is_paused()) {
219
resume();
220
} else {
221
update_requested = true;
222
}
223
}
224
225
void TTSDriverOneCore::pause() {
226
if (!paused && playing) {
227
media->Pause();
228
paused = true;
229
}
230
}
231
232
void TTSDriverOneCore::resume() {
233
if (paused && playing) {
234
media->Play();
235
paused = false;
236
}
237
}
238
239
void TTSDriverOneCore::stop() {
240
for (TTSUtterance &message : queue) {
241
DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, message.id);
242
}
243
queue.clear();
244
_dispose_current(false, true);
245
}
246
247
bool TTSDriverOneCore::init() {
248
if (!ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 1)) {
249
print_verbose("Text-to-Speech: Cannot initialize OneCore driver, API contract not present!");
250
return false;
251
}
252
if (SpeechSynthesizer::AllVoices().Size() == 0) {
253
print_verbose("Text-to-Speech: Cannot initialize OneCore driver, no voices found!");
254
return false;
255
}
256
print_verbose("Text-to-Speech: OneCore initialized.");
257
return true;
258
}
259
260
TTSDriverOneCore::TTSDriverOneCore() {
261
singleton = this;
262
}
263
264
TTSDriverOneCore::~TTSDriverOneCore() {
265
_dispose_current(false, true);
266
singleton = nullptr;
267
}
268
269