CoCalc -- tts_driver

GitHub Repository: godotengine/godot
Path: blob/master/platform/windows/tts_driver_onecore.cpp
⁴⁶⁰⁰⁶ views
1
/**************************************************************************/
2
/*  tts_driver_onecore.cpp                                                */
3
/**************************************************************************/
4
/*                         This file is part of:                          */
5
/*                             GODOT ENGINE                               */
6
/*                        https://godotengine.org                         */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
10
/*                                                                        */
11
/* Permission is hereby granted, free of charge, to any person obtaining  */
12
/* a copy of this software and associated documentation files (the        */
13
/* "Software"), to deal in the Software without restriction, including    */
14
/* without limitation the rights to use, copy, modify, merge, publish,    */
15
/* distribute, sublicense, and/or sell copies of the Software, and to     */
16
/* permit persons to whom the Software is furnished to do so, subject to  */
17
/* the following conditions:                                              */
18
/*                                                                        */
19
/* The above copyright notice and this permission notice shall be         */
20
/* included in all copies or substantial portions of the Software.        */
21
/*                                                                        */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
29
/**************************************************************************/
30

31
#include "tts_driver_onecore.h"
32

33
#include "core/object/callable_mp.h"
34
#include "servers/display/display_server.h"
35

36
TTSDriverOneCore *TTSDriverOneCore::singleton = nullptr;
37

38
void TTSDriverOneCore::_speech_index_mark(int p_msg_id, int p_index_mark) {
39
	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_BOUNDARY, p_msg_id, p_index_mark);
40
}
41

42
void TTSDriverOneCore::_speech_cancel(int p_msg_id) {
43
	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, p_msg_id);
44
}
45

46
void TTSDriverOneCore::_speech_end(int p_msg_id) {
47
	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_ENDED, p_msg_id);
48
}
49

50
void TTSDriverOneCore::_dispose_current(bool p_silent, bool p_canceled) {
51
	if (media.get() != nullptr) {
52
		for (const TrackData &T : tracks) {
53
			T.track.CueEntered(T.token);
54
		}
55
		tracks.clear();
56
		media->MediaFailed(singleton->token_f);
57
		media->MediaEnded(singleton->token_e);
58
		if (!ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) {
59
			media->PlaybackMediaMarkerReached(singleton->token_s);
60
		}
61
		media->Close();
62
		media.reset();
63

64
		if (!p_silent) {
65
			if (p_canceled) {
66
				callable_mp(this, &TTSDriverOneCore::_speech_cancel).call_deferred(id);
67
			} else {
68
				callable_mp(this, &TTSDriverOneCore::_speech_end).call_deferred(id);
69
			}
70
		}
71
		id = -1;
72
		string = Char16String();
73
		playing = false;
74
		paused = false;
75
		offset = 0;
76
	}
77
}
78

79
void TTSDriverOneCore::process_events() {
80
	if (update_requested && !paused && queue.size() > 0 && !is_speaking()) {
81
		TTSUtterance &message = queue.front()->get();
82
		_dispose_current(true);
83
		playing = true;
84

85
		SpeechSynthesizer synth = SpeechSynthesizer();
86

87
		if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) {
88
			synth.Options().IncludeWordBoundaryMetadata(true);
89
		}
90
		if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 5)) {
91
			synth.Options().SpeakingRate(CLAMP(message.rate, 0.5, 6.0));
92
			synth.Options().AudioPitch(CLAMP(message.pitch, 0.0, 2.0));
93
			synth.Options().AudioVolume(CLAMP((double)message.volume / 100.0, 0.0, 1.0));
94
		}
95

96
		winrt::hstring name = winrt::hstring((const wchar_t *)message.voice.utf16().get_data());
97
		IVectorView<VoiceInformation> voices = SpeechSynthesizer::AllVoices();
98
		for (uint32_t i = 0; i < voices.Size(); i++) {
99
			VoiceInformation voice = voices.GetAt(i);
100
			if (voice.Id() == name) {
101
				synth.Voice(voice);
102
				break;
103
			}
104
		}
105

106
		string = message.text.utf16();
107
		winrt::hstring text = winrt::hstring((const wchar_t *)string.get_data());
108

109
		SpeechSynthesisStream stream = synth.SynthesizeTextToStreamAsync(text).get();
110

111
		media = std::make_shared<MediaPlayer>();
112
		token_f = media->MediaFailed([=, this](const MediaPlayer &p_sender, const MediaPlayerFailedEventArgs &p_args) {
113
			_dispose_current(false, true);
114
		});
115
		token_e = media->MediaEnded([=, this](const MediaPlayer &p_sender, const IInspectable &p_args) {
116
			_dispose_current(false, false);
117
		});
118
		if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) {
119
			MediaPlaybackItem mitem = MediaPlaybackItem(MediaSource::CreateFromStream(stream, stream.ContentType()));
120
			media->Source(mitem);
121
			MediaPlaybackTimedMetadataTrackList list = mitem.TimedMetadataTracks();
122

123
			for (uint32_t i = 0; i < list.Size(); i++) {
124
				TimedMetadataTrack track = list.GetAt(i);
125
				if (track.TimedMetadataKind() == TimedMetadataKind::Speech) {
126
					winrt::event_token token = track.CueEntered([=, this](const TimedMetadataTrack &p_sender, const MediaCueEventArgs &p_args) {
127
						SpeechCue sq;
128
						p_args.Cue().as(sq);
129
						int32_t pos16 = sq.StartPositionInInput().Value();
130
						int pos = 0;
131
						for (int j = 0; j < MIN(pos16, string.length()); j++) {
132
							char16_t c = string[j];
133
							if ((c & 0xfffffc00) == 0xd800) {
134
								j++;
135
							}
136
							pos++;
137
						}
138
						callable_mp(singleton, &TTSDriverOneCore::_speech_index_mark).call_deferred(id, pos);
139
					});
140
					tracks.push_back({ track, token });
141
					list.SetPresentationMode(i, TimedMetadataTrackPresentationMode::ApplicationPresented);
142
				}
143
			}
144
		} else {
145
			media->Source(MediaSource::CreateFromStream(stream, stream.ContentType()));
146
			token_s = media->PlaybackMediaMarkerReached([=, this](const MediaPlayer &p_sender, const PlaybackMediaMarkerReachedEventArgs &p_args) {
147
				offset += p_args.PlaybackMediaMarker().Text().size() + 1;
148
				int pos = 0;
149
				for (int j = 0; j < MIN(offset, string.length()); j++) {
150
					char16_t c = string[j];
151
					if ((c & 0xfffffc00) == 0xd800) {
152
						j++;
153
					}
154
					pos++;
155
				}
156
				callable_mp(singleton, &TTSDriverOneCore::_speech_index_mark).call_deferred(id, pos);
157
			});
158
		}
159
		media->AutoPlay(true);
160

161
		id = message.id;
162
		update_requested = false;
163
		paused = false;
164

165
		media->Play();
166

167
		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_STARTED, message.id);
168
		queue.pop_front();
169
	}
170
}
171

172
bool TTSDriverOneCore::is_speaking() const {
173
	return playing;
174
}
175

176
bool TTSDriverOneCore::is_paused() const {
177
	return paused;
178
}
179

180
Array TTSDriverOneCore::get_voices() const {
181
	Array list;
182

183
	IVectorView<VoiceInformation> voices = SpeechSynthesizer::AllVoices();
184
	for (uint32_t i = 0; i < voices.Size(); i++) {
185
		VoiceInformation voice = voices.GetAt(i);
186
		winrt::hstring vname = voice.DisplayName();
187
		winrt::hstring vid = voice.Id();
188
		winrt::hstring vlang = voice.Language();
189

190
		Dictionary voice_d;
191
		voice_d["id"] = String::utf16((const char16_t *)vid.c_str(), vid.size());
192
		voice_d["name"] = String::utf16((const char16_t *)vname.c_str(), vname.size());
193
		voice_d["language"] = String::utf16((const char16_t *)vlang.c_str(), vlang.size());
194
		list.push_back(voice_d);
195
	}
196
	return list;
197
}
198

199
void TTSDriverOneCore::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int64_t p_utterance_id, bool p_interrupt) {
200
	if (p_interrupt) {
201
		stop();
202
	}
203

204
	if (p_text.is_empty()) {
205
		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, p_utterance_id);
206
		return;
207
	}
208

209
	TTSUtterance message;
210
	message.text = p_text;
211
	message.voice = p_voice;
212
	message.volume = CLAMP(p_volume, 0, 100);
213
	message.pitch = CLAMP(p_pitch, 0.f, 2.f);
214
	message.rate = CLAMP(p_rate, 0.1f, 10.f);
215
	message.id = p_utterance_id;
216
	queue.push_back(message);
217

218
	if (is_paused()) {
219
		resume();
220
	} else {
221
		update_requested = true;
222
	}
223
}
224

225
void TTSDriverOneCore::pause() {
226
	if (!paused && playing) {
227
		media->Pause();
228
		paused = true;
229
	}
230
}
231

232
void TTSDriverOneCore::resume() {
233
	if (paused && playing) {
234
		media->Play();
235
		paused = false;
236
	}
237
}
238

239
void TTSDriverOneCore::stop() {
240
	for (TTSUtterance &message : queue) {
241
		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, message.id);
242
	}
243
	queue.clear();
244
	_dispose_current(false, true);
245
}
246

247
bool TTSDriverOneCore::init() {
248
	if (!ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 1)) {
249
		print_verbose("Text-to-Speech: Cannot initialize OneCore driver, API contract not present!");
250
		return false;
251
	}
252
	if (SpeechSynthesizer::AllVoices().Size() == 0) {
253
		print_verbose("Text-to-Speech: Cannot initialize OneCore driver, no voices found!");
254
		return false;
255
	}
256
	print_verbose("Text-to-Speech: OneCore initialized.");
257
	return true;
258
}
259

260
TTSDriverOneCore::TTSDriverOneCore() {
261
	singleton = this;
262
}
263

264
TTSDriverOneCore::~TTSDriverOneCore() {
265
	_dispose_current(false, true);
266
	singleton = nullptr;
267
}
268

269
Product

Resources

Company