Path: blob/master/platform/windows/tts_driver_onecore.cpp
46006 views
/**************************************************************************/1/* tts_driver_onecore.cpp */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#include "tts_driver_onecore.h"3132#include "core/object/callable_mp.h"33#include "servers/display/display_server.h"3435TTSDriverOneCore *TTSDriverOneCore::singleton = nullptr;3637void TTSDriverOneCore::_speech_index_mark(int p_msg_id, int p_index_mark) {38DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_BOUNDARY, p_msg_id, p_index_mark);39}4041void TTSDriverOneCore::_speech_cancel(int p_msg_id) {42DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, p_msg_id);43}4445void TTSDriverOneCore::_speech_end(int p_msg_id) {46DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_ENDED, p_msg_id);47}4849void TTSDriverOneCore::_dispose_current(bool p_silent, bool p_canceled) {50if (media.get() != nullptr) {51for (const TrackData &T : tracks) {52T.track.CueEntered(T.token);53}54tracks.clear();55media->MediaFailed(singleton->token_f);56media->MediaEnded(singleton->token_e);57if (!ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) {58media->PlaybackMediaMarkerReached(singleton->token_s);59}60media->Close();61media.reset();6263if (!p_silent) {64if (p_canceled) {65callable_mp(this, &TTSDriverOneCore::_speech_cancel).call_deferred(id);66} else {67callable_mp(this, &TTSDriverOneCore::_speech_end).call_deferred(id);68}69}70id = -1;71string = Char16String();72playing = false;73paused = false;74offset = 0;75}76}7778void TTSDriverOneCore::process_events() {79if (update_requested && !paused && queue.size() > 0 && !is_speaking()) {80TTSUtterance &message = queue.front()->get();81_dispose_current(true);82playing = true;8384SpeechSynthesizer synth = SpeechSynthesizer();8586if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) {87synth.Options().IncludeWordBoundaryMetadata(true);88}89if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 5)) {90synth.Options().SpeakingRate(CLAMP(message.rate, 0.5, 6.0));91synth.Options().AudioPitch(CLAMP(message.pitch, 0.0, 2.0));92synth.Options().AudioVolume(CLAMP((double)message.volume / 100.0, 0.0, 1.0));93}9495winrt::hstring name = winrt::hstring((const wchar_t *)message.voice.utf16().get_data());96IVectorView<VoiceInformation> voices = SpeechSynthesizer::AllVoices();97for (uint32_t i = 0; i < voices.Size(); i++) {98VoiceInformation voice = voices.GetAt(i);99if (voice.Id() == name) {100synth.Voice(voice);101break;102}103}104105string = message.text.utf16();106winrt::hstring text = winrt::hstring((const wchar_t *)string.get_data());107108SpeechSynthesisStream stream = synth.SynthesizeTextToStreamAsync(text).get();109110media = std::make_shared<MediaPlayer>();111token_f = media->MediaFailed([=, this](const MediaPlayer &p_sender, const MediaPlayerFailedEventArgs &p_args) {112_dispose_current(false, true);113});114token_e = media->MediaEnded([=, this](const MediaPlayer &p_sender, const IInspectable &p_args) {115_dispose_current(false, false);116});117if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) {118MediaPlaybackItem mitem = MediaPlaybackItem(MediaSource::CreateFromStream(stream, stream.ContentType()));119media->Source(mitem);120MediaPlaybackTimedMetadataTrackList list = mitem.TimedMetadataTracks();121122for (uint32_t i = 0; i < list.Size(); i++) {123TimedMetadataTrack track = list.GetAt(i);124if (track.TimedMetadataKind() == TimedMetadataKind::Speech) {125winrt::event_token token = track.CueEntered([=, this](const TimedMetadataTrack &p_sender, const MediaCueEventArgs &p_args) {126SpeechCue sq;127p_args.Cue().as(sq);128int32_t pos16 = sq.StartPositionInInput().Value();129int pos = 0;130for (int j = 0; j < MIN(pos16, string.length()); j++) {131char16_t c = string[j];132if ((c & 0xfffffc00) == 0xd800) {133j++;134}135pos++;136}137callable_mp(singleton, &TTSDriverOneCore::_speech_index_mark).call_deferred(id, pos);138});139tracks.push_back({ track, token });140list.SetPresentationMode(i, TimedMetadataTrackPresentationMode::ApplicationPresented);141}142}143} else {144media->Source(MediaSource::CreateFromStream(stream, stream.ContentType()));145token_s = media->PlaybackMediaMarkerReached([=, this](const MediaPlayer &p_sender, const PlaybackMediaMarkerReachedEventArgs &p_args) {146offset += p_args.PlaybackMediaMarker().Text().size() + 1;147int pos = 0;148for (int j = 0; j < MIN(offset, string.length()); j++) {149char16_t c = string[j];150if ((c & 0xfffffc00) == 0xd800) {151j++;152}153pos++;154}155callable_mp(singleton, &TTSDriverOneCore::_speech_index_mark).call_deferred(id, pos);156});157}158media->AutoPlay(true);159160id = message.id;161update_requested = false;162paused = false;163164media->Play();165166DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_STARTED, message.id);167queue.pop_front();168}169}170171bool TTSDriverOneCore::is_speaking() const {172return playing;173}174175bool TTSDriverOneCore::is_paused() const {176return paused;177}178179Array TTSDriverOneCore::get_voices() const {180Array list;181182IVectorView<VoiceInformation> voices = SpeechSynthesizer::AllVoices();183for (uint32_t i = 0; i < voices.Size(); i++) {184VoiceInformation voice = voices.GetAt(i);185winrt::hstring vname = voice.DisplayName();186winrt::hstring vid = voice.Id();187winrt::hstring vlang = voice.Language();188189Dictionary voice_d;190voice_d["id"] = String::utf16((const char16_t *)vid.c_str(), vid.size());191voice_d["name"] = String::utf16((const char16_t *)vname.c_str(), vname.size());192voice_d["language"] = String::utf16((const char16_t *)vlang.c_str(), vlang.size());193list.push_back(voice_d);194}195return list;196}197198void TTSDriverOneCore::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int64_t p_utterance_id, bool p_interrupt) {199if (p_interrupt) {200stop();201}202203if (p_text.is_empty()) {204DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, p_utterance_id);205return;206}207208TTSUtterance message;209message.text = p_text;210message.voice = p_voice;211message.volume = CLAMP(p_volume, 0, 100);212message.pitch = CLAMP(p_pitch, 0.f, 2.f);213message.rate = CLAMP(p_rate, 0.1f, 10.f);214message.id = p_utterance_id;215queue.push_back(message);216217if (is_paused()) {218resume();219} else {220update_requested = true;221}222}223224void TTSDriverOneCore::pause() {225if (!paused && playing) {226media->Pause();227paused = true;228}229}230231void TTSDriverOneCore::resume() {232if (paused && playing) {233media->Play();234paused = false;235}236}237238void TTSDriverOneCore::stop() {239for (TTSUtterance &message : queue) {240DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, message.id);241}242queue.clear();243_dispose_current(false, true);244}245246bool TTSDriverOneCore::init() {247if (!ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 1)) {248print_verbose("Text-to-Speech: Cannot initialize OneCore driver, API contract not present!");249return false;250}251if (SpeechSynthesizer::AllVoices().Size() == 0) {252print_verbose("Text-to-Speech: Cannot initialize OneCore driver, no voices found!");253return false;254}255print_verbose("Text-to-Speech: OneCore initialized.");256return true;257}258259TTSDriverOneCore::TTSDriverOneCore() {260singleton = this;261}262263TTSDriverOneCore::~TTSDriverOneCore() {264_dispose_current(false, true);265singleton = nullptr;266}267268269