Path: blob/master/examples/cppwin/TensorflowTTSCppInference/FastSpeech2.cpp
1559 views
#include "FastSpeech2.h"1#include <stdexcept>234FastSpeech2::FastSpeech2()5{6FastSpeech = nullptr;7}89FastSpeech2::FastSpeech2(const std::string & SavedModelFolder)10{11Initialize(SavedModelFolder);12}131415bool FastSpeech2::Initialize(const std::string & SavedModelFolder)16{17try {18FastSpeech = new Model(SavedModelFolder);19}20catch (...) {21FastSpeech = nullptr;22return false;2324}25return true;26}2728TFTensor<float> FastSpeech2::DoInference(const std::vector<int32_t>& InputIDs, int32_t SpeakerID, float Speed, float Energy, float F0, int32_t EmotionID)29{30if (!FastSpeech)31throw std::invalid_argument("Tried to do inference on unloaded or invalid model!");3233// Convenience reference so that we don't have to constantly derefer pointers.34Model& Mdl = *FastSpeech;3536// Define the tensors37Tensor input_ids{ Mdl,"serving_default_input_ids" };38Tensor energy_ratios{ Mdl,"serving_default_energy_ratios" };39Tensor f0_ratios{ Mdl,"serving_default_f0_ratios" };40Tensor speaker_ids{ Mdl,"serving_default_speaker_ids" };41Tensor speed_ratios{ Mdl,"serving_default_speed_ratios" };42Tensor* emotion_ids = nullptr;4344// This is a multi-emotion model45if (EmotionID != -1)46{47emotion_ids = new Tensor{Mdl,"serving_default_emotion_ids"};48emotion_ids->set_data(std::vector<int32_t>{EmotionID});4950}515253// This is the shape of the input IDs, our equivalent to tf.expand_dims.54std::vector<int64_t> InputIDShape = { 1, (int64_t)InputIDs.size() };5556input_ids.set_data(InputIDs, InputIDShape);57energy_ratios.set_data(std::vector<float>{ Energy });58f0_ratios.set_data(std::vector<float>{F0});59speaker_ids.set_data(std::vector<int32_t>{SpeakerID});60speed_ratios.set_data(std::vector<float>{Speed});6162// Define output tensor63Tensor output{ Mdl,"StatefulPartitionedCall" };646566// Vector of input tensors67std::vector<Tensor*> inputs = { &input_ids,&speaker_ids,&speed_ratios,&f0_ratios,&energy_ratios };6869if (EmotionID != -1)70inputs.push_back(emotion_ids);717273// Do inference74FastSpeech->run(inputs, output);7576// Define output and return it77TFTensor<float> Output = VoxUtil::CopyTensor<float>(output);7879// We allocated the emotion_ids tensor dynamically, delete it80if (emotion_ids)81delete emotion_ids;8283// We could just straight out define it in the return statement, but I like it more this way8485return Output;86}8788FastSpeech2::~FastSpeech2()89{90if (FastSpeech)91delete FastSpeech;92}939495