Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
TensorSpeech
GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/examples/cppwin/TensorflowTTSCppInference/FastSpeech2.cpp
1559 views
1
#include "FastSpeech2.h"
2
#include <stdexcept>
3
4
5
FastSpeech2::FastSpeech2()
6
{
7
FastSpeech = nullptr;
8
}
9
10
FastSpeech2::FastSpeech2(const std::string & SavedModelFolder)
11
{
12
Initialize(SavedModelFolder);
13
}
14
15
16
bool FastSpeech2::Initialize(const std::string & SavedModelFolder)
17
{
18
try {
19
FastSpeech = new Model(SavedModelFolder);
20
}
21
catch (...) {
22
FastSpeech = nullptr;
23
return false;
24
25
}
26
return true;
27
}
28
29
TFTensor<float> FastSpeech2::DoInference(const std::vector<int32_t>& InputIDs, int32_t SpeakerID, float Speed, float Energy, float F0, int32_t EmotionID)
30
{
31
if (!FastSpeech)
32
throw std::invalid_argument("Tried to do inference on unloaded or invalid model!");
33
34
// Convenience reference so that we don't have to constantly derefer pointers.
35
Model& Mdl = *FastSpeech;
36
37
// Define the tensors
38
Tensor input_ids{ Mdl,"serving_default_input_ids" };
39
Tensor energy_ratios{ Mdl,"serving_default_energy_ratios" };
40
Tensor f0_ratios{ Mdl,"serving_default_f0_ratios" };
41
Tensor speaker_ids{ Mdl,"serving_default_speaker_ids" };
42
Tensor speed_ratios{ Mdl,"serving_default_speed_ratios" };
43
Tensor* emotion_ids = nullptr;
44
45
// This is a multi-emotion model
46
if (EmotionID != -1)
47
{
48
emotion_ids = new Tensor{Mdl,"serving_default_emotion_ids"};
49
emotion_ids->set_data(std::vector<int32_t>{EmotionID});
50
51
}
52
53
54
// This is the shape of the input IDs, our equivalent to tf.expand_dims.
55
std::vector<int64_t> InputIDShape = { 1, (int64_t)InputIDs.size() };
56
57
input_ids.set_data(InputIDs, InputIDShape);
58
energy_ratios.set_data(std::vector<float>{ Energy });
59
f0_ratios.set_data(std::vector<float>{F0});
60
speaker_ids.set_data(std::vector<int32_t>{SpeakerID});
61
speed_ratios.set_data(std::vector<float>{Speed});
62
63
// Define output tensor
64
Tensor output{ Mdl,"StatefulPartitionedCall" };
65
66
67
// Vector of input tensors
68
std::vector<Tensor*> inputs = { &input_ids,&speaker_ids,&speed_ratios,&f0_ratios,&energy_ratios };
69
70
if (EmotionID != -1)
71
inputs.push_back(emotion_ids);
72
73
74
// Do inference
75
FastSpeech->run(inputs, output);
76
77
// Define output and return it
78
TFTensor<float> Output = VoxUtil::CopyTensor<float>(output);
79
80
// We allocated the emotion_ids tensor dynamically, delete it
81
if (emotion_ids)
82
delete emotion_ids;
83
84
// We could just straight out define it in the return statement, but I like it more this way
85
86
return Output;
87
}
88
89
FastSpeech2::~FastSpeech2()
90
{
91
if (FastSpeech)
92
delete FastSpeech;
93
}
94
95