Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
TensorSpeech
GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/examples/cppwin/TensorflowTTSCppInference/Voice.cpp
1559 views
1
#include "Voice.h"
2
#include "ext/ZCharScanner.h"
3
4
5
6
std::vector<int32_t> Voice::PhonemesToID(const std::string & InTxt)
7
{
8
ZStringDelimiter Delim(InTxt);
9
Delim.AddDelimiter(" ");
10
11
std::vector<int32_t> VecPhones;
12
VecPhones.reserve(Delim.szTokens());
13
14
for (const auto& Pho : Delim.GetTokens())
15
{
16
size_t ArrID = 0;
17
18
if (VoxUtil::FindInVec<std::string>(Pho, Phonemes, ArrID))
19
VecPhones.push_back(PhonemeIDs[ArrID]);
20
else
21
std::cout << "Voice::PhonemesToID() WARNING: Unknown phoneme " << Pho << std::endl;
22
23
24
25
}
26
27
28
return VecPhones;
29
30
}
31
32
void Voice::ReadPhonemes(const std::string &PhonemePath)
33
{
34
std::ifstream Phone(PhonemePath);
35
36
std::string Line;
37
while (std::getline(Phone, Line))
38
{
39
if (Line.find("\t") == std::string::npos)
40
continue;
41
42
43
ZStringDelimiter Deline(Line);
44
Deline.AddDelimiter("\t");
45
46
Phonemes.push_back(Deline[0]);
47
PhonemeIDs.push_back(stoi(Deline[1]));
48
49
50
51
52
}
53
54
}
55
56
void Voice::ReadSpeakers(const std::string &SpeakerPath)
57
{
58
Speakers = GetLinedFile(SpeakerPath);
59
60
}
61
62
void Voice::ReadEmotions(const std::string &EmotionPath)
63
{
64
Emotions = GetLinedFile(EmotionPath);
65
66
}
67
68
void Voice::ReadModelInfo(const std::string &ModelInfoPath)
69
{
70
71
ModelInfo = "";
72
std::vector<std::string> MiLines = GetLinedFile(ModelInfoPath);
73
74
for (const std::string& ss : MiLines)
75
ModelInfo += ss + "\n";
76
77
78
}
79
80
std::vector<std::string> Voice::GetLinedFile(const std::string &Path)
81
{
82
std::vector<std::string> RetLines;
83
std::ifstream Fi(Path);
84
85
if (!Fi.good()) // File not exists, ret empty vec
86
return RetLines;
87
88
std::string Line;
89
while (std::getline(Fi, Line))
90
{
91
if (Line.size() > 1)
92
RetLines.push_back(Line);
93
94
95
}
96
97
return RetLines;
98
99
}
100
101
Voice::Voice(const std::string & VoxPath, const std::string &inName, Phonemizer *InPhn)
102
{
103
MelPredictor.Initialize(VoxPath + "/melgen");
104
Vocoder.Initialize(VoxPath + "/vocoder");
105
106
if (InPhn)
107
Processor.Initialize(InPhn);
108
109
110
VoxInfo = VoxUtil::ReadModelJSON(VoxPath + "/info.json");
111
Name = inName;
112
ReadPhonemes(VoxPath + "/phonemes.txt");
113
ReadSpeakers(VoxPath + "/speakers.txt");
114
ReadEmotions(VoxPath + "/emotions.txt");
115
116
117
ReadModelInfo(VoxPath + "/info.txt");
118
119
120
121
122
123
124
125
}
126
127
void Voice::AddPhonemizer(Phonemizer *InPhn)
128
{
129
Processor.Initialize(InPhn);
130
131
132
}
133
134
135
std::vector<float> Voice::Vocalize(const std::string & Prompt, float Speed, int32_t SpeakerID, float Energy, float F0, int32_t EmotionID)
136
{
137
138
std::string PhoneticTxt = Processor.ProcessTextPhonetic(Prompt,Phonemes,(ETTSLanguage::Enum)VoxInfo.Language);
139
140
TFTensor<float> Mel = MelPredictor.DoInference(PhonemesToID(PhoneticTxt), SpeakerID, Speed, Energy, F0,EmotionID);
141
142
TFTensor<float> AuData = Vocoder.DoInference(Mel);
143
144
145
int64_t Width = AuData.Shape[0];
146
int64_t Height = AuData.Shape[1];
147
int64_t Depth = AuData.Shape[2];
148
//int z = 0;
149
150
std::vector<float> AudioData;
151
AudioData.resize(Height);
152
153
// Code to access 1D array as if it were 3D
154
for (int64_t x = 0; x < Width;x++)
155
{
156
for (int64_t z = 0;z < Depth;z++)
157
{
158
for (int64_t y = 0; y < Height;y++) {
159
int64_t Index = x * Height * Depth + y * Depth + z;
160
AudioData[(size_t)y] = AuData.Data[(size_t)Index];
161
162
}
163
164
}
165
}
166
167
168
return AudioData;
169
}
170
171
172
Voice::~Voice()
173
{
174
}
175
176