CoCalc -- FastSpeech2.cpp

GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/examples/cppwin/TensorflowTTSCppInference/FastSpeech2.cpp
¹⁵⁵⁹ views
1
#include "FastSpeech2.h"
2
#include <stdexcept>
3

4

5
FastSpeech2::FastSpeech2()
6
{
7
	FastSpeech = nullptr;
8
}
9

10
FastSpeech2::FastSpeech2(const std::string & SavedModelFolder)
11
{
12
	Initialize(SavedModelFolder);
13
}
14

15

16
bool FastSpeech2::Initialize(const std::string & SavedModelFolder)
17
{
18
	try {
19
		FastSpeech = new Model(SavedModelFolder);
20
	}
21
	catch (...) {
22
		FastSpeech = nullptr;
23
		return false;
24

25
	}
26
	return true;
27
}
28

29
TFTensor<float> FastSpeech2::DoInference(const std::vector<int32_t>& InputIDs, int32_t SpeakerID, float Speed, float Energy, float F0, int32_t EmotionID)
30
{
31
	if (!FastSpeech)
32
        throw std::invalid_argument("Tried to do inference on unloaded or invalid model!");
33

34
	// Convenience reference so that we don't have to constantly derefer pointers.
35
	Model& Mdl = *FastSpeech;
36

37
	// Define the tensors
38
	Tensor input_ids{ Mdl,"serving_default_input_ids" };
39
	Tensor energy_ratios{ Mdl,"serving_default_energy_ratios" };
40
	Tensor f0_ratios{ Mdl,"serving_default_f0_ratios" };
41
	Tensor speaker_ids{ Mdl,"serving_default_speaker_ids" };
42
	Tensor speed_ratios{ Mdl,"serving_default_speed_ratios" };
43
    Tensor* emotion_ids = nullptr;
44

45
    // This is a multi-emotion model
46
    if (EmotionID != -1)
47
    {
48
        emotion_ids = new Tensor{Mdl,"serving_default_emotion_ids"};
49
        emotion_ids->set_data(std::vector<int32_t>{EmotionID});
50

51
    }
52

53

54
	// This is the shape of the input IDs, our equivalent to tf.expand_dims.
55
	std::vector<int64_t> InputIDShape = { 1, (int64_t)InputIDs.size() };
56

57
	input_ids.set_data(InputIDs, InputIDShape);
58
	energy_ratios.set_data(std::vector<float>{ Energy });
59
	f0_ratios.set_data(std::vector<float>{F0});
60
	speaker_ids.set_data(std::vector<int32_t>{SpeakerID});
61
	speed_ratios.set_data(std::vector<float>{Speed});
62

63
	// Define output tensor
64
	Tensor output{ Mdl,"StatefulPartitionedCall" };
65

66

67
	// Vector of input tensors
68
	std::vector<Tensor*> inputs = { &input_ids,&speaker_ids,&speed_ratios,&f0_ratios,&energy_ratios };
69

70
    if (EmotionID != -1)
71
        inputs.push_back(emotion_ids);
72

73

74
	// Do inference
75
	FastSpeech->run(inputs, output);
76

77
	// Define output and return it
78
	TFTensor<float> Output = VoxUtil::CopyTensor<float>(output);
79

80
    // We allocated the emotion_ids tensor dynamically, delete it
81
    if (emotion_ids)
82
        delete emotion_ids;
83

84
    // We could just straight out define it in the return statement, but I like it more this way
85

86
	return Output;
87
}
88

89
FastSpeech2::~FastSpeech2()
90
{
91
	if (FastSpeech)
92
		delete FastSpeech;
93
}
94

95
Product

Resources

Company