Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
TensorSpeech
GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/examples/fastspeech2_libritts/fastspeech2_dataset.py
1558 views
1
# -*- coding: utf-8 -*-
2
# Copyright 2020 TensorFlowTTS Team.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
"""Dataset modules."""
16
17
import os
18
import numpy as np
19
import tensorflow as tf
20
21
from tensorflow_tts.datasets.abstract_dataset import AbstractDataset
22
from tensorflow_tts.utils import find_files
23
24
25
def average_by_duration(x, durs):
26
mel_len = durs.sum()
27
durs_cum = np.cumsum(np.pad(durs, (1, 0)))
28
29
# calculate charactor f0/energy
30
x_char = np.zeros((durs.shape[0],), dtype=np.float32)
31
for idx, start, end in zip(range(mel_len), durs_cum[:-1], durs_cum[1:]):
32
values = x[start:end][np.where(x[start:end] != 0.0)[0]]
33
x_char[idx] = np.mean(values) if len(values) > 0 else 0.0 # np.mean([]) = nan.
34
35
return x_char.astype(np.float32)
36
37
38
def tf_average_by_duration(x, durs):
39
outs = tf.numpy_function(average_by_duration, [x, durs], tf.float32)
40
return outs
41
42
43
class CharactorDurationF0EnergyMelDataset(AbstractDataset):
44
"""Tensorflow Charactor Duration F0 Energy Mel dataset."""
45
46
def __init__(
47
self,
48
root_dir,
49
charactor_query="*-ids.npy",
50
mel_query="*-norm-feats.npy",
51
duration_query="*-durations.npy",
52
f0_query="*-raw-f0.npy",
53
energy_query="*-raw-energy.npy",
54
f0_stat="./dump/stats_f0.npy",
55
energy_stat="./dump/stats_energy.npy",
56
charactor_load_fn=np.load,
57
mel_load_fn=np.load,
58
duration_load_fn=np.load,
59
f0_load_fn=np.load,
60
energy_load_fn=np.load,
61
mel_length_threshold=0,
62
speakers_map=None
63
):
64
"""Initialize dataset.
65
66
Args:
67
root_dir (str): Root directory including dumped files.
68
charactor_query (str): Query to find charactor files in root_dir.
69
mel_query (str): Query to find feature files in root_dir.
70
duration_query (str): Query to find duration files in root_dir.
71
f0_query (str): Query to find f0 files in root_dir.
72
energy_query (str): Query to find energy files in root_dir.
73
f0_stat (str): str path of f0_stat.
74
energy_stat (str): str path of energy_stat.
75
charactor_load_fn (func): Function to load charactor file.
76
mel_load_fn (func): Function to load feature file.
77
duration_load_fn (func): Function to load duration file.
78
f0_load_fn (func): Function to load f0 file.
79
energy_load_fn (func): Function to load energy file.
80
mel_length_threshold (int): Threshold to remove short feature files.
81
speakers_map (dict): Speakers map generated in dataset preprocessing
82
83
"""
84
# find all of charactor and mel files.
85
charactor_files = sorted(find_files(root_dir, charactor_query))
86
mel_files = sorted(find_files(root_dir, mel_query))
87
duration_files = sorted(find_files(root_dir, duration_query))
88
f0_files = sorted(find_files(root_dir, f0_query))
89
energy_files = sorted(find_files(root_dir, energy_query))
90
91
# assert the number of files
92
assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}."
93
assert (
94
len(mel_files)
95
== len(charactor_files)
96
== len(duration_files)
97
== len(f0_files)
98
== len(energy_files)
99
), f"Number of charactor, mel, duration, f0 and energy files are different"
100
101
assert speakers_map != None, f"No speakers map found. Did you set --dataset_mapping?"
102
103
if ".npy" in charactor_query:
104
suffix = charactor_query[1:]
105
utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files]
106
107
# set global params
108
self.utt_ids = utt_ids
109
self.mel_files = mel_files
110
self.charactor_files = charactor_files
111
self.duration_files = duration_files
112
self.f0_files = f0_files
113
self.energy_files = energy_files
114
self.mel_load_fn = mel_load_fn
115
self.charactor_load_fn = charactor_load_fn
116
self.duration_load_fn = duration_load_fn
117
self.f0_load_fn = f0_load_fn
118
self.energy_load_fn = energy_load_fn
119
self.mel_length_threshold = mel_length_threshold
120
self.speakers_map = speakers_map
121
self.speakers = [self.speakers_map[i.split("_")[0]] for i in self.utt_ids]
122
print("Speaker: utt_id", list(zip(self.speakers, self.utt_ids)))
123
self.f0_stat = np.load(f0_stat)
124
self.energy_stat = np.load(energy_stat)
125
126
def get_args(self):
127
return [self.utt_ids]
128
129
def _norm_mean_std(self, x, mean, std):
130
zero_idxs = np.where(x == 0.0)[0]
131
x = (x - mean) / std
132
x[zero_idxs] = 0.0
133
return x
134
135
def _norm_mean_std_tf(self, x, mean, std):
136
x = tf.numpy_function(self._norm_mean_std, [x, mean, std], tf.float32)
137
return x
138
139
def generator(self, utt_ids):
140
for i, utt_id in enumerate(utt_ids):
141
mel_file = self.mel_files[i]
142
charactor_file = self.charactor_files[i]
143
duration_file = self.duration_files[i]
144
f0_file = self.f0_files[i]
145
energy_file = self.energy_files[i]
146
speaker_id = self.speakers[i]
147
148
items = {
149
"utt_ids": utt_id,
150
"mel_files": mel_file,
151
"charactor_files": charactor_file,
152
"duration_files": duration_file,
153
"f0_files": f0_file,
154
"energy_files": energy_file,
155
"speaker_ids": speaker_id,
156
}
157
158
yield items
159
160
@tf.function
161
def _load_data(self, items):
162
mel = tf.numpy_function(np.load, [items["mel_files"]], tf.float32)
163
charactor = tf.numpy_function(np.load, [items["charactor_files"]], tf.int32)
164
duration = tf.numpy_function(np.load, [items["duration_files"]], tf.int32)
165
f0 = tf.numpy_function(np.load, [items["f0_files"]], tf.float32)
166
energy = tf.numpy_function(np.load, [items["energy_files"]], tf.float32)
167
168
f0 = self._norm_mean_std_tf(f0, self.f0_stat[0], self.f0_stat[1])
169
energy = self._norm_mean_std_tf(
170
energy, self.energy_stat[0], self.energy_stat[1]
171
)
172
173
# calculate charactor f0/energy
174
f0 = tf_average_by_duration(f0, duration)
175
energy = tf_average_by_duration(energy, duration)
176
177
items = {
178
"utt_ids": items["utt_ids"],
179
"input_ids": charactor,
180
"speaker_ids": items["speaker_ids"],
181
"duration_gts": duration,
182
"f0_gts": f0,
183
"energy_gts": energy,
184
"mel_gts": mel,
185
"mel_lengths": len(mel),
186
}
187
188
return items
189
190
def create(
191
self,
192
allow_cache=False,
193
batch_size=1,
194
is_shuffle=False,
195
map_fn=None,
196
reshuffle_each_iteration=True,
197
):
198
"""Create tf.dataset function."""
199
output_types = self.get_output_dtypes()
200
datasets = tf.data.Dataset.from_generator(
201
self.generator, output_types=output_types, args=(self.get_args())
202
)
203
204
# load data
205
datasets = datasets.map(
206
lambda items: self._load_data(items), tf.data.experimental.AUTOTUNE
207
)
208
209
datasets = datasets.filter(
210
lambda x: x["mel_lengths"] > self.mel_length_threshold
211
)
212
213
if allow_cache:
214
datasets = datasets.cache()
215
216
if is_shuffle:
217
datasets = datasets.shuffle(
218
self.get_len_dataset(),
219
reshuffle_each_iteration=reshuffle_each_iteration,
220
)
221
222
# define padded shapes
223
padded_shapes = {
224
"utt_ids": [],
225
"input_ids": [None],
226
"speaker_ids": [],
227
"duration_gts": [None],
228
"f0_gts": [None],
229
"energy_gts": [None],
230
"mel_gts": [None, None],
231
"mel_lengths": [],
232
}
233
234
datasets = datasets.padded_batch(
235
batch_size, padded_shapes=padded_shapes, drop_remainder=True
236
)
237
datasets = datasets.prefetch(tf.data.experimental.AUTOTUNE)
238
return datasets
239
240
def get_output_dtypes(self):
241
output_types = {
242
"utt_ids": tf.string,
243
"mel_files": tf.string,
244
"charactor_files": tf.string,
245
"duration_files": tf.string,
246
"f0_files": tf.string,
247
"energy_files": tf.string,
248
"speaker_ids": tf.int32,
249
}
250
return output_types
251
252
def get_len_dataset(self):
253
return len(self.utt_ids)
254
255
def __name__(self):
256
return "CharactorDurationF0EnergyMelDataset"
257
258