Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
TensorSpeech
GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/examples/fastspeech2/fastspeech2_dataset.py
1558 views
1
# -*- coding: utf-8 -*-
2
# Copyright 2020 Minh Nguyen (@dathudeptrai)
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
"""Dataset modules."""
16
17
import itertools
18
import logging
19
import os
20
import random
21
22
import numpy as np
23
import tensorflow as tf
24
25
from tensorflow_tts.datasets.abstract_dataset import AbstractDataset
26
from tensorflow_tts.utils import find_files
27
28
29
def average_by_duration(x, durs):
30
mel_len = durs.sum()
31
durs_cum = np.cumsum(np.pad(durs, (1, 0)))
32
33
# calculate charactor f0/energy
34
x_char = np.zeros((durs.shape[0],), dtype=np.float32)
35
for idx, start, end in zip(range(mel_len), durs_cum[:-1], durs_cum[1:]):
36
values = x[start:end][np.where(x[start:end] != 0.0)[0]]
37
x_char[idx] = np.mean(values) if len(values) > 0 else 0.0 # np.mean([]) = nan.
38
39
return x_char.astype(np.float32)
40
41
42
def tf_average_by_duration(x, durs):
43
outs = tf.numpy_function(average_by_duration, [x, durs], tf.float32)
44
return outs
45
46
47
class CharactorDurationF0EnergyMelDataset(AbstractDataset):
48
"""Tensorflow Charactor Duration F0 Energy Mel dataset."""
49
50
def __init__(
51
self,
52
root_dir,
53
charactor_query="*-ids.npy",
54
mel_query="*-norm-feats.npy",
55
duration_query="*-durations.npy",
56
f0_query="*-raw-f0.npy",
57
energy_query="*-raw-energy.npy",
58
f0_stat="./dump/stats_f0.npy",
59
energy_stat="./dump/stats_energy.npy",
60
charactor_load_fn=np.load,
61
mel_load_fn=np.load,
62
duration_load_fn=np.load,
63
f0_load_fn=np.load,
64
energy_load_fn=np.load,
65
mel_length_threshold=0,
66
):
67
"""Initialize dataset.
68
69
Args:
70
root_dir (str): Root directory including dumped files.
71
charactor_query (str): Query to find charactor files in root_dir.
72
mel_query (str): Query to find feature files in root_dir.
73
duration_query (str): Query to find duration files in root_dir.
74
f0_query (str): Query to find f0 files in root_dir.
75
energy_query (str): Query to find energy files in root_dir.
76
f0_stat (str): str path of f0_stat.
77
energy_stat (str): str path of energy_stat.
78
charactor_load_fn (func): Function to load charactor file.
79
mel_load_fn (func): Function to load feature file.
80
duration_load_fn (func): Function to load duration file.
81
f0_load_fn (func): Function to load f0 file.
82
energy_load_fn (func): Function to load energy file.
83
mel_length_threshold (int): Threshold to remove short feature files.
84
85
"""
86
# find all of charactor and mel files.
87
charactor_files = sorted(find_files(root_dir, charactor_query))
88
mel_files = sorted(find_files(root_dir, mel_query))
89
duration_files = sorted(find_files(root_dir, duration_query))
90
f0_files = sorted(find_files(root_dir, f0_query))
91
energy_files = sorted(find_files(root_dir, energy_query))
92
93
# assert the number of files
94
assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}."
95
assert (
96
len(mel_files)
97
== len(charactor_files)
98
== len(duration_files)
99
== len(f0_files)
100
== len(energy_files)
101
), f"Number of charactor, mel, duration, f0 and energy files are different"
102
103
if ".npy" in charactor_query:
104
suffix = charactor_query[1:]
105
utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files]
106
107
# set global params
108
self.utt_ids = utt_ids
109
self.mel_files = mel_files
110
self.charactor_files = charactor_files
111
self.duration_files = duration_files
112
self.f0_files = f0_files
113
self.energy_files = energy_files
114
self.mel_load_fn = mel_load_fn
115
self.charactor_load_fn = charactor_load_fn
116
self.duration_load_fn = duration_load_fn
117
self.f0_load_fn = f0_load_fn
118
self.energy_load_fn = energy_load_fn
119
self.mel_length_threshold = mel_length_threshold
120
121
self.f0_stat = np.load(f0_stat)
122
self.energy_stat = np.load(energy_stat)
123
124
def get_args(self):
125
return [self.utt_ids]
126
127
def _norm_mean_std(self, x, mean, std):
128
zero_idxs = np.where(x == 0.0)[0]
129
x = (x - mean) / std
130
x[zero_idxs] = 0.0
131
return x
132
133
def _norm_mean_std_tf(self, x, mean, std):
134
x = tf.numpy_function(self._norm_mean_std, [x, mean, std], tf.float32)
135
return x
136
137
def generator(self, utt_ids):
138
for i, utt_id in enumerate(utt_ids):
139
mel_file = self.mel_files[i]
140
charactor_file = self.charactor_files[i]
141
duration_file = self.duration_files[i]
142
f0_file = self.f0_files[i]
143
energy_file = self.energy_files[i]
144
145
items = {
146
"utt_ids": utt_id,
147
"mel_files": mel_file,
148
"charactor_files": charactor_file,
149
"duration_files": duration_file,
150
"f0_files": f0_file,
151
"energy_files": energy_file,
152
}
153
154
yield items
155
156
@tf.function
157
def _load_data(self, items):
158
mel = tf.numpy_function(np.load, [items["mel_files"]], tf.float32)
159
charactor = tf.numpy_function(np.load, [items["charactor_files"]], tf.int32)
160
duration = tf.numpy_function(np.load, [items["duration_files"]], tf.int32)
161
f0 = tf.numpy_function(np.load, [items["f0_files"]], tf.float32)
162
energy = tf.numpy_function(np.load, [items["energy_files"]], tf.float32)
163
164
f0 = self._norm_mean_std_tf(f0, self.f0_stat[0], self.f0_stat[1])
165
energy = self._norm_mean_std_tf(
166
energy, self.energy_stat[0], self.energy_stat[1]
167
)
168
169
# calculate charactor f0/energy
170
f0 = tf_average_by_duration(f0, duration)
171
energy = tf_average_by_duration(energy, duration)
172
173
items = {
174
"utt_ids": items["utt_ids"],
175
"input_ids": charactor,
176
"speaker_ids": 0,
177
"duration_gts": duration,
178
"f0_gts": f0,
179
"energy_gts": energy,
180
"mel_gts": mel,
181
"mel_lengths": len(mel),
182
}
183
184
return items
185
186
def create(
187
self,
188
allow_cache=False,
189
batch_size=1,
190
is_shuffle=False,
191
map_fn=None,
192
reshuffle_each_iteration=True,
193
):
194
"""Create tf.dataset function."""
195
output_types = self.get_output_dtypes()
196
datasets = tf.data.Dataset.from_generator(
197
self.generator, output_types=output_types, args=(self.get_args())
198
)
199
200
# load data
201
datasets = datasets.map(
202
lambda items: self._load_data(items), tf.data.experimental.AUTOTUNE
203
)
204
205
datasets = datasets.filter(
206
lambda x: x["mel_lengths"] > self.mel_length_threshold
207
)
208
209
if allow_cache:
210
datasets = datasets.cache()
211
212
if is_shuffle:
213
datasets = datasets.shuffle(
214
self.get_len_dataset(),
215
reshuffle_each_iteration=reshuffle_each_iteration,
216
)
217
218
# define padded shapes
219
padded_shapes = {
220
"utt_ids": [],
221
"input_ids": [None],
222
"speaker_ids": [],
223
"duration_gts": [None],
224
"f0_gts": [None],
225
"energy_gts": [None],
226
"mel_gts": [None, None],
227
"mel_lengths": [],
228
}
229
230
datasets = datasets.padded_batch(
231
batch_size, padded_shapes=padded_shapes, drop_remainder=True
232
)
233
datasets = datasets.prefetch(tf.data.experimental.AUTOTUNE)
234
return datasets
235
236
def get_output_dtypes(self):
237
output_types = {
238
"utt_ids": tf.string,
239
"mel_files": tf.string,
240
"charactor_files": tf.string,
241
"duration_files": tf.string,
242
"f0_files": tf.string,
243
"energy_files": tf.string,
244
}
245
return output_types
246
247
def get_len_dataset(self):
248
return len(self.utt_ids)
249
250
def __name__(self):
251
return "CharactorDurationF0EnergyMelDataset"
252
253