CoCalc -- fastspeech2

GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/examples/fastspeech2/fastspeech2_dataset.py
¹⁵⁵⁸ views
1
# -*- coding: utf-8 -*-
2
# Copyright 2020 Minh Nguyen (@dathudeptrai)
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
"""Dataset modules."""
16

17
import itertools
18
import logging
19
import os
20
import random
21

22
import numpy as np
23
import tensorflow as tf
24

25
from tensorflow_tts.datasets.abstract_dataset import AbstractDataset
26
from tensorflow_tts.utils import find_files
27

28

29
def average_by_duration(x, durs):
30
    mel_len = durs.sum()
31
    durs_cum = np.cumsum(np.pad(durs, (1, 0)))
32

33
    # calculate charactor f0/energy
34
    x_char = np.zeros((durs.shape[0],), dtype=np.float32)
35
    for idx, start, end in zip(range(mel_len), durs_cum[:-1], durs_cum[1:]):
36
        values = x[start:end][np.where(x[start:end] != 0.0)[0]]
37
        x_char[idx] = np.mean(values) if len(values) > 0 else 0.0  # np.mean([]) = nan.
38

39
    return x_char.astype(np.float32)
40

41

42
def tf_average_by_duration(x, durs):
43
    outs = tf.numpy_function(average_by_duration, [x, durs], tf.float32)
44
    return outs
45

46

47
class CharactorDurationF0EnergyMelDataset(AbstractDataset):
48
    """Tensorflow Charactor Duration F0 Energy Mel dataset."""
49

50
    def __init__(
51
        self,
52
        root_dir,
53
        charactor_query="*-ids.npy",
54
        mel_query="*-norm-feats.npy",
55
        duration_query="*-durations.npy",
56
        f0_query="*-raw-f0.npy",
57
        energy_query="*-raw-energy.npy",
58
        f0_stat="./dump/stats_f0.npy",
59
        energy_stat="./dump/stats_energy.npy",
60
        charactor_load_fn=np.load,
61
        mel_load_fn=np.load,
62
        duration_load_fn=np.load,
63
        f0_load_fn=np.load,
64
        energy_load_fn=np.load,
65
        mel_length_threshold=0,
66
    ):
67
        """Initialize dataset.
68

69
        Args:
70
            root_dir (str): Root directory including dumped files.
71
            charactor_query (str): Query to find charactor files in root_dir.
72
            mel_query (str): Query to find feature files in root_dir.
73
            duration_query (str): Query to find duration files in root_dir.
74
            f0_query (str): Query to find f0 files in root_dir.
75
            energy_query (str): Query to find energy files in root_dir.
76
            f0_stat (str): str path of f0_stat.
77
            energy_stat (str): str path of energy_stat.
78
            charactor_load_fn (func): Function to load charactor file.
79
            mel_load_fn (func): Function to load feature file.
80
            duration_load_fn (func): Function to load duration file.
81
            f0_load_fn (func): Function to load f0 file.
82
            energy_load_fn (func): Function to load energy file.
83
            mel_length_threshold (int): Threshold to remove short feature files.
84

85
        """
86
        # find all of charactor and mel files.
87
        charactor_files = sorted(find_files(root_dir, charactor_query))
88
        mel_files = sorted(find_files(root_dir, mel_query))
89
        duration_files = sorted(find_files(root_dir, duration_query))
90
        f0_files = sorted(find_files(root_dir, f0_query))
91
        energy_files = sorted(find_files(root_dir, energy_query))
92

93
        # assert the number of files
94
        assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}."
95
        assert (
96
            len(mel_files)
97
            == len(charactor_files)
98
            == len(duration_files)
99
            == len(f0_files)
100
            == len(energy_files)
101
        ), f"Number of charactor, mel, duration, f0 and energy files are different"
102

103
        if ".npy" in charactor_query:
104
            suffix = charactor_query[1:]
105
            utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files]
106

107
        # set global params
108
        self.utt_ids = utt_ids
109
        self.mel_files = mel_files
110
        self.charactor_files = charactor_files
111
        self.duration_files = duration_files
112
        self.f0_files = f0_files
113
        self.energy_files = energy_files
114
        self.mel_load_fn = mel_load_fn
115
        self.charactor_load_fn = charactor_load_fn
116
        self.duration_load_fn = duration_load_fn
117
        self.f0_load_fn = f0_load_fn
118
        self.energy_load_fn = energy_load_fn
119
        self.mel_length_threshold = mel_length_threshold
120

121
        self.f0_stat = np.load(f0_stat)
122
        self.energy_stat = np.load(energy_stat)
123

124
    def get_args(self):
125
        return [self.utt_ids]
126

127
    def _norm_mean_std(self, x, mean, std):
128
        zero_idxs = np.where(x == 0.0)[0]
129
        x = (x - mean) / std
130
        x[zero_idxs] = 0.0
131
        return x
132

133
    def _norm_mean_std_tf(self, x, mean, std):
134
        x = tf.numpy_function(self._norm_mean_std, [x, mean, std], tf.float32)
135
        return x
136

137
    def generator(self, utt_ids):
138
        for i, utt_id in enumerate(utt_ids):
139
            mel_file = self.mel_files[i]
140
            charactor_file = self.charactor_files[i]
141
            duration_file = self.duration_files[i]
142
            f0_file = self.f0_files[i]
143
            energy_file = self.energy_files[i]
144

145
            items = {
146
                "utt_ids": utt_id,
147
                "mel_files": mel_file,
148
                "charactor_files": charactor_file,
149
                "duration_files": duration_file,
150
                "f0_files": f0_file,
151
                "energy_files": energy_file,
152
            }
153

154
            yield items
155

156
    @tf.function
157
    def _load_data(self, items):
158
        mel = tf.numpy_function(np.load, [items["mel_files"]], tf.float32)
159
        charactor = tf.numpy_function(np.load, [items["charactor_files"]], tf.int32)
160
        duration = tf.numpy_function(np.load, [items["duration_files"]], tf.int32)
161
        f0 = tf.numpy_function(np.load, [items["f0_files"]], tf.float32)
162
        energy = tf.numpy_function(np.load, [items["energy_files"]], tf.float32)
163

164
        f0 = self._norm_mean_std_tf(f0, self.f0_stat[0], self.f0_stat[1])
165
        energy = self._norm_mean_std_tf(
166
            energy, self.energy_stat[0], self.energy_stat[1]
167
        )
168

169
        # calculate charactor f0/energy
170
        f0 = tf_average_by_duration(f0, duration)
171
        energy = tf_average_by_duration(energy, duration)
172

173
        items = {
174
            "utt_ids": items["utt_ids"],
175
            "input_ids": charactor,
176
            "speaker_ids": 0,
177
            "duration_gts": duration,
178
            "f0_gts": f0,
179
            "energy_gts": energy,
180
            "mel_gts": mel,
181
            "mel_lengths": len(mel),
182
        }
183

184
        return items
185

186
    def create(
187
        self,
188
        allow_cache=False,
189
        batch_size=1,
190
        is_shuffle=False,
191
        map_fn=None,
192
        reshuffle_each_iteration=True,
193
    ):
194
        """Create tf.dataset function."""
195
        output_types = self.get_output_dtypes()
196
        datasets = tf.data.Dataset.from_generator(
197
            self.generator, output_types=output_types, args=(self.get_args())
198
        )
199

200
        # load data
201
        datasets = datasets.map(
202
            lambda items: self._load_data(items), tf.data.experimental.AUTOTUNE
203
        )
204

205
        datasets = datasets.filter(
206
            lambda x: x["mel_lengths"] > self.mel_length_threshold
207
        )
208

209
        if allow_cache:
210
            datasets = datasets.cache()
211

212
        if is_shuffle:
213
            datasets = datasets.shuffle(
214
                self.get_len_dataset(),
215
                reshuffle_each_iteration=reshuffle_each_iteration,
216
            )
217

218
        # define padded shapes
219
        padded_shapes = {
220
            "utt_ids": [],
221
            "input_ids": [None],
222
            "speaker_ids": [],
223
            "duration_gts": [None],
224
            "f0_gts": [None],
225
            "energy_gts": [None],
226
            "mel_gts": [None, None],
227
            "mel_lengths": [],
228
        }
229

230
        datasets = datasets.padded_batch(
231
            batch_size, padded_shapes=padded_shapes, drop_remainder=True
232
        )
233
        datasets = datasets.prefetch(tf.data.experimental.AUTOTUNE)
234
        return datasets
235

236
    def get_output_dtypes(self):
237
        output_types = {
238
            "utt_ids": tf.string,
239
            "mel_files": tf.string,
240
            "charactor_files": tf.string,
241
            "duration_files": tf.string,
242
            "f0_files": tf.string,
243
            "energy_files": tf.string,
244
        }
245
        return output_types
246

247
    def get_len_dataset(self):
248
        return len(self.utt_ids)
249

250
    def __name__(self):
251
        return "CharactorDurationF0EnergyMelDataset"
252

253
Product

Resources

Company