CoCalc -- fastspeech2

GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/examples/fastspeech2_libritts/fastspeech2_dataset.py
¹⁵⁵⁸ views
1
# -*- coding: utf-8 -*-
2
# Copyright 2020 TensorFlowTTS Team.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
"""Dataset modules."""
16

17
import os
18
import numpy as np
19
import tensorflow as tf
20

21
from tensorflow_tts.datasets.abstract_dataset import AbstractDataset
22
from tensorflow_tts.utils import find_files
23

24

25
def average_by_duration(x, durs):
26
    mel_len = durs.sum()
27
    durs_cum = np.cumsum(np.pad(durs, (1, 0)))
28

29
    # calculate charactor f0/energy
30
    x_char = np.zeros((durs.shape[0],), dtype=np.float32)
31
    for idx, start, end in zip(range(mel_len), durs_cum[:-1], durs_cum[1:]):
32
        values = x[start:end][np.where(x[start:end] != 0.0)[0]]
33
        x_char[idx] = np.mean(values) if len(values) > 0 else 0.0  # np.mean([]) = nan.
34

35
    return x_char.astype(np.float32)
36

37

38
def tf_average_by_duration(x, durs):
39
    outs = tf.numpy_function(average_by_duration, [x, durs], tf.float32)
40
    return outs
41

42

43
class CharactorDurationF0EnergyMelDataset(AbstractDataset):
44
    """Tensorflow Charactor Duration F0 Energy Mel dataset."""
45

46
    def __init__(
47
        self,
48
        root_dir,
49
        charactor_query="*-ids.npy",
50
        mel_query="*-norm-feats.npy",
51
        duration_query="*-durations.npy",
52
        f0_query="*-raw-f0.npy",
53
        energy_query="*-raw-energy.npy",
54
        f0_stat="./dump/stats_f0.npy",
55
        energy_stat="./dump/stats_energy.npy",
56
        charactor_load_fn=np.load,
57
        mel_load_fn=np.load,
58
        duration_load_fn=np.load,
59
        f0_load_fn=np.load,
60
        energy_load_fn=np.load,
61
        mel_length_threshold=0,
62
        speakers_map=None
63
    ):
64
        """Initialize dataset.
65

66
        Args:
67
            root_dir (str): Root directory including dumped files.
68
            charactor_query (str): Query to find charactor files in root_dir.
69
            mel_query (str): Query to find feature files in root_dir.
70
            duration_query (str): Query to find duration files in root_dir.
71
            f0_query (str): Query to find f0 files in root_dir.
72
            energy_query (str): Query to find energy files in root_dir.
73
            f0_stat (str): str path of f0_stat.
74
            energy_stat (str): str path of energy_stat.
75
            charactor_load_fn (func): Function to load charactor file.
76
            mel_load_fn (func): Function to load feature file.
77
            duration_load_fn (func): Function to load duration file.
78
            f0_load_fn (func): Function to load f0 file.
79
            energy_load_fn (func): Function to load energy file.
80
            mel_length_threshold (int): Threshold to remove short feature files.
81
            speakers_map (dict): Speakers map generated in dataset preprocessing
82

83
        """
84
        # find all of charactor and mel files.
85
        charactor_files = sorted(find_files(root_dir, charactor_query))
86
        mel_files = sorted(find_files(root_dir, mel_query))
87
        duration_files = sorted(find_files(root_dir, duration_query))
88
        f0_files = sorted(find_files(root_dir, f0_query))
89
        energy_files = sorted(find_files(root_dir, energy_query))
90

91
        # assert the number of files
92
        assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}."
93
        assert (
94
            len(mel_files)
95
            == len(charactor_files)
96
            == len(duration_files)
97
            == len(f0_files)
98
            == len(energy_files)
99
        ), f"Number of charactor, mel, duration, f0 and energy files are different"
100

101
        assert speakers_map != None, f"No speakers map found. Did you set --dataset_mapping?"
102

103
        if ".npy" in charactor_query:
104
            suffix = charactor_query[1:]
105
            utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files]
106

107
        # set global params
108
        self.utt_ids = utt_ids
109
        self.mel_files = mel_files
110
        self.charactor_files = charactor_files
111
        self.duration_files = duration_files
112
        self.f0_files = f0_files
113
        self.energy_files = energy_files
114
        self.mel_load_fn = mel_load_fn
115
        self.charactor_load_fn = charactor_load_fn
116
        self.duration_load_fn = duration_load_fn
117
        self.f0_load_fn = f0_load_fn
118
        self.energy_load_fn = energy_load_fn
119
        self.mel_length_threshold = mel_length_threshold
120
        self.speakers_map = speakers_map
121
        self.speakers = [self.speakers_map[i.split("_")[0]] for i in self.utt_ids]
122
        print("Speaker: utt_id", list(zip(self.speakers, self.utt_ids)))
123
        self.f0_stat = np.load(f0_stat)
124
        self.energy_stat = np.load(energy_stat)
125

126
    def get_args(self):
127
        return [self.utt_ids]
128

129
    def _norm_mean_std(self, x, mean, std):
130
        zero_idxs = np.where(x == 0.0)[0]
131
        x = (x - mean) / std
132
        x[zero_idxs] = 0.0
133
        return x
134

135
    def _norm_mean_std_tf(self, x, mean, std):
136
        x = tf.numpy_function(self._norm_mean_std, [x, mean, std], tf.float32)
137
        return x
138

139
    def generator(self, utt_ids):
140
        for i, utt_id in enumerate(utt_ids):
141
            mel_file = self.mel_files[i]
142
            charactor_file = self.charactor_files[i]
143
            duration_file = self.duration_files[i]
144
            f0_file = self.f0_files[i]
145
            energy_file = self.energy_files[i]
146
            speaker_id = self.speakers[i]
147

148
            items = {
149
                "utt_ids": utt_id,
150
                "mel_files": mel_file,
151
                "charactor_files": charactor_file,
152
                "duration_files": duration_file,
153
                "f0_files": f0_file,
154
                "energy_files": energy_file,
155
                "speaker_ids": speaker_id,
156
            }
157

158
            yield items
159

160
    @tf.function
161
    def _load_data(self, items):
162
        mel = tf.numpy_function(np.load, [items["mel_files"]], tf.float32)
163
        charactor = tf.numpy_function(np.load, [items["charactor_files"]], tf.int32)
164
        duration = tf.numpy_function(np.load, [items["duration_files"]], tf.int32)
165
        f0 = tf.numpy_function(np.load, [items["f0_files"]], tf.float32)
166
        energy = tf.numpy_function(np.load, [items["energy_files"]], tf.float32)
167

168
        f0 = self._norm_mean_std_tf(f0, self.f0_stat[0], self.f0_stat[1])
169
        energy = self._norm_mean_std_tf(
170
            energy, self.energy_stat[0], self.energy_stat[1]
171
        )
172

173
        # calculate charactor f0/energy
174
        f0 = tf_average_by_duration(f0, duration)
175
        energy = tf_average_by_duration(energy, duration)
176

177
        items = {
178
            "utt_ids": items["utt_ids"],
179
            "input_ids": charactor,
180
            "speaker_ids": items["speaker_ids"],
181
            "duration_gts": duration,
182
            "f0_gts": f0,
183
            "energy_gts": energy,
184
            "mel_gts": mel,
185
            "mel_lengths": len(mel),
186
        }
187

188
        return items
189

190
    def create(
191
        self,
192
        allow_cache=False,
193
        batch_size=1,
194
        is_shuffle=False,
195
        map_fn=None,
196
        reshuffle_each_iteration=True,
197
    ):
198
        """Create tf.dataset function."""
199
        output_types = self.get_output_dtypes()
200
        datasets = tf.data.Dataset.from_generator(
201
            self.generator, output_types=output_types, args=(self.get_args())
202
        )
203

204
        # load data
205
        datasets = datasets.map(
206
            lambda items: self._load_data(items), tf.data.experimental.AUTOTUNE
207
        )
208

209
        datasets = datasets.filter(
210
            lambda x: x["mel_lengths"] > self.mel_length_threshold
211
        )
212

213
        if allow_cache:
214
            datasets = datasets.cache()
215

216
        if is_shuffle:
217
            datasets = datasets.shuffle(
218
                self.get_len_dataset(),
219
                reshuffle_each_iteration=reshuffle_each_iteration,
220
            )
221

222
        # define padded shapes
223
        padded_shapes = {
224
            "utt_ids": [],
225
            "input_ids": [None],
226
            "speaker_ids": [],
227
            "duration_gts": [None],
228
            "f0_gts": [None],
229
            "energy_gts": [None],
230
            "mel_gts": [None, None],
231
            "mel_lengths": [],
232
        }
233

234
        datasets = datasets.padded_batch(
235
            batch_size, padded_shapes=padded_shapes, drop_remainder=True
236
        )
237
        datasets = datasets.prefetch(tf.data.experimental.AUTOTUNE)
238
        return datasets
239

240
    def get_output_dtypes(self):
241
        output_types = {
242
            "utt_ids": tf.string,
243
            "mel_files": tf.string,
244
            "charactor_files": tf.string,
245
            "duration_files": tf.string,
246
            "f0_files": tf.string,
247
            "energy_files": tf.string,
248
            "speaker_ids": tf.int32,
249
        }
250
        return output_types
251

252
    def get_len_dataset(self):
253
        return len(self.utt_ids)
254

255
    def __name__(self):
256
        return "CharactorDurationF0EnergyMelDataset"
257

258
Product

Resources

Company