Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
TensorSpeech
GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/tensorflow_tts/models/melgan.py
1558 views
1
# -*- coding: utf-8 -*-
2
# Copyright 2020 The MelGAN Authors and Minh Nguyen (@dathudeptrai)
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
"""MelGAN Modules."""
16
17
import numpy as np
18
import tensorflow as tf
19
20
from tensorflow_tts.models import BaseModel
21
from tensorflow_tts.utils import GroupConv1D, WeightNormalization
22
23
24
def get_initializer(initializer_seed=42):
25
"""Creates a `tf.initializers.glorot_normal` with the given seed.
26
Args:
27
initializer_seed: int, initializer seed.
28
Returns:
29
GlorotNormal initializer with seed = `initializer_seed`.
30
"""
31
return tf.keras.initializers.GlorotNormal(seed=initializer_seed)
32
33
34
class TFReflectionPad1d(tf.keras.layers.Layer):
35
"""Tensorflow ReflectionPad1d module."""
36
37
def __init__(self, padding_size, padding_type="REFLECT", **kwargs):
38
"""Initialize TFReflectionPad1d module.
39
40
Args:
41
padding_size (int)
42
padding_type (str) ("CONSTANT", "REFLECT", or "SYMMETRIC". Default is "REFLECT")
43
"""
44
super().__init__(**kwargs)
45
self.padding_size = padding_size
46
self.padding_type = padding_type
47
48
def call(self, x):
49
"""Calculate forward propagation.
50
Args:
51
x (Tensor): Input tensor (B, T, C).
52
Returns:
53
Tensor: Padded tensor (B, T + 2 * padding_size, C).
54
"""
55
return tf.pad(
56
x,
57
[[0, 0], [self.padding_size, self.padding_size], [0, 0]],
58
self.padding_type,
59
)
60
61
62
class TFConvTranspose1d(tf.keras.layers.Layer):
63
"""Tensorflow ConvTranspose1d module."""
64
65
def __init__(
66
self,
67
filters,
68
kernel_size,
69
strides,
70
padding,
71
is_weight_norm,
72
initializer_seed,
73
**kwargs
74
):
75
"""Initialize TFConvTranspose1d( module.
76
Args:
77
filters (int): Number of filters.
78
kernel_size (int): kernel size.
79
strides (int): Stride width.
80
padding (str): Padding type ("same" or "valid").
81
"""
82
super().__init__(**kwargs)
83
self.conv1d_transpose = tf.keras.layers.Conv2DTranspose(
84
filters=filters,
85
kernel_size=(kernel_size, 1),
86
strides=(strides, 1),
87
padding="same",
88
kernel_initializer=get_initializer(initializer_seed),
89
)
90
if is_weight_norm:
91
self.conv1d_transpose = WeightNormalization(self.conv1d_transpose)
92
93
def call(self, x):
94
"""Calculate forward propagation.
95
Args:
96
x (Tensor): Input tensor (B, T, C).
97
Returns:
98
Tensor: Output tensor (B, T', C').
99
"""
100
x = tf.expand_dims(x, 2)
101
x = self.conv1d_transpose(x)
102
x = tf.squeeze(x, 2)
103
return x
104
105
106
class TFResidualStack(tf.keras.layers.Layer):
107
"""Tensorflow ResidualStack module."""
108
109
def __init__(
110
self,
111
kernel_size,
112
filters,
113
dilation_rate,
114
use_bias,
115
nonlinear_activation,
116
nonlinear_activation_params,
117
is_weight_norm,
118
initializer_seed,
119
**kwargs
120
):
121
"""Initialize TFResidualStack module.
122
Args:
123
kernel_size (int): Kernel size.
124
filters (int): Number of filters.
125
dilation_rate (int): Dilation rate.
126
use_bias (bool): Whether to add bias parameter in convolution layers.
127
nonlinear_activation (str): Activation function module name.
128
nonlinear_activation_params (dict): Hyperparameters for activation function.
129
"""
130
super().__init__(**kwargs)
131
self.blocks = [
132
getattr(tf.keras.layers, nonlinear_activation)(
133
**nonlinear_activation_params
134
),
135
TFReflectionPad1d((kernel_size - 1) // 2 * dilation_rate),
136
tf.keras.layers.Conv1D(
137
filters=filters,
138
kernel_size=kernel_size,
139
dilation_rate=dilation_rate,
140
use_bias=use_bias,
141
kernel_initializer=get_initializer(initializer_seed),
142
),
143
getattr(tf.keras.layers, nonlinear_activation)(
144
**nonlinear_activation_params
145
),
146
tf.keras.layers.Conv1D(
147
filters=filters,
148
kernel_size=1,
149
use_bias=use_bias,
150
kernel_initializer=get_initializer(initializer_seed),
151
),
152
]
153
self.shortcut = tf.keras.layers.Conv1D(
154
filters=filters,
155
kernel_size=1,
156
use_bias=use_bias,
157
kernel_initializer=get_initializer(initializer_seed),
158
name="shortcut",
159
)
160
161
# apply weightnorm
162
if is_weight_norm:
163
self._apply_weightnorm(self.blocks)
164
self.shortcut = WeightNormalization(self.shortcut)
165
166
def call(self, x):
167
"""Calculate forward propagation.
168
Args:
169
x (Tensor): Input tensor (B, T, C).
170
Returns:
171
Tensor: Output tensor (B, T, C).
172
"""
173
_x = tf.identity(x)
174
for layer in self.blocks:
175
_x = layer(_x)
176
shortcut = self.shortcut(x)
177
return shortcut + _x
178
179
def _apply_weightnorm(self, list_layers):
180
"""Try apply weightnorm for all layer in list_layers."""
181
for i in range(len(list_layers)):
182
try:
183
layer_name = list_layers[i].name.lower()
184
if "conv1d" in layer_name or "dense" in layer_name:
185
list_layers[i] = WeightNormalization(list_layers[i])
186
except Exception:
187
pass
188
189
190
class TFMelGANGenerator(BaseModel):
191
"""Tensorflow MelGAN generator module."""
192
193
def __init__(self, config, **kwargs):
194
"""Initialize TFMelGANGenerator module.
195
Args:
196
config: config object of Melgan generator.
197
"""
198
super().__init__(**kwargs)
199
200
# check hyper parameter is valid or not
201
assert config.filters >= np.prod(config.upsample_scales)
202
assert config.filters % (2 ** len(config.upsample_scales)) == 0
203
204
# add initial layer
205
layers = []
206
layers += [
207
TFReflectionPad1d(
208
(config.kernel_size - 1) // 2,
209
padding_type=config.padding_type,
210
name="first_reflect_padding",
211
),
212
tf.keras.layers.Conv1D(
213
filters=config.filters,
214
kernel_size=config.kernel_size,
215
use_bias=config.use_bias,
216
kernel_initializer=get_initializer(config.initializer_seed),
217
),
218
]
219
220
for i, upsample_scale in enumerate(config.upsample_scales):
221
# add upsampling layer
222
layers += [
223
getattr(tf.keras.layers, config.nonlinear_activation)(
224
**config.nonlinear_activation_params
225
),
226
TFConvTranspose1d(
227
filters=config.filters // (2 ** (i + 1)),
228
kernel_size=upsample_scale * 2,
229
strides=upsample_scale,
230
padding="same",
231
is_weight_norm=config.is_weight_norm,
232
initializer_seed=config.initializer_seed,
233
name="conv_transpose_._{}".format(i),
234
),
235
]
236
237
# ad residual stack layer
238
for j in range(config.stacks):
239
layers += [
240
TFResidualStack(
241
kernel_size=config.stack_kernel_size,
242
filters=config.filters // (2 ** (i + 1)),
243
dilation_rate=config.stack_kernel_size ** j,
244
use_bias=config.use_bias,
245
nonlinear_activation=config.nonlinear_activation,
246
nonlinear_activation_params=config.nonlinear_activation_params,
247
is_weight_norm=config.is_weight_norm,
248
initializer_seed=config.initializer_seed,
249
name="residual_stack_._{}._._{}".format(i, j),
250
)
251
]
252
# add final layer
253
layers += [
254
getattr(tf.keras.layers, config.nonlinear_activation)(
255
**config.nonlinear_activation_params
256
),
257
TFReflectionPad1d(
258
(config.kernel_size - 1) // 2,
259
padding_type=config.padding_type,
260
name="last_reflect_padding",
261
),
262
tf.keras.layers.Conv1D(
263
filters=config.out_channels,
264
kernel_size=config.kernel_size,
265
use_bias=config.use_bias,
266
kernel_initializer=get_initializer(config.initializer_seed),
267
dtype=tf.float32,
268
),
269
]
270
if config.use_final_nolinear_activation:
271
layers += [tf.keras.layers.Activation("tanh", dtype=tf.float32)]
272
273
if config.is_weight_norm is True:
274
self._apply_weightnorm(layers)
275
276
self.melgan = tf.keras.models.Sequential(layers)
277
278
def call(self, mels, **kwargs):
279
"""Calculate forward propagation.
280
Args:
281
c (Tensor): Input tensor (B, T, channels)
282
Returns:
283
Tensor: Output tensor (B, T ** prod(upsample_scales), out_channels)
284
"""
285
return self.inference(mels)
286
287
@tf.function(
288
input_signature=[
289
tf.TensorSpec(shape=[None, None, 80], dtype=tf.float32, name="mels")
290
]
291
)
292
def inference(self, mels):
293
return self.melgan(mels)
294
295
@tf.function(
296
input_signature=[
297
tf.TensorSpec(shape=[1, None, 80], dtype=tf.float32, name="mels")
298
]
299
)
300
def inference_tflite(self, mels):
301
return self.melgan(mels)
302
303
def _apply_weightnorm(self, list_layers):
304
"""Try apply weightnorm for all layer in list_layers."""
305
for i in range(len(list_layers)):
306
try:
307
layer_name = list_layers[i].name.lower()
308
if "conv1d" in layer_name or "dense" in layer_name:
309
list_layers[i] = WeightNormalization(list_layers[i])
310
except Exception:
311
pass
312
313
def _build(self):
314
"""Build model by passing fake input."""
315
fake_mels = tf.random.uniform(shape=[1, 100, 80], dtype=tf.float32)
316
self(fake_mels)
317
318
319
class TFMelGANDiscriminator(tf.keras.layers.Layer):
320
"""Tensorflow MelGAN generator module."""
321
322
def __init__(
323
self,
324
out_channels=1,
325
kernel_sizes=[5, 3],
326
filters=16,
327
max_downsample_filters=1024,
328
use_bias=True,
329
downsample_scales=[4, 4, 4, 4],
330
nonlinear_activation="LeakyReLU",
331
nonlinear_activation_params={"alpha": 0.2},
332
padding_type="REFLECT",
333
is_weight_norm=True,
334
initializer_seed=0.02,
335
**kwargs
336
):
337
"""Initilize MelGAN discriminator module.
338
Args:
339
out_channels (int): Number of output channels.
340
kernel_sizes (list): List of two kernel sizes. The prod will be used for the first conv layer,
341
and the first and the second kernel sizes will be used for the last two layers.
342
For example if kernel_sizes = [5, 3], the first layer kernel size will be 5 * 3 = 15.
343
the last two layers' kernel size will be 5 and 3, respectively.
344
filters (int): Initial number of filters for conv layer.
345
max_downsample_filters (int): Maximum number of filters for downsampling layers.
346
use_bias (bool): Whether to add bias parameter in convolution layers.
347
downsample_scales (list): List of downsampling scales.
348
nonlinear_activation (str): Activation function module name.
349
nonlinear_activation_params (dict): Hyperparameters for activation function.
350
padding_type (str): Padding type (support only "REFLECT", "CONSTANT", "SYMMETRIC")
351
"""
352
super().__init__(**kwargs)
353
discriminator = []
354
355
# check kernel_size is valid
356
assert len(kernel_sizes) == 2
357
assert kernel_sizes[0] % 2 == 1
358
assert kernel_sizes[1] % 2 == 1
359
360
# add first layer
361
discriminator = [
362
TFReflectionPad1d(
363
(np.prod(kernel_sizes) - 1) // 2, padding_type=padding_type
364
),
365
tf.keras.layers.Conv1D(
366
filters=filters,
367
kernel_size=int(np.prod(kernel_sizes)),
368
use_bias=use_bias,
369
kernel_initializer=get_initializer(initializer_seed),
370
),
371
getattr(tf.keras.layers, nonlinear_activation)(
372
**nonlinear_activation_params
373
),
374
]
375
376
# add downsample layers
377
in_chs = filters
378
with tf.keras.utils.CustomObjectScope({"GroupConv1D": GroupConv1D}):
379
for downsample_scale in downsample_scales:
380
out_chs = min(in_chs * downsample_scale, max_downsample_filters)
381
discriminator += [
382
GroupConv1D(
383
filters=out_chs,
384
kernel_size=downsample_scale * 10 + 1,
385
strides=downsample_scale,
386
padding="same",
387
use_bias=use_bias,
388
groups=in_chs // 4,
389
kernel_initializer=get_initializer(initializer_seed),
390
)
391
]
392
discriminator += [
393
getattr(tf.keras.layers, nonlinear_activation)(
394
**nonlinear_activation_params
395
)
396
]
397
in_chs = out_chs
398
399
# add final layers
400
out_chs = min(in_chs * 2, max_downsample_filters)
401
discriminator += [
402
tf.keras.layers.Conv1D(
403
filters=out_chs,
404
kernel_size=kernel_sizes[0],
405
padding="same",
406
use_bias=use_bias,
407
kernel_initializer=get_initializer(initializer_seed),
408
)
409
]
410
discriminator += [
411
getattr(tf.keras.layers, nonlinear_activation)(
412
**nonlinear_activation_params
413
)
414
]
415
discriminator += [
416
tf.keras.layers.Conv1D(
417
filters=out_channels,
418
kernel_size=kernel_sizes[1],
419
padding="same",
420
use_bias=use_bias,
421
kernel_initializer=get_initializer(initializer_seed),
422
)
423
]
424
425
if is_weight_norm is True:
426
self._apply_weightnorm(discriminator)
427
428
self.disciminator = discriminator
429
430
def call(self, x, **kwargs):
431
"""Calculate forward propagation.
432
Args:
433
x (Tensor): Input noise signal (B, T, 1).
434
Returns:
435
List: List of output tensors of each layer.
436
"""
437
outs = []
438
for f in self.disciminator:
439
x = f(x)
440
outs += [x]
441
return outs
442
443
def _apply_weightnorm(self, list_layers):
444
"""Try apply weightnorm for all layer in list_layers."""
445
for i in range(len(list_layers)):
446
try:
447
layer_name = list_layers[i].name.lower()
448
if "conv1d" in layer_name or "dense" in layer_name:
449
list_layers[i] = WeightNormalization(list_layers[i])
450
except Exception:
451
pass
452
453
454
class TFMelGANMultiScaleDiscriminator(BaseModel):
455
"""MelGAN multi-scale discriminator module."""
456
457
def __init__(self, config, **kwargs):
458
"""Initilize MelGAN multi-scale discriminator module.
459
Args:
460
config: config object for melgan discriminator
461
"""
462
super().__init__(**kwargs)
463
self.discriminator = []
464
465
# add discriminator
466
for i in range(config.scales):
467
self.discriminator += [
468
TFMelGANDiscriminator(
469
out_channels=config.out_channels,
470
kernel_sizes=config.kernel_sizes,
471
filters=config.filters,
472
max_downsample_filters=config.max_downsample_filters,
473
use_bias=config.use_bias,
474
downsample_scales=config.downsample_scales,
475
nonlinear_activation=config.nonlinear_activation,
476
nonlinear_activation_params=config.nonlinear_activation_params,
477
padding_type=config.padding_type,
478
is_weight_norm=config.is_weight_norm,
479
initializer_seed=config.initializer_seed,
480
name="melgan_discriminator_scale_._{}".format(i),
481
)
482
]
483
self.pooling = getattr(tf.keras.layers, config.downsample_pooling)(
484
**config.downsample_pooling_params
485
)
486
487
def call(self, x, **kwargs):
488
"""Calculate forward propagation.
489
Args:
490
x (Tensor): Input noise signal (B, T, 1).
491
Returns:
492
List: List of list of each discriminator outputs, which consists of each layer output tensors.
493
"""
494
outs = []
495
for f in self.discriminator:
496
outs += [f(x)]
497
x = self.pooling(x)
498
return outs
499
500