Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
TensorSpeech
GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/test/test_hifigan.py
1558 views
1
# -*- coding: utf-8 -*-
2
# Copyright 2020 TensorFlowTTS Team
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
16
import logging
17
import os
18
19
import pytest
20
import tensorflow as tf
21
22
from tensorflow_tts.configs import (
23
HifiGANDiscriminatorConfig,
24
HifiGANGeneratorConfig,
25
MelGANDiscriminatorConfig,
26
)
27
from tensorflow_tts.models import (
28
TFHifiGANGenerator,
29
TFHifiGANMultiPeriodDiscriminator,
30
TFMelGANMultiScaleDiscriminator,
31
)
32
33
from examples.hifigan.train_hifigan import TFHifiGANDiscriminator
34
35
os.environ["CUDA_VISIBLE_DEVICES"] = ""
36
37
logging.basicConfig(
38
level=logging.DEBUG,
39
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
40
)
41
42
43
def make_hifigan_generator_args(**kwargs):
44
defaults = dict(
45
out_channels=1,
46
kernel_size=7,
47
filters=128,
48
use_bias=True,
49
upsample_scales=[8, 8, 2, 2],
50
stacks=3,
51
stack_kernel_size=[3, 7, 11],
52
stack_dilation_rate=[[1, 3, 5], [1, 3, 5], [1, 3, 5]],
53
nonlinear_activation="LeakyReLU",
54
nonlinear_activation_params={"alpha": 0.2},
55
padding_type="REFLECT",
56
use_final_nolinear_activation=True,
57
is_weight_norm=True,
58
initializer_seed=42,
59
)
60
defaults.update(kwargs)
61
return defaults
62
63
64
def make_hifigan_discriminator_args(**kwargs):
65
defaults_multisperiod = dict(
66
out_channels=1,
67
period_scales=[2, 3, 5, 7, 11],
68
n_layers=5,
69
kernel_size=5,
70
strides=3,
71
filters=8,
72
filter_scales=4,
73
max_filters=1024,
74
nonlinear_activation="LeakyReLU",
75
nonlinear_activation_params={"alpha": 0.2},
76
is_weight_norm=True,
77
initializer_seed=42,
78
)
79
defaults_multisperiod.update(kwargs)
80
defaults_multiscale = dict(
81
out_channels=1,
82
scales=3,
83
downsample_pooling="AveragePooling1D",
84
downsample_pooling_params={"pool_size": 4, "strides": 2,},
85
kernel_sizes=[5, 3],
86
filters=16,
87
max_downsample_filters=1024,
88
use_bias=True,
89
downsample_scales=[4, 4, 4, 4],
90
nonlinear_activation="LeakyReLU",
91
nonlinear_activation_params={"alpha": 0.2},
92
padding_type="REFLECT",
93
)
94
defaults_multiscale.update(kwargs)
95
return [defaults_multisperiod, defaults_multiscale]
96
97
98
@pytest.mark.parametrize(
99
"dict_g, dict_d, dict_loss",
100
[
101
({}, {}, {}),
102
({"kernel_size": 3}, {}, {}),
103
({"filters": 1024}, {}, {}),
104
({"stack_kernel_size": [1, 2, 3]}, {}, {}),
105
({"stack_kernel_size": [3, 5, 7], "stacks": 3}, {}, {}),
106
({"upsample_scales": [4, 4, 4, 4]}, {}, {}),
107
({"upsample_scales": [8, 8, 2, 2]}, {}, {}),
108
({"filters": 1024, "upsample_scales": [8, 8, 2, 2]}, {}, {}),
109
],
110
)
111
def test_hifigan_trainable(dict_g, dict_d, dict_loss):
112
batch_size = 4
113
batch_length = 4096
114
args_g = make_hifigan_generator_args(**dict_g)
115
args_d_p, args_d_s = make_hifigan_discriminator_args(**dict_d)
116
117
args_g = HifiGANGeneratorConfig(**args_g)
118
args_d_p = HifiGANDiscriminatorConfig(**args_d_p)
119
args_d_s = MelGANDiscriminatorConfig(**args_d_s)
120
121
generator = TFHifiGANGenerator(args_g)
122
123
discriminator_p = TFHifiGANMultiPeriodDiscriminator(args_d_p)
124
discriminator_s = TFMelGANMultiScaleDiscriminator(args_d_s)
125
discriminator = TFHifiGANDiscriminator(discriminator_p, discriminator_s)
126
127