CoCalc -- test_fastspeech2.py

GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/test/test_fastspeech2.py
¹⁵⁵⁸ views
1
# -*- coding: utf-8 -*-
2
# Copyright 2020 Minh Nguyen (@dathudeptrai)
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
import logging
17
import os
18
import yaml
19

20
import pytest
21
import tensorflow as tf
22

23
from tensorflow_tts.configs import FastSpeech2Config
24
from tensorflow_tts.models import TFFastSpeech2
25
from tensorflow_tts.utils import return_strategy
26

27
from examples.fastspeech2.train_fastspeech2 import FastSpeech2Trainer
28

29
os.environ["CUDA_VISIBLE_DEVICES"] = ""
30

31
logging.basicConfig(
32
    level=logging.DEBUG,
33
    format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
34
)
35

36

37
@pytest.mark.parametrize("new_size", [100, 200, 300])
38
def test_fastspeech_resize_positional_embeddings(new_size):
39
    config = FastSpeech2Config()
40
    fastspeech2 = TFFastSpeech2(config, name="fastspeech")
41
    fastspeech2._build()
42
    fastspeech2.save_weights("./test.h5")
43
    fastspeech2.resize_positional_embeddings(new_size)
44
    fastspeech2.load_weights("./test.h5", by_name=True, skip_mismatch=True)
45

46

47
@pytest.mark.parametrize(
48
    "var_train_expr, config_path",
49
    [
50
        (None, "./examples/fastspeech2/conf/fastspeech2.v1.yaml"),
51
        ("embeddings|encoder", "./examples/fastspeech2/conf/fastspeech2.v1.yaml"),
52
        ("embeddings|encoder", "./examples/fastspeech2/conf/fastspeech2.v2.yaml"),
53
        ("embeddings|encoder", "./examples/fastspeech2/conf/fastspeech2.baker.v2.yaml"),
54
        ("embeddings|encoder", "./examples/fastspeech2/conf/fastspeech2.kss.v1.yaml"),
55
        ("embeddings|encoder", "./examples/fastspeech2/conf/fastspeech2.kss.v2.yaml"),
56
    ],
57
)
58
def test_fastspeech2_train_some_layers(var_train_expr, config_path):
59
    config = FastSpeech2Config(n_speakers=5)
60
    model = TFFastSpeech2(config)
61
    model._build()
62
    optimizer = tf.keras.optimizers.Adam(lr=0.001)
63

64
    with open(config_path) as f:
65
        config = yaml.load(f, Loader=yaml.Loader)
66

67
    config.update({"outdir": "./"})
68
    config.update({"var_train_expr": var_train_expr})
69

70
    STRATEGY = return_strategy()
71

72
    trainer = FastSpeech2Trainer(
73
        config=config, strategy=STRATEGY, steps=0, epochs=0, is_mixed_precision=False,
74
    )
75
    trainer.compile(model, optimizer)
76

77
    len_trainable_vars = len(trainer._trainable_variables)
78
    all_trainable_vars = len(model.trainable_variables)
79

80
    if var_train_expr is None:
81
        tf.debugging.assert_equal(len_trainable_vars, all_trainable_vars)
82
    else:
83
        tf.debugging.assert_less(len_trainable_vars, all_trainable_vars)
84

85

86
@pytest.mark.parametrize("num_hidden_layers,n_speakers", [(2, 1), (3, 2), (4, 3)])
87
def test_fastspeech_trainable(num_hidden_layers, n_speakers):
88
    config = FastSpeech2Config(
89
        encoder_num_hidden_layers=num_hidden_layers,
90
        decoder_num_hidden_layers=num_hidden_layers + 1,
91
        n_speakers=n_speakers,
92
    )
93

94
    fastspeech2 = TFFastSpeech2(config, name="fastspeech")
95
    optimizer = tf.keras.optimizers.Adam(lr=0.001)
96

97
    # fake inputs
98
    input_ids = tf.convert_to_tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], tf.int32)
99
    attention_mask = tf.convert_to_tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], tf.int32)
100
    speaker_ids = tf.convert_to_tensor([0], tf.int32)
101
    duration_gts = tf.convert_to_tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], tf.int32)
102
    f0_gts = tf.convert_to_tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], tf.float32)
103
    energy_gts = tf.convert_to_tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], tf.float32)
104

105
    mel_gts = tf.random.uniform(shape=[1, 10, 80], dtype=tf.float32)
106

107
    @tf.function
108
    def one_step_training():
109
        with tf.GradientTape() as tape:
110
            mel_outputs_before, _, duration_outputs, _, _ = fastspeech2(
111
                input_ids, speaker_ids, duration_gts, f0_gts, energy_gts, training=True,
112
            )
113
            duration_loss = tf.keras.losses.MeanSquaredError()(
114
                duration_gts, duration_outputs
115
            )
116
            mel_loss = tf.keras.losses.MeanSquaredError()(mel_gts, mel_outputs_before)
117
            loss = duration_loss + mel_loss
118
        gradients = tape.gradient(loss, fastspeech2.trainable_variables)
119
        optimizer.apply_gradients(zip(gradients, fastspeech2.trainable_variables))
120

121
        tf.print(loss)
122

123
    import time
124

125
    for i in range(2):
126
        if i == 1:
127
            start = time.time()
128
        one_step_training()
129
    print(time.time() - start)
130

131
Product

Resources

Company