CoCalc -- test_fastspeech.py

GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/test/test_fastspeech.py
¹⁵⁵⁸ views
1
# -*- coding: utf-8 -*-
2
# Copyright 2020 Minh Nguyen (@dathudeptrai)
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
import logging
17
import os
18

19
import pytest
20
import tensorflow as tf
21

22
from tensorflow_tts.configs import FastSpeechConfig
23
from tensorflow_tts.models import TFFastSpeech
24

25
os.environ["CUDA_VISIBLE_DEVICES"] = ""
26

27
logging.basicConfig(
28
    level=logging.DEBUG,
29
    format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
30
)
31

32

33
@pytest.mark.parametrize("new_size", [100, 200, 300])
34
def test_fastspeech_resize_positional_embeddings(new_size):
35
    config = FastSpeechConfig()
36
    fastspeech = TFFastSpeech(config, name="fastspeech")
37
    fastspeech._build()
38
    fastspeech.save_weights("./test.h5")
39
    fastspeech.resize_positional_embeddings(new_size)
40
    fastspeech.load_weights("./test.h5", by_name=True, skip_mismatch=True)
41

42

43
@pytest.mark.parametrize("num_hidden_layers,n_speakers", [(2, 1), (3, 2), (4, 3)])
44
def test_fastspeech_trainable(num_hidden_layers, n_speakers):
45
    config = FastSpeechConfig(
46
        encoder_num_hidden_layers=num_hidden_layers,
47
        decoder_num_hidden_layers=num_hidden_layers + 1,
48
        n_speakers=n_speakers,
49
    )
50

51
    fastspeech = TFFastSpeech(config, name="fastspeech")
52
    optimizer = tf.keras.optimizers.Adam(lr=0.001)
53

54
    # fake inputs
55
    input_ids = tf.convert_to_tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], tf.int32)
56
    attention_mask = tf.convert_to_tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], tf.int32)
57
    speaker_ids = tf.convert_to_tensor([0], tf.int32)
58
    duration_gts = tf.convert_to_tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], tf.int32)
59

60
    mel_gts = tf.random.uniform(shape=[1, 10, 80], dtype=tf.float32)
61

62
    @tf.function
63
    def one_step_training():
64
        with tf.GradientTape() as tape:
65
            mel_outputs_before, _, duration_outputs = fastspeech(
66
                input_ids, speaker_ids, duration_gts, training=True
67
            )
68
            duration_loss = tf.keras.losses.MeanSquaredError()(
69
                duration_gts, duration_outputs
70
            )
71
            mel_loss = tf.keras.losses.MeanSquaredError()(mel_gts, mel_outputs_before)
72
            loss = duration_loss + mel_loss
73
        gradients = tape.gradient(loss, fastspeech.trainable_variables)
74
        optimizer.apply_gradients(zip(gradients, fastspeech.trainable_variables))
75

76
        tf.print(loss)
77

78
    import time
79

80
    for i in range(2):
81
        if i == 1:
82
            start = time.time()
83
        one_step_training()
84
    print(time.time() - start)
85

86
Product

Resources

Company