Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
TensorSpeech
GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/test/test_fastspeech.py
1558 views
1
# -*- coding: utf-8 -*-
2
# Copyright 2020 Minh Nguyen (@dathudeptrai)
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
16
import logging
17
import os
18
19
import pytest
20
import tensorflow as tf
21
22
from tensorflow_tts.configs import FastSpeechConfig
23
from tensorflow_tts.models import TFFastSpeech
24
25
os.environ["CUDA_VISIBLE_DEVICES"] = ""
26
27
logging.basicConfig(
28
level=logging.DEBUG,
29
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
30
)
31
32
33
@pytest.mark.parametrize("new_size", [100, 200, 300])
34
def test_fastspeech_resize_positional_embeddings(new_size):
35
config = FastSpeechConfig()
36
fastspeech = TFFastSpeech(config, name="fastspeech")
37
fastspeech._build()
38
fastspeech.save_weights("./test.h5")
39
fastspeech.resize_positional_embeddings(new_size)
40
fastspeech.load_weights("./test.h5", by_name=True, skip_mismatch=True)
41
42
43
@pytest.mark.parametrize("num_hidden_layers,n_speakers", [(2, 1), (3, 2), (4, 3)])
44
def test_fastspeech_trainable(num_hidden_layers, n_speakers):
45
config = FastSpeechConfig(
46
encoder_num_hidden_layers=num_hidden_layers,
47
decoder_num_hidden_layers=num_hidden_layers + 1,
48
n_speakers=n_speakers,
49
)
50
51
fastspeech = TFFastSpeech(config, name="fastspeech")
52
optimizer = tf.keras.optimizers.Adam(lr=0.001)
53
54
# fake inputs
55
input_ids = tf.convert_to_tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], tf.int32)
56
attention_mask = tf.convert_to_tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], tf.int32)
57
speaker_ids = tf.convert_to_tensor([0], tf.int32)
58
duration_gts = tf.convert_to_tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], tf.int32)
59
60
mel_gts = tf.random.uniform(shape=[1, 10, 80], dtype=tf.float32)
61
62
@tf.function
63
def one_step_training():
64
with tf.GradientTape() as tape:
65
mel_outputs_before, _, duration_outputs = fastspeech(
66
input_ids, speaker_ids, duration_gts, training=True
67
)
68
duration_loss = tf.keras.losses.MeanSquaredError()(
69
duration_gts, duration_outputs
70
)
71
mel_loss = tf.keras.losses.MeanSquaredError()(mel_gts, mel_outputs_before)
72
loss = duration_loss + mel_loss
73
gradients = tape.gradient(loss, fastspeech.trainable_variables)
74
optimizer.apply_gradients(zip(gradients, fastspeech.trainable_variables))
75
76
tf.print(loss)
77
78
import time
79
80
for i in range(2):
81
if i == 1:
82
start = time.time()
83
one_step_training()
84
print(time.time() - start)
85
86