Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
TensorSpeech
GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/examples/mfa_extraction/fix_mismatch.py
1558 views
1
# -*- coding: utf-8 -*-
2
# Copyright 2020 TensorFlowTTS Team.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
"""Fix mismatch between sum durations and mel lengths."""
16
17
import numpy as np
18
import os
19
from tqdm import tqdm
20
import click
21
import logging
22
import sys
23
24
25
logging.basicConfig(
26
level=logging.DEBUG,
27
stream=sys.stdout,
28
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
29
)
30
31
32
@click.command()
33
@click.option("--base_path", default="dump")
34
@click.option("--trimmed_dur_path", default="dataset/trimmed-durations")
35
@click.option("--dur_path", default="dataset/durations")
36
@click.option("--use_norm", default="f")
37
def fix(base_path: str, dur_path: str, trimmed_dur_path: str, use_norm: str):
38
for t in ["train", "valid"]:
39
mfa_longer = []
40
mfa_shorter = []
41
big_diff = []
42
not_fixed = []
43
pre_path = os.path.join(base_path, t)
44
os.makedirs(os.path.join(pre_path, "fix_dur"), exist_ok=True)
45
46
logging.info(f"FIXING {t} set ...\n")
47
for i in tqdm(os.listdir(os.path.join(pre_path, "ids"))):
48
if use_norm == "t":
49
mel = np.load(
50
os.path.join(
51
pre_path, "norm-feats", f"{i.split('-')[0]}-norm-feats.npy"
52
)
53
)
54
else:
55
mel = np.load(
56
os.path.join(
57
pre_path, "raw-feats", f"{i.split('-')[0]}-raw-feats.npy"
58
)
59
)
60
61
try:
62
dur = np.load(
63
os.path.join(trimmed_dur_path, f"{i.split('-')[0]}-durations.npy")
64
)
65
except:
66
dur = np.load(
67
os.path.join(dur_path, f"{i.split('-')[0]}-durations.npy")
68
)
69
70
l_mel = len(mel)
71
dur_s = np.sum(dur)
72
cloned = np.array(dur, copy=True)
73
diff = abs(l_mel - dur_s)
74
75
if abs(l_mel - dur_s) > 30: # more then 300 ms
76
big_diff.append([i, abs(l_mel - dur_s)])
77
78
if dur_s > l_mel:
79
for j in range(1, len(dur) - 1):
80
if diff == 0:
81
break
82
dur_val = cloned[-j]
83
84
if dur_val >= diff:
85
cloned[-j] -= diff
86
diff -= dur_val
87
break
88
else:
89
cloned[-j] = 0
90
diff -= dur_val
91
92
if j == len(dur) - 2:
93
not_fixed.append(i)
94
95
mfa_longer.append(abs(l_mel - dur_s))
96
elif dur_s < l_mel:
97
cloned[-1] += diff
98
mfa_shorter.append(abs(l_mel - dur_s))
99
100
np.save(
101
os.path.join(pre_path, "fix_dur", f"{i.split('-')[0]}-durations.npy"),
102
cloned.astype(np.int32),
103
allow_pickle=False,
104
)
105
106
logging.info(
107
f"{t} stats: number of mfa with longer duration: {len(mfa_longer)}, total diff: {sum(mfa_longer)}"
108
f", mean diff: {sum(mfa_longer)/len(mfa_longer) if len(mfa_longer) > 0 else 0}"
109
)
110
logging.info(
111
f"{t} stats: number of mfa with shorter duration: {len(mfa_shorter)}, total diff: {sum(mfa_shorter)}"
112
f", mean diff: {sum(mfa_shorter)/len(mfa_shorter) if len(mfa_shorter) > 0 else 0}"
113
)
114
logging.info(
115
f"{t} stats: number of files with a ''big'' duration diff: {len(big_diff)} if number>1 you should check it"
116
)
117
logging.info(f"{t} stats: not fixed len: {len(not_fixed)}\n")
118
119
120
if __name__ == "__main__":
121
fix()
122
123