Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
prophesier
GitHub Repository: prophesier/diff-svc
Path: blob/main/modules/parallel_wavegan/utils/utils.py
694 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 Tomoki Hayashi
4
# MIT License (https://opensource.org/licenses/MIT)
5
6
"""Utility functions."""
7
8
import fnmatch
9
import logging
10
import os
11
import sys
12
13
import h5py
14
import numpy as np
15
16
17
def find_files(root_dir, query="*.wav", include_root_dir=True):
18
"""Find files recursively.
19
20
Args:
21
root_dir (str): Root root_dir to find.
22
query (str): Query to find.
23
include_root_dir (bool): If False, root_dir name is not included.
24
25
Returns:
26
list: List of found filenames.
27
28
"""
29
files = []
30
for root, dirnames, filenames in os.walk(root_dir, followlinks=True):
31
for filename in fnmatch.filter(filenames, query):
32
files.append(os.path.join(root, filename))
33
if not include_root_dir:
34
files = [file_.replace(root_dir + "/", "") for file_ in files]
35
36
return files
37
38
39
def read_hdf5(hdf5_name, hdf5_path):
40
"""Read hdf5 dataset.
41
42
Args:
43
hdf5_name (str): Filename of hdf5 file.
44
hdf5_path (str): Dataset name in hdf5 file.
45
46
Return:
47
any: Dataset values.
48
49
"""
50
if not os.path.exists(hdf5_name):
51
logging.error(f"There is no such a hdf5 file ({hdf5_name}).")
52
sys.exit(1)
53
54
hdf5_file = h5py.File(hdf5_name, "r")
55
56
if hdf5_path not in hdf5_file:
57
logging.error(f"There is no such a data in hdf5 file. ({hdf5_path})")
58
sys.exit(1)
59
60
hdf5_data = hdf5_file[hdf5_path][()]
61
hdf5_file.close()
62
63
return hdf5_data
64
65
66
def write_hdf5(hdf5_name, hdf5_path, write_data, is_overwrite=True):
67
"""Write dataset to hdf5.
68
69
Args:
70
hdf5_name (str): Hdf5 dataset filename.
71
hdf5_path (str): Dataset path in hdf5.
72
write_data (ndarray): Data to write.
73
is_overwrite (bool): Whether to overwrite dataset.
74
75
"""
76
# convert to numpy array
77
write_data = np.array(write_data)
78
79
# check folder existence
80
folder_name, _ = os.path.split(hdf5_name)
81
if not os.path.exists(folder_name) and len(folder_name) != 0:
82
os.makedirs(folder_name)
83
84
# check hdf5 existence
85
if os.path.exists(hdf5_name):
86
# if already exists, open with r+ mode
87
hdf5_file = h5py.File(hdf5_name, "r+")
88
# check dataset existence
89
if hdf5_path in hdf5_file:
90
if is_overwrite:
91
logging.warning("Dataset in hdf5 file already exists. "
92
"recreate dataset in hdf5.")
93
hdf5_file.__delitem__(hdf5_path)
94
else:
95
logging.error("Dataset in hdf5 file already exists. "
96
"if you want to overwrite, please set is_overwrite = True.")
97
hdf5_file.close()
98
sys.exit(1)
99
else:
100
# if not exists, open with w mode
101
hdf5_file = h5py.File(hdf5_name, "w")
102
103
# write data to hdf5
104
hdf5_file.create_dataset(hdf5_path, data=write_data)
105
hdf5_file.flush()
106
hdf5_file.close()
107
108
109
class HDF5ScpLoader(object):
110
"""Loader class for a fests.scp file of hdf5 file.
111
112
Examples:
113
key1 /some/path/a.h5:feats
114
key2 /some/path/b.h5:feats
115
key3 /some/path/c.h5:feats
116
key4 /some/path/d.h5:feats
117
...
118
>>> loader = HDF5ScpLoader("hdf5.scp")
119
>>> array = loader["key1"]
120
121
key1 /some/path/a.h5
122
key2 /some/path/b.h5
123
key3 /some/path/c.h5
124
key4 /some/path/d.h5
125
...
126
>>> loader = HDF5ScpLoader("hdf5.scp", "feats")
127
>>> array = loader["key1"]
128
129
"""
130
131
def __init__(self, feats_scp, default_hdf5_path="feats"):
132
"""Initialize HDF5 scp loader.
133
134
Args:
135
feats_scp (str): Kaldi-style feats.scp file with hdf5 format.
136
default_hdf5_path (str): Path in hdf5 file. If the scp contain the info, not used.
137
138
"""
139
self.default_hdf5_path = default_hdf5_path
140
with open(feats_scp, encoding='utf-8') as f:
141
lines = [line.replace("\n", "") for line in f.readlines()]
142
self.data = {}
143
for line in lines:
144
key, value = line.split()
145
self.data[key] = value
146
147
def get_path(self, key):
148
"""Get hdf5 file path for a given key."""
149
return self.data[key]
150
151
def __getitem__(self, key):
152
"""Get ndarray for a given key."""
153
p = self.data[key]
154
if ":" in p:
155
return read_hdf5(*p.split(":"))
156
else:
157
return read_hdf5(p, self.default_hdf5_path)
158
159
def __len__(self):
160
"""Return the length of the scp file."""
161
return len(self.data)
162
163
def __iter__(self):
164
"""Return the iterator of the scp file."""
165
return iter(self.data)
166
167
def keys(self):
168
"""Return the keys of the scp file."""
169
return self.data.keys()
170
171