Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
snakers4
GitHub Repository: snakers4/silero-vad
Path: blob/master/examples/cpp_libtorch/wav.h
1179 views
1
// Copyright (c) 2016 Personal (Binbin Zhang)
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
16
#ifndef FRONTEND_WAV_H_
17
#define FRONTEND_WAV_H_
18
19
#include <assert.h>
20
#include <stdint.h>
21
#include <stdio.h>
22
#include <stdlib.h>
23
#include <string.h>
24
25
#include <string>
26
27
// #include "utils/log.h"
28
29
namespace wav {
30
31
struct WavHeader {
32
char riff[4]; // "riff"
33
unsigned int size;
34
char wav[4]; // "WAVE"
35
char fmt[4]; // "fmt "
36
unsigned int fmt_size;
37
uint16_t format;
38
uint16_t channels;
39
unsigned int sample_rate;
40
unsigned int bytes_per_second;
41
uint16_t block_size;
42
uint16_t bit;
43
char data[4]; // "data"
44
unsigned int data_size;
45
};
46
47
class WavReader {
48
public:
49
WavReader() : data_(nullptr) {}
50
explicit WavReader(const std::string& filename) { Open(filename); }
51
52
bool Open(const std::string& filename) {
53
FILE* fp = fopen(filename.c_str(), "rb"); //文件读取
54
if (NULL == fp) {
55
std::cout << "Error in read " << filename;
56
return false;
57
}
58
59
WavHeader header;
60
fread(&header, 1, sizeof(header), fp);
61
if (header.fmt_size < 16) {
62
printf("WaveData: expect PCM format data "
63
"to have fmt chunk of at least size 16.\n");
64
return false;
65
} else if (header.fmt_size > 16) {
66
int offset = 44 - 8 + header.fmt_size - 16;
67
fseek(fp, offset, SEEK_SET);
68
fread(header.data, 8, sizeof(char), fp);
69
}
70
// check "riff" "WAVE" "fmt " "data"
71
72
// Skip any sub-chunks between "fmt" and "data". Usually there will
73
// be a single "fact" sub chunk, but on Windows there can also be a
74
// "list" sub chunk.
75
while (0 != strncmp(header.data, "data", 4)) {
76
// We will just ignore the data in these chunks.
77
fseek(fp, header.data_size, SEEK_CUR);
78
// read next sub chunk
79
fread(header.data, 8, sizeof(char), fp);
80
}
81
82
if (header.data_size == 0) {
83
int offset = ftell(fp);
84
fseek(fp, 0, SEEK_END);
85
header.data_size = ftell(fp) - offset;
86
fseek(fp, offset, SEEK_SET);
87
}
88
89
num_channel_ = header.channels;
90
sample_rate_ = header.sample_rate;
91
bits_per_sample_ = header.bit;
92
int num_data = header.data_size / (bits_per_sample_ / 8);
93
data_ = new float[num_data]; // Create 1-dim array
94
num_samples_ = num_data / num_channel_;
95
96
std::cout << "num_channel_ :" << num_channel_ << std::endl;
97
std::cout << "sample_rate_ :" << sample_rate_ << std::endl;
98
std::cout << "bits_per_sample_:" << bits_per_sample_ << std::endl;
99
std::cout << "num_samples :" << num_data << std::endl;
100
std::cout << "num_data_size :" << header.data_size << std::endl;
101
102
switch (bits_per_sample_) {
103
case 8: {
104
char sample;
105
for (int i = 0; i < num_data; ++i) {
106
fread(&sample, 1, sizeof(char), fp);
107
data_[i] = static_cast<float>(sample) / 32768;
108
}
109
break;
110
}
111
case 16: {
112
int16_t sample;
113
for (int i = 0; i < num_data; ++i) {
114
fread(&sample, 1, sizeof(int16_t), fp);
115
data_[i] = static_cast<float>(sample) / 32768;
116
}
117
break;
118
}
119
case 32:
120
{
121
if (header.format == 1) //S32
122
{
123
int sample;
124
for (int i = 0; i < num_data; ++i) {
125
fread(&sample, 1, sizeof(int), fp);
126
data_[i] = static_cast<float>(sample) / 32768;
127
}
128
}
129
else if (header.format == 3) // IEEE-float
130
{
131
float sample;
132
for (int i = 0; i < num_data; ++i) {
133
fread(&sample, 1, sizeof(float), fp);
134
data_[i] = static_cast<float>(sample);
135
}
136
}
137
else {
138
printf("unsupported quantization bits\n");
139
}
140
break;
141
}
142
default:
143
printf("unsupported quantization bits\n");
144
break;
145
}
146
147
fclose(fp);
148
return true;
149
}
150
151
int num_channel() const { return num_channel_; }
152
int sample_rate() const { return sample_rate_; }
153
int bits_per_sample() const { return bits_per_sample_; }
154
int num_samples() const { return num_samples_; }
155
156
~WavReader() {
157
delete[] data_;
158
}
159
160
const float* data() const { return data_; }
161
162
private:
163
int num_channel_;
164
int sample_rate_;
165
int bits_per_sample_;
166
int num_samples_; // sample points per channel
167
float* data_;
168
};
169
170
class WavWriter {
171
public:
172
WavWriter(const float* data, int num_samples, int num_channel,
173
int sample_rate, int bits_per_sample)
174
: data_(data),
175
num_samples_(num_samples),
176
num_channel_(num_channel),
177
sample_rate_(sample_rate),
178
bits_per_sample_(bits_per_sample) {}
179
180
void Write(const std::string& filename) {
181
FILE* fp = fopen(filename.c_str(), "w");
182
// init char 'riff' 'WAVE' 'fmt ' 'data'
183
WavHeader header;
184
char wav_header[44] = {0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57,
185
0x41, 0x56, 0x45, 0x66, 0x6d, 0x74, 0x20, 0x10, 0x00,
186
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
187
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
188
0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00};
189
memcpy(&header, wav_header, sizeof(header));
190
header.channels = num_channel_;
191
header.bit = bits_per_sample_;
192
header.sample_rate = sample_rate_;
193
header.data_size = num_samples_ * num_channel_ * (bits_per_sample_ / 8);
194
header.size = sizeof(header) - 8 + header.data_size;
195
header.bytes_per_second =
196
sample_rate_ * num_channel_ * (bits_per_sample_ / 8);
197
header.block_size = num_channel_ * (bits_per_sample_ / 8);
198
199
fwrite(&header, 1, sizeof(header), fp);
200
201
for (int i = 0; i < num_samples_; ++i) {
202
for (int j = 0; j < num_channel_; ++j) {
203
switch (bits_per_sample_) {
204
case 8: {
205
char sample = static_cast<char>(data_[i * num_channel_ + j]);
206
fwrite(&sample, 1, sizeof(sample), fp);
207
break;
208
}
209
case 16: {
210
int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);
211
fwrite(&sample, 1, sizeof(sample), fp);
212
break;
213
}
214
case 32: {
215
int sample = static_cast<int>(data_[i * num_channel_ + j]);
216
fwrite(&sample, 1, sizeof(sample), fp);
217
break;
218
}
219
}
220
}
221
}
222
fclose(fp);
223
}
224
225
private:
226
const float* data_;
227
int num_samples_; // total float points in data_
228
int num_channel_;
229
int sample_rate_;
230
int bits_per_sample_;
231
};
232
233
} // namespace wenet
234
235
#endif // FRONTEND_WAV_H_
236
237