Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
snakers4
GitHub Repository: snakers4/silero-vad
Path: blob/master/examples/cpp/wav.h
1179 views
1
// Copyright (c) 2016 Personal (Binbin Zhang)
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#ifndef FRONTEND_WAV_H_
16
#define FRONTEND_WAV_H_
17
18
19
#include <assert.h>
20
#include <stdint.h>
21
#include <stdio.h>
22
#include <stdlib.h>
23
#include <string.h>
24
25
#include <string>
26
27
#include <iostream>
28
29
// #include "utils/log.h"
30
31
namespace wav {
32
33
struct WavHeader {
34
char riff[4]; // "riff"
35
unsigned int size;
36
char wav[4]; // "WAVE"
37
char fmt[4]; // "fmt "
38
unsigned int fmt_size;
39
uint16_t format;
40
uint16_t channels;
41
unsigned int sample_rate;
42
unsigned int bytes_per_second;
43
uint16_t block_size;
44
uint16_t bit;
45
char data[4]; // "data"
46
unsigned int data_size;
47
};
48
49
class WavReader {
50
public:
51
WavReader() : data_(nullptr) {}
52
explicit WavReader(const std::string& filename) { Open(filename); }
53
54
bool Open(const std::string& filename) {
55
FILE* fp = fopen(filename.c_str(), "rb"); //文件读取
56
if (NULL == fp) {
57
std::cout << "Error in read " << filename;
58
return false;
59
}
60
61
WavHeader header;
62
fread(&header, 1, sizeof(header), fp);
63
if (header.fmt_size < 16) {
64
printf("WaveData: expect PCM format data "
65
"to have fmt chunk of at least size 16.\n");
66
return false;
67
} else if (header.fmt_size > 16) {
68
int offset = 44 - 8 + header.fmt_size - 16;
69
fseek(fp, offset, SEEK_SET);
70
fread(header.data, 8, sizeof(char), fp);
71
}
72
// check "riff" "WAVE" "fmt " "data"
73
74
// Skip any sub-chunks between "fmt" and "data". Usually there will
75
// be a single "fact" sub chunk, but on Windows there can also be a
76
// "list" sub chunk.
77
while (0 != strncmp(header.data, "data", 4)) {
78
// We will just ignore the data in these chunks.
79
fseek(fp, header.data_size, SEEK_CUR);
80
// read next sub chunk
81
fread(header.data, 8, sizeof(char), fp);
82
}
83
84
if (header.data_size == 0) {
85
int offset = ftell(fp);
86
fseek(fp, 0, SEEK_END);
87
header.data_size = ftell(fp) - offset;
88
fseek(fp, offset, SEEK_SET);
89
}
90
91
num_channel_ = header.channels;
92
sample_rate_ = header.sample_rate;
93
bits_per_sample_ = header.bit;
94
int num_data = header.data_size / (bits_per_sample_ / 8);
95
data_ = new float[num_data]; // Create 1-dim array
96
num_samples_ = num_data / num_channel_;
97
98
std::cout << "num_channel_ :" << num_channel_ << std::endl;
99
std::cout << "sample_rate_ :" << sample_rate_ << std::endl;
100
std::cout << "bits_per_sample_:" << bits_per_sample_ << std::endl;
101
std::cout << "num_samples :" << num_data << std::endl;
102
std::cout << "num_data_size :" << header.data_size << std::endl;
103
104
switch (bits_per_sample_) {
105
case 8: {
106
char sample;
107
for (int i = 0; i < num_data; ++i) {
108
fread(&sample, 1, sizeof(char), fp);
109
data_[i] = static_cast<float>(sample) / 32768;
110
}
111
break;
112
}
113
case 16: {
114
int16_t sample;
115
for (int i = 0; i < num_data; ++i) {
116
fread(&sample, 1, sizeof(int16_t), fp);
117
data_[i] = static_cast<float>(sample) / 32768;
118
}
119
break;
120
}
121
case 32:
122
{
123
if (header.format == 1) //S32
124
{
125
int sample;
126
for (int i = 0; i < num_data; ++i) {
127
fread(&sample, 1, sizeof(int), fp);
128
data_[i] = static_cast<float>(sample) / 32768;
129
}
130
}
131
else if (header.format == 3) // IEEE-float
132
{
133
float sample;
134
for (int i = 0; i < num_data; ++i) {
135
fread(&sample, 1, sizeof(float), fp);
136
data_[i] = static_cast<float>(sample);
137
}
138
}
139
else {
140
printf("unsupported quantization bits\n");
141
}
142
break;
143
}
144
default:
145
printf("unsupported quantization bits\n");
146
break;
147
}
148
149
fclose(fp);
150
return true;
151
}
152
153
int num_channel() const { return num_channel_; }
154
int sample_rate() const { return sample_rate_; }
155
int bits_per_sample() const { return bits_per_sample_; }
156
int num_samples() const { return num_samples_; }
157
158
~WavReader() {
159
delete[] data_;
160
}
161
162
const float* data() const { return data_; }
163
164
private:
165
int num_channel_;
166
int sample_rate_;
167
int bits_per_sample_;
168
int num_samples_; // sample points per channel
169
float* data_;
170
};
171
172
class WavWriter {
173
public:
174
WavWriter(const float* data, int num_samples, int num_channel,
175
int sample_rate, int bits_per_sample)
176
: data_(data),
177
num_samples_(num_samples),
178
num_channel_(num_channel),
179
sample_rate_(sample_rate),
180
bits_per_sample_(bits_per_sample) {}
181
182
void Write(const std::string& filename) {
183
FILE* fp = fopen(filename.c_str(), "w");
184
// init char 'riff' 'WAVE' 'fmt ' 'data'
185
WavHeader header;
186
char wav_header[44] = {0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57,
187
0x41, 0x56, 0x45, 0x66, 0x6d, 0x74, 0x20, 0x10, 0x00,
188
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
189
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
190
0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00};
191
memcpy(&header, wav_header, sizeof(header));
192
header.channels = num_channel_;
193
header.bit = bits_per_sample_;
194
header.sample_rate = sample_rate_;
195
header.data_size = num_samples_ * num_channel_ * (bits_per_sample_ / 8);
196
header.size = sizeof(header) - 8 + header.data_size;
197
header.bytes_per_second =
198
sample_rate_ * num_channel_ * (bits_per_sample_ / 8);
199
header.block_size = num_channel_ * (bits_per_sample_ / 8);
200
201
fwrite(&header, 1, sizeof(header), fp);
202
203
for (int i = 0; i < num_samples_; ++i) {
204
for (int j = 0; j < num_channel_; ++j) {
205
switch (bits_per_sample_) {
206
case 8: {
207
char sample = static_cast<char>(data_[i * num_channel_ + j]);
208
fwrite(&sample, 1, sizeof(sample), fp);
209
break;
210
}
211
case 16: {
212
int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);
213
fwrite(&sample, 1, sizeof(sample), fp);
214
break;
215
}
216
case 32: {
217
int sample = static_cast<int>(data_[i * num_channel_ + j]);
218
fwrite(&sample, 1, sizeof(sample), fp);
219
break;
220
}
221
}
222
}
223
}
224
fclose(fp);
225
}
226
227
private:
228
const float* data_;
229
int num_samples_; // total float points in data_
230
int num_channel_;
231
int sample_rate_;
232
int bits_per_sample_;
233
};
234
235
} // namespace wav
236
237
#endif // FRONTEND_WAV_H_
238
239