Path: blob/master/examples/cpp_libtorch_deprecated/wav.h
1897 views
// Copyright (c) 2016 Personal (Binbin Zhang)1//2// Licensed under the Apache License, Version 2.0 (the "License");3// you may not use this file except in compliance with the License.4// You may obtain a copy of the License at5//6// http://www.apache.org/licenses/LICENSE-2.07//8// Unless required by applicable law or agreed to in writing, software9// distributed under the License is distributed on an "AS IS" BASIS,10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.11// See the License for the specific language governing permissions and12// limitations under the License.131415#ifndef FRONTEND_WAV_H_16#define FRONTEND_WAV_H_1718#include <assert.h>19#include <stdint.h>20#include <stdio.h>21#include <stdlib.h>22#include <string.h>2324#include <string>2526// #include "utils/log.h"2728namespace wav {2930struct WavHeader {31char riff[4]; // "riff"32unsigned int size;33char wav[4]; // "WAVE"34char fmt[4]; // "fmt "35unsigned int fmt_size;36uint16_t format;37uint16_t channels;38unsigned int sample_rate;39unsigned int bytes_per_second;40uint16_t block_size;41uint16_t bit;42char data[4]; // "data"43unsigned int data_size;44};4546class WavReader {47public:48WavReader() : data_(nullptr) {}49explicit WavReader(const std::string& filename) { Open(filename); }5051bool Open(const std::string& filename) {52FILE* fp = fopen(filename.c_str(), "rb"); //文件读取53if (NULL == fp) {54std::cout << "Error in read " << filename;55return false;56}5758WavHeader header;59fread(&header, 1, sizeof(header), fp);60if (header.fmt_size < 16) {61printf("WaveData: expect PCM format data "62"to have fmt chunk of at least size 16.\n");63return false;64} else if (header.fmt_size > 16) {65int offset = 44 - 8 + header.fmt_size - 16;66fseek(fp, offset, SEEK_SET);67fread(header.data, 8, sizeof(char), fp);68}69// check "riff" "WAVE" "fmt " "data"7071// Skip any sub-chunks between "fmt" and "data". Usually there will72// be a single "fact" sub chunk, but on Windows there can also be a73// "list" sub chunk.74while (0 != strncmp(header.data, "data", 4)) {75// We will just ignore the data in these chunks.76fseek(fp, header.data_size, SEEK_CUR);77// read next sub chunk78fread(header.data, 8, sizeof(char), fp);79}8081if (header.data_size == 0) {82int offset = ftell(fp);83fseek(fp, 0, SEEK_END);84header.data_size = ftell(fp) - offset;85fseek(fp, offset, SEEK_SET);86}8788num_channel_ = header.channels;89sample_rate_ = header.sample_rate;90bits_per_sample_ = header.bit;91int num_data = header.data_size / (bits_per_sample_ / 8);92data_ = new float[num_data]; // Create 1-dim array93num_samples_ = num_data / num_channel_;9495std::cout << "num_channel_ :" << num_channel_ << std::endl;96std::cout << "sample_rate_ :" << sample_rate_ << std::endl;97std::cout << "bits_per_sample_:" << bits_per_sample_ << std::endl;98std::cout << "num_samples :" << num_data << std::endl;99std::cout << "num_data_size :" << header.data_size << std::endl;100101switch (bits_per_sample_) {102case 8: {103char sample;104for (int i = 0; i < num_data; ++i) {105fread(&sample, 1, sizeof(char), fp);106data_[i] = static_cast<float>(sample) / 32768;107}108break;109}110case 16: {111int16_t sample;112for (int i = 0; i < num_data; ++i) {113fread(&sample, 1, sizeof(int16_t), fp);114data_[i] = static_cast<float>(sample) / 32768;115}116break;117}118case 32:119{120if (header.format == 1) //S32121{122int sample;123for (int i = 0; i < num_data; ++i) {124fread(&sample, 1, sizeof(int), fp);125data_[i] = static_cast<float>(sample) / 32768;126}127}128else if (header.format == 3) // IEEE-float129{130float sample;131for (int i = 0; i < num_data; ++i) {132fread(&sample, 1, sizeof(float), fp);133data_[i] = static_cast<float>(sample);134}135}136else {137printf("unsupported quantization bits\n");138}139break;140}141default:142printf("unsupported quantization bits\n");143break;144}145146fclose(fp);147return true;148}149150int num_channel() const { return num_channel_; }151int sample_rate() const { return sample_rate_; }152int bits_per_sample() const { return bits_per_sample_; }153int num_samples() const { return num_samples_; }154155~WavReader() {156delete[] data_;157}158159const float* data() const { return data_; }160161private:162int num_channel_;163int sample_rate_;164int bits_per_sample_;165int num_samples_; // sample points per channel166float* data_;167};168169class WavWriter {170public:171WavWriter(const float* data, int num_samples, int num_channel,172int sample_rate, int bits_per_sample)173: data_(data),174num_samples_(num_samples),175num_channel_(num_channel),176sample_rate_(sample_rate),177bits_per_sample_(bits_per_sample) {}178179void Write(const std::string& filename) {180FILE* fp = fopen(filename.c_str(), "w");181// init char 'riff' 'WAVE' 'fmt ' 'data'182WavHeader header;183char wav_header[44] = {0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57,1840x41, 0x56, 0x45, 0x66, 0x6d, 0x74, 0x20, 0x10, 0x00,1850x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,1860x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,1870x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00};188memcpy(&header, wav_header, sizeof(header));189header.channels = num_channel_;190header.bit = bits_per_sample_;191header.sample_rate = sample_rate_;192header.data_size = num_samples_ * num_channel_ * (bits_per_sample_ / 8);193header.size = sizeof(header) - 8 + header.data_size;194header.bytes_per_second =195sample_rate_ * num_channel_ * (bits_per_sample_ / 8);196header.block_size = num_channel_ * (bits_per_sample_ / 8);197198fwrite(&header, 1, sizeof(header), fp);199200for (int i = 0; i < num_samples_; ++i) {201for (int j = 0; j < num_channel_; ++j) {202switch (bits_per_sample_) {203case 8: {204char sample = static_cast<char>(data_[i * num_channel_ + j]);205fwrite(&sample, 1, sizeof(sample), fp);206break;207}208case 16: {209int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);210fwrite(&sample, 1, sizeof(sample), fp);211break;212}213case 32: {214int sample = static_cast<int>(data_[i * num_channel_ + j]);215fwrite(&sample, 1, sizeof(sample), fp);216break;217}218}219}220}221fclose(fp);222}223224private:225const float* data_;226int num_samples_; // total float points in data_227int num_channel_;228int sample_rate_;229int bits_per_sample_;230};231232} // namespace wenet233234#endif // FRONTEND_WAV_H_235236237