Path: blob/master/examples/cppwin/TensorflowTTSCppInference/ext/AudioFile.hpp
1564 views
//=======================================================================1/** @file AudioFile.h2* @author Adam Stark3* @copyright Copyright (C) 2017 Adam Stark4*5* This file is part of the 'AudioFile' library6*7* This program is free software: you can redistribute it and/or modify8* it under the terms of the GNU General Public License as published by9* the Free Software Foundation, either version 3 of the License, or10* (at your option) any later version.11*12* This program is distributed in the hope that it will be useful,13* but WITHOUT ANY WARRANTY; without even the implied warranty of14* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the15* GNU General Public License for more details.16*17* You should have received a copy of the GNU General Public License18* along with this program. If not, see <http://www.gnu.org/licenses/>.19*/20//=======================================================================2122#ifndef _AS_AudioFile_h23#define _AS_AudioFile_h2425#include <iostream>26#include <vector>27#include <assert.h>28#include <string>29#include <fstream>30#include <unordered_map>31#include <iterator>32#include <algorithm>3334// disable some warnings on Windows35#if defined (_MSC_VER)36__pragma(warning (push))37__pragma(warning (disable : 4244))38__pragma(warning (disable : 4457))39__pragma(warning (disable : 4458))40__pragma(warning (disable : 4389))41__pragma(warning (disable : 4996))42#elif defined (__GNUC__)43_Pragma("GCC diagnostic push")44_Pragma("GCC diagnostic ignored \"-Wconversion\"")45_Pragma("GCC diagnostic ignored \"-Wsign-compare\"")46_Pragma("GCC diagnostic ignored \"-Wshadow\"")47#endif4849//=============================================================50/** The different types of audio file, plus some other types to51* indicate a failure to load a file, or that one hasn't been52* loaded yet53*/54enum class AudioFileFormat55{56Error,57NotLoaded,58Wave,59Aiff60};6162//=============================================================63template <class T>64class AudioFile65{66public:6768//=============================================================69typedef std::vector<std::vector<T> > AudioBuffer;7071//=============================================================72/** Constructor */73AudioFile();7475//=============================================================76/** Loads an audio file from a given file path.77* @Returns true if the file was successfully loaded78*/79bool load (std::string filePath);8081/** Saves an audio file to a given file path.82* @Returns true if the file was successfully saved83*/84bool save (std::string filePath, AudioFileFormat format = AudioFileFormat::Wave);8586//=============================================================87/** @Returns the sample rate */88uint32_t getSampleRate() const;8990/** @Returns the number of audio channels in the buffer */91int getNumChannels() const;9293/** @Returns true if the audio file is mono */94bool isMono() const;9596/** @Returns true if the audio file is stereo */97bool isStereo() const;9899/** @Returns the bit depth of each sample */100int getBitDepth() const;101102/** @Returns the number of samples per channel */103int getNumSamplesPerChannel() const;104105/** @Returns the length in seconds of the audio file based on the number of samples and sample rate */106double getLengthInSeconds() const;107108/** Prints a summary of the audio file to the console */109void printSummary() const;110111//=============================================================112113/** Set the audio buffer for this AudioFile by copying samples from another buffer.114* @Returns true if the buffer was copied successfully.115*/116bool setAudioBuffer (AudioBuffer& newBuffer);117118/** Sets the audio buffer to a given number of channels and number of samples per channel. This will try to preserve119* the existing audio, adding zeros to any new channels or new samples in a given channel.120*/121void setAudioBufferSize (int numChannels, int numSamples);122123/** Sets the number of samples per channel in the audio buffer. This will try to preserve124* the existing audio, adding zeros to new samples in a given channel if the number of samples is increased.125*/126void setNumSamplesPerChannel (int numSamples);127128/** Sets the number of channels. New channels will have the correct number of samples and be initialised to zero */129void setNumChannels (int numChannels);130131/** Sets the bit depth for the audio file. If you use the save() function, this bit depth rate will be used */132void setBitDepth (int numBitsPerSample);133134/** Sets the sample rate for the audio file. If you use the save() function, this sample rate will be used */135void setSampleRate (uint32_t newSampleRate);136137//=============================================================138/** Sets whether the library should log error messages to the console. By default this is true */139void shouldLogErrorsToConsole (bool logErrors);140141//=============================================================142/** A vector of vectors holding the audio samples for the AudioFile. You can143* access the samples by channel and then by sample index, i.e:144*145* samples[channel][sampleIndex]146*/147AudioBuffer samples;148149//=============================================================150/** An optional iXML chunk that can be added to the AudioFile.151*/152std::string iXMLChunk;153154private:155156//=============================================================157enum class Endianness158{159LittleEndian,160BigEndian161};162163//=============================================================164AudioFileFormat determineAudioFileFormat (std::vector<uint8_t>& fileData);165bool decodeWaveFile (std::vector<uint8_t>& fileData);166bool decodeAiffFile (std::vector<uint8_t>& fileData);167168//=============================================================169bool saveToWaveFile (std::string filePath);170bool saveToAiffFile (std::string filePath);171172//=============================================================173void clearAudioBuffer();174175//=============================================================176int32_t fourBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness = Endianness::LittleEndian);177int16_t twoBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness = Endianness::LittleEndian);178int getIndexOfString (std::vector<uint8_t>& source, std::string s);179int getIndexOfChunk (std::vector<uint8_t>& source, const std::string& chunkHeaderID, int startIndex, Endianness endianness = Endianness::LittleEndian);180181//=============================================================182T sixteenBitIntToSample (int16_t sample);183int16_t sampleToSixteenBitInt (T sample);184185//=============================================================186uint8_t sampleToSingleByte (T sample);187T singleByteToSample (uint8_t sample);188189uint32_t getAiffSampleRate (std::vector<uint8_t>& fileData, int sampleRateStartIndex);190bool tenByteMatch (std::vector<uint8_t>& v1, int startIndex1, std::vector<uint8_t>& v2, int startIndex2);191void addSampleRateToAiffData (std::vector<uint8_t>& fileData, uint32_t sampleRate);192T clamp (T v1, T minValue, T maxValue);193194//=============================================================195void addStringToFileData (std::vector<uint8_t>& fileData, std::string s);196void addInt32ToFileData (std::vector<uint8_t>& fileData, int32_t i, Endianness endianness = Endianness::LittleEndian);197void addInt16ToFileData (std::vector<uint8_t>& fileData, int16_t i, Endianness endianness = Endianness::LittleEndian);198199//=============================================================200bool writeDataToFile (std::vector<uint8_t>& fileData, std::string filePath);201202//=============================================================203void reportError (std::string errorMessage);204205//=============================================================206AudioFileFormat audioFileFormat;207uint32_t sampleRate;208int bitDepth;209bool logErrorsToConsole {true};210};211212213//=============================================================214// Pre-defined 10-byte representations of common sample rates215static std::unordered_map <uint32_t, std::vector<uint8_t>> aiffSampleRateTable = {216{8000, {64, 11, 250, 0, 0, 0, 0, 0, 0, 0}},217{11025, {64, 12, 172, 68, 0, 0, 0, 0, 0, 0}},218{16000, {64, 12, 250, 0, 0, 0, 0, 0, 0, 0}},219{22050, {64, 13, 172, 68, 0, 0, 0, 0, 0, 0}},220{32000, {64, 13, 250, 0, 0, 0, 0, 0, 0, 0}},221{37800, {64, 14, 147, 168, 0, 0, 0, 0, 0, 0}},222{44056, {64, 14, 172, 24, 0, 0, 0, 0, 0, 0}},223{44100, {64, 14, 172, 68, 0, 0, 0, 0, 0, 0}},224{47250, {64, 14, 184, 146, 0, 0, 0, 0, 0, 0}},225{48000, {64, 14, 187, 128, 0, 0, 0, 0, 0, 0}},226{50000, {64, 14, 195, 80, 0, 0, 0, 0, 0, 0}},227{50400, {64, 14, 196, 224, 0, 0, 0, 0, 0, 0}},228{88200, {64, 15, 172, 68, 0, 0, 0, 0, 0, 0}},229{96000, {64, 15, 187, 128, 0, 0, 0, 0, 0, 0}},230{176400, {64, 16, 172, 68, 0, 0, 0, 0, 0, 0}},231{192000, {64, 16, 187, 128, 0, 0, 0, 0, 0, 0}},232{352800, {64, 17, 172, 68, 0, 0, 0, 0, 0, 0}},233{2822400, {64, 20, 172, 68, 0, 0, 0, 0, 0, 0}},234{5644800, {64, 21, 172, 68, 0, 0, 0, 0, 0, 0}}235};236237//=============================================================238enum WavAudioFormat239{240PCM = 0x0001,241IEEEFloat = 0x0003,242ALaw = 0x0006,243MULaw = 0x0007,244Extensible = 0xFFFE245};246247//=============================================================248enum AIFFAudioFormat249{250Uncompressed,251Compressed,252Error253};254255//=============================================================256/* IMPLEMENTATION */257//=============================================================258259//=============================================================260template <class T>261AudioFile<T>::AudioFile()262{263static_assert(std::is_floating_point<T>::value, "ERROR: This version of AudioFile only supports floating point sample formats");264265bitDepth = 16;266sampleRate = 44100;267samples.resize (1);268samples[0].resize (0);269audioFileFormat = AudioFileFormat::NotLoaded;270}271272//=============================================================273template <class T>274uint32_t AudioFile<T>::getSampleRate() const275{276return sampleRate;277}278279//=============================================================280template <class T>281int AudioFile<T>::getNumChannels() const282{283return (int)samples.size();284}285286//=============================================================287template <class T>288bool AudioFile<T>::isMono() const289{290return getNumChannels() == 1;291}292293//=============================================================294template <class T>295bool AudioFile<T>::isStereo() const296{297return getNumChannels() == 2;298}299300//=============================================================301template <class T>302int AudioFile<T>::getBitDepth() const303{304return bitDepth;305}306307//=============================================================308template <class T>309int AudioFile<T>::getNumSamplesPerChannel() const310{311if (samples.size() > 0)312return (int) samples[0].size();313else314return 0;315}316317//=============================================================318template <class T>319double AudioFile<T>::getLengthInSeconds() const320{321return (double)getNumSamplesPerChannel() / (double)sampleRate;322}323324//=============================================================325template <class T>326void AudioFile<T>::printSummary() const327{328std::cout << "|======================================|" << std::endl;329std::cout << "Num Channels: " << getNumChannels() << std::endl;330std::cout << "Num Samples Per Channel: " << getNumSamplesPerChannel() << std::endl;331std::cout << "Sample Rate: " << sampleRate << std::endl;332std::cout << "Bit Depth: " << bitDepth << std::endl;333std::cout << "Length in Seconds: " << getLengthInSeconds() << std::endl;334std::cout << "|======================================|" << std::endl;335}336337//=============================================================338template <class T>339bool AudioFile<T>::setAudioBuffer (AudioBuffer& newBuffer)340{341int numChannels = (int)newBuffer.size();342343if (numChannels <= 0)344{345assert (false && "The buffer your are trying to use has no channels");346return false;347}348349size_t numSamples = newBuffer[0].size();350351// set the number of channels352samples.resize (newBuffer.size());353354for (int k = 0; k < getNumChannels(); k++)355{356assert (newBuffer[k].size() == numSamples);357358samples[k].resize (numSamples);359360for (size_t i = 0; i < numSamples; i++)361{362samples[k][i] = newBuffer[k][i];363}364}365366return true;367}368369//=============================================================370template <class T>371void AudioFile<T>::setAudioBufferSize (int numChannels, int numSamples)372{373samples.resize (numChannels);374setNumSamplesPerChannel (numSamples);375}376377//=============================================================378template <class T>379void AudioFile<T>::setNumSamplesPerChannel (int numSamples)380{381int originalSize = getNumSamplesPerChannel();382383for (int i = 0; i < getNumChannels();i++)384{385samples[i].resize (numSamples);386387// set any new samples to zero388if (numSamples > originalSize)389std::fill (samples[i].begin() + originalSize, samples[i].end(), (T)0.);390}391}392393//=============================================================394template <class T>395void AudioFile<T>::setNumChannels (int numChannels)396{397int originalNumChannels = getNumChannels();398int originalNumSamplesPerChannel = getNumSamplesPerChannel();399400samples.resize (numChannels);401402// make sure any new channels are set to the right size403// and filled with zeros404if (numChannels > originalNumChannels)405{406for (int i = originalNumChannels; i < numChannels; i++)407{408samples[i].resize (originalNumSamplesPerChannel);409std::fill (samples[i].begin(), samples[i].end(), (T)0.);410}411}412}413414//=============================================================415template <class T>416void AudioFile<T>::setBitDepth (int numBitsPerSample)417{418bitDepth = numBitsPerSample;419}420421//=============================================================422template <class T>423void AudioFile<T>::setSampleRate (uint32_t newSampleRate)424{425sampleRate = newSampleRate;426}427428//=============================================================429template <class T>430void AudioFile<T>::shouldLogErrorsToConsole (bool logErrors)431{432logErrorsToConsole = logErrors;433}434435//=============================================================436template <class T>437bool AudioFile<T>::load (std::string filePath)438{439std::ifstream file (filePath, std::ios::binary);440441// check the file exists442if (! file.good())443{444reportError ("ERROR: File doesn't exist or otherwise can't load file\n" + filePath);445return false;446}447448file.unsetf (std::ios::skipws);449std::istream_iterator<uint8_t> begin (file), end;450std::vector<uint8_t> fileData (begin, end);451452// get audio file format453audioFileFormat = determineAudioFileFormat (fileData);454455if (audioFileFormat == AudioFileFormat::Wave)456{457return decodeWaveFile (fileData);458}459else if (audioFileFormat == AudioFileFormat::Aiff)460{461return decodeAiffFile (fileData);462}463else464{465reportError ("Audio File Type: Error");466return false;467}468}469470//=============================================================471template <class T>472bool AudioFile<T>::decodeWaveFile (std::vector<uint8_t>& fileData)473{474// -----------------------------------------------------------475// HEADER CHUNK476std::string headerChunkID (fileData.begin(), fileData.begin() + 4);477//int32_t fileSizeInBytes = fourBytesToInt (fileData, 4) + 8;478std::string format (fileData.begin() + 8, fileData.begin() + 12);479480// -----------------------------------------------------------481// try and find the start points of key chunks482int indexOfDataChunk = getIndexOfChunk (fileData, "data", 12);483int indexOfFormatChunk = getIndexOfChunk (fileData, "fmt ", 12);484int indexOfXMLChunk = getIndexOfChunk (fileData, "iXML", 12);485486// if we can't find the data or format chunks, or the IDs/formats don't seem to be as expected487// then it is unlikely we'll able to read this file, so abort488if (indexOfDataChunk == -1 || indexOfFormatChunk == -1 || headerChunkID != "RIFF" || format != "WAVE")489{490reportError ("ERROR: this doesn't seem to be a valid .WAV file");491return false;492}493494// -----------------------------------------------------------495// FORMAT CHUNK496int f = indexOfFormatChunk;497std::string formatChunkID (fileData.begin() + f, fileData.begin() + f + 4);498//int32_t formatChunkSize = fourBytesToInt (fileData, f + 4);499int16_t audioFormat = twoBytesToInt (fileData, f + 8);500int16_t numChannels = twoBytesToInt (fileData, f + 10);501sampleRate = (uint32_t) fourBytesToInt (fileData, f + 12);502int32_t numBytesPerSecond = fourBytesToInt (fileData, f + 16);503int16_t numBytesPerBlock = twoBytesToInt (fileData, f + 20);504bitDepth = (int) twoBytesToInt (fileData, f + 22);505506int numBytesPerSample = bitDepth / 8;507508// check that the audio format is PCM or Float509if (audioFormat != WavAudioFormat::PCM && audioFormat != WavAudioFormat::IEEEFloat)510{511reportError ("ERROR: this .WAV file is encoded in a format that this library does not support at present");512return false;513}514515// check the number of channels is mono or stereo516if (numChannels < 1 || numChannels > 128)517{518reportError ("ERROR: this WAV file seems to be an invalid number of channels (or corrupted?)");519return false;520}521522// check header data is consistent523if ((numBytesPerSecond != (numChannels * sampleRate * bitDepth) / 8) || (numBytesPerBlock != (numChannels * numBytesPerSample)))524{525reportError ("ERROR: the header data in this WAV file seems to be inconsistent");526return false;527}528529// check bit depth is either 8, 16, 24 or 32 bit530if (bitDepth != 8 && bitDepth != 16 && bitDepth != 24 && bitDepth != 32)531{532reportError ("ERROR: this file has a bit depth that is not 8, 16, 24 or 32 bits");533return false;534}535536// -----------------------------------------------------------537// DATA CHUNK538int d = indexOfDataChunk;539std::string dataChunkID (fileData.begin() + d, fileData.begin() + d + 4);540int32_t dataChunkSize = fourBytesToInt (fileData, d + 4);541542int numSamples = dataChunkSize / (numChannels * bitDepth / 8);543int samplesStartIndex = indexOfDataChunk + 8;544545clearAudioBuffer();546samples.resize (numChannels);547548for (int i = 0; i < numSamples; i++)549{550for (int channel = 0; channel < numChannels; channel++)551{552int sampleIndex = samplesStartIndex + (numBytesPerBlock * i) + channel * numBytesPerSample;553554if (bitDepth == 8)555{556T sample = singleByteToSample (fileData[sampleIndex]);557samples[channel].push_back (sample);558}559else if (bitDepth == 16)560{561int16_t sampleAsInt = twoBytesToInt (fileData, sampleIndex);562T sample = sixteenBitIntToSample (sampleAsInt);563samples[channel].push_back (sample);564}565else if (bitDepth == 24)566{567int32_t sampleAsInt = 0;568sampleAsInt = (fileData[sampleIndex + 2] << 16) | (fileData[sampleIndex + 1] << 8) | fileData[sampleIndex];569570if (sampleAsInt & 0x800000) // if the 24th bit is set, this is a negative number in 24-bit world571sampleAsInt = sampleAsInt | ~0xFFFFFF; // so make sure sign is extended to the 32 bit float572573T sample = (T)sampleAsInt / (T)8388608.;574samples[channel].push_back (sample);575}576else if (bitDepth == 32)577{578int32_t sampleAsInt = fourBytesToInt (fileData, sampleIndex);579T sample;580581if (audioFormat == WavAudioFormat::IEEEFloat)582sample = (T)reinterpret_cast<float&> (sampleAsInt);583else // assume PCM584sample = (T) sampleAsInt / static_cast<float> (std::numeric_limits<std::int32_t>::max());585586samples[channel].push_back (sample);587}588else589{590assert (false);591}592}593}594595// -----------------------------------------------------------596// iXML CHUNK597if (indexOfXMLChunk != -1)598{599int32_t chunkSize = fourBytesToInt (fileData, indexOfXMLChunk + 4);600iXMLChunk = std::string ((const char*) &fileData[indexOfXMLChunk + 8], chunkSize);601}602603return true;604}605606//=============================================================607template <class T>608bool AudioFile<T>::decodeAiffFile (std::vector<uint8_t>& fileData)609{610// -----------------------------------------------------------611// HEADER CHUNK612std::string headerChunkID (fileData.begin(), fileData.begin() + 4);613//int32_t fileSizeInBytes = fourBytesToInt (fileData, 4, Endianness::BigEndian) + 8;614std::string format (fileData.begin() + 8, fileData.begin() + 12);615616int audioFormat = format == "AIFF" ? AIFFAudioFormat::Uncompressed : format == "AIFC" ? AIFFAudioFormat::Compressed : AIFFAudioFormat::Error;617618// -----------------------------------------------------------619// try and find the start points of key chunks620int indexOfCommChunk = getIndexOfChunk (fileData, "COMM", 12, Endianness::BigEndian);621int indexOfSoundDataChunk = getIndexOfChunk (fileData, "SSND", 12, Endianness::BigEndian);622int indexOfXMLChunk = getIndexOfChunk (fileData, "iXML", 12, Endianness::BigEndian);623624// if we can't find the data or format chunks, or the IDs/formats don't seem to be as expected625// then it is unlikely we'll able to read this file, so abort626if (indexOfSoundDataChunk == -1 || indexOfCommChunk == -1 || headerChunkID != "FORM" || audioFormat == AIFFAudioFormat::Error)627{628reportError ("ERROR: this doesn't seem to be a valid AIFF file");629return false;630}631632// -----------------------------------------------------------633// COMM CHUNK634int p = indexOfCommChunk;635std::string commChunkID (fileData.begin() + p, fileData.begin() + p + 4);636//int32_t commChunkSize = fourBytesToInt (fileData, p + 4, Endianness::BigEndian);637int16_t numChannels = twoBytesToInt (fileData, p + 8, Endianness::BigEndian);638int32_t numSamplesPerChannel = fourBytesToInt (fileData, p + 10, Endianness::BigEndian);639bitDepth = (int) twoBytesToInt (fileData, p + 14, Endianness::BigEndian);640sampleRate = getAiffSampleRate (fileData, p + 16);641642// check the sample rate was properly decoded643if (sampleRate == 0)644{645reportError ("ERROR: this AIFF file has an unsupported sample rate");646return false;647}648649// check the number of channels is mono or stereo650if (numChannels < 1 ||numChannels > 2)651{652reportError ("ERROR: this AIFF file seems to be neither mono nor stereo (perhaps multi-track, or corrupted?)");653return false;654}655656// check bit depth is either 8, 16, 24 or 32-bit657if (bitDepth != 8 && bitDepth != 16 && bitDepth != 24 && bitDepth != 32)658{659reportError ("ERROR: this file has a bit depth that is not 8, 16, 24 or 32 bits");660return false;661}662663// -----------------------------------------------------------664// SSND CHUNK665int s = indexOfSoundDataChunk;666std::string soundDataChunkID (fileData.begin() + s, fileData.begin() + s + 4);667int32_t soundDataChunkSize = fourBytesToInt (fileData, s + 4, Endianness::BigEndian);668int32_t offset = fourBytesToInt (fileData, s + 8, Endianness::BigEndian);669//int32_t blockSize = fourBytesToInt (fileData, s + 12, Endianness::BigEndian);670671int numBytesPerSample = bitDepth / 8;672int numBytesPerFrame = numBytesPerSample * numChannels;673int totalNumAudioSampleBytes = numSamplesPerChannel * numBytesPerFrame;674int samplesStartIndex = s + 16 + (int)offset;675676// sanity check the data677if ((soundDataChunkSize - 8) != totalNumAudioSampleBytes || totalNumAudioSampleBytes > static_cast<long>(fileData.size() - samplesStartIndex))678{679reportError ("ERROR: the metadatafor this file doesn't seem right");680return false;681}682683clearAudioBuffer();684samples.resize (numChannels);685686for (int i = 0; i < numSamplesPerChannel; i++)687{688for (int channel = 0; channel < numChannels; channel++)689{690int sampleIndex = samplesStartIndex + (numBytesPerFrame * i) + channel * numBytesPerSample;691692if (bitDepth == 8)693{694int8_t sampleAsSigned8Bit = (int8_t)fileData[sampleIndex];695T sample = (T)sampleAsSigned8Bit / (T)128.;696samples[channel].push_back (sample);697}698else if (bitDepth == 16)699{700int16_t sampleAsInt = twoBytesToInt (fileData, sampleIndex, Endianness::BigEndian);701T sample = sixteenBitIntToSample (sampleAsInt);702samples[channel].push_back (sample);703}704else if (bitDepth == 24)705{706int32_t sampleAsInt = 0;707sampleAsInt = (fileData[sampleIndex] << 16) | (fileData[sampleIndex + 1] << 8) | fileData[sampleIndex + 2];708709if (sampleAsInt & 0x800000) // if the 24th bit is set, this is a negative number in 24-bit world710sampleAsInt = sampleAsInt | ~0xFFFFFF; // so make sure sign is extended to the 32 bit float711712T sample = (T)sampleAsInt / (T)8388608.;713samples[channel].push_back (sample);714}715else if (bitDepth == 32)716{717int32_t sampleAsInt = fourBytesToInt (fileData, sampleIndex, Endianness::BigEndian);718T sample;719720if (audioFormat == AIFFAudioFormat::Compressed)721sample = (T)reinterpret_cast<float&> (sampleAsInt);722else // assume uncompressed723sample = (T) sampleAsInt / static_cast<float> (std::numeric_limits<std::int32_t>::max());724725samples[channel].push_back (sample);726}727else728{729assert (false);730}731}732}733734// -----------------------------------------------------------735// iXML CHUNK736if (indexOfXMLChunk != -1)737{738int32_t chunkSize = fourBytesToInt (fileData, indexOfXMLChunk + 4);739iXMLChunk = std::string ((const char*) &fileData[indexOfXMLChunk + 8], chunkSize);740}741742return true;743}744745//=============================================================746template <class T>747uint32_t AudioFile<T>::getAiffSampleRate (std::vector<uint8_t>& fileData, int sampleRateStartIndex)748{749for (auto it : aiffSampleRateTable)750{751if (tenByteMatch (fileData, sampleRateStartIndex, it.second, 0))752return it.first;753}754755return 0;756}757758//=============================================================759template <class T>760bool AudioFile<T>::tenByteMatch (std::vector<uint8_t>& v1, int startIndex1, std::vector<uint8_t>& v2, int startIndex2)761{762for (int i = 0; i < 10; i++)763{764if (v1[startIndex1 + i] != v2[startIndex2 + i])765return false;766}767768return true;769}770771//=============================================================772template <class T>773void AudioFile<T>::addSampleRateToAiffData (std::vector<uint8_t>& fileData, uint32_t sampleRate)774{775if (aiffSampleRateTable.count (sampleRate) > 0)776{777for (int i = 0; i < 10; i++)778fileData.push_back (aiffSampleRateTable[sampleRate][i]);779}780}781782//=============================================================783template <class T>784bool AudioFile<T>::save (std::string filePath, AudioFileFormat format)785{786if (format == AudioFileFormat::Wave)787{788return saveToWaveFile (filePath);789}790else if (format == AudioFileFormat::Aiff)791{792return saveToAiffFile (filePath);793}794795return false;796}797798//=============================================================799template <class T>800bool AudioFile<T>::saveToWaveFile (std::string filePath)801{802std::vector<uint8_t> fileData;803804int32_t dataChunkSize = getNumSamplesPerChannel() * (getNumChannels() * bitDepth / 8);805int16_t audioFormat = bitDepth == 32 ? WavAudioFormat::IEEEFloat : WavAudioFormat::PCM;806int32_t formatChunkSize = audioFormat == WavAudioFormat::PCM ? 16 : 18;807int32_t iXMLChunkSize = static_cast<int32_t> (iXMLChunk.size());808809// -----------------------------------------------------------810// HEADER CHUNK811addStringToFileData (fileData, "RIFF");812813// The file size in bytes is the header chunk size (4, not counting RIFF and WAVE) + the format814// chunk size (24) + the metadata part of the data chunk plus the actual data chunk size815int32_t fileSizeInBytes = 4 + formatChunkSize + 8 + 8 + dataChunkSize;816if (iXMLChunkSize > 0)817{818fileSizeInBytes += (8 + iXMLChunkSize);819}820821addInt32ToFileData (fileData, fileSizeInBytes);822823addStringToFileData (fileData, "WAVE");824825// -----------------------------------------------------------826// FORMAT CHUNK827addStringToFileData (fileData, "fmt ");828addInt32ToFileData (fileData, formatChunkSize); // format chunk size (16 for PCM)829addInt16ToFileData (fileData, audioFormat); // audio format830addInt16ToFileData (fileData, (int16_t)getNumChannels()); // num channels831addInt32ToFileData (fileData, (int32_t)sampleRate); // sample rate832833int32_t numBytesPerSecond = (int32_t) ((getNumChannels() * sampleRate * bitDepth) / 8);834addInt32ToFileData (fileData, numBytesPerSecond);835836int16_t numBytesPerBlock = getNumChannels() * (bitDepth / 8);837addInt16ToFileData (fileData, numBytesPerBlock);838839addInt16ToFileData (fileData, (int16_t)bitDepth);840841if (audioFormat == WavAudioFormat::IEEEFloat)842addInt16ToFileData (fileData, 0); // extension size843844// -----------------------------------------------------------845// DATA CHUNK846addStringToFileData (fileData, "data");847addInt32ToFileData (fileData, dataChunkSize);848849for (int i = 0; i < getNumSamplesPerChannel(); i++)850{851for (int channel = 0; channel < getNumChannels(); channel++)852{853if (bitDepth == 8)854{855uint8_t byte = sampleToSingleByte (samples[channel][i]);856fileData.push_back (byte);857}858else if (bitDepth == 16)859{860int16_t sampleAsInt = sampleToSixteenBitInt (samples[channel][i]);861addInt16ToFileData (fileData, sampleAsInt);862}863else if (bitDepth == 24)864{865int32_t sampleAsIntAgain = (int32_t) (samples[channel][i] * (T)8388608.);866867uint8_t bytes[3];868bytes[2] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF;869bytes[1] = (uint8_t) (sampleAsIntAgain >> 8) & 0xFF;870bytes[0] = (uint8_t) sampleAsIntAgain & 0xFF;871872fileData.push_back (bytes[0]);873fileData.push_back (bytes[1]);874fileData.push_back (bytes[2]);875}876else if (bitDepth == 32)877{878int32_t sampleAsInt;879880if (audioFormat == WavAudioFormat::IEEEFloat)881sampleAsInt = (int32_t) reinterpret_cast<int32_t&> (samples[channel][i]);882else // assume PCM883sampleAsInt = (int32_t) (samples[channel][i] * std::numeric_limits<int32_t>::max());884885addInt32ToFileData (fileData, sampleAsInt, Endianness::LittleEndian);886}887else888{889assert (false && "Trying to write a file with unsupported bit depth");890return false;891}892}893}894895// -----------------------------------------------------------896// iXML CHUNK897if (iXMLChunkSize > 0)898{899addStringToFileData (fileData, "iXML");900addInt32ToFileData (fileData, iXMLChunkSize);901addStringToFileData (fileData, iXMLChunk);902}903904// check that the various sizes we put in the metadata are correct905if (fileSizeInBytes != static_cast<int32_t> (fileData.size() - 8) || dataChunkSize != (getNumSamplesPerChannel() * getNumChannels() * (bitDepth / 8)))906{907reportError ("ERROR: couldn't save file to " + filePath);908return false;909}910911// try to write the file912return writeDataToFile (fileData, filePath);913}914915//=============================================================916template <class T>917bool AudioFile<T>::saveToAiffFile (std::string filePath)918{919std::vector<uint8_t> fileData;920921int32_t numBytesPerSample = bitDepth / 8;922int32_t numBytesPerFrame = numBytesPerSample * getNumChannels();923int32_t totalNumAudioSampleBytes = getNumSamplesPerChannel() * numBytesPerFrame;924int32_t soundDataChunkSize = totalNumAudioSampleBytes + 8;925int32_t iXMLChunkSize = static_cast<int32_t> (iXMLChunk.size());926927// -----------------------------------------------------------928// HEADER CHUNK929addStringToFileData (fileData, "FORM");930931// The file size in bytes is the header chunk size (4, not counting FORM and AIFF) + the COMM932// chunk size (26) + the metadata part of the SSND chunk plus the actual data chunk size933int32_t fileSizeInBytes = 4 + 26 + 16 + totalNumAudioSampleBytes;934if (iXMLChunkSize > 0)935{936fileSizeInBytes += (8 + iXMLChunkSize);937}938939addInt32ToFileData (fileData, fileSizeInBytes, Endianness::BigEndian);940941addStringToFileData (fileData, "AIFF");942943// -----------------------------------------------------------944// COMM CHUNK945addStringToFileData (fileData, "COMM");946addInt32ToFileData (fileData, 18, Endianness::BigEndian); // commChunkSize947addInt16ToFileData (fileData, getNumChannels(), Endianness::BigEndian); // num channels948addInt32ToFileData (fileData, getNumSamplesPerChannel(), Endianness::BigEndian); // num samples per channel949addInt16ToFileData (fileData, bitDepth, Endianness::BigEndian); // bit depth950addSampleRateToAiffData (fileData, sampleRate);951952// -----------------------------------------------------------953// SSND CHUNK954addStringToFileData (fileData, "SSND");955addInt32ToFileData (fileData, soundDataChunkSize, Endianness::BigEndian);956addInt32ToFileData (fileData, 0, Endianness::BigEndian); // offset957addInt32ToFileData (fileData, 0, Endianness::BigEndian); // block size958959for (int i = 0; i < getNumSamplesPerChannel(); i++)960{961for (int channel = 0; channel < getNumChannels(); channel++)962{963if (bitDepth == 8)964{965uint8_t byte = sampleToSingleByte (samples[channel][i]);966fileData.push_back (byte);967}968else if (bitDepth == 16)969{970int16_t sampleAsInt = sampleToSixteenBitInt (samples[channel][i]);971addInt16ToFileData (fileData, sampleAsInt, Endianness::BigEndian);972}973else if (bitDepth == 24)974{975int32_t sampleAsIntAgain = (int32_t) (samples[channel][i] * (T)8388608.);976977uint8_t bytes[3];978bytes[0] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF;979bytes[1] = (uint8_t) (sampleAsIntAgain >> 8) & 0xFF;980bytes[2] = (uint8_t) sampleAsIntAgain & 0xFF;981982fileData.push_back (bytes[0]);983fileData.push_back (bytes[1]);984fileData.push_back (bytes[2]);985}986else if (bitDepth == 32)987{988// write samples as signed integers (no implementation yet for floating point, but looking at WAV implementation should help)989int32_t sampleAsInt = (int32_t) (samples[channel][i] * std::numeric_limits<int32_t>::max());990addInt32ToFileData (fileData, sampleAsInt, Endianness::BigEndian);991}992else993{994assert (false && "Trying to write a file with unsupported bit depth");995return false;996}997}998}9991000// -----------------------------------------------------------1001// iXML CHUNK1002if (iXMLChunkSize > 0)1003{1004addStringToFileData (fileData, "iXML");1005addInt32ToFileData (fileData, iXMLChunkSize);1006addStringToFileData (fileData, iXMLChunk);1007}10081009// check that the various sizes we put in the metadata are correct1010if (fileSizeInBytes != static_cast<int32_t> (fileData.size() - 8) || soundDataChunkSize != getNumSamplesPerChannel() * numBytesPerFrame + 8)1011{1012reportError ("ERROR: couldn't save file to " + filePath);1013return false;1014}10151016// try to write the file1017return writeDataToFile (fileData, filePath);1018}10191020//=============================================================1021template <class T>1022bool AudioFile<T>::writeDataToFile (std::vector<uint8_t>& fileData, std::string filePath)1023{1024std::ofstream outputFile (filePath, std::ios::binary);10251026if (outputFile.is_open())1027{1028for (size_t i = 0; i < fileData.size(); i++)1029{1030char value = (char) fileData[i];1031outputFile.write (&value, sizeof (char));1032}10331034outputFile.close();10351036return true;1037}10381039return false;1040}10411042//=============================================================1043template <class T>1044void AudioFile<T>::addStringToFileData (std::vector<uint8_t>& fileData, std::string s)1045{1046for (size_t i = 0; i < s.length();i++)1047fileData.push_back ((uint8_t) s[i]);1048}10491050//=============================================================1051template <class T>1052void AudioFile<T>::addInt32ToFileData (std::vector<uint8_t>& fileData, int32_t i, Endianness endianness)1053{1054uint8_t bytes[4];10551056if (endianness == Endianness::LittleEndian)1057{1058bytes[3] = (i >> 24) & 0xFF;1059bytes[2] = (i >> 16) & 0xFF;1060bytes[1] = (i >> 8) & 0xFF;1061bytes[0] = i & 0xFF;1062}1063else1064{1065bytes[0] = (i >> 24) & 0xFF;1066bytes[1] = (i >> 16) & 0xFF;1067bytes[2] = (i >> 8) & 0xFF;1068bytes[3] = i & 0xFF;1069}10701071for (int i = 0; i < 4; i++)1072fileData.push_back (bytes[i]);1073}10741075//=============================================================1076template <class T>1077void AudioFile<T>::addInt16ToFileData (std::vector<uint8_t>& fileData, int16_t i, Endianness endianness)1078{1079uint8_t bytes[2];10801081if (endianness == Endianness::LittleEndian)1082{1083bytes[1] = (i >> 8) & 0xFF;1084bytes[0] = i & 0xFF;1085}1086else1087{1088bytes[0] = (i >> 8) & 0xFF;1089bytes[1] = i & 0xFF;1090}10911092fileData.push_back (bytes[0]);1093fileData.push_back (bytes[1]);1094}10951096//=============================================================1097template <class T>1098void AudioFile<T>::clearAudioBuffer()1099{1100for (size_t i = 0; i < samples.size();i++)1101{1102samples[i].clear();1103}11041105samples.clear();1106}11071108//=============================================================1109template <class T>1110AudioFileFormat AudioFile<T>::determineAudioFileFormat (std::vector<uint8_t>& fileData)1111{1112std::string header (fileData.begin(), fileData.begin() + 4);11131114if (header == "RIFF")1115return AudioFileFormat::Wave;1116else if (header == "FORM")1117return AudioFileFormat::Aiff;1118else1119return AudioFileFormat::Error;1120}11211122//=============================================================1123template <class T>1124int32_t AudioFile<T>::fourBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness)1125{1126int32_t result;11271128if (endianness == Endianness::LittleEndian)1129result = (source[startIndex + 3] << 24) | (source[startIndex + 2] << 16) | (source[startIndex + 1] << 8) | source[startIndex];1130else1131result = (source[startIndex] << 24) | (source[startIndex + 1] << 16) | (source[startIndex + 2] << 8) | source[startIndex + 3];11321133return result;1134}11351136//=============================================================1137template <class T>1138int16_t AudioFile<T>::twoBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness)1139{1140int16_t result;11411142if (endianness == Endianness::LittleEndian)1143result = (source[startIndex + 1] << 8) | source[startIndex];1144else1145result = (source[startIndex] << 8) | source[startIndex + 1];11461147return result;1148}11491150//=============================================================1151template <class T>1152int AudioFile<T>::getIndexOfString (std::vector<uint8_t>& source, std::string stringToSearchFor)1153{1154int index = -1;1155int stringLength = (int)stringToSearchFor.length();11561157for (size_t i = 0; i < source.size() - stringLength;i++)1158{1159std::string section (source.begin() + i, source.begin() + i + stringLength);11601161if (section == stringToSearchFor)1162{1163index = static_cast<int> (i);1164break;1165}1166}11671168return index;1169}11701171//=============================================================1172template <class T>1173int AudioFile<T>::getIndexOfChunk (std::vector<uint8_t>& source, const std::string& chunkHeaderID, int startIndex, Endianness endianness)1174{1175constexpr int dataLen = 4;1176if (chunkHeaderID.size() != dataLen)1177{1178assert (false && "Invalid chunk header ID string");1179return -1;1180}11811182int i = startIndex;1183while (i < source.size() - dataLen)1184{1185if (memcmp (&source[i], chunkHeaderID.data(), dataLen) == 0)1186{1187return i;1188}11891190i += dataLen;1191auto chunkSize = fourBytesToInt (source, i, endianness);1192i += (dataLen + chunkSize);1193}11941195return -1;1196}11971198//=============================================================1199template <class T>1200T AudioFile<T>::sixteenBitIntToSample (int16_t sample)1201{1202return static_cast<T> (sample) / static_cast<T> (32768.);1203}12041205//=============================================================1206template <class T>1207int16_t AudioFile<T>::sampleToSixteenBitInt (T sample)1208{1209sample = clamp (sample, -1., 1.);1210return static_cast<int16_t> (sample * 32767.);1211}12121213//=============================================================1214template <class T>1215uint8_t AudioFile<T>::sampleToSingleByte (T sample)1216{1217sample = clamp (sample, -1., 1.);1218sample = (sample + 1.) / 2.;1219return static_cast<uint8_t> (sample * 255.);1220}12211222//=============================================================1223template <class T>1224T AudioFile<T>::singleByteToSample (uint8_t sample)1225{1226return static_cast<T> (sample - 128) / static_cast<T> (128.);1227}12281229//=============================================================1230template <class T>1231T AudioFile<T>::clamp (T value, T minValue, T maxValue)1232{1233value = std::min (value, maxValue);1234value = std::max (value, minValue);1235return value;1236}12371238//=============================================================1239template <class T>1240void AudioFile<T>::reportError (std::string errorMessage)1241{1242if (logErrorsToConsole)1243std::cout << errorMessage << std::endl;1244}12451246#if defined (_MSC_VER)1247__pragma(warning (pop))1248#elif defined (__GNUC__)1249_Pragma("GCC diagnostic pop")1250#endif12511252#endif /* AudioFile_h */125312541255