Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
TensorSpeech
GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/examples/cppwin/TensorflowTTSCppInference/ext/AudioFile.hpp
1564 views
1
//=======================================================================
2
/** @file AudioFile.h
3
* @author Adam Stark
4
* @copyright Copyright (C) 2017 Adam Stark
5
*
6
* This file is part of the 'AudioFile' library
7
*
8
* This program is free software: you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation, either version 3 of the License, or
11
* (at your option) any later version.
12
*
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
17
*
18
* You should have received a copy of the GNU General Public License
19
* along with this program. If not, see <http://www.gnu.org/licenses/>.
20
*/
21
//=======================================================================
22
23
#ifndef _AS_AudioFile_h
24
#define _AS_AudioFile_h
25
26
#include <iostream>
27
#include <vector>
28
#include <assert.h>
29
#include <string>
30
#include <fstream>
31
#include <unordered_map>
32
#include <iterator>
33
#include <algorithm>
34
35
// disable some warnings on Windows
36
#if defined (_MSC_VER)
37
__pragma(warning (push))
38
__pragma(warning (disable : 4244))
39
__pragma(warning (disable : 4457))
40
__pragma(warning (disable : 4458))
41
__pragma(warning (disable : 4389))
42
__pragma(warning (disable : 4996))
43
#elif defined (__GNUC__)
44
_Pragma("GCC diagnostic push")
45
_Pragma("GCC diagnostic ignored \"-Wconversion\"")
46
_Pragma("GCC diagnostic ignored \"-Wsign-compare\"")
47
_Pragma("GCC diagnostic ignored \"-Wshadow\"")
48
#endif
49
50
//=============================================================
51
/** The different types of audio file, plus some other types to
52
* indicate a failure to load a file, or that one hasn't been
53
* loaded yet
54
*/
55
enum class AudioFileFormat
56
{
57
Error,
58
NotLoaded,
59
Wave,
60
Aiff
61
};
62
63
//=============================================================
64
template <class T>
65
class AudioFile
66
{
67
public:
68
69
//=============================================================
70
typedef std::vector<std::vector<T> > AudioBuffer;
71
72
//=============================================================
73
/** Constructor */
74
AudioFile();
75
76
//=============================================================
77
/** Loads an audio file from a given file path.
78
* @Returns true if the file was successfully loaded
79
*/
80
bool load (std::string filePath);
81
82
/** Saves an audio file to a given file path.
83
* @Returns true if the file was successfully saved
84
*/
85
bool save (std::string filePath, AudioFileFormat format = AudioFileFormat::Wave);
86
87
//=============================================================
88
/** @Returns the sample rate */
89
uint32_t getSampleRate() const;
90
91
/** @Returns the number of audio channels in the buffer */
92
int getNumChannels() const;
93
94
/** @Returns true if the audio file is mono */
95
bool isMono() const;
96
97
/** @Returns true if the audio file is stereo */
98
bool isStereo() const;
99
100
/** @Returns the bit depth of each sample */
101
int getBitDepth() const;
102
103
/** @Returns the number of samples per channel */
104
int getNumSamplesPerChannel() const;
105
106
/** @Returns the length in seconds of the audio file based on the number of samples and sample rate */
107
double getLengthInSeconds() const;
108
109
/** Prints a summary of the audio file to the console */
110
void printSummary() const;
111
112
//=============================================================
113
114
/** Set the audio buffer for this AudioFile by copying samples from another buffer.
115
* @Returns true if the buffer was copied successfully.
116
*/
117
bool setAudioBuffer (AudioBuffer& newBuffer);
118
119
/** Sets the audio buffer to a given number of channels and number of samples per channel. This will try to preserve
120
* the existing audio, adding zeros to any new channels or new samples in a given channel.
121
*/
122
void setAudioBufferSize (int numChannels, int numSamples);
123
124
/** Sets the number of samples per channel in the audio buffer. This will try to preserve
125
* the existing audio, adding zeros to new samples in a given channel if the number of samples is increased.
126
*/
127
void setNumSamplesPerChannel (int numSamples);
128
129
/** Sets the number of channels. New channels will have the correct number of samples and be initialised to zero */
130
void setNumChannels (int numChannels);
131
132
/** Sets the bit depth for the audio file. If you use the save() function, this bit depth rate will be used */
133
void setBitDepth (int numBitsPerSample);
134
135
/** Sets the sample rate for the audio file. If you use the save() function, this sample rate will be used */
136
void setSampleRate (uint32_t newSampleRate);
137
138
//=============================================================
139
/** Sets whether the library should log error messages to the console. By default this is true */
140
void shouldLogErrorsToConsole (bool logErrors);
141
142
//=============================================================
143
/** A vector of vectors holding the audio samples for the AudioFile. You can
144
* access the samples by channel and then by sample index, i.e:
145
*
146
* samples[channel][sampleIndex]
147
*/
148
AudioBuffer samples;
149
150
//=============================================================
151
/** An optional iXML chunk that can be added to the AudioFile.
152
*/
153
std::string iXMLChunk;
154
155
private:
156
157
//=============================================================
158
enum class Endianness
159
{
160
LittleEndian,
161
BigEndian
162
};
163
164
//=============================================================
165
AudioFileFormat determineAudioFileFormat (std::vector<uint8_t>& fileData);
166
bool decodeWaveFile (std::vector<uint8_t>& fileData);
167
bool decodeAiffFile (std::vector<uint8_t>& fileData);
168
169
//=============================================================
170
bool saveToWaveFile (std::string filePath);
171
bool saveToAiffFile (std::string filePath);
172
173
//=============================================================
174
void clearAudioBuffer();
175
176
//=============================================================
177
int32_t fourBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness = Endianness::LittleEndian);
178
int16_t twoBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness = Endianness::LittleEndian);
179
int getIndexOfString (std::vector<uint8_t>& source, std::string s);
180
int getIndexOfChunk (std::vector<uint8_t>& source, const std::string& chunkHeaderID, int startIndex, Endianness endianness = Endianness::LittleEndian);
181
182
//=============================================================
183
T sixteenBitIntToSample (int16_t sample);
184
int16_t sampleToSixteenBitInt (T sample);
185
186
//=============================================================
187
uint8_t sampleToSingleByte (T sample);
188
T singleByteToSample (uint8_t sample);
189
190
uint32_t getAiffSampleRate (std::vector<uint8_t>& fileData, int sampleRateStartIndex);
191
bool tenByteMatch (std::vector<uint8_t>& v1, int startIndex1, std::vector<uint8_t>& v2, int startIndex2);
192
void addSampleRateToAiffData (std::vector<uint8_t>& fileData, uint32_t sampleRate);
193
T clamp (T v1, T minValue, T maxValue);
194
195
//=============================================================
196
void addStringToFileData (std::vector<uint8_t>& fileData, std::string s);
197
void addInt32ToFileData (std::vector<uint8_t>& fileData, int32_t i, Endianness endianness = Endianness::LittleEndian);
198
void addInt16ToFileData (std::vector<uint8_t>& fileData, int16_t i, Endianness endianness = Endianness::LittleEndian);
199
200
//=============================================================
201
bool writeDataToFile (std::vector<uint8_t>& fileData, std::string filePath);
202
203
//=============================================================
204
void reportError (std::string errorMessage);
205
206
//=============================================================
207
AudioFileFormat audioFileFormat;
208
uint32_t sampleRate;
209
int bitDepth;
210
bool logErrorsToConsole {true};
211
};
212
213
214
//=============================================================
215
// Pre-defined 10-byte representations of common sample rates
216
static std::unordered_map <uint32_t, std::vector<uint8_t>> aiffSampleRateTable = {
217
{8000, {64, 11, 250, 0, 0, 0, 0, 0, 0, 0}},
218
{11025, {64, 12, 172, 68, 0, 0, 0, 0, 0, 0}},
219
{16000, {64, 12, 250, 0, 0, 0, 0, 0, 0, 0}},
220
{22050, {64, 13, 172, 68, 0, 0, 0, 0, 0, 0}},
221
{32000, {64, 13, 250, 0, 0, 0, 0, 0, 0, 0}},
222
{37800, {64, 14, 147, 168, 0, 0, 0, 0, 0, 0}},
223
{44056, {64, 14, 172, 24, 0, 0, 0, 0, 0, 0}},
224
{44100, {64, 14, 172, 68, 0, 0, 0, 0, 0, 0}},
225
{47250, {64, 14, 184, 146, 0, 0, 0, 0, 0, 0}},
226
{48000, {64, 14, 187, 128, 0, 0, 0, 0, 0, 0}},
227
{50000, {64, 14, 195, 80, 0, 0, 0, 0, 0, 0}},
228
{50400, {64, 14, 196, 224, 0, 0, 0, 0, 0, 0}},
229
{88200, {64, 15, 172, 68, 0, 0, 0, 0, 0, 0}},
230
{96000, {64, 15, 187, 128, 0, 0, 0, 0, 0, 0}},
231
{176400, {64, 16, 172, 68, 0, 0, 0, 0, 0, 0}},
232
{192000, {64, 16, 187, 128, 0, 0, 0, 0, 0, 0}},
233
{352800, {64, 17, 172, 68, 0, 0, 0, 0, 0, 0}},
234
{2822400, {64, 20, 172, 68, 0, 0, 0, 0, 0, 0}},
235
{5644800, {64, 21, 172, 68, 0, 0, 0, 0, 0, 0}}
236
};
237
238
//=============================================================
239
enum WavAudioFormat
240
{
241
PCM = 0x0001,
242
IEEEFloat = 0x0003,
243
ALaw = 0x0006,
244
MULaw = 0x0007,
245
Extensible = 0xFFFE
246
};
247
248
//=============================================================
249
enum AIFFAudioFormat
250
{
251
Uncompressed,
252
Compressed,
253
Error
254
};
255
256
//=============================================================
257
/* IMPLEMENTATION */
258
//=============================================================
259
260
//=============================================================
261
template <class T>
262
AudioFile<T>::AudioFile()
263
{
264
static_assert(std::is_floating_point<T>::value, "ERROR: This version of AudioFile only supports floating point sample formats");
265
266
bitDepth = 16;
267
sampleRate = 44100;
268
samples.resize (1);
269
samples[0].resize (0);
270
audioFileFormat = AudioFileFormat::NotLoaded;
271
}
272
273
//=============================================================
274
template <class T>
275
uint32_t AudioFile<T>::getSampleRate() const
276
{
277
return sampleRate;
278
}
279
280
//=============================================================
281
template <class T>
282
int AudioFile<T>::getNumChannels() const
283
{
284
return (int)samples.size();
285
}
286
287
//=============================================================
288
template <class T>
289
bool AudioFile<T>::isMono() const
290
{
291
return getNumChannels() == 1;
292
}
293
294
//=============================================================
295
template <class T>
296
bool AudioFile<T>::isStereo() const
297
{
298
return getNumChannels() == 2;
299
}
300
301
//=============================================================
302
template <class T>
303
int AudioFile<T>::getBitDepth() const
304
{
305
return bitDepth;
306
}
307
308
//=============================================================
309
template <class T>
310
int AudioFile<T>::getNumSamplesPerChannel() const
311
{
312
if (samples.size() > 0)
313
return (int) samples[0].size();
314
else
315
return 0;
316
}
317
318
//=============================================================
319
template <class T>
320
double AudioFile<T>::getLengthInSeconds() const
321
{
322
return (double)getNumSamplesPerChannel() / (double)sampleRate;
323
}
324
325
//=============================================================
326
template <class T>
327
void AudioFile<T>::printSummary() const
328
{
329
std::cout << "|======================================|" << std::endl;
330
std::cout << "Num Channels: " << getNumChannels() << std::endl;
331
std::cout << "Num Samples Per Channel: " << getNumSamplesPerChannel() << std::endl;
332
std::cout << "Sample Rate: " << sampleRate << std::endl;
333
std::cout << "Bit Depth: " << bitDepth << std::endl;
334
std::cout << "Length in Seconds: " << getLengthInSeconds() << std::endl;
335
std::cout << "|======================================|" << std::endl;
336
}
337
338
//=============================================================
339
template <class T>
340
bool AudioFile<T>::setAudioBuffer (AudioBuffer& newBuffer)
341
{
342
int numChannels = (int)newBuffer.size();
343
344
if (numChannels <= 0)
345
{
346
assert (false && "The buffer your are trying to use has no channels");
347
return false;
348
}
349
350
size_t numSamples = newBuffer[0].size();
351
352
// set the number of channels
353
samples.resize (newBuffer.size());
354
355
for (int k = 0; k < getNumChannels(); k++)
356
{
357
assert (newBuffer[k].size() == numSamples);
358
359
samples[k].resize (numSamples);
360
361
for (size_t i = 0; i < numSamples; i++)
362
{
363
samples[k][i] = newBuffer[k][i];
364
}
365
}
366
367
return true;
368
}
369
370
//=============================================================
371
template <class T>
372
void AudioFile<T>::setAudioBufferSize (int numChannels, int numSamples)
373
{
374
samples.resize (numChannels);
375
setNumSamplesPerChannel (numSamples);
376
}
377
378
//=============================================================
379
template <class T>
380
void AudioFile<T>::setNumSamplesPerChannel (int numSamples)
381
{
382
int originalSize = getNumSamplesPerChannel();
383
384
for (int i = 0; i < getNumChannels();i++)
385
{
386
samples[i].resize (numSamples);
387
388
// set any new samples to zero
389
if (numSamples > originalSize)
390
std::fill (samples[i].begin() + originalSize, samples[i].end(), (T)0.);
391
}
392
}
393
394
//=============================================================
395
template <class T>
396
void AudioFile<T>::setNumChannels (int numChannels)
397
{
398
int originalNumChannels = getNumChannels();
399
int originalNumSamplesPerChannel = getNumSamplesPerChannel();
400
401
samples.resize (numChannels);
402
403
// make sure any new channels are set to the right size
404
// and filled with zeros
405
if (numChannels > originalNumChannels)
406
{
407
for (int i = originalNumChannels; i < numChannels; i++)
408
{
409
samples[i].resize (originalNumSamplesPerChannel);
410
std::fill (samples[i].begin(), samples[i].end(), (T)0.);
411
}
412
}
413
}
414
415
//=============================================================
416
template <class T>
417
void AudioFile<T>::setBitDepth (int numBitsPerSample)
418
{
419
bitDepth = numBitsPerSample;
420
}
421
422
//=============================================================
423
template <class T>
424
void AudioFile<T>::setSampleRate (uint32_t newSampleRate)
425
{
426
sampleRate = newSampleRate;
427
}
428
429
//=============================================================
430
template <class T>
431
void AudioFile<T>::shouldLogErrorsToConsole (bool logErrors)
432
{
433
logErrorsToConsole = logErrors;
434
}
435
436
//=============================================================
437
template <class T>
438
bool AudioFile<T>::load (std::string filePath)
439
{
440
std::ifstream file (filePath, std::ios::binary);
441
442
// check the file exists
443
if (! file.good())
444
{
445
reportError ("ERROR: File doesn't exist or otherwise can't load file\n" + filePath);
446
return false;
447
}
448
449
file.unsetf (std::ios::skipws);
450
std::istream_iterator<uint8_t> begin (file), end;
451
std::vector<uint8_t> fileData (begin, end);
452
453
// get audio file format
454
audioFileFormat = determineAudioFileFormat (fileData);
455
456
if (audioFileFormat == AudioFileFormat::Wave)
457
{
458
return decodeWaveFile (fileData);
459
}
460
else if (audioFileFormat == AudioFileFormat::Aiff)
461
{
462
return decodeAiffFile (fileData);
463
}
464
else
465
{
466
reportError ("Audio File Type: Error");
467
return false;
468
}
469
}
470
471
//=============================================================
472
template <class T>
473
bool AudioFile<T>::decodeWaveFile (std::vector<uint8_t>& fileData)
474
{
475
// -----------------------------------------------------------
476
// HEADER CHUNK
477
std::string headerChunkID (fileData.begin(), fileData.begin() + 4);
478
//int32_t fileSizeInBytes = fourBytesToInt (fileData, 4) + 8;
479
std::string format (fileData.begin() + 8, fileData.begin() + 12);
480
481
// -----------------------------------------------------------
482
// try and find the start points of key chunks
483
int indexOfDataChunk = getIndexOfChunk (fileData, "data", 12);
484
int indexOfFormatChunk = getIndexOfChunk (fileData, "fmt ", 12);
485
int indexOfXMLChunk = getIndexOfChunk (fileData, "iXML", 12);
486
487
// if we can't find the data or format chunks, or the IDs/formats don't seem to be as expected
488
// then it is unlikely we'll able to read this file, so abort
489
if (indexOfDataChunk == -1 || indexOfFormatChunk == -1 || headerChunkID != "RIFF" || format != "WAVE")
490
{
491
reportError ("ERROR: this doesn't seem to be a valid .WAV file");
492
return false;
493
}
494
495
// -----------------------------------------------------------
496
// FORMAT CHUNK
497
int f = indexOfFormatChunk;
498
std::string formatChunkID (fileData.begin() + f, fileData.begin() + f + 4);
499
//int32_t formatChunkSize = fourBytesToInt (fileData, f + 4);
500
int16_t audioFormat = twoBytesToInt (fileData, f + 8);
501
int16_t numChannels = twoBytesToInt (fileData, f + 10);
502
sampleRate = (uint32_t) fourBytesToInt (fileData, f + 12);
503
int32_t numBytesPerSecond = fourBytesToInt (fileData, f + 16);
504
int16_t numBytesPerBlock = twoBytesToInt (fileData, f + 20);
505
bitDepth = (int) twoBytesToInt (fileData, f + 22);
506
507
int numBytesPerSample = bitDepth / 8;
508
509
// check that the audio format is PCM or Float
510
if (audioFormat != WavAudioFormat::PCM && audioFormat != WavAudioFormat::IEEEFloat)
511
{
512
reportError ("ERROR: this .WAV file is encoded in a format that this library does not support at present");
513
return false;
514
}
515
516
// check the number of channels is mono or stereo
517
if (numChannels < 1 || numChannels > 128)
518
{
519
reportError ("ERROR: this WAV file seems to be an invalid number of channels (or corrupted?)");
520
return false;
521
}
522
523
// check header data is consistent
524
if ((numBytesPerSecond != (numChannels * sampleRate * bitDepth) / 8) || (numBytesPerBlock != (numChannels * numBytesPerSample)))
525
{
526
reportError ("ERROR: the header data in this WAV file seems to be inconsistent");
527
return false;
528
}
529
530
// check bit depth is either 8, 16, 24 or 32 bit
531
if (bitDepth != 8 && bitDepth != 16 && bitDepth != 24 && bitDepth != 32)
532
{
533
reportError ("ERROR: this file has a bit depth that is not 8, 16, 24 or 32 bits");
534
return false;
535
}
536
537
// -----------------------------------------------------------
538
// DATA CHUNK
539
int d = indexOfDataChunk;
540
std::string dataChunkID (fileData.begin() + d, fileData.begin() + d + 4);
541
int32_t dataChunkSize = fourBytesToInt (fileData, d + 4);
542
543
int numSamples = dataChunkSize / (numChannels * bitDepth / 8);
544
int samplesStartIndex = indexOfDataChunk + 8;
545
546
clearAudioBuffer();
547
samples.resize (numChannels);
548
549
for (int i = 0; i < numSamples; i++)
550
{
551
for (int channel = 0; channel < numChannels; channel++)
552
{
553
int sampleIndex = samplesStartIndex + (numBytesPerBlock * i) + channel * numBytesPerSample;
554
555
if (bitDepth == 8)
556
{
557
T sample = singleByteToSample (fileData[sampleIndex]);
558
samples[channel].push_back (sample);
559
}
560
else if (bitDepth == 16)
561
{
562
int16_t sampleAsInt = twoBytesToInt (fileData, sampleIndex);
563
T sample = sixteenBitIntToSample (sampleAsInt);
564
samples[channel].push_back (sample);
565
}
566
else if (bitDepth == 24)
567
{
568
int32_t sampleAsInt = 0;
569
sampleAsInt = (fileData[sampleIndex + 2] << 16) | (fileData[sampleIndex + 1] << 8) | fileData[sampleIndex];
570
571
if (sampleAsInt & 0x800000) // if the 24th bit is set, this is a negative number in 24-bit world
572
sampleAsInt = sampleAsInt | ~0xFFFFFF; // so make sure sign is extended to the 32 bit float
573
574
T sample = (T)sampleAsInt / (T)8388608.;
575
samples[channel].push_back (sample);
576
}
577
else if (bitDepth == 32)
578
{
579
int32_t sampleAsInt = fourBytesToInt (fileData, sampleIndex);
580
T sample;
581
582
if (audioFormat == WavAudioFormat::IEEEFloat)
583
sample = (T)reinterpret_cast<float&> (sampleAsInt);
584
else // assume PCM
585
sample = (T) sampleAsInt / static_cast<float> (std::numeric_limits<std::int32_t>::max());
586
587
samples[channel].push_back (sample);
588
}
589
else
590
{
591
assert (false);
592
}
593
}
594
}
595
596
// -----------------------------------------------------------
597
// iXML CHUNK
598
if (indexOfXMLChunk != -1)
599
{
600
int32_t chunkSize = fourBytesToInt (fileData, indexOfXMLChunk + 4);
601
iXMLChunk = std::string ((const char*) &fileData[indexOfXMLChunk + 8], chunkSize);
602
}
603
604
return true;
605
}
606
607
//=============================================================
608
template <class T>
609
bool AudioFile<T>::decodeAiffFile (std::vector<uint8_t>& fileData)
610
{
611
// -----------------------------------------------------------
612
// HEADER CHUNK
613
std::string headerChunkID (fileData.begin(), fileData.begin() + 4);
614
//int32_t fileSizeInBytes = fourBytesToInt (fileData, 4, Endianness::BigEndian) + 8;
615
std::string format (fileData.begin() + 8, fileData.begin() + 12);
616
617
int audioFormat = format == "AIFF" ? AIFFAudioFormat::Uncompressed : format == "AIFC" ? AIFFAudioFormat::Compressed : AIFFAudioFormat::Error;
618
619
// -----------------------------------------------------------
620
// try and find the start points of key chunks
621
int indexOfCommChunk = getIndexOfChunk (fileData, "COMM", 12, Endianness::BigEndian);
622
int indexOfSoundDataChunk = getIndexOfChunk (fileData, "SSND", 12, Endianness::BigEndian);
623
int indexOfXMLChunk = getIndexOfChunk (fileData, "iXML", 12, Endianness::BigEndian);
624
625
// if we can't find the data or format chunks, or the IDs/formats don't seem to be as expected
626
// then it is unlikely we'll able to read this file, so abort
627
if (indexOfSoundDataChunk == -1 || indexOfCommChunk == -1 || headerChunkID != "FORM" || audioFormat == AIFFAudioFormat::Error)
628
{
629
reportError ("ERROR: this doesn't seem to be a valid AIFF file");
630
return false;
631
}
632
633
// -----------------------------------------------------------
634
// COMM CHUNK
635
int p = indexOfCommChunk;
636
std::string commChunkID (fileData.begin() + p, fileData.begin() + p + 4);
637
//int32_t commChunkSize = fourBytesToInt (fileData, p + 4, Endianness::BigEndian);
638
int16_t numChannels = twoBytesToInt (fileData, p + 8, Endianness::BigEndian);
639
int32_t numSamplesPerChannel = fourBytesToInt (fileData, p + 10, Endianness::BigEndian);
640
bitDepth = (int) twoBytesToInt (fileData, p + 14, Endianness::BigEndian);
641
sampleRate = getAiffSampleRate (fileData, p + 16);
642
643
// check the sample rate was properly decoded
644
if (sampleRate == 0)
645
{
646
reportError ("ERROR: this AIFF file has an unsupported sample rate");
647
return false;
648
}
649
650
// check the number of channels is mono or stereo
651
if (numChannels < 1 ||numChannels > 2)
652
{
653
reportError ("ERROR: this AIFF file seems to be neither mono nor stereo (perhaps multi-track, or corrupted?)");
654
return false;
655
}
656
657
// check bit depth is either 8, 16, 24 or 32-bit
658
if (bitDepth != 8 && bitDepth != 16 && bitDepth != 24 && bitDepth != 32)
659
{
660
reportError ("ERROR: this file has a bit depth that is not 8, 16, 24 or 32 bits");
661
return false;
662
}
663
664
// -----------------------------------------------------------
665
// SSND CHUNK
666
int s = indexOfSoundDataChunk;
667
std::string soundDataChunkID (fileData.begin() + s, fileData.begin() + s + 4);
668
int32_t soundDataChunkSize = fourBytesToInt (fileData, s + 4, Endianness::BigEndian);
669
int32_t offset = fourBytesToInt (fileData, s + 8, Endianness::BigEndian);
670
//int32_t blockSize = fourBytesToInt (fileData, s + 12, Endianness::BigEndian);
671
672
int numBytesPerSample = bitDepth / 8;
673
int numBytesPerFrame = numBytesPerSample * numChannels;
674
int totalNumAudioSampleBytes = numSamplesPerChannel * numBytesPerFrame;
675
int samplesStartIndex = s + 16 + (int)offset;
676
677
// sanity check the data
678
if ((soundDataChunkSize - 8) != totalNumAudioSampleBytes || totalNumAudioSampleBytes > static_cast<long>(fileData.size() - samplesStartIndex))
679
{
680
reportError ("ERROR: the metadatafor this file doesn't seem right");
681
return false;
682
}
683
684
clearAudioBuffer();
685
samples.resize (numChannels);
686
687
for (int i = 0; i < numSamplesPerChannel; i++)
688
{
689
for (int channel = 0; channel < numChannels; channel++)
690
{
691
int sampleIndex = samplesStartIndex + (numBytesPerFrame * i) + channel * numBytesPerSample;
692
693
if (bitDepth == 8)
694
{
695
int8_t sampleAsSigned8Bit = (int8_t)fileData[sampleIndex];
696
T sample = (T)sampleAsSigned8Bit / (T)128.;
697
samples[channel].push_back (sample);
698
}
699
else if (bitDepth == 16)
700
{
701
int16_t sampleAsInt = twoBytesToInt (fileData, sampleIndex, Endianness::BigEndian);
702
T sample = sixteenBitIntToSample (sampleAsInt);
703
samples[channel].push_back (sample);
704
}
705
else if (bitDepth == 24)
706
{
707
int32_t sampleAsInt = 0;
708
sampleAsInt = (fileData[sampleIndex] << 16) | (fileData[sampleIndex + 1] << 8) | fileData[sampleIndex + 2];
709
710
if (sampleAsInt & 0x800000) // if the 24th bit is set, this is a negative number in 24-bit world
711
sampleAsInt = sampleAsInt | ~0xFFFFFF; // so make sure sign is extended to the 32 bit float
712
713
T sample = (T)sampleAsInt / (T)8388608.;
714
samples[channel].push_back (sample);
715
}
716
else if (bitDepth == 32)
717
{
718
int32_t sampleAsInt = fourBytesToInt (fileData, sampleIndex, Endianness::BigEndian);
719
T sample;
720
721
if (audioFormat == AIFFAudioFormat::Compressed)
722
sample = (T)reinterpret_cast<float&> (sampleAsInt);
723
else // assume uncompressed
724
sample = (T) sampleAsInt / static_cast<float> (std::numeric_limits<std::int32_t>::max());
725
726
samples[channel].push_back (sample);
727
}
728
else
729
{
730
assert (false);
731
}
732
}
733
}
734
735
// -----------------------------------------------------------
736
// iXML CHUNK
737
if (indexOfXMLChunk != -1)
738
{
739
int32_t chunkSize = fourBytesToInt (fileData, indexOfXMLChunk + 4);
740
iXMLChunk = std::string ((const char*) &fileData[indexOfXMLChunk + 8], chunkSize);
741
}
742
743
return true;
744
}
745
746
//=============================================================
747
template <class T>
748
uint32_t AudioFile<T>::getAiffSampleRate (std::vector<uint8_t>& fileData, int sampleRateStartIndex)
749
{
750
for (auto it : aiffSampleRateTable)
751
{
752
if (tenByteMatch (fileData, sampleRateStartIndex, it.second, 0))
753
return it.first;
754
}
755
756
return 0;
757
}
758
759
//=============================================================
760
template <class T>
761
bool AudioFile<T>::tenByteMatch (std::vector<uint8_t>& v1, int startIndex1, std::vector<uint8_t>& v2, int startIndex2)
762
{
763
for (int i = 0; i < 10; i++)
764
{
765
if (v1[startIndex1 + i] != v2[startIndex2 + i])
766
return false;
767
}
768
769
return true;
770
}
771
772
//=============================================================
773
template <class T>
774
void AudioFile<T>::addSampleRateToAiffData (std::vector<uint8_t>& fileData, uint32_t sampleRate)
775
{
776
if (aiffSampleRateTable.count (sampleRate) > 0)
777
{
778
for (int i = 0; i < 10; i++)
779
fileData.push_back (aiffSampleRateTable[sampleRate][i]);
780
}
781
}
782
783
//=============================================================
784
template <class T>
785
bool AudioFile<T>::save (std::string filePath, AudioFileFormat format)
786
{
787
if (format == AudioFileFormat::Wave)
788
{
789
return saveToWaveFile (filePath);
790
}
791
else if (format == AudioFileFormat::Aiff)
792
{
793
return saveToAiffFile (filePath);
794
}
795
796
return false;
797
}
798
799
//=============================================================
800
template <class T>
801
bool AudioFile<T>::saveToWaveFile (std::string filePath)
802
{
803
std::vector<uint8_t> fileData;
804
805
int32_t dataChunkSize = getNumSamplesPerChannel() * (getNumChannels() * bitDepth / 8);
806
int16_t audioFormat = bitDepth == 32 ? WavAudioFormat::IEEEFloat : WavAudioFormat::PCM;
807
int32_t formatChunkSize = audioFormat == WavAudioFormat::PCM ? 16 : 18;
808
int32_t iXMLChunkSize = static_cast<int32_t> (iXMLChunk.size());
809
810
// -----------------------------------------------------------
811
// HEADER CHUNK
812
addStringToFileData (fileData, "RIFF");
813
814
// The file size in bytes is the header chunk size (4, not counting RIFF and WAVE) + the format
815
// chunk size (24) + the metadata part of the data chunk plus the actual data chunk size
816
int32_t fileSizeInBytes = 4 + formatChunkSize + 8 + 8 + dataChunkSize;
817
if (iXMLChunkSize > 0)
818
{
819
fileSizeInBytes += (8 + iXMLChunkSize);
820
}
821
822
addInt32ToFileData (fileData, fileSizeInBytes);
823
824
addStringToFileData (fileData, "WAVE");
825
826
// -----------------------------------------------------------
827
// FORMAT CHUNK
828
addStringToFileData (fileData, "fmt ");
829
addInt32ToFileData (fileData, formatChunkSize); // format chunk size (16 for PCM)
830
addInt16ToFileData (fileData, audioFormat); // audio format
831
addInt16ToFileData (fileData, (int16_t)getNumChannels()); // num channels
832
addInt32ToFileData (fileData, (int32_t)sampleRate); // sample rate
833
834
int32_t numBytesPerSecond = (int32_t) ((getNumChannels() * sampleRate * bitDepth) / 8);
835
addInt32ToFileData (fileData, numBytesPerSecond);
836
837
int16_t numBytesPerBlock = getNumChannels() * (bitDepth / 8);
838
addInt16ToFileData (fileData, numBytesPerBlock);
839
840
addInt16ToFileData (fileData, (int16_t)bitDepth);
841
842
if (audioFormat == WavAudioFormat::IEEEFloat)
843
addInt16ToFileData (fileData, 0); // extension size
844
845
// -----------------------------------------------------------
846
// DATA CHUNK
847
addStringToFileData (fileData, "data");
848
addInt32ToFileData (fileData, dataChunkSize);
849
850
for (int i = 0; i < getNumSamplesPerChannel(); i++)
851
{
852
for (int channel = 0; channel < getNumChannels(); channel++)
853
{
854
if (bitDepth == 8)
855
{
856
uint8_t byte = sampleToSingleByte (samples[channel][i]);
857
fileData.push_back (byte);
858
}
859
else if (bitDepth == 16)
860
{
861
int16_t sampleAsInt = sampleToSixteenBitInt (samples[channel][i]);
862
addInt16ToFileData (fileData, sampleAsInt);
863
}
864
else if (bitDepth == 24)
865
{
866
int32_t sampleAsIntAgain = (int32_t) (samples[channel][i] * (T)8388608.);
867
868
uint8_t bytes[3];
869
bytes[2] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF;
870
bytes[1] = (uint8_t) (sampleAsIntAgain >> 8) & 0xFF;
871
bytes[0] = (uint8_t) sampleAsIntAgain & 0xFF;
872
873
fileData.push_back (bytes[0]);
874
fileData.push_back (bytes[1]);
875
fileData.push_back (bytes[2]);
876
}
877
else if (bitDepth == 32)
878
{
879
int32_t sampleAsInt;
880
881
if (audioFormat == WavAudioFormat::IEEEFloat)
882
sampleAsInt = (int32_t) reinterpret_cast<int32_t&> (samples[channel][i]);
883
else // assume PCM
884
sampleAsInt = (int32_t) (samples[channel][i] * std::numeric_limits<int32_t>::max());
885
886
addInt32ToFileData (fileData, sampleAsInt, Endianness::LittleEndian);
887
}
888
else
889
{
890
assert (false && "Trying to write a file with unsupported bit depth");
891
return false;
892
}
893
}
894
}
895
896
// -----------------------------------------------------------
897
// iXML CHUNK
898
if (iXMLChunkSize > 0)
899
{
900
addStringToFileData (fileData, "iXML");
901
addInt32ToFileData (fileData, iXMLChunkSize);
902
addStringToFileData (fileData, iXMLChunk);
903
}
904
905
// check that the various sizes we put in the metadata are correct
906
if (fileSizeInBytes != static_cast<int32_t> (fileData.size() - 8) || dataChunkSize != (getNumSamplesPerChannel() * getNumChannels() * (bitDepth / 8)))
907
{
908
reportError ("ERROR: couldn't save file to " + filePath);
909
return false;
910
}
911
912
// try to write the file
913
return writeDataToFile (fileData, filePath);
914
}
915
916
//=============================================================
917
template <class T>
918
bool AudioFile<T>::saveToAiffFile (std::string filePath)
919
{
920
std::vector<uint8_t> fileData;
921
922
int32_t numBytesPerSample = bitDepth / 8;
923
int32_t numBytesPerFrame = numBytesPerSample * getNumChannels();
924
int32_t totalNumAudioSampleBytes = getNumSamplesPerChannel() * numBytesPerFrame;
925
int32_t soundDataChunkSize = totalNumAudioSampleBytes + 8;
926
int32_t iXMLChunkSize = static_cast<int32_t> (iXMLChunk.size());
927
928
// -----------------------------------------------------------
929
// HEADER CHUNK
930
addStringToFileData (fileData, "FORM");
931
932
// The file size in bytes is the header chunk size (4, not counting FORM and AIFF) + the COMM
933
// chunk size (26) + the metadata part of the SSND chunk plus the actual data chunk size
934
int32_t fileSizeInBytes = 4 + 26 + 16 + totalNumAudioSampleBytes;
935
if (iXMLChunkSize > 0)
936
{
937
fileSizeInBytes += (8 + iXMLChunkSize);
938
}
939
940
addInt32ToFileData (fileData, fileSizeInBytes, Endianness::BigEndian);
941
942
addStringToFileData (fileData, "AIFF");
943
944
// -----------------------------------------------------------
945
// COMM CHUNK
946
addStringToFileData (fileData, "COMM");
947
addInt32ToFileData (fileData, 18, Endianness::BigEndian); // commChunkSize
948
addInt16ToFileData (fileData, getNumChannels(), Endianness::BigEndian); // num channels
949
addInt32ToFileData (fileData, getNumSamplesPerChannel(), Endianness::BigEndian); // num samples per channel
950
addInt16ToFileData (fileData, bitDepth, Endianness::BigEndian); // bit depth
951
addSampleRateToAiffData (fileData, sampleRate);
952
953
// -----------------------------------------------------------
954
// SSND CHUNK
955
addStringToFileData (fileData, "SSND");
956
addInt32ToFileData (fileData, soundDataChunkSize, Endianness::BigEndian);
957
addInt32ToFileData (fileData, 0, Endianness::BigEndian); // offset
958
addInt32ToFileData (fileData, 0, Endianness::BigEndian); // block size
959
960
for (int i = 0; i < getNumSamplesPerChannel(); i++)
961
{
962
for (int channel = 0; channel < getNumChannels(); channel++)
963
{
964
if (bitDepth == 8)
965
{
966
uint8_t byte = sampleToSingleByte (samples[channel][i]);
967
fileData.push_back (byte);
968
}
969
else if (bitDepth == 16)
970
{
971
int16_t sampleAsInt = sampleToSixteenBitInt (samples[channel][i]);
972
addInt16ToFileData (fileData, sampleAsInt, Endianness::BigEndian);
973
}
974
else if (bitDepth == 24)
975
{
976
int32_t sampleAsIntAgain = (int32_t) (samples[channel][i] * (T)8388608.);
977
978
uint8_t bytes[3];
979
bytes[0] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF;
980
bytes[1] = (uint8_t) (sampleAsIntAgain >> 8) & 0xFF;
981
bytes[2] = (uint8_t) sampleAsIntAgain & 0xFF;
982
983
fileData.push_back (bytes[0]);
984
fileData.push_back (bytes[1]);
985
fileData.push_back (bytes[2]);
986
}
987
else if (bitDepth == 32)
988
{
989
// write samples as signed integers (no implementation yet for floating point, but looking at WAV implementation should help)
990
int32_t sampleAsInt = (int32_t) (samples[channel][i] * std::numeric_limits<int32_t>::max());
991
addInt32ToFileData (fileData, sampleAsInt, Endianness::BigEndian);
992
}
993
else
994
{
995
assert (false && "Trying to write a file with unsupported bit depth");
996
return false;
997
}
998
}
999
}
1000
1001
// -----------------------------------------------------------
1002
// iXML CHUNK
1003
if (iXMLChunkSize > 0)
1004
{
1005
addStringToFileData (fileData, "iXML");
1006
addInt32ToFileData (fileData, iXMLChunkSize);
1007
addStringToFileData (fileData, iXMLChunk);
1008
}
1009
1010
// check that the various sizes we put in the metadata are correct
1011
if (fileSizeInBytes != static_cast<int32_t> (fileData.size() - 8) || soundDataChunkSize != getNumSamplesPerChannel() * numBytesPerFrame + 8)
1012
{
1013
reportError ("ERROR: couldn't save file to " + filePath);
1014
return false;
1015
}
1016
1017
// try to write the file
1018
return writeDataToFile (fileData, filePath);
1019
}
1020
1021
//=============================================================
1022
template <class T>
1023
bool AudioFile<T>::writeDataToFile (std::vector<uint8_t>& fileData, std::string filePath)
1024
{
1025
std::ofstream outputFile (filePath, std::ios::binary);
1026
1027
if (outputFile.is_open())
1028
{
1029
for (size_t i = 0; i < fileData.size(); i++)
1030
{
1031
char value = (char) fileData[i];
1032
outputFile.write (&value, sizeof (char));
1033
}
1034
1035
outputFile.close();
1036
1037
return true;
1038
}
1039
1040
return false;
1041
}
1042
1043
//=============================================================
1044
template <class T>
1045
void AudioFile<T>::addStringToFileData (std::vector<uint8_t>& fileData, std::string s)
1046
{
1047
for (size_t i = 0; i < s.length();i++)
1048
fileData.push_back ((uint8_t) s[i]);
1049
}
1050
1051
//=============================================================
1052
template <class T>
1053
void AudioFile<T>::addInt32ToFileData (std::vector<uint8_t>& fileData, int32_t i, Endianness endianness)
1054
{
1055
uint8_t bytes[4];
1056
1057
if (endianness == Endianness::LittleEndian)
1058
{
1059
bytes[3] = (i >> 24) & 0xFF;
1060
bytes[2] = (i >> 16) & 0xFF;
1061
bytes[1] = (i >> 8) & 0xFF;
1062
bytes[0] = i & 0xFF;
1063
}
1064
else
1065
{
1066
bytes[0] = (i >> 24) & 0xFF;
1067
bytes[1] = (i >> 16) & 0xFF;
1068
bytes[2] = (i >> 8) & 0xFF;
1069
bytes[3] = i & 0xFF;
1070
}
1071
1072
for (int i = 0; i < 4; i++)
1073
fileData.push_back (bytes[i]);
1074
}
1075
1076
//=============================================================
1077
template <class T>
1078
void AudioFile<T>::addInt16ToFileData (std::vector<uint8_t>& fileData, int16_t i, Endianness endianness)
1079
{
1080
uint8_t bytes[2];
1081
1082
if (endianness == Endianness::LittleEndian)
1083
{
1084
bytes[1] = (i >> 8) & 0xFF;
1085
bytes[0] = i & 0xFF;
1086
}
1087
else
1088
{
1089
bytes[0] = (i >> 8) & 0xFF;
1090
bytes[1] = i & 0xFF;
1091
}
1092
1093
fileData.push_back (bytes[0]);
1094
fileData.push_back (bytes[1]);
1095
}
1096
1097
//=============================================================
1098
template <class T>
1099
void AudioFile<T>::clearAudioBuffer()
1100
{
1101
for (size_t i = 0; i < samples.size();i++)
1102
{
1103
samples[i].clear();
1104
}
1105
1106
samples.clear();
1107
}
1108
1109
//=============================================================
1110
template <class T>
1111
AudioFileFormat AudioFile<T>::determineAudioFileFormat (std::vector<uint8_t>& fileData)
1112
{
1113
std::string header (fileData.begin(), fileData.begin() + 4);
1114
1115
if (header == "RIFF")
1116
return AudioFileFormat::Wave;
1117
else if (header == "FORM")
1118
return AudioFileFormat::Aiff;
1119
else
1120
return AudioFileFormat::Error;
1121
}
1122
1123
//=============================================================
1124
template <class T>
1125
int32_t AudioFile<T>::fourBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness)
1126
{
1127
int32_t result;
1128
1129
if (endianness == Endianness::LittleEndian)
1130
result = (source[startIndex + 3] << 24) | (source[startIndex + 2] << 16) | (source[startIndex + 1] << 8) | source[startIndex];
1131
else
1132
result = (source[startIndex] << 24) | (source[startIndex + 1] << 16) | (source[startIndex + 2] << 8) | source[startIndex + 3];
1133
1134
return result;
1135
}
1136
1137
//=============================================================
1138
template <class T>
1139
int16_t AudioFile<T>::twoBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness)
1140
{
1141
int16_t result;
1142
1143
if (endianness == Endianness::LittleEndian)
1144
result = (source[startIndex + 1] << 8) | source[startIndex];
1145
else
1146
result = (source[startIndex] << 8) | source[startIndex + 1];
1147
1148
return result;
1149
}
1150
1151
//=============================================================
1152
template <class T>
1153
int AudioFile<T>::getIndexOfString (std::vector<uint8_t>& source, std::string stringToSearchFor)
1154
{
1155
int index = -1;
1156
int stringLength = (int)stringToSearchFor.length();
1157
1158
for (size_t i = 0; i < source.size() - stringLength;i++)
1159
{
1160
std::string section (source.begin() + i, source.begin() + i + stringLength);
1161
1162
if (section == stringToSearchFor)
1163
{
1164
index = static_cast<int> (i);
1165
break;
1166
}
1167
}
1168
1169
return index;
1170
}
1171
1172
//=============================================================
1173
template <class T>
1174
int AudioFile<T>::getIndexOfChunk (std::vector<uint8_t>& source, const std::string& chunkHeaderID, int startIndex, Endianness endianness)
1175
{
1176
constexpr int dataLen = 4;
1177
if (chunkHeaderID.size() != dataLen)
1178
{
1179
assert (false && "Invalid chunk header ID string");
1180
return -1;
1181
}
1182
1183
int i = startIndex;
1184
while (i < source.size() - dataLen)
1185
{
1186
if (memcmp (&source[i], chunkHeaderID.data(), dataLen) == 0)
1187
{
1188
return i;
1189
}
1190
1191
i += dataLen;
1192
auto chunkSize = fourBytesToInt (source, i, endianness);
1193
i += (dataLen + chunkSize);
1194
}
1195
1196
return -1;
1197
}
1198
1199
//=============================================================
1200
template <class T>
1201
T AudioFile<T>::sixteenBitIntToSample (int16_t sample)
1202
{
1203
return static_cast<T> (sample) / static_cast<T> (32768.);
1204
}
1205
1206
//=============================================================
1207
template <class T>
1208
int16_t AudioFile<T>::sampleToSixteenBitInt (T sample)
1209
{
1210
sample = clamp (sample, -1., 1.);
1211
return static_cast<int16_t> (sample * 32767.);
1212
}
1213
1214
//=============================================================
1215
template <class T>
1216
uint8_t AudioFile<T>::sampleToSingleByte (T sample)
1217
{
1218
sample = clamp (sample, -1., 1.);
1219
sample = (sample + 1.) / 2.;
1220
return static_cast<uint8_t> (sample * 255.);
1221
}
1222
1223
//=============================================================
1224
template <class T>
1225
T AudioFile<T>::singleByteToSample (uint8_t sample)
1226
{
1227
return static_cast<T> (sample - 128) / static_cast<T> (128.);
1228
}
1229
1230
//=============================================================
1231
template <class T>
1232
T AudioFile<T>::clamp (T value, T minValue, T maxValue)
1233
{
1234
value = std::min (value, maxValue);
1235
value = std::max (value, minValue);
1236
return value;
1237
}
1238
1239
//=============================================================
1240
template <class T>
1241
void AudioFile<T>::reportError (std::string errorMessage)
1242
{
1243
if (logErrorsToConsole)
1244
std::cout << errorMessage << std::endl;
1245
}
1246
1247
#if defined (_MSC_VER)
1248
__pragma(warning (pop))
1249
#elif defined (__GNUC__)
1250
_Pragma("GCC diagnostic pop")
1251
#endif
1252
1253
#endif /* AudioFile_h */
1254
1255