CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/ext/minimp3/minimp3.h
Views: 1401
#ifndef MINIMP3_H1#define MINIMP3_H2/*3https://github.com/lieff/minimp34To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.5This software is distributed without any warranty.6See <http://creativecommons.org/publicdomain/zero/1.0/>.7*/8#include <stdint.h>910#define MINIMP3_MAX_SAMPLES_PER_FRAME (1152*2)1112typedef struct13{14int frame_bytes, frame_offset, channels, hz, layer, bitrate_kbps;15} mp3dec_frame_info_t;1617typedef struct18{19float mdct_overlap[2][9*32], qmf_state[15*2*32];20int reserv, free_format_bytes;21unsigned char header[4], reserv_buf[511];22} mp3dec_t;2324#ifdef __cplusplus25extern "C" {26#endif /* __cplusplus */2728void mp3dec_init(mp3dec_t *dec);29#ifndef MINIMP3_FLOAT_OUTPUT30typedef int16_t mp3d_sample_t;31#else /* MINIMP3_FLOAT_OUTPUT */32typedef float mp3d_sample_t;33void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples);34#endif /* MINIMP3_FLOAT_OUTPUT */35int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info);3637#ifdef __cplusplus38}39#endif /* __cplusplus */4041#endif /* MINIMP3_H */42#if defined(MINIMP3_IMPLEMENTATION) && !defined(_MINIMP3_IMPLEMENTATION_GUARD)43#define _MINIMP3_IMPLEMENTATION_GUARD4445#include <stdlib.h>46#include <string.h>4748#define MAX_FREE_FORMAT_FRAME_SIZE 2304 /* more than ISO spec's */49#ifndef MAX_FRAME_SYNC_MATCHES50#define MAX_FRAME_SYNC_MATCHES 1051#endif /* MAX_FRAME_SYNC_MATCHES */5253#define MAX_L3_FRAME_PAYLOAD_BYTES MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */5455#define MAX_BITRESERVOIR_BYTES 51156#define SHORT_BLOCK_TYPE 257#define STOP_BLOCK_TYPE 358#define MODE_MONO 359#define MODE_JOINT_STEREO 160#define HDR_SIZE 461#define HDR_IS_MONO(h) (((h[3]) & 0xC0) == 0xC0)62#define HDR_IS_MS_STEREO(h) (((h[3]) & 0xE0) == 0x60)63#define HDR_IS_FREE_FORMAT(h) (((h[2]) & 0xF0) == 0)64#define HDR_IS_CRC(h) (!((h[1]) & 1))65#define HDR_TEST_PADDING(h) ((h[2]) & 0x2)66#define HDR_TEST_MPEG1(h) ((h[1]) & 0x8)67#define HDR_TEST_NOT_MPEG25(h) ((h[1]) & 0x10)68#define HDR_TEST_I_STEREO(h) ((h[3]) & 0x10)69#define HDR_TEST_MS_STEREO(h) ((h[3]) & 0x20)70#define HDR_GET_STEREO_MODE(h) (((h[3]) >> 6) & 3)71#define HDR_GET_STEREO_MODE_EXT(h) (((h[3]) >> 4) & 3)72#define HDR_GET_LAYER(h) (((h[1]) >> 1) & 3)73#define HDR_GET_BITRATE(h) ((h[2]) >> 4)74#define HDR_GET_SAMPLE_RATE(h) (((h[2]) >> 2) & 3)75#define HDR_GET_MY_SAMPLE_RATE(h) (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3)76#define HDR_IS_FRAME_576(h) ((h[1] & 14) == 2)77#define HDR_IS_LAYER_1(h) ((h[1] & 6) == 6)7879#define BITS_DEQUANTIZER_OUT -180#define MAX_SCF (255 + BITS_DEQUANTIZER_OUT*4 - 210)81#define MAX_SCFI ((MAX_SCF + 3) & ~3)8283#define MINIMP3_MIN(a, b) ((a) > (b) ? (b) : (a))84#define MINIMP3_MAX(a, b) ((a) < (b) ? (b) : (a))8586#if !defined(MINIMP3_NO_SIMD)8788#if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__) || defined(_M_ARM64))89/* x64 always have SSE2, arm64 always have neon, no need for generic code */90#define MINIMP3_ONLY_SIMD91#endif /* SIMD checks... */9293#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))94#if defined(_MSC_VER)95#include <intrin.h>96#endif /* defined(_MSC_VER) */97#include <immintrin.h>98#define HAVE_SSE 199#define HAVE_SIMD 1100#define VSTORE _mm_storeu_ps101#define VLD _mm_loadu_ps102#define VSET _mm_set1_ps103#define VADD _mm_add_ps104#define VSUB _mm_sub_ps105#define VMUL _mm_mul_ps106#define VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y))107#define VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y))108#define VMUL_S(x, s) _mm_mul_ps(x, _mm_set1_ps(s))109#define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))110typedef __m128 f4;111#if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD)112#define minimp3_cpuid __cpuid113#else /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */114static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType)115{116#if defined(__PIC__)117__asm__ __volatile__(118#if defined(__x86_64__)119"push %%rbx\n"120"cpuid\n"121"xchgl %%ebx, %1\n"122"pop %%rbx\n"123#else /* defined(__x86_64__) */124"xchgl %%ebx, %1\n"125"cpuid\n"126"xchgl %%ebx, %1\n"127#endif /* defined(__x86_64__) */128: "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])129: "a" (InfoType));130#else /* defined(__PIC__) */131__asm__ __volatile__(132"cpuid"133: "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])134: "a" (InfoType));135#endif /* defined(__PIC__)*/136}137#endif /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */138static int have_simd(void)139{140#ifdef MINIMP3_ONLY_SIMD141return 1;142#else /* MINIMP3_ONLY_SIMD */143static int g_have_simd;144int CPUInfo[4];145#ifdef MINIMP3_TEST146static int g_counter;147if (g_counter++ > 100)148return 0;149#endif /* MINIMP3_TEST */150if (g_have_simd)151goto end;152minimp3_cpuid(CPUInfo, 0);153g_have_simd = 1;154if (CPUInfo[0] > 0)155{156minimp3_cpuid(CPUInfo, 1);157g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */158}159end:160return g_have_simd - 1;161#endif /* MINIMP3_ONLY_SIMD */162}163#elif defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)164#include <arm_neon.h>165#define HAVE_SSE 0166#define HAVE_SIMD 1167#define VSTORE vst1q_f32168#define VLD vld1q_f32169#define VSET vmovq_n_f32170#define VADD vaddq_f32171#define VSUB vsubq_f32172#define VMUL vmulq_f32173#define VMAC(a, x, y) vmlaq_f32(a, x, y)174#define VMSB(a, x, y) vmlsq_f32(a, x, y)175#define VMUL_S(x, s) vmulq_f32(x, vmovq_n_f32(s))176#define VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x)))177typedef float32x4_t f4;178static int have_simd()179{ /* TODO: detect neon for !MINIMP3_ONLY_SIMD */180return 1;181}182#else /* SIMD checks... */183#define HAVE_SSE 0184#define HAVE_SIMD 0185#ifdef MINIMP3_ONLY_SIMD186#error MINIMP3_ONLY_SIMD used, but SSE/NEON not enabled187#endif /* MINIMP3_ONLY_SIMD */188#endif /* SIMD checks... */189#else /* !defined(MINIMP3_NO_SIMD) */190#define HAVE_SIMD 0191#endif /* !defined(MINIMP3_NO_SIMD) */192193#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64)194#define HAVE_ARMV6 1195static __inline__ __attribute__((always_inline)) int32_t minimp3_clip_int16_arm(int32_t a)196{197int32_t x = 0;198__asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));199return x;200}201#else202#define HAVE_ARMV6 0203#endif204205typedef struct206{207const uint8_t *buf;208int pos, limit;209} bs_t;210211typedef struct212{213float scf[3*64];214uint8_t total_bands, stereo_bands, bitalloc[64], scfcod[64];215} L12_scale_info;216217typedef struct218{219uint8_t tab_offset, code_tab_width, band_count;220} L12_subband_alloc_t;221222typedef struct223{224const uint8_t *sfbtab;225uint16_t part_23_length, big_values, scalefac_compress;226uint8_t global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb;227uint8_t table_select[3], region_count[3], subblock_gain[3];228uint8_t preflag, scalefac_scale, count1_table, scfsi;229} L3_gr_info_t;230231typedef struct232{233bs_t bs;234uint8_t maindata[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES];235L3_gr_info_t gr_info[4];236float grbuf[2][576], scf[40], syn[18 + 15][2*32];237uint8_t ist_pos[2][39];238} mp3dec_scratch_t;239240static void bs_init(bs_t *bs, const uint8_t *data, int bytes)241{242bs->buf = data;243bs->pos = 0;244bs->limit = bytes*8;245}246247static uint32_t get_bits(bs_t *bs, int n)248{249uint32_t next, cache = 0, s = bs->pos & 7;250int shl = n + s;251const uint8_t *p = bs->buf + (bs->pos >> 3);252if ((bs->pos += n) > bs->limit)253return 0;254next = *p++ & (255 >> s);255while ((shl -= 8) > 0)256{257cache |= next << shl;258next = *p++;259}260return cache | (next >> -shl);261}262263static int hdr_valid(const uint8_t *h)264{265return h[0] == 0xff &&266((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) &&267(HDR_GET_LAYER(h) != 0) &&268(HDR_GET_BITRATE(h) != 15) &&269(HDR_GET_SAMPLE_RATE(h) != 3);270}271272static int hdr_compare(const uint8_t *h1, const uint8_t *h2)273{274return hdr_valid(h2) &&275((h1[1] ^ h2[1]) & 0xFE) == 0 &&276((h1[2] ^ h2[2]) & 0x0C) == 0 &&277!(HDR_IS_FREE_FORMAT(h1) ^ HDR_IS_FREE_FORMAT(h2));278}279280static unsigned hdr_bitrate_kbps(const uint8_t *h)281{282static const uint8_t halfrate[2][3][15] = {283{ { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },284{ { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },285};286return 2*halfrate[!!HDR_TEST_MPEG1(h)][HDR_GET_LAYER(h) - 1][HDR_GET_BITRATE(h)];287}288289static unsigned hdr_sample_rate_hz(const uint8_t *h)290{291static const unsigned g_hz[3] = { 44100, 48000, 32000 };292return g_hz[HDR_GET_SAMPLE_RATE(h)] >> (int)!HDR_TEST_MPEG1(h) >> (int)!HDR_TEST_NOT_MPEG25(h);293}294295static unsigned hdr_frame_samples(const uint8_t *h)296{297return HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)HDR_IS_FRAME_576(h));298}299300static int hdr_frame_bytes(const uint8_t *h, int free_format_size)301{302int frame_bytes = hdr_frame_samples(h)*hdr_bitrate_kbps(h)*125/hdr_sample_rate_hz(h);303if (HDR_IS_LAYER_1(h))304{305frame_bytes &= ~3; /* slot align */306}307return frame_bytes ? frame_bytes : free_format_size;308}309310static int hdr_padding(const uint8_t *h)311{312return HDR_TEST_PADDING(h) ? (HDR_IS_LAYER_1(h) ? 4 : 1) : 0;313}314315#ifndef MINIMP3_ONLY_MP3316static const L12_subband_alloc_t *L12_subband_alloc_table(const uint8_t *hdr, L12_scale_info *sci)317{318const L12_subband_alloc_t *alloc;319int mode = HDR_GET_STEREO_MODE(hdr);320int nbands, stereo_bands = (mode == MODE_MONO) ? 0 : (mode == MODE_JOINT_STEREO) ? (HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32;321322if (HDR_IS_LAYER_1(hdr))323{324static const L12_subband_alloc_t g_alloc_L1[] = { { 76, 4, 32 } };325alloc = g_alloc_L1;326nbands = 32;327} else if (!HDR_TEST_MPEG1(hdr))328{329static const L12_subband_alloc_t g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } };330alloc = g_alloc_L2M2;331nbands = 30;332} else333{334static const L12_subband_alloc_t g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } };335int sample_rate_idx = HDR_GET_SAMPLE_RATE(hdr);336unsigned kbps = hdr_bitrate_kbps(hdr) >> (int)(mode != MODE_MONO);337if (!kbps) /* free-format */338{339kbps = 192;340}341342alloc = g_alloc_L2M1;343nbands = 27;344if (kbps < 56)345{346static const L12_subband_alloc_t g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } };347alloc = g_alloc_L2M1_lowrate;348nbands = sample_rate_idx == 2 ? 12 : 8;349} else if (kbps >= 96 && sample_rate_idx != 1)350{351nbands = 30;352}353}354355sci->total_bands = (uint8_t)nbands;356sci->stereo_bands = (uint8_t)MINIMP3_MIN(stereo_bands, nbands);357358return alloc;359}360361static void L12_read_scalefactors(bs_t *bs, uint8_t *pba, uint8_t *scfcod, int bands, float *scf)362{363static const float g_deq_L12[18*3] = {364#define DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x365DQ(3),DQ(7),DQ(15),DQ(31),DQ(63),DQ(127),DQ(255),DQ(511),DQ(1023),DQ(2047),DQ(4095),DQ(8191),DQ(16383),DQ(32767),DQ(65535),DQ(3),DQ(5),DQ(9)366};367int i, m;368for (i = 0; i < bands; i++)369{370float s = 0;371int ba = *pba++;372int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0;373for (m = 4; m; m >>= 1)374{375if (mask & m)376{377int b = get_bits(bs, 6);378s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3);379}380*scf++ = s;381}382}383}384385static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sci)386{387static const uint8_t g_bitalloc_code_tab[] = {3880,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16,3890,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16,3900,17,18, 3,19,4,5,16,3910,17,18,16,3920,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15,3930,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14,3940, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16395};396const L12_subband_alloc_t *subband_alloc = L12_subband_alloc_table(hdr, sci);397398int i, k = 0, ba_bits = 0;399const uint8_t *ba_code_tab = g_bitalloc_code_tab;400401for (i = 0; i < sci->total_bands; i++)402{403uint8_t ba;404if (i == k)405{406k += subband_alloc->band_count;407ba_bits = subband_alloc->code_tab_width;408ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset;409subband_alloc++;410}411ba = ba_code_tab[get_bits(bs, ba_bits)];412sci->bitalloc[2*i] = ba;413if (i < sci->stereo_bands)414{415ba = ba_code_tab[get_bits(bs, ba_bits)];416}417sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0;418}419420for (i = 0; i < 2*sci->total_bands; i++)421{422sci->scfcod[i] = sci->bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6;423}424425L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf);426427for (i = sci->stereo_bands; i < sci->total_bands; i++)428{429sci->bitalloc[2*i + 1] = 0;430}431}432433static int L12_dequantize_granule(float *grbuf, bs_t *bs, L12_scale_info *sci, int group_size)434{435int i, j, k, choff = 576;436for (j = 0; j < 4; j++)437{438float *dst = grbuf + group_size*j;439for (i = 0; i < 2*sci->total_bands; i++)440{441int ba = sci->bitalloc[i];442if (ba != 0)443{444if (ba < 17)445{446int half = (1 << (ba - 1)) - 1;447for (k = 0; k < group_size; k++)448{449dst[k] = (float)((int)get_bits(bs, ba) - half);450}451} else452{453unsigned mod = (2 << (ba - 17)) + 1; /* 3, 5, 9 */454unsigned code = get_bits(bs, mod + 2 - (mod >> 3)); /* 5, 7, 10 */455for (k = 0; k < group_size; k++, code /= mod)456{457dst[k] = (float)((int)(code % mod - mod/2));458}459}460}461dst += choff;462choff = 18 - choff;463}464}465return group_size*4;466}467468static void L12_apply_scf_384(L12_scale_info *sci, const float *scf, float *dst)469{470int i, k;471memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));472for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)473{474for (k = 0; k < 12; k++)475{476dst[k + 0] *= scf[0];477dst[k + 576] *= scf[3];478}479}480}481#endif /* MINIMP3_ONLY_MP3 */482483static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr)484{485static const uint8_t g_scf_long[8][23] = {486{ 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },487{ 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 },488{ 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },489{ 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 },490{ 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },491{ 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 },492{ 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 },493{ 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 }494};495static const uint8_t g_scf_short[8][40] = {496{ 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },497{ 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },498{ 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },499{ 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },500{ 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },501{ 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },502{ 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },503{ 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }504};505static const uint8_t g_scf_mixed[8][40] = {506{ 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },507{ 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },508{ 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },509{ 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },510{ 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },511{ 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },512{ 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },513{ 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }514};515516unsigned tables, scfsi = 0;517int main_data_begin, part_23_sum = 0;518int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);519int gr_count = HDR_IS_MONO(hdr) ? 1 : 2;520521if (HDR_TEST_MPEG1(hdr))522{523gr_count *= 2;524main_data_begin = get_bits(bs, 9);525scfsi = get_bits(bs, 7 + gr_count);526} else527{528main_data_begin = get_bits(bs, 8 + gr_count) >> gr_count;529}530531do532{533if (HDR_IS_MONO(hdr))534{535scfsi <<= 4;536}537gr->part_23_length = (uint16_t)get_bits(bs, 12);538part_23_sum += gr->part_23_length;539gr->big_values = (uint16_t)get_bits(bs, 9);540if (gr->big_values > 288)541{542return -1;543}544gr->global_gain = (uint8_t)get_bits(bs, 8);545gr->scalefac_compress = (uint16_t)get_bits(bs, HDR_TEST_MPEG1(hdr) ? 4 : 9);546gr->sfbtab = g_scf_long[sr_idx];547gr->n_long_sfb = 22;548gr->n_short_sfb = 0;549if (get_bits(bs, 1))550{551gr->block_type = (uint8_t)get_bits(bs, 2);552if (!gr->block_type)553{554return -1;555}556gr->mixed_block_flag = (uint8_t)get_bits(bs, 1);557gr->region_count[0] = 7;558gr->region_count[1] = 255;559if (gr->block_type == SHORT_BLOCK_TYPE)560{561scfsi &= 0x0F0F;562if (!gr->mixed_block_flag)563{564gr->region_count[0] = 8;565gr->sfbtab = g_scf_short[sr_idx];566gr->n_long_sfb = 0;567gr->n_short_sfb = 39;568} else569{570gr->sfbtab = g_scf_mixed[sr_idx];571gr->n_long_sfb = HDR_TEST_MPEG1(hdr) ? 8 : 6;572gr->n_short_sfb = 30;573}574}575tables = get_bits(bs, 10);576tables <<= 5;577gr->subblock_gain[0] = (uint8_t)get_bits(bs, 3);578gr->subblock_gain[1] = (uint8_t)get_bits(bs, 3);579gr->subblock_gain[2] = (uint8_t)get_bits(bs, 3);580} else581{582gr->block_type = 0;583gr->mixed_block_flag = 0;584tables = get_bits(bs, 15);585gr->region_count[0] = (uint8_t)get_bits(bs, 4);586gr->region_count[1] = (uint8_t)get_bits(bs, 3);587gr->region_count[2] = 255;588}589gr->table_select[0] = (uint8_t)(tables >> 10);590gr->table_select[1] = (uint8_t)((tables >> 5) & 31);591gr->table_select[2] = (uint8_t)((tables) & 31);592gr->preflag = HDR_TEST_MPEG1(hdr) ? get_bits(bs, 1) : (gr->scalefac_compress >= 500);593gr->scalefac_scale = (uint8_t)get_bits(bs, 1);594gr->count1_table = (uint8_t)get_bits(bs, 1);595gr->scfsi = (uint8_t)((scfsi >> 12) & 15);596scfsi <<= 4;597gr++;598} while(--gr_count);599600if (part_23_sum + bs->pos > bs->limit + main_data_begin*8)601{602return -1;603}604605return main_data_begin;606}607608static void L3_read_scalefactors(uint8_t *scf, uint8_t *ist_pos, const uint8_t *scf_size, const uint8_t *scf_count, bs_t *bitbuf, int scfsi)609{610int i, k;611for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2)612{613int cnt = scf_count[i];614if (scfsi & 8)615{616memcpy(scf, ist_pos, cnt);617} else618{619int bits = scf_size[i];620if (!bits)621{622memset(scf, 0, cnt);623memset(ist_pos, 0, cnt);624} else625{626int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;627for (k = 0; k < cnt; k++)628{629int s = get_bits(bitbuf, bits);630ist_pos[k] = (s == max_scf ? -1 : s);631scf[k] = s;632}633}634}635ist_pos += cnt;636scf += cnt;637}638scf[0] = scf[1] = scf[2] = 0;639}640641static float L3_ldexp_q2(float y, int exp_q2)642{643static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f };644int e;645do646{647e = MINIMP3_MIN(30*4, exp_q2);648y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2));649} while ((exp_q2 -= e) > 0);650return y;651}652653static void L3_decode_scalefactors(const uint8_t *hdr, uint8_t *ist_pos, bs_t *bs, const L3_gr_info_t *gr, float *scf, int ch)654{655static const uint8_t g_scf_partitions[3][28] = {656{ 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 },657{ 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 },658{ 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 }659};660const uint8_t *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb];661uint8_t scf_size[4], iscf[40];662int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi;663float gain;664665if (HDR_TEST_MPEG1(hdr))666{667static const uint8_t g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 };668int part = g_scfc_decode[gr->scalefac_compress];669scf_size[1] = scf_size[0] = (uint8_t)(part >> 2);670scf_size[3] = scf_size[2] = (uint8_t)(part & 3);671} else672{673static const uint8_t g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 };674int k, modprod, sfc, ist = HDR_TEST_I_STEREO(hdr) && ch;675sfc = gr->scalefac_compress >> ist;676for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4)677{678for (modprod = 1, i = 3; i >= 0; i--)679{680scf_size[i] = (uint8_t)(sfc / modprod % g_mod[k + i]);681modprod *= g_mod[k + i];682}683}684scf_partition += k;685scfsi = -16;686}687L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi);688689if (gr->n_short_sfb)690{691int sh = 3 - scf_shift;692for (i = 0; i < gr->n_short_sfb; i += 3)693{694iscf[gr->n_long_sfb + i + 0] += gr->subblock_gain[0] << sh;695iscf[gr->n_long_sfb + i + 1] += gr->subblock_gain[1] << sh;696iscf[gr->n_long_sfb + i + 2] += gr->subblock_gain[2] << sh;697}698} else if (gr->preflag)699{700static const uint8_t g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 };701for (i = 0; i < 10; i++)702{703iscf[11 + i] += g_preamp[i];704}705}706707gain_exp = gr->global_gain + BITS_DEQUANTIZER_OUT*4 - 210 - (HDR_IS_MS_STEREO(hdr) ? 2 : 0);708gain = L3_ldexp_q2(1 << (MAX_SCFI/4), MAX_SCFI - gain_exp);709for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++)710{711scf[i] = L3_ldexp_q2(gain, iscf[i] << scf_shift);712}713}714715static const float g_pow43[129 + 16] = {7160,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f,7170,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f718};719720static float L3_pow_43(int x)721{722float frac;723int sign, mult = 256;724725if (x < 129)726{727return g_pow43[16 + x];728}729730if (x < 1024)731{732mult = 16;733x <<= 3;734}735736sign = 2*x & 64;737frac = (float)((x & 63) - sign) / ((x & ~63) + sign);738return g_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult;739}740741static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const float *scf, int layer3gr_limit)742{743static const int16_t tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,744785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,745-255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288,746-255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288,747-253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258,748-254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259,749-252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258,750-252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258,751-253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259,752-251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258,753-251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290,754-252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259,755-250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258,756-250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259,757-251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258,758-253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 };759static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205 };760static const uint8_t tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 };761static const int16_t tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 };762static const uint8_t g_linbits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 };763764#define PEEK_BITS(n) (bs_cache >> (32 - n))765#define FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); }766#define CHECK_BITS while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; }767#define BSPOS ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh)768769float one = 0.0f;770int ireg = 0, big_val_cnt = gr_info->big_values;771const uint8_t *sfb = gr_info->sfbtab;772const uint8_t *bs_next_ptr = bs->buf + bs->pos/8;773uint32_t bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7);774int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8;775bs_next_ptr += 4;776777while (big_val_cnt > 0)778{779int tab_num = gr_info->table_select[ireg];780int sfb_cnt = gr_info->region_count[ireg++];781const int16_t *codebook = tabs + tabindex[tab_num];782int linbits = g_linbits[tab_num];783if (linbits)784{785do786{787np = *sfb++ / 2;788pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);789one = *scf++;790do791{792int j, w = 5;793int leaf = codebook[PEEK_BITS(w)];794while (leaf < 0)795{796FLUSH_BITS(w);797w = leaf & 7;798leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];799}800FLUSH_BITS(leaf >> 8);801802for (j = 0; j < 2; j++, dst++, leaf >>= 4)803{804int lsb = leaf & 0x0F;805if (lsb == 15)806{807lsb += PEEK_BITS(linbits);808FLUSH_BITS(linbits);809CHECK_BITS;810*dst = one*L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1);811} else812{813*dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;814}815FLUSH_BITS(lsb ? 1 : 0);816}817CHECK_BITS;818} while (--pairs_to_decode);819} while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);820} else821{822do823{824np = *sfb++ / 2;825pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);826one = *scf++;827do828{829int j, w = 5;830int leaf = codebook[PEEK_BITS(w)];831while (leaf < 0)832{833FLUSH_BITS(w);834w = leaf & 7;835leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];836}837FLUSH_BITS(leaf >> 8);838839for (j = 0; j < 2; j++, dst++, leaf >>= 4)840{841int lsb = leaf & 0x0F;842*dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;843FLUSH_BITS(lsb ? 1 : 0);844}845CHECK_BITS;846} while (--pairs_to_decode);847} while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);848}849}850851for (np = 1 - big_val_cnt;; dst += 4)852{853const uint8_t *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32;854int leaf = codebook_count1[PEEK_BITS(4)];855if (!(leaf & 8))856{857leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))];858}859FLUSH_BITS(leaf & 7);860if (BSPOS > layer3gr_limit)861{862break;863}864#define RELOAD_SCALEFACTOR if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; }865#define DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1) }866RELOAD_SCALEFACTOR;867DEQ_COUNT1(0);868DEQ_COUNT1(1);869RELOAD_SCALEFACTOR;870DEQ_COUNT1(2);871DEQ_COUNT1(3);872CHECK_BITS;873}874875bs->pos = layer3gr_limit;876}877878static void L3_midside_stereo(float *left, int n)879{880int i = 0;881float *right = left + 576;882#if HAVE_SIMD883if (have_simd())884{885for (; i < n - 3; i += 4)886{887f4 vl = VLD(left + i);888f4 vr = VLD(right + i);889VSTORE(left + i, VADD(vl, vr));890VSTORE(right + i, VSUB(vl, vr));891}892#ifdef __GNUC__893/* Workaround for spurious -Waggressive-loop-optimizations warning from gcc.894* For more info see: https://github.com/lieff/minimp3/issues/88895*/896if (__builtin_constant_p(n % 4 == 0) && n % 4 == 0)897return;898#endif899}900#endif /* HAVE_SIMD */901for (; i < n; i++)902{903float a = left[i];904float b = right[i];905left[i] = a + b;906right[i] = a - b;907}908}909910static void L3_intensity_stereo_band(float *left, int n, float kl, float kr)911{912int i;913for (i = 0; i < n; i++)914{915left[i + 576] = left[i]*kr;916left[i] = left[i]*kl;917}918}919920static void L3_stereo_top_band(const float *right, const uint8_t *sfb, int nbands, int max_band[3])921{922int i, k;923924max_band[0] = max_band[1] = max_band[2] = -1;925926for (i = 0; i < nbands; i++)927{928for (k = 0; k < sfb[i]; k += 2)929{930if (right[k] != 0 || right[k + 1] != 0)931{932max_band[i % 3] = i;933break;934}935}936right += sfb[i];937}938}939940static void L3_stereo_process(float *left, const uint8_t *ist_pos, const uint8_t *sfb, const uint8_t *hdr, int max_band[3], int mpeg2_sh)941{942static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 };943unsigned i, max_pos = HDR_TEST_MPEG1(hdr) ? 7 : 64;944945for (i = 0; sfb[i]; i++)946{947unsigned ipos = ist_pos[i];948if ((int)i > max_band[i % 3] && ipos < max_pos)949{950float kl, kr, s = HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1;951if (HDR_TEST_MPEG1(hdr))952{953kl = g_pan[2*ipos];954kr = g_pan[2*ipos + 1];955} else956{957kl = 1;958kr = L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh);959if (ipos & 1)960{961kl = kr;962kr = 1;963}964}965L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s);966} else if (HDR_TEST_MS_STEREO(hdr))967{968L3_midside_stereo(left, sfb[i]);969}970left += sfb[i];971}972}973974static void L3_intensity_stereo(float *left, uint8_t *ist_pos, const L3_gr_info_t *gr, const uint8_t *hdr)975{976int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb;977int i, max_blocks = gr->n_short_sfb ? 3 : 1;978979L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band);980if (gr->n_long_sfb)981{982max_band[0] = max_band[1] = max_band[2] = MINIMP3_MAX(MINIMP3_MAX(max_band[0], max_band[1]), max_band[2]);983}984for (i = 0; i < max_blocks; i++)985{986int default_pos = HDR_TEST_MPEG1(hdr) ? 3 : 0;987int itop = n_sfb - max_blocks + i;988int prev = itop - max_blocks;989ist_pos[itop] = max_band[i] >= prev ? default_pos : ist_pos[prev];990}991L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1);992}993994static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb)995{996int i, len;997float *src = grbuf, *dst = scratch;998999for (;0 != (len = *sfb); sfb += 3, src += 2*len)1000{1001for (i = 0; i < len; i++, src++)1002{1003*dst++ = src[0*len];1004*dst++ = src[1*len];1005*dst++ = src[2*len];1006}1007}1008memcpy(grbuf, scratch, (dst - scratch)*sizeof(float));1009}10101011static void L3_antialias(float *grbuf, int nbands)1012{1013static const float g_aa[2][8] = {1014{0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f},1015{0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f}1016};10171018for (; nbands > 0; nbands--, grbuf += 18)1019{1020int i = 0;1021#if HAVE_SIMD1022if (have_simd()) for (; i < 8; i += 4)1023{1024f4 vu = VLD(grbuf + 18 + i);1025f4 vd = VLD(grbuf + 14 - i);1026f4 vc0 = VLD(g_aa[0] + i);1027f4 vc1 = VLD(g_aa[1] + i);1028vd = VREV(vd);1029VSTORE(grbuf + 18 + i, VSUB(VMUL(vu, vc0), VMUL(vd, vc1)));1030vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0));1031VSTORE(grbuf + 14 - i, VREV(vd));1032}1033#endif /* HAVE_SIMD */1034#ifndef MINIMP3_ONLY_SIMD1035for(; i < 8; i++)1036{1037float u = grbuf[18 + i];1038float d = grbuf[17 - i];1039grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];1040grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];1041}1042#endif /* MINIMP3_ONLY_SIMD */1043}1044}10451046static void L3_dct3_9(float *y)1047{1048float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4;10491050s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8];1051t0 = s0 + s6*0.5f;1052s0 -= s6;1053t4 = (s4 + s2)*0.93969262f;1054t2 = (s8 + s2)*0.76604444f;1055s6 = (s4 - s8)*0.17364818f;1056s4 += s8 - s2;10571058s2 = s0 - s4*0.5f;1059y[4] = s4 + s0;1060s8 = t0 - t2 + s6;1061s0 = t0 - t4 + t2;1062s4 = t0 + t4 - s6;10631064s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7];10651066s3 *= 0.86602540f;1067t0 = (s5 + s1)*0.98480775f;1068t4 = (s5 - s7)*0.34202014f;1069t2 = (s1 + s7)*0.64278761f;1070s1 = (s1 - s5 - s7)*0.86602540f;10711072s5 = t0 - s3 - t2;1073s7 = t4 - s3 - t0;1074s3 = t4 + s3 - t2;10751076y[0] = s4 - s7;1077y[1] = s2 + s1;1078y[2] = s0 - s3;1079y[3] = s8 + s5;1080y[5] = s8 - s5;1081y[6] = s0 + s3;1082y[7] = s2 - s1;1083y[8] = s4 + s7;1084}10851086static void L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands)1087{1088int i, j;1089static const float g_twid9[18] = {10900.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f1091};10921093for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9)1094{1095float co[9], si[9];1096co[0] = -grbuf[0];1097si[0] = grbuf[17];1098for (i = 0; i < 4; i++)1099{1100si[8 - 2*i] = grbuf[4*i + 1] - grbuf[4*i + 2];1101co[1 + 2*i] = grbuf[4*i + 1] + grbuf[4*i + 2];1102si[7 - 2*i] = grbuf[4*i + 4] - grbuf[4*i + 3];1103co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]);1104}1105L3_dct3_9(co);1106L3_dct3_9(si);11071108si[1] = -si[1];1109si[3] = -si[3];1110si[5] = -si[5];1111si[7] = -si[7];11121113i = 0;11141115#if HAVE_SIMD1116if (have_simd()) for (; i < 8; i += 4)1117{1118f4 vovl = VLD(overlap + i);1119f4 vc = VLD(co + i);1120f4 vs = VLD(si + i);1121f4 vr0 = VLD(g_twid9 + i);1122f4 vr1 = VLD(g_twid9 + 9 + i);1123f4 vw0 = VLD(window + i);1124f4 vw1 = VLD(window + 9 + i);1125f4 vsum = VADD(VMUL(vc, vr1), VMUL(vs, vr0));1126VSTORE(overlap + i, VSUB(VMUL(vc, vr0), VMUL(vs, vr1)));1127VSTORE(grbuf + i, VSUB(VMUL(vovl, vw0), VMUL(vsum, vw1)));1128vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0));1129VSTORE(grbuf + 14 - i, VREV(vsum));1130}1131#endif /* HAVE_SIMD */1132for (; i < 9; i++)1133{1134float ovl = overlap[i];1135float sum = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i];1136overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i];1137grbuf[i] = ovl*window[0 + i] - sum*window[9 + i];1138grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i];1139}1140}1141}11421143static void L3_idct3(float x0, float x1, float x2, float *dst)1144{1145float m1 = x1*0.86602540f;1146float a1 = x0 - x2*0.5f;1147dst[1] = x0 + x2;1148dst[0] = a1 + m1;1149dst[2] = a1 - m1;1150}11511152static void L3_imdct12(float *x, float *dst, float *overlap)1153{1154static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f };1155float co[3], si[3];1156int i;11571158L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co);1159L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si);1160si[1] = -si[1];11611162for (i = 0; i < 3; i++)1163{1164float ovl = overlap[i];1165float sum = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i];1166overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i];1167dst[i] = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i];1168dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i];1169}1170}11711172static void L3_imdct_short(float *grbuf, float *overlap, int nbands)1173{1174for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)1175{1176float tmp[18];1177memcpy(tmp, grbuf, sizeof(tmp));1178memcpy(grbuf, overlap, 6*sizeof(float));1179L3_imdct12(tmp, grbuf + 6, overlap + 6);1180L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);1181L3_imdct12(tmp + 2, overlap, overlap + 6);1182}1183}11841185static void L3_change_sign(float *grbuf)1186{1187int b, i;1188for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36)1189for (i = 1; i < 18; i += 2)1190grbuf[i] = -grbuf[i];1191}11921193static void L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands)1194{1195static const float g_mdct_window[2][18] = {1196{ 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f },1197{ 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f }1198};1199if (n_long_bands)1200{1201L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands);1202grbuf += 18*n_long_bands;1203overlap += 9*n_long_bands;1204}1205if (block_type == SHORT_BLOCK_TYPE)1206L3_imdct_short(grbuf, overlap, 32 - n_long_bands);1207else1208L3_imdct36(grbuf, overlap, g_mdct_window[block_type == STOP_BLOCK_TYPE], 32 - n_long_bands);1209}12101211static void L3_save_reservoir(mp3dec_t *h, mp3dec_scratch_t *s)1212{1213int pos = (s->bs.pos + 7)/8u;1214int remains = s->bs.limit/8u - pos;1215if (remains > MAX_BITRESERVOIR_BYTES)1216{1217pos += remains - MAX_BITRESERVOIR_BYTES;1218remains = MAX_BITRESERVOIR_BYTES;1219}1220if (remains > 0)1221{1222memmove(h->reserv_buf, s->maindata + pos, remains);1223}1224h->reserv = remains;1225}12261227static int L3_restore_reservoir(mp3dec_t *h, bs_t *bs, mp3dec_scratch_t *s, int main_data_begin)1228{1229int frame_bytes = (bs->limit - bs->pos)/8;1230int bytes_have = MINIMP3_MIN(h->reserv, main_data_begin);1231memcpy(s->maindata, h->reserv_buf + MINIMP3_MAX(0, h->reserv - main_data_begin), MINIMP3_MIN(h->reserv, main_data_begin));1232memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);1233bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);1234return h->reserv >= main_data_begin;1235}12361237static void L3_decode(mp3dec_t *h, mp3dec_scratch_t *s, L3_gr_info_t *gr_info, int nch)1238{1239int ch;12401241for (ch = 0; ch < nch; ch++)1242{1243int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length;1244L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch);1245L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit);1246}12471248if (HDR_TEST_I_STEREO(h->header))1249{1250L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header);1251} else if (HDR_IS_MS_STEREO(h->header))1252{1253L3_midside_stereo(s->grbuf[0], 576);1254}12551256for (ch = 0; ch < nch; ch++, gr_info++)1257{1258int aa_bands = 31;1259int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(HDR_GET_MY_SAMPLE_RATE(h->header) == 2);12601261if (gr_info->n_short_sfb)1262{1263aa_bands = n_long_bands - 1;1264L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb);1265}12661267L3_antialias(s->grbuf[ch], aa_bands);1268L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands);1269L3_change_sign(s->grbuf[ch]);1270}1271}12721273static void mp3d_DCT_II(float *grbuf, int n)1274{1275static const float g_sec[24] = {127610.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f1277};1278int i, k = 0;1279#if HAVE_SIMD1280if (have_simd()) for (; k < n; k += 4)1281{1282f4 t[4][8], *x;1283float *y = grbuf + k;12841285for (x = t[0], i = 0; i < 8; i++, x++)1286{1287f4 x0 = VLD(&y[i*18]);1288f4 x1 = VLD(&y[(15 - i)*18]);1289f4 x2 = VLD(&y[(16 + i)*18]);1290f4 x3 = VLD(&y[(31 - i)*18]);1291f4 t0 = VADD(x0, x3);1292f4 t1 = VADD(x1, x2);1293f4 t2 = VMUL_S(VSUB(x1, x2), g_sec[3*i + 0]);1294f4 t3 = VMUL_S(VSUB(x0, x3), g_sec[3*i + 1]);1295x[0] = VADD(t0, t1);1296x[8] = VMUL_S(VSUB(t0, t1), g_sec[3*i + 2]);1297x[16] = VADD(t3, t2);1298x[24] = VMUL_S(VSUB(t3, t2), g_sec[3*i + 2]);1299}1300for (x = t[0], i = 0; i < 4; i++, x += 8)1301{1302f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;1303xt = VSUB(x0, x7); x0 = VADD(x0, x7);1304x7 = VSUB(x1, x6); x1 = VADD(x1, x6);1305x6 = VSUB(x2, x5); x2 = VADD(x2, x5);1306x5 = VSUB(x3, x4); x3 = VADD(x3, x4);1307x4 = VSUB(x0, x3); x0 = VADD(x0, x3);1308x3 = VSUB(x1, x2); x1 = VADD(x1, x2);1309x[0] = VADD(x0, x1);1310x[4] = VMUL_S(VSUB(x0, x1), 0.70710677f);1311x5 = VADD(x5, x6);1312x6 = VMUL_S(VADD(x6, x7), 0.70710677f);1313x7 = VADD(x7, xt);1314x3 = VMUL_S(VADD(x3, x4), 0.70710677f);1315x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */1316x7 = VADD(x7, VMUL_S(x5, 0.382683432f));1317x5 = VSUB(x5, VMUL_S(x7, 0.198912367f));1318x0 = VSUB(xt, x6); xt = VADD(xt, x6);1319x[1] = VMUL_S(VADD(xt, x7), 0.50979561f);1320x[2] = VMUL_S(VADD(x4, x3), 0.54119611f);1321x[3] = VMUL_S(VSUB(x0, x5), 0.60134488f);1322x[5] = VMUL_S(VADD(x0, x5), 0.89997619f);1323x[6] = VMUL_S(VSUB(x4, x3), 1.30656302f);1324x[7] = VMUL_S(VSUB(xt, x7), 2.56291556f);1325}13261327if (k > n - 3)1328{1329#if HAVE_SSE1330#define VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v)1331#else /* HAVE_SSE */1332#define VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18], vget_low_f32(v))1333#endif /* HAVE_SSE */1334for (i = 0; i < 7; i++, y += 4*18)1335{1336f4 s = VADD(t[3][i], t[3][i + 1]);1337VSAVE2(0, t[0][i]);1338VSAVE2(1, VADD(t[2][i], s));1339VSAVE2(2, VADD(t[1][i], t[1][i + 1]));1340VSAVE2(3, VADD(t[2][1 + i], s));1341}1342VSAVE2(0, t[0][7]);1343VSAVE2(1, VADD(t[2][7], t[3][7]));1344VSAVE2(2, t[1][7]);1345VSAVE2(3, t[3][7]);1346} else1347{1348#define VSAVE4(i, v) VSTORE(&y[i*18], v)1349for (i = 0; i < 7; i++, y += 4*18)1350{1351f4 s = VADD(t[3][i], t[3][i + 1]);1352VSAVE4(0, t[0][i]);1353VSAVE4(1, VADD(t[2][i], s));1354VSAVE4(2, VADD(t[1][i], t[1][i + 1]));1355VSAVE4(3, VADD(t[2][1 + i], s));1356}1357VSAVE4(0, t[0][7]);1358VSAVE4(1, VADD(t[2][7], t[3][7]));1359VSAVE4(2, t[1][7]);1360VSAVE4(3, t[3][7]);1361}1362} else1363#endif /* HAVE_SIMD */1364#ifdef MINIMP3_ONLY_SIMD1365{} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */1366#else /* MINIMP3_ONLY_SIMD */1367for (; k < n; k++)1368{1369float t[4][8], *x, *y = grbuf + k;13701371for (x = t[0], i = 0; i < 8; i++, x++)1372{1373float x0 = y[i*18];1374float x1 = y[(15 - i)*18];1375float x2 = y[(16 + i)*18];1376float x3 = y[(31 - i)*18];1377float t0 = x0 + x3;1378float t1 = x1 + x2;1379float t2 = (x1 - x2)*g_sec[3*i + 0];1380float t3 = (x0 - x3)*g_sec[3*i + 1];1381x[0] = t0 + t1;1382x[8] = (t0 - t1)*g_sec[3*i + 2];1383x[16] = t3 + t2;1384x[24] = (t3 - t2)*g_sec[3*i + 2];1385}1386for (x = t[0], i = 0; i < 4; i++, x += 8)1387{1388float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;1389xt = x0 - x7; x0 += x7;1390x7 = x1 - x6; x1 += x6;1391x6 = x2 - x5; x2 += x5;1392x5 = x3 - x4; x3 += x4;1393x4 = x0 - x3; x0 += x3;1394x3 = x1 - x2; x1 += x2;1395x[0] = x0 + x1;1396x[4] = (x0 - x1)*0.70710677f;1397x5 = x5 + x6;1398x6 = (x6 + x7)*0.70710677f;1399x7 = x7 + xt;1400x3 = (x3 + x4)*0.70710677f;1401x5 -= x7*0.198912367f; /* rotate by PI/8 */1402x7 += x5*0.382683432f;1403x5 -= x7*0.198912367f;1404x0 = xt - x6; xt += x6;1405x[1] = (xt + x7)*0.50979561f;1406x[2] = (x4 + x3)*0.54119611f;1407x[3] = (x0 - x5)*0.60134488f;1408x[5] = (x0 + x5)*0.89997619f;1409x[6] = (x4 - x3)*1.30656302f;1410x[7] = (xt - x7)*2.56291556f;14111412}1413for (i = 0; i < 7; i++, y += 4*18)1414{1415y[0*18] = t[0][i];1416y[1*18] = t[2][i] + t[3][i] + t[3][i + 1];1417y[2*18] = t[1][i] + t[1][i + 1];1418y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1];1419}1420y[0*18] = t[0][7];1421y[1*18] = t[2][7] + t[3][7];1422y[2*18] = t[1][7];1423y[3*18] = t[3][7];1424}1425#endif /* MINIMP3_ONLY_SIMD */1426}14271428#ifndef MINIMP3_FLOAT_OUTPUT1429static int16_t mp3d_scale_pcm(float sample)1430{1431#if HAVE_ARMV61432int32_t s32 = (int32_t)(sample + .5f);1433s32 -= (s32 < 0);1434int16_t s = (int16_t)minimp3_clip_int16_arm(s32);1435#else1436if (sample >= 32766.5) return (int16_t) 32767;1437if (sample <= -32767.5) return (int16_t)-32768;1438int16_t s = (int16_t)(sample + .5f);1439s -= (s < 0); /* away from zero, to be compliant */1440#endif1441return s;1442}1443#else /* MINIMP3_FLOAT_OUTPUT */1444static float mp3d_scale_pcm(float sample)1445{1446return sample*(1.f/32768.f);1447}1448#endif /* MINIMP3_FLOAT_OUTPUT */14491450static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z)1451{1452float a;1453a = (z[14*64] - z[ 0]) * 29;1454a += (z[ 1*64] + z[13*64]) * 213;1455a += (z[12*64] - z[ 2*64]) * 459;1456a += (z[ 3*64] + z[11*64]) * 2037;1457a += (z[10*64] - z[ 4*64]) * 5153;1458a += (z[ 5*64] + z[ 9*64]) * 6574;1459a += (z[ 8*64] - z[ 6*64]) * 37489;1460a += z[ 7*64] * 75038;1461pcm[0] = mp3d_scale_pcm(a);14621463z += 2;1464a = z[14*64] * 104;1465a += z[12*64] * 1567;1466a += z[10*64] * 9727;1467a += z[ 8*64] * 64019;1468a += z[ 6*64] * -9975;1469a += z[ 4*64] * -45;1470a += z[ 2*64] * 146;1471a += z[ 0*64] * -5;1472pcm[16*nch] = mp3d_scale_pcm(a);1473}14741475static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins)1476{1477int i;1478float *xr = xl + 576*(nch - 1);1479mp3d_sample_t *dstr = dstl + (nch - 1);14801481static const float g_win[] = {1482-1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,1483-1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856,1484-1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630,1485-1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313,1486-1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908,1487-1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415,1488-2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835,1489-2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169,1490-2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420,1491-2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590,1492-3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679,1493-3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692,1494-4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629,1495-4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494,1496-5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,652901497};1498float *zlin = lins + 15*64;1499const float *w = g_win;15001501zlin[4*15] = xl[18*16];1502zlin[4*15 + 1] = xr[18*16];1503zlin[4*15 + 2] = xl[0];1504zlin[4*15 + 3] = xr[0];15051506zlin[4*31] = xl[1 + 18*16];1507zlin[4*31 + 1] = xr[1 + 18*16];1508zlin[4*31 + 2] = xl[1];1509zlin[4*31 + 3] = xr[1];15101511mp3d_synth_pair(dstr, nch, lins + 4*15 + 1);1512mp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1);1513mp3d_synth_pair(dstl, nch, lins + 4*15);1514mp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64);15151516#if HAVE_SIMD1517if (have_simd()) for (i = 14; i >= 0; i--)1518{1519#define VLOAD(k) f4 w0 = VSET(*w++); f4 w1 = VSET(*w++); f4 vz = VLD(&zlin[4*i - 64*k]); f4 vy = VLD(&zlin[4*i - 64*(15 - k)]);1520#define V0(k) { VLOAD(k) b = VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a = VSUB(VMUL(vz, w0), VMUL(vy, w1)); }1521#define V1(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1))); }1522#define V2(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0))); }1523f4 a, b;1524zlin[4*i] = xl[18*(31 - i)];1525zlin[4*i + 1] = xr[18*(31 - i)];1526zlin[4*i + 2] = xl[1 + 18*(31 - i)];1527zlin[4*i + 3] = xr[1 + 18*(31 - i)];1528zlin[4*i + 64] = xl[1 + 18*(1 + i)];1529zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)];1530zlin[4*i - 64 + 2] = xl[18*(1 + i)];1531zlin[4*i - 64 + 3] = xr[18*(1 + i)];15321533V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)15341535{1536#ifndef MINIMP3_FLOAT_OUTPUT1537#if HAVE_SSE1538static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };1539static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };1540__m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),1541_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));1542dstr[(15 - i)*nch] = _mm_extract_epi16(pcm8, 1);1543dstr[(17 + i)*nch] = _mm_extract_epi16(pcm8, 5);1544dstl[(15 - i)*nch] = _mm_extract_epi16(pcm8, 0);1545dstl[(17 + i)*nch] = _mm_extract_epi16(pcm8, 4);1546dstr[(47 - i)*nch] = _mm_extract_epi16(pcm8, 3);1547dstr[(49 + i)*nch] = _mm_extract_epi16(pcm8, 7);1548dstl[(47 - i)*nch] = _mm_extract_epi16(pcm8, 2);1549dstl[(49 + i)*nch] = _mm_extract_epi16(pcm8, 6);1550#else /* HAVE_SSE */1551int16x4_t pcma, pcmb;1552a = VADD(a, VSET(0.5f));1553b = VADD(b, VSET(0.5f));1554pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));1555pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));1556vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1);1557vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1);1558vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0);1559vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0);1560vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3);1561vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3);1562vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2);1563vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);1564#endif /* HAVE_SSE */15651566#else /* MINIMP3_FLOAT_OUTPUT */15671568static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };1569a = VMUL(a, g_scale);1570b = VMUL(b, g_scale);1571#if HAVE_SSE1572_mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));1573_mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));1574_mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));1575_mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));1576_mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));1577_mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));1578_mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));1579_mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));1580#else /* HAVE_SSE */1581vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);1582vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);1583vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);1584vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);1585vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);1586vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);1587vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);1588vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);1589#endif /* HAVE_SSE */1590#endif /* MINIMP3_FLOAT_OUTPUT */1591}1592} else1593#endif /* HAVE_SIMD */1594#ifdef MINIMP3_ONLY_SIMD1595{} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */1596#else /* MINIMP3_ONLY_SIMD */1597for (i = 14; i >= 0; i--)1598{1599#define LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64];1600#define S0(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] = vz[j]*w1 + vy[j]*w0, a[j] = vz[j]*w0 - vy[j]*w1; }1601#define S1(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; }1602#define S2(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; }1603float a[4], b[4];16041605zlin[4*i] = xl[18*(31 - i)];1606zlin[4*i + 1] = xr[18*(31 - i)];1607zlin[4*i + 2] = xl[1 + 18*(31 - i)];1608zlin[4*i + 3] = xr[1 + 18*(31 - i)];1609zlin[4*(i + 16)] = xl[1 + 18*(1 + i)];1610zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)];1611zlin[4*(i - 16) + 2] = xl[18*(1 + i)];1612zlin[4*(i - 16) + 3] = xr[18*(1 + i)];16131614S0(0) S2(1) S1(2) S2(3) S1(4) S2(5) S1(6) S2(7)16151616dstr[(15 - i)*nch] = mp3d_scale_pcm(a[1]);1617dstr[(17 + i)*nch] = mp3d_scale_pcm(b[1]);1618dstl[(15 - i)*nch] = mp3d_scale_pcm(a[0]);1619dstl[(17 + i)*nch] = mp3d_scale_pcm(b[0]);1620dstr[(47 - i)*nch] = mp3d_scale_pcm(a[3]);1621dstr[(49 + i)*nch] = mp3d_scale_pcm(b[3]);1622dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]);1623dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]);1624}1625#endif /* MINIMP3_ONLY_SIMD */1626}16271628static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins)1629{1630int i;1631for (i = 0; i < nch; i++)1632{1633mp3d_DCT_II(grbuf + 576*i, nbands);1634}16351636memcpy(lins, qmf_state, sizeof(float)*15*64);16371638for (i = 0; i < nbands; i += 2)1639{1640mp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64);1641}1642#ifndef MINIMP3_NONSTANDARD_BUT_LOGICAL1643if (nch == 1)1644{1645for (i = 0; i < 15*64; i += 2)1646{1647qmf_state[i] = lins[nbands*64 + i];1648}1649} else1650#endif /* MINIMP3_NONSTANDARD_BUT_LOGICAL */1651{1652memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64);1653}1654}16551656static int mp3d_match_frame(const uint8_t *hdr, int mp3_bytes, int frame_bytes)1657{1658int i, nmatch;1659for (i = 0, nmatch = 0; nmatch < MAX_FRAME_SYNC_MATCHES; nmatch++)1660{1661i += hdr_frame_bytes(hdr + i, frame_bytes) + hdr_padding(hdr + i);1662if (i + HDR_SIZE > mp3_bytes)1663return nmatch > 0;1664if (!hdr_compare(hdr, hdr + i))1665return 0;1666}1667return 1;1668}16691670static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes)1671{1672int i, k;1673for (i = 0; i < mp3_bytes - HDR_SIZE; i++, mp3++)1674{1675if (hdr_valid(mp3))1676{1677int frame_bytes = hdr_frame_bytes(mp3, *free_format_bytes);1678int frame_and_padding = frame_bytes + hdr_padding(mp3);16791680for (k = HDR_SIZE; !frame_bytes && k < MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - HDR_SIZE; k++)1681{1682if (hdr_compare(mp3, mp3 + k))1683{1684int fb = k - hdr_padding(mp3);1685int nextfb = fb + hdr_padding(mp3 + k);1686if (i + k + nextfb + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + k + nextfb))1687continue;1688frame_and_padding = k;1689frame_bytes = fb;1690*free_format_bytes = fb;1691}1692}1693if ((frame_bytes && i + frame_and_padding <= mp3_bytes &&1694mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) ||1695(!i && frame_and_padding == mp3_bytes))1696{1697*ptr_frame_bytes = frame_and_padding;1698return i;1699}1700*free_format_bytes = 0;1701}1702}1703*ptr_frame_bytes = 0;1704return mp3_bytes;1705}17061707void mp3dec_init(mp3dec_t *dec)1708{1709dec->header[0] = 0;1710}17111712int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info)1713{1714int i = 0, igr, frame_size = 0, success = 1;1715const uint8_t *hdr;1716bs_t bs_frame[1];1717mp3dec_scratch_t scratch;17181719if (mp3_bytes > 4 && dec->header[0] == 0xff && hdr_compare(dec->header, mp3))1720{1721frame_size = hdr_frame_bytes(mp3, dec->free_format_bytes) + hdr_padding(mp3);1722if (frame_size != mp3_bytes && (frame_size + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + frame_size)))1723{1724frame_size = 0;1725}1726}1727if (!frame_size)1728{1729memset(dec, 0, sizeof(mp3dec_t));1730i = mp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);1731if (!frame_size || i + frame_size > mp3_bytes)1732{1733info->frame_bytes = i;1734return 0;1735}1736}17371738hdr = mp3 + i;1739memcpy(dec->header, hdr, HDR_SIZE);1740info->frame_bytes = i + frame_size;1741info->frame_offset = i;1742info->channels = HDR_IS_MONO(hdr) ? 1 : 2;1743info->hz = hdr_sample_rate_hz(hdr);1744info->layer = 4 - HDR_GET_LAYER(hdr);1745info->bitrate_kbps = hdr_bitrate_kbps(hdr);17461747if (!pcm)1748{1749return hdr_frame_samples(hdr);1750}17511752bs_init(bs_frame, hdr + HDR_SIZE, frame_size - HDR_SIZE);1753if (HDR_IS_CRC(hdr))1754{1755get_bits(bs_frame, 16);1756}17571758if (info->layer == 3)1759{1760int main_data_begin = L3_read_side_info(bs_frame, scratch.gr_info, hdr);1761if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit)1762{1763mp3dec_init(dec);1764return 0;1765}1766success = L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);1767if (success)1768{1769for (igr = 0; igr < (HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm += 576*info->channels)1770{1771memset(scratch.grbuf[0], 0, 576*2*sizeof(float));1772L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);1773mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, pcm, scratch.syn[0]);1774}1775}1776L3_save_reservoir(dec, &scratch);1777} else1778{1779#ifdef MINIMP3_ONLY_MP31780return 0;1781#else /* MINIMP3_ONLY_MP3 */1782L12_scale_info sci[1];1783L12_read_scale_info(hdr, bs_frame, sci);17841785memset(scratch.grbuf[0], 0, 576*2*sizeof(float));1786for (i = 0, igr = 0; igr < 3; igr++)1787{1788if (12 == (i += L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))1789{1790i = 0;1791L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);1792mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, pcm, scratch.syn[0]);1793memset(scratch.grbuf[0], 0, 576*2*sizeof(float));1794pcm += 384*info->channels;1795}1796if (bs_frame->pos > bs_frame->limit)1797{1798mp3dec_init(dec);1799return 0;1800}1801}1802#endif /* MINIMP3_ONLY_MP3 */1803}1804return success*hdr_frame_samples(dec->header);1805}18061807#ifdef MINIMP3_FLOAT_OUTPUT1808void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples)1809{1810int i = 0;1811#if HAVE_SIMD1812int aligned_count = num_samples & ~7;1813for(; i < aligned_count; i += 8)1814{1815static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };1816f4 a = VMUL(VLD(&in[i ]), g_scale);1817f4 b = VMUL(VLD(&in[i+4]), g_scale);1818#if HAVE_SSE1819static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };1820static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };1821__m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),1822_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));1823out[i ] = _mm_extract_epi16(pcm8, 0);1824out[i+1] = _mm_extract_epi16(pcm8, 1);1825out[i+2] = _mm_extract_epi16(pcm8, 2);1826out[i+3] = _mm_extract_epi16(pcm8, 3);1827out[i+4] = _mm_extract_epi16(pcm8, 4);1828out[i+5] = _mm_extract_epi16(pcm8, 5);1829out[i+6] = _mm_extract_epi16(pcm8, 6);1830out[i+7] = _mm_extract_epi16(pcm8, 7);1831#else /* HAVE_SSE */1832int16x4_t pcma, pcmb;1833a = VADD(a, VSET(0.5f));1834b = VADD(b, VSET(0.5f));1835pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));1836pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));1837vst1_lane_s16(out+i , pcma, 0);1838vst1_lane_s16(out+i+1, pcma, 1);1839vst1_lane_s16(out+i+2, pcma, 2);1840vst1_lane_s16(out+i+3, pcma, 3);1841vst1_lane_s16(out+i+4, pcmb, 0);1842vst1_lane_s16(out+i+5, pcmb, 1);1843vst1_lane_s16(out+i+6, pcmb, 2);1844vst1_lane_s16(out+i+7, pcmb, 3);1845#endif /* HAVE_SSE */1846}1847#endif /* HAVE_SIMD */1848for(; i < num_samples; i++)1849{1850float sample = in[i] * 32768.0f;1851if (sample >= 32766.5)1852out[i] = (int16_t) 32767;1853else if (sample <= -32767.5)1854out[i] = (int16_t)-32768;1855else1856{1857int16_t s = (int16_t)(sample + .5f);1858s -= (s < 0); /* away from zero, to be compliant */1859out[i] = s;1860}1861}1862}1863#endif /* MINIMP3_FLOAT_OUTPUT */1864#endif /* MINIMP3_IMPLEMENTATION && !_MINIMP3_IMPLEMENTATION_GUARD */186518661867