Path: blob/master/libs/mpg123/src/libmpg123/optimize.c
4394 views
/*1optimize: get a grip on the different optimizations23copyright 2006-21 by the mpg123 project - free software under the terms of the LGPL 2.14see COPYING and AUTHORS files in distribution or http://mpg123.org5initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]67Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.8*/910#define I_AM_OPTIMIZE11#define WANT_GETCPUFLAGS12#include "mpg123lib_intern.h" /* includes optimize.h */13#include "getcpuflags.h"14#include "../common/debug.h"151617/* Ugly macros to build conditional synth function array values. */1819#ifndef NO_8BIT20#define IF8(synth) synth,21#else22#define IF8(synth)23#endif2425#ifndef NO_SYNTH322627#ifndef NO_REAL28#define IFREAL(synth) synth,29#else30#define IFREAL(synth)31#endif3233#ifndef NO_32BIT34#define IF32(synth) synth35#else36#define IF32(synth)37#endif3839#else4041#define IFREAL(synth)42#define IF32(synth)4344#endif4546#ifndef NO_16BIT47# define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }48#else49# define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }50#endif5152/* The call of left and right plain synth, wrapped.53This may be replaced by a direct stereo optimized synth. */54static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)55{56int clip;57clip = (fr->synth)(bandPtr_l, 0, fr, 0);58clip += (fr->synth)(bandPtr_r, 1, fr, 1);59return clip;60}6162static const struct synth_s synth_base =63{64{ /* plain */65OUT_SYNTHS(INT123_synth_1to1, INT123_synth_1to1_8bit, INT123_synth_1to1_real, INT123_synth_1to1_s32)66# ifndef NO_DOWNSAMPLE67,OUT_SYNTHS(INT123_synth_2to1, INT123_synth_2to1_8bit, INT123_synth_2to1_real, INT123_synth_2to1_s32)68,OUT_SYNTHS(INT123_synth_4to1, INT123_synth_4to1_8bit, INT123_synth_4to1_real, INT123_synth_4to1_s32)69# endif70# ifndef NO_NTOM71,OUT_SYNTHS(INT123_synth_ntom, INT123_synth_ntom_8bit, INT123_synth_ntom_real, INT123_synth_ntom_s32)72# endif73},74{ /* stereo, by default only wrappers over plain synth */75OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)76# ifndef NO_DOWNSAMPLE77,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)78,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)79# endif80# ifndef NO_NTOM81,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)82# endif83},84{ /* mono2stereo */85OUT_SYNTHS(INT123_synth_1to1_m2s, INT123_synth_1to1_8bit_m2s, INT123_synth_1to1_real_m2s, INT123_synth_1to1_s32_m2s)86# ifndef NO_DOWNSAMPLE87,OUT_SYNTHS(INT123_synth_2to1_m2s, INT123_synth_2to1_8bit_m2s, INT123_synth_2to1_real_m2s, INT123_synth_2to1_s32_m2s)88,OUT_SYNTHS(INT123_synth_4to1_m2s, INT123_synth_4to1_8bit_m2s, INT123_synth_4to1_real_m2s, INT123_synth_4to1_s32_m2s)89# endif90# ifndef NO_NTOM91,OUT_SYNTHS(INT123_synth_ntom_m2s, INT123_synth_ntom_8bit_m2s, INT123_synth_ntom_real_m2s, INT123_synth_ntom_s32_m2s)92# endif93},94{ /* mono*/95OUT_SYNTHS(INT123_synth_1to1_mono, INT123_synth_1to1_8bit_mono, INT123_synth_1to1_real_mono, INT123_synth_1to1_s32_mono)96# ifndef NO_DOWNSAMPLE97,OUT_SYNTHS(INT123_synth_2to1_mono, INT123_synth_2to1_8bit_mono, INT123_synth_2to1_real_mono, INT123_synth_2to1_s32_mono)98,OUT_SYNTHS(INT123_synth_4to1_mono, INT123_synth_4to1_8bit_mono, INT123_synth_4to1_real_mono, INT123_synth_4to1_s32_mono)99# endif100# ifndef NO_NTOM101,OUT_SYNTHS(INT123_synth_ntom_mono, INT123_synth_ntom_8bit_mono, INT123_synth_ntom_real_mono, INT123_synth_ntom_s32_mono)102#endif103}104};105106#ifdef OPT_X86107/* More plain synths for i386 */108const func_synth plain_i386[r_limit][f_limit] =109{ /* plain */110OUT_SYNTHS(INT123_synth_1to1_i386, INT123_synth_1to1_8bit_i386, INT123_synth_1to1_real_i386, INT123_synth_1to1_s32_i386)111# ifndef NO_DOWNSAMPLE112,OUT_SYNTHS(INT123_synth_2to1_i386, INT123_synth_2to1_8bit_i386, INT123_synth_2to1_real_i386, INT123_synth_2to1_s32_i386)113,OUT_SYNTHS(INT123_synth_4to1_i386, INT123_synth_4to1_8bit_i386, INT123_synth_4to1_real_i386, INT123_synth_4to1_s32_i386)114# endif115# ifndef NO_NTOM116,OUT_SYNTHS(INT123_synth_ntom, INT123_synth_ntom_8bit, INT123_synth_ntom_real, INT123_synth_ntom_s32)117# endif118};119#endif120121122enum optdec INT123_defdec(void){ return defopt; }123124enum optcla INT123_decclass(const enum optdec type)125{126return127(128type == mmx129|| type == sse130|| type == sse_vintage131|| type == dreidnowext132|| type == dreidnowext_vintage133|| type == x86_64134|| type == neon135|| type == neon64136|| type == avx137) ? mmxsse : normal;138}139140static int find_synth(func_synth synth, const func_synth synths[r_limit][f_limit])141{142enum synth_resample ri;143enum synth_format fi;144for(ri=0; ri<r_limit; ++ri)145for(fi=0; fi<f_limit; ++fi)146if(synth == synths[ri][fi])147return TRUE;148149return FALSE;150}151152153#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)154/* After knowing that it is either vintage or current SSE,155this separates the two. In case of non-OPT_MULTI, only one156of OPT_SSE and OPT_SSE_VINTAGE is active. */157static enum optdec sse_or_vintage(mpg123_handle *fr)158{159enum optdec type;160type = sse_vintage;161# ifdef OPT_SSE162# ifdef OPT_THE_DCT36163if(INT123_dct36_match(fr, sse))164# endif165type = sse;166# endif167return type;168}169#endif170171/* Determine what kind of decoder is actually active172This depends on runtime choices which may cause fallback to i386 or generic code. */173static int find_dectype(mpg123_handle *fr)174{175enum optdec type = nodec;176/* Direct and indirect usage, 1to1 stereo decoding.177Concentrating on the plain stereo synth should be fine, mono stuff is derived. */178func_synth basic_synth = fr->synth;179#ifndef NO_8BIT180#ifndef NO_16BIT181if(basic_synth == INT123_synth_1to1_8bit_wrap)182basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */183#endif184#endif185186if(FALSE) ; /* Just to initialize the else if ladder. */187#ifndef NO_16BIT188#if defined(OPT_3DNOWEXT) || defined(OPT_3DNOWEXT_VINTAGE)189else if(basic_synth == INT123_synth_1to1_3dnowext)190{191type = dreidnowext;192# ifdef OPT_3DNOWEXT_VINTAGE193# ifdef OPT_MULTI194if(INT123_dct36_match(fr, dreidnowext_vintage))195# endif196type = dreidnowext_vintage;197# endif198}199#endif200#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)201else if(basic_synth == INT123_synth_1to1_sse)202{203type = sse_or_vintage(fr);204}205#endif206#if defined(OPT_3DNOW) || defined(OPT_3DNOW_VINTAGE)207else if(basic_synth == INT123_synth_1to1_3dnow)208{209type = dreidnow;210# ifdef OPT_3DNOW_VINTAGE211# ifdef OPT_MULTI212if(INT123_dct36_match(fr, dreidnow_vintage))213# endif214type = dreidnow_vintage;215# endif216}217#endif218#ifdef OPT_MMX219else if(basic_synth == INT123_synth_1to1_mmx) type = mmx;220#endif221#ifdef OPT_I586_DITHER222else if(basic_synth == INT123_synth_1to1_i586_dither) type = ifuenf_dither;223#endif224#ifdef OPT_I586225else if(basic_synth == INT123_synth_1to1_i586) type = ifuenf;226#endif227#ifdef OPT_ALTIVEC228else if(basic_synth == INT123_synth_1to1_altivec) type = altivec;229#endif230#ifdef OPT_X86_64231else if(basic_synth == INT123_synth_1to1_x86_64) type = x86_64;232#endif233#ifdef OPT_AVX234else if(basic_synth == INT123_synth_1to1_avx) type = avx;235#endif236#ifdef OPT_ARM237else if(basic_synth == INT123_synth_1to1_arm) type = arm;238#endif239#ifdef OPT_NEON240else if(basic_synth == INT123_synth_1to1_neon) type = neon;241#endif242#ifdef OPT_NEON64243else if(basic_synth == INT123_synth_1to1_neon64) type = neon64;244#endif245#ifdef OPT_GENERIC_DITHER246else if(basic_synth == INT123_synth_1to1_dither) type = generic_dither;247#endif248#ifdef OPT_DITHER /* either i586 or generic! */249#ifndef NO_DOWNSAMPLE250else if251(252basic_synth == INT123_synth_2to1_dither253|| basic_synth == INT123_synth_4to1_dither254) type = generic_dither;255#endif256#endif257#endif /* 16bit */258259#ifndef NO_SYNTH32260261#ifndef NO_REAL262#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)263else if(basic_synth == INT123_synth_1to1_real_sse)264{265type = sse_or_vintage(fr);266}267#endif268#ifdef OPT_X86_64269else if(basic_synth == INT123_synth_1to1_real_x86_64) type = x86_64;270#endif271#ifdef OPT_AVX272else if(basic_synth == INT123_synth_1to1_real_avx) type = avx;273#endif274#ifdef OPT_ALTIVEC275else if(basic_synth == INT123_synth_1to1_real_altivec) type = altivec;276#endif277#ifdef OPT_NEON278else if(basic_synth == INT123_synth_1to1_real_neon) type = neon;279#endif280#ifdef OPT_NEON64281else if(basic_synth == INT123_synth_1to1_real_neon64) type = neon64;282#endif283284#endif /* real */285286#ifndef NO_32BIT287#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)288else if(basic_synth == INT123_synth_1to1_s32_sse)289{290type = sse_or_vintage(fr);291}292#endif293#ifdef OPT_X86_64294else if(basic_synth == INT123_synth_1to1_s32_x86_64) type = x86_64;295#endif296#ifdef OPT_AVX297else if(basic_synth == INT123_synth_1to1_s32_avx) type = avx;298#endif299#ifdef OPT_ALTIVEC300else if(basic_synth == INT123_synth_1to1_s32_altivec) type = altivec;301#endif302#ifdef OPT_NEON303else if(basic_synth == INT123_synth_1to1_s32_neon) type = neon;304#endif305#ifdef OPT_NEON64306else if(basic_synth == INT123_synth_1to1_s32_neon64) type = neon64;307#endif308#endif /* 32bit */309310#endif /* any 32 bit synth */311312#ifdef OPT_X86313else if(find_synth(basic_synth, plain_i386))314type = idrei;315#endif316317else if(find_synth(basic_synth, synth_base.plain))318type = generic;319320321322#ifdef OPT_I486323/* i486 is special ... the specific code is in use for 16bit 1to1 stereo324otherwise we have i386 active... but still, the distinction doesn't matter*/325type = ivier;326#endif327328if(type != nodec)329{330fr->cpu_opts.type = type;331fr->cpu_opts.class = INT123_decclass(type);332333debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class);334return MPG123_OK;335}336else337{338if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!");339340fr->err = MPG123_BAD_DECODER_SETUP;341return MPG123_ERR;342}343}344345/* set synth functions for current frame, optimizations handled by opt_* macros */346int INT123_set_synth_functions(mpg123_handle *fr)347{348enum synth_resample resample = r_none;349enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */350351/* Select the basic output format, different from 16bit: 8bit, real. */352if(FALSE){}353#ifndef NO_16BIT354else if(fr->af.dec_enc & MPG123_ENC_16)355basic_format = f_16;356#endif357#ifndef NO_8BIT358else if(fr->af.dec_enc & MPG123_ENC_8)359basic_format = f_8;360#endif361#ifndef NO_REAL362else if(fr->af.dec_enc & MPG123_ENC_FLOAT)363basic_format = f_real;364#endif365#ifndef NO_32BIT366/* 24 bit integer means decoding to 32 bit first. */367else if(fr->af.dec_enc & MPG123_ENC_32 || fr->af.dec_enc & MPG123_ENC_24)368basic_format = f_32;369#endif370371/* Make sure the chosen format is compiled into this lib. */372if(basic_format == f_none)373{374if(NOQUIET) error("INT123_set_synth_functions: This output format is disabled in this build!");375376return -1;377}378379/* Be explicit about downsampling variant. */380switch(fr->down_sample)381{382case 0: resample = r_1to1; break;383#ifndef NO_DOWNSAMPLE384case 1: resample = r_2to1; break;385case 2: resample = r_4to1; break;386#endif387#ifndef NO_NTOM388case 3: resample = r_ntom; break;389#endif390}391392if(resample == r_none)393{394if(NOQUIET) error("INT123_set_synth_functions: This resampling mode is not supported in this build!");395396return -1;397}398399debug2("selecting synth: resample=%i format=%i", resample, basic_format);400/* Finally selecting the synth functions for stereo / mono. */401fr->synth = fr->synths.plain[resample][basic_format];402fr->synth_stereo = fr->synths.stereo[resample][basic_format];403fr->synth_mono = fr->af.channels==2404? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */405: fr->synths.mono[resample][basic_format]; /* Mono MPEG file decoded to mono. */406407if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */408{409fr->err = MPG123_BAD_DECODER_SETUP;410return MPG123_ERR;411}412413if(INT123_frame_buffers(fr) != 0)414{415fr->err = MPG123_NO_BUFFERS;416if(NOQUIET) error("Failed to set up decoder buffers!");417418return MPG123_ERR;419}420421#ifndef NO_8BIT422if(basic_format == f_8)423{424if(INT123_make_conv16to8_table(fr) != 0)425{426if(NOQUIET) error("Failed to set up conv16to8 table!");427/* it's a bit more work to get proper error propagation up */428return -1;429}430}431#endif432433#ifdef OPT_MMXORSSE434/* Special treatment for MMX, SSE and 3DNowExt stuff.435The real-decoding SSE for x86-64 uses normal tables! */436if(fr->cpu_opts.class == mmxsse437# ifndef NO_REAL438&& basic_format != f_real439# endif440# ifndef NO_32BIT441&& basic_format != f_32442# endif443# ifdef ACCURATE_ROUNDING444&& fr->cpu_opts.type != sse445&& fr->cpu_opts.type != sse_vintage446&& fr->cpu_opts.type != x86_64447&& fr->cpu_opts.type != neon448&& fr->cpu_opts.type != neon64449&& fr->cpu_opts.type != avx450# endif451)452{453#ifndef NO_LAYER3454INT123_init_layer3_stuff(fr, INT123_init_layer3_gainpow2_mmx);455#endif456#ifndef NO_LAYER12457INT123_init_layer12_stuff(fr, INT123_init_layer12_table_mmx);458#endif459fr->INT123_make_decode_tables = INT123_make_decode_tables_mmx;460}461else462#endif463{464#ifndef NO_LAYER3465INT123_init_layer3_stuff(fr, INT123_init_layer3_gainpow2);466#endif467#ifndef NO_LAYER12468INT123_init_layer12_stuff(fr, INT123_init_layer12_table);469#endif470fr->INT123_make_decode_tables = INT123_make_decode_tables;471}472473/* We allocated the table buffers just now, so (re)create the tables. */474fr->INT123_make_decode_tables(fr);475476return 0;477}478479int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)480{481const char* chosen = ""; /* the chosen decoder opt as string */482enum optdec want_dec = nodec;483int done = 0;484int auto_choose = 0;485#ifdef OPT_DITHER486int dithered = FALSE; /* If some dithered decoder is chosen. */487#endif488489want_dec = INT123_dectype(cpu);490auto_choose = want_dec == autodec;491/* Fill whole array of synth functions with generic code first. */492fr->synths = synth_base;493494#ifndef OPT_MULTI495{496if(!auto_choose && want_dec != defopt)497{498if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt);499}500auto_choose = TRUE; /* There will be only one choice anyway. */501}502#endif503504fr->cpu_opts.type = nodec;505/* covers any i386+ cpu; they actually differ only in the INT123_synth_1to1 function, mostly... */506#ifdef OPT_X86507if(cpu_i586(fr->cpu_flags))508{509# ifdef OPT_MULTI510debug2("standard flags: 0x%08x\textended flags: 0x%08x", fr->cpu_flags.std, fr->cpu_flags.ext);511# endif512# ifdef OPT_SSE513if( !done && (auto_choose || want_dec == sse)514&& cpu_sse(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )515{516chosen = dn_sse;517fr->cpu_opts.type = sse;518# ifndef NO_16BIT519fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_sse;520# ifdef ACCURATE_ROUNDING521fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_sse;522# endif523# endif524# ifndef NO_REAL525fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_sse;526fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_sse;527# endif528# ifndef NO_32BIT529fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_sse;530fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_sse;531# endif532done = 1;533}534# endif535# ifdef OPT_SSE_VINTAGE536if( !done && (auto_choose || want_dec == sse_vintage)537&& cpu_sse(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )538{539chosen = dn_sse_vintage;540fr->cpu_opts.type = sse_vintage;541# ifndef NO_16BIT542fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_sse;543# ifdef ACCURATE_ROUNDING544fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_sse;545# endif546# endif547# ifndef NO_REAL548fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_sse;549fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_sse;550# endif551# ifndef NO_32BIT552fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_sse;553fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_sse;554# endif555done = 1;556}557# endif558# ifdef OPT_3DNOWEXT559if( !done && (auto_choose || want_dec == dreidnowext)560&& cpu_3dnow(fr->cpu_flags)561&& cpu_3dnowext(fr->cpu_flags)562&& cpu_mmx(fr->cpu_flags) )563{564chosen = dn_dreidnowext;565fr->cpu_opts.type = dreidnowext;566# ifndef NO_16BIT567fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnowext;568# endif569done = 1;570}571# endif572# ifdef OPT_3DNOWEXT_VINTAGE573if( !done && (auto_choose || want_dec == dreidnowext_vintage)574&& cpu_3dnow(fr->cpu_flags)575&& cpu_3dnowext(fr->cpu_flags)576&& cpu_mmx(fr->cpu_flags) )577{578chosen = dn_dreidnowext_vintage;579fr->cpu_opts.type = dreidnowext_vintage;580# ifndef NO_16BIT581fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnowext;582# endif583done = 1;584}585# endif586# ifdef OPT_3DNOW587if( !done && (auto_choose || want_dec == dreidnow)588&& cpu_3dnow(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )589{590chosen = dn_dreidnow;591fr->cpu_opts.type = dreidnow;592# ifndef NO_16BIT593fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnow;594# endif595done = 1;596}597# endif598# ifdef OPT_3DNOW_VINTAGE599if( !done && (auto_choose || want_dec == dreidnow_vintage)600&& cpu_3dnow(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )601{602chosen = dn_dreidnow_vintage;603fr->cpu_opts.type = dreidnow_vintage;604# ifndef NO_16BIT605fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnow;606# endif607done = 1;608}609# endif610#ifdef OPT_MMX611if( !done && (auto_choose || want_dec == mmx)612&& cpu_mmx(fr->cpu_flags) )613{614chosen = dn_mmx;615fr->cpu_opts.type = mmx;616# ifndef NO_16BIT617fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_mmx;618# endif619done = 1;620}621#endif622#ifdef OPT_I586623if(!done && (auto_choose || want_dec == ifuenf))624{625chosen = "i586/pentium";626fr->cpu_opts.type = ifuenf;627# ifndef NO_16BIT628fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_i586;629# endif630done = 1;631}632#endif633#ifdef OPT_I586_DITHER634if(!done && (auto_choose || want_dec == ifuenf_dither))635{636chosen = "dithered i586/pentium";637fr->cpu_opts.type = ifuenf_dither;638dithered = TRUE;639# ifndef NO_16BIT640fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_i586_dither;641# ifndef NO_DOWNSAMPLE642fr->synths.plain[r_2to1][f_16] = INT123_synth_2to1_dither;643fr->synths.plain[r_4to1][f_16] = INT123_synth_4to1_dither;644# endif645# endif646done = 1;647}648#endif649}650#ifdef OPT_I486651/* That won't cooperate in multi opt mode - forcing i486 in layer3.c652But still... here it is... maybe for real use in future. */653if(!done && (auto_choose || want_dec == ivier))654{655chosen = dn_ivier;656fr->cpu_opts.type = ivier;657done = 1;658}659#endif660#ifdef OPT_I386661if(!done && (auto_choose || want_dec == idrei))662{663chosen = dn_idrei;664fr->cpu_opts.type = idrei;665done = 1;666}667#endif668669if(done)670{671/*672We have chosen some x86 decoder... fillup some i386 stuff.673There is an open question about using dithered INT123_synth_1to1 for 8bit wrappers.674For quality it won't make sense, but wrapped i586_dither wrapped may still be faster...675*/676enum synth_resample ri;677enum synth_format fi;678# ifndef NO_8BIT679# ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */680if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16])681{682fr->synths.plain[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap;683fr->synths.mono[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_mono;684fr->synths.mono2stereo[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_m2s;685}686# endif687# endif688for(ri=0; ri<r_limit; ++ri)689for(fi=0; fi<f_limit; ++fi)690{691if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi])692fr->synths.plain[ri][fi] = plain_i386[ri][fi];693}694}695696#endif /* OPT_X86 */697698#ifdef OPT_AVX699if(!done && (auto_choose || want_dec == avx) && cpu_avx(fr->cpu_flags))700{701chosen = "x86-64 (AVX)";702fr->cpu_opts.type = avx;703# ifndef NO_16BIT704fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_avx;705fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_avx;706# endif707# ifndef NO_REAL708fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_avx;709fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_fltst_avx;710# endif711# ifndef NO_32BIT712fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_avx;713fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_avx;714# endif715done = 1;716}717#endif718719#ifdef OPT_X86_64720if(!done && (auto_choose || want_dec == x86_64))721{722chosen = "x86-64 (SSE)";723fr->cpu_opts.type = x86_64;724# ifndef NO_16BIT725fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_x86_64;726fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_x86_64;727# endif728# ifndef NO_REAL729fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_x86_64;730fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_x86_64;731# endif732# ifndef NO_32BIT733fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_x86_64;734fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_x86_64;735# endif736done = 1;737}738#endif739740# ifdef OPT_ALTIVEC741if(!done && (auto_choose || want_dec == altivec))742{743chosen = dn_altivec;744fr->cpu_opts.type = altivec;745# ifndef NO_16BIT746fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_altivec;747fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_altivec;748# endif749# ifndef NO_REAL750fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_altivec;751fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_fltst_altivec;752# endif753# ifndef NO_32BIT754fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_altivec;755fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_altivec;756# endif757done = 1;758}759# endif760761# ifdef OPT_NEON762if(!done && (auto_choose || want_dec == neon) && cpu_neon(fr->cpu_flags))763{764chosen = dn_neon;765fr->cpu_opts.type = neon;766# ifndef NO_16BIT767fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_neon;768fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_neon;769# endif770# ifndef NO_REAL771fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_neon;772fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_neon;773# endif774# ifndef NO_32BIT775fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_neon;776fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_neon;777# endif778done = 1;779}780# endif781782# ifdef OPT_ARM783if(!done && (auto_choose || want_dec == arm))784{785chosen = dn_arm;786fr->cpu_opts.type = arm;787# ifndef NO_16BIT788fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_arm;789# endif790done = 1;791}792# endif793794# ifdef OPT_NEON64795if(!done && (auto_choose || want_dec == neon64) && cpu_neon(fr->cpu_flags))796{797chosen = dn_neon64;798fr->cpu_opts.type = neon64;799# ifndef NO_16BIT800fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_neon64;801fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_neon64;802# endif803# ifndef NO_REAL804fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_neon64;805fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_fltst_neon64;806# endif807# ifndef NO_32BIT808fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_neon64;809fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32st_neon64;810# endif811done = 1;812}813# endif814815# ifdef OPT_GENERIC816if(!done && (auto_choose || want_dec == generic))817{818chosen = dn_generic;819fr->cpu_opts.type = generic;820done = 1;821}822# endif823824#ifdef OPT_GENERIC_DITHER825if(!done && (auto_choose || want_dec == generic_dither))826{827chosen = "dithered generic";828fr->cpu_opts.type = generic_dither;829dithered = TRUE;830# ifndef NO_16BIT831fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_dither;832# ifndef NO_DOWNSAMPLE833fr->synths.plain[r_2to1][f_16] = INT123_synth_2to1_dither;834fr->synths.plain[r_4to1][f_16] = INT123_synth_4to1_dither;835# endif836# endif837done = 1;838}839#endif840841fr->cpu_opts.class = INT123_decclass(fr->cpu_opts.type);842843# ifndef NO_8BIT844# ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */845/* Last chance to use some optimized routine via generic wrappers (for 8bit). */846if( fr->cpu_opts.type != ifuenf_dither847&& fr->cpu_opts.type != generic_dither848&& fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] )849{850fr->synths.plain[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap;851fr->synths.mono[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_mono;852fr->synths.mono2stereo[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_m2s;853}854# endif855# endif856857#ifdef OPT_THE_DCT36858INT123_dct36_choose(fr);859#endif860861#ifdef OPT_DITHER862if(done && dithered)863{864/* run-time dither noise table generation */865if(!INT123_frame_dither_init(fr))866{867if(NOQUIET) error("Dither noise setup failed!");868return 0;869}870}871#endif872873if(done)874{875if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);876return 1;877}878else879{880if(NOQUIET) error("Could not set optimization!");881return 0;882}883}884885enum optdec INT123_dectype(const char* decoder)886{887enum optdec dt;888if( (decoder == NULL)889|| (decoder[0] == 0) )890return autodec;891892for(dt=autodec; dt<nodec; ++dt)893if(!strcasecmp(decoder, decname[dt])) return dt;894895return nodec; /* If we found nothing... */896}897898#ifdef OPT_MULTI899900/* same number of entries as full list, but empty at beginning */901static const char *mpg123_supported_decoder_list[] =902{903#ifdef OPT_SSE904NULL,905#endif906#ifdef OPT_SSE_VINTAGE907NULL,908#endif909#ifdef OPT_3DNOWEXT910NULL,911#endif912#ifdef OPT_3DNOWEXT_VINTAGE913NULL,914#endif915#ifdef OPT_3DNOW916NULL,917#endif918#ifdef OPT_3DNOW_VINTAGE919NULL,920#endif921#ifdef OPT_MMX922NULL,923#endif924#ifdef OPT_I586925NULL,926#endif927#ifdef OPT_I586_DITHER928NULL,929#endif930#ifdef OPT_I486931NULL,932#endif933#ifdef OPT_I386934NULL,935#endif936#ifdef OPT_ALTIVEC937NULL,938#endif939#ifdef OPT_AVX940NULL,941#endif942#ifdef OPT_X86_64943NULL,944#endif945#ifdef OPT_ARM946NULL,947#endif948#ifdef OPT_NEON949NULL,950#endif951#ifdef OPT_NEON64952NULL,953#endif954#ifdef OPT_GENERIC_FLOAT955NULL,956#endif957# ifdef OPT_GENERIC958NULL,959# endif960# ifdef OPT_GENERIC_DITHER961NULL,962# endif963NULL964};965#endif966967static const char *mpg123_decoder_list[] =968{969#ifdef OPT_SSE970dn_sse,971#endif972#ifdef OPT_SSE_VINTAGE973dn_sse_vintage,974#endif975#ifdef OPT_3DNOWEXT976dn_dreidnowext,977#endif978#ifdef OPT_3DNOWEXT_VINTAGE979dn_dreidnowext_vintage,980#endif981#ifdef OPT_3DNOW982dn_dreidnow,983#endif984#ifdef OPT_3DNOW_VINTAGE985dn_dreidnow_vintage,986#endif987#ifdef OPT_MMX988dn_mmx,989#endif990#ifdef OPT_I586991dn_ifuenf,992#endif993#ifdef OPT_I586_DITHER994dn_ifuenf_dither,995#endif996#ifdef OPT_I486997dn_ivier,998#endif999#ifdef OPT_I3861000dn_idrei,1001#endif1002#ifdef OPT_ALTIVEC1003dn_altivec,1004#endif1005#ifdef OPT_AVX1006dn_avx,1007#endif1008#ifdef OPT_X86_641009dn_x86_64,1010#endif1011#ifdef OPT_ARM1012dn_arm,1013#endif1014#ifdef OPT_NEON1015dn_neon,1016#endif1017#ifdef OPT_NEON641018dn_neon64,1019#endif1020#ifdef OPT_GENERIC1021dn_generic,1022#endif1023#ifdef OPT_GENERIC_DITHER1024dn_generic_dither,1025#endif1026NULL1027};10281029static void check_decoders(void)1030{1031#ifndef OPT_MULTI1032/* In non-multi mode, only the full list (one entry) is used. */1033return;1034#else1035const char **d = mpg123_supported_decoder_list;1036#ifdef OPT_CPU_FLAGS1037struct cpuflags cpu_flags;1038wrap_getcpuflags(&cpu_flags);1039#endif1040#ifdef OPT_X861041if(cpu_i586(cpu_flags))1042{1043/* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");1044if(cpu_sse3(cpu_flags)) printf(" SSE3"); */1045#ifdef OPT_SSE1046if(cpu_sse(cpu_flags)) *(d++) = dn_sse;1047#endif1048#ifdef OPT_SSE_VINTAGE1049if(cpu_sse(cpu_flags)) *(d++) = dn_sse_vintage;1050#endif1051#ifdef OPT_3DNOWEXT1052if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext;1053#endif1054#ifdef OPT_3DNOWEXT_VINTAGE1055if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext_vintage;1056#endif1057#ifdef OPT_3DNOW1058if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow;1059#endif1060#ifdef OPT_3DNOW_VINTAGE1061if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow_vintage;1062#endif1063#ifdef OPT_MMX1064if(cpu_mmx(cpu_flags)) *(d++) = dn_mmx;1065#endif1066#ifdef OPT_I5861067*(d++) = dn_ifuenf;1068#endif1069#ifdef OPT_I586_DITHER1070*(d++) = dn_ifuenf_dither;1071#endif1072}1073#endif1074/* just assume that the i486 built is run on a i486 cpu... */1075#ifdef OPT_I4861076*(d++) = dn_ivier;1077#endif1078#ifdef OPT_ALTIVEC1079*(d++) = dn_altivec;1080#endif1081/* every supported x86 can do i386, any cpu can do generic */1082#ifdef OPT_I3861083*(d++) = dn_idrei;1084#endif1085#ifdef OPT_AVX1086if(cpu_avx(cpu_flags)) *(d++) = dn_avx;1087#endif1088#ifdef OPT_X86_641089*(d++) = dn_x86_64;1090#endif1091#ifdef OPT_ARM1092*(d++) = dn_arm;1093#endif1094#ifdef OPT_NEON1095if(cpu_neon(cpu_flags)) *(d++) = dn_neon;1096#endif1097#ifdef OPT_NEON641098if(cpu_neon(cpu_flags)) *(d++) = dn_neon64;1099#endif1100#ifdef OPT_GENERIC1101*(d++) = dn_generic;1102#endif1103#ifdef OPT_GENERIC_DITHER1104*(d++) = dn_generic_dither;1105#endif1106#endif /* ndef OPT_MULTI */1107}11081109const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh)1110{1111if(mh == NULL) return NULL;11121113return decname[mh->cpu_opts.type];1114}11151116const char attribute_align_arg **mpg123_decoders(void){ return mpg123_decoder_list; }1117const char attribute_align_arg **mpg123_supported_decoders(void)1118{1119check_decoders();1120#ifdef OPT_MULTI1121return mpg123_supported_decoder_list;1122#else1123return mpg123_decoder_list;1124#endif1125}112611271128