Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/mpg123/src/libmpg123/optimize.c
4394 views
1
/*
2
optimize: get a grip on the different optimizations
3
4
copyright 2006-21 by the mpg123 project - free software under the terms of the LGPL 2.1
5
see COPYING and AUTHORS files in distribution or http://mpg123.org
6
initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
7
8
Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
9
*/
10
11
#define I_AM_OPTIMIZE
12
#define WANT_GETCPUFLAGS
13
#include "mpg123lib_intern.h" /* includes optimize.h */
14
#include "getcpuflags.h"
15
#include "../common/debug.h"
16
17
18
/* Ugly macros to build conditional synth function array values. */
19
20
#ifndef NO_8BIT
21
#define IF8(synth) synth,
22
#else
23
#define IF8(synth)
24
#endif
25
26
#ifndef NO_SYNTH32
27
28
#ifndef NO_REAL
29
#define IFREAL(synth) synth,
30
#else
31
#define IFREAL(synth)
32
#endif
33
34
#ifndef NO_32BIT
35
#define IF32(synth) synth
36
#else
37
#define IF32(synth)
38
#endif
39
40
#else
41
42
#define IFREAL(synth)
43
#define IF32(synth)
44
45
#endif
46
47
#ifndef NO_16BIT
48
# define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
49
#else
50
# define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
51
#endif
52
53
/* The call of left and right plain synth, wrapped.
54
This may be replaced by a direct stereo optimized synth. */
55
static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
56
{
57
int clip;
58
clip = (fr->synth)(bandPtr_l, 0, fr, 0);
59
clip += (fr->synth)(bandPtr_r, 1, fr, 1);
60
return clip;
61
}
62
63
static const struct synth_s synth_base =
64
{
65
{ /* plain */
66
OUT_SYNTHS(INT123_synth_1to1, INT123_synth_1to1_8bit, INT123_synth_1to1_real, INT123_synth_1to1_s32)
67
# ifndef NO_DOWNSAMPLE
68
,OUT_SYNTHS(INT123_synth_2to1, INT123_synth_2to1_8bit, INT123_synth_2to1_real, INT123_synth_2to1_s32)
69
,OUT_SYNTHS(INT123_synth_4to1, INT123_synth_4to1_8bit, INT123_synth_4to1_real, INT123_synth_4to1_s32)
70
# endif
71
# ifndef NO_NTOM
72
,OUT_SYNTHS(INT123_synth_ntom, INT123_synth_ntom_8bit, INT123_synth_ntom_real, INT123_synth_ntom_s32)
73
# endif
74
},
75
{ /* stereo, by default only wrappers over plain synth */
76
OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
77
# ifndef NO_DOWNSAMPLE
78
,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
79
,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
80
# endif
81
# ifndef NO_NTOM
82
,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
83
# endif
84
},
85
{ /* mono2stereo */
86
OUT_SYNTHS(INT123_synth_1to1_m2s, INT123_synth_1to1_8bit_m2s, INT123_synth_1to1_real_m2s, INT123_synth_1to1_s32_m2s)
87
# ifndef NO_DOWNSAMPLE
88
,OUT_SYNTHS(INT123_synth_2to1_m2s, INT123_synth_2to1_8bit_m2s, INT123_synth_2to1_real_m2s, INT123_synth_2to1_s32_m2s)
89
,OUT_SYNTHS(INT123_synth_4to1_m2s, INT123_synth_4to1_8bit_m2s, INT123_synth_4to1_real_m2s, INT123_synth_4to1_s32_m2s)
90
# endif
91
# ifndef NO_NTOM
92
,OUT_SYNTHS(INT123_synth_ntom_m2s, INT123_synth_ntom_8bit_m2s, INT123_synth_ntom_real_m2s, INT123_synth_ntom_s32_m2s)
93
# endif
94
},
95
{ /* mono*/
96
OUT_SYNTHS(INT123_synth_1to1_mono, INT123_synth_1to1_8bit_mono, INT123_synth_1to1_real_mono, INT123_synth_1to1_s32_mono)
97
# ifndef NO_DOWNSAMPLE
98
,OUT_SYNTHS(INT123_synth_2to1_mono, INT123_synth_2to1_8bit_mono, INT123_synth_2to1_real_mono, INT123_synth_2to1_s32_mono)
99
,OUT_SYNTHS(INT123_synth_4to1_mono, INT123_synth_4to1_8bit_mono, INT123_synth_4to1_real_mono, INT123_synth_4to1_s32_mono)
100
# endif
101
# ifndef NO_NTOM
102
,OUT_SYNTHS(INT123_synth_ntom_mono, INT123_synth_ntom_8bit_mono, INT123_synth_ntom_real_mono, INT123_synth_ntom_s32_mono)
103
#endif
104
}
105
};
106
107
#ifdef OPT_X86
108
/* More plain synths for i386 */
109
const func_synth plain_i386[r_limit][f_limit] =
110
{ /* plain */
111
OUT_SYNTHS(INT123_synth_1to1_i386, INT123_synth_1to1_8bit_i386, INT123_synth_1to1_real_i386, INT123_synth_1to1_s32_i386)
112
# ifndef NO_DOWNSAMPLE
113
,OUT_SYNTHS(INT123_synth_2to1_i386, INT123_synth_2to1_8bit_i386, INT123_synth_2to1_real_i386, INT123_synth_2to1_s32_i386)
114
,OUT_SYNTHS(INT123_synth_4to1_i386, INT123_synth_4to1_8bit_i386, INT123_synth_4to1_real_i386, INT123_synth_4to1_s32_i386)
115
# endif
116
# ifndef NO_NTOM
117
,OUT_SYNTHS(INT123_synth_ntom, INT123_synth_ntom_8bit, INT123_synth_ntom_real, INT123_synth_ntom_s32)
118
# endif
119
};
120
#endif
121
122
123
enum optdec INT123_defdec(void){ return defopt; }
124
125
enum optcla INT123_decclass(const enum optdec type)
126
{
127
return
128
(
129
type == mmx
130
|| type == sse
131
|| type == sse_vintage
132
|| type == dreidnowext
133
|| type == dreidnowext_vintage
134
|| type == x86_64
135
|| type == neon
136
|| type == neon64
137
|| type == avx
138
) ? mmxsse : normal;
139
}
140
141
static int find_synth(func_synth synth, const func_synth synths[r_limit][f_limit])
142
{
143
enum synth_resample ri;
144
enum synth_format fi;
145
for(ri=0; ri<r_limit; ++ri)
146
for(fi=0; fi<f_limit; ++fi)
147
if(synth == synths[ri][fi])
148
return TRUE;
149
150
return FALSE;
151
}
152
153
154
#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
155
/* After knowing that it is either vintage or current SSE,
156
this separates the two. In case of non-OPT_MULTI, only one
157
of OPT_SSE and OPT_SSE_VINTAGE is active. */
158
static enum optdec sse_or_vintage(mpg123_handle *fr)
159
{
160
enum optdec type;
161
type = sse_vintage;
162
# ifdef OPT_SSE
163
# ifdef OPT_THE_DCT36
164
if(INT123_dct36_match(fr, sse))
165
# endif
166
type = sse;
167
# endif
168
return type;
169
}
170
#endif
171
172
/* Determine what kind of decoder is actually active
173
This depends on runtime choices which may cause fallback to i386 or generic code. */
174
static int find_dectype(mpg123_handle *fr)
175
{
176
enum optdec type = nodec;
177
/* Direct and indirect usage, 1to1 stereo decoding.
178
Concentrating on the plain stereo synth should be fine, mono stuff is derived. */
179
func_synth basic_synth = fr->synth;
180
#ifndef NO_8BIT
181
#ifndef NO_16BIT
182
if(basic_synth == INT123_synth_1to1_8bit_wrap)
183
basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */
184
#endif
185
#endif
186
187
if(FALSE) ; /* Just to initialize the else if ladder. */
188
#ifndef NO_16BIT
189
#if defined(OPT_3DNOWEXT) || defined(OPT_3DNOWEXT_VINTAGE)
190
else if(basic_synth == INT123_synth_1to1_3dnowext)
191
{
192
type = dreidnowext;
193
# ifdef OPT_3DNOWEXT_VINTAGE
194
# ifdef OPT_MULTI
195
if(INT123_dct36_match(fr, dreidnowext_vintage))
196
# endif
197
type = dreidnowext_vintage;
198
# endif
199
}
200
#endif
201
#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
202
else if(basic_synth == INT123_synth_1to1_sse)
203
{
204
type = sse_or_vintage(fr);
205
}
206
#endif
207
#if defined(OPT_3DNOW) || defined(OPT_3DNOW_VINTAGE)
208
else if(basic_synth == INT123_synth_1to1_3dnow)
209
{
210
type = dreidnow;
211
# ifdef OPT_3DNOW_VINTAGE
212
# ifdef OPT_MULTI
213
if(INT123_dct36_match(fr, dreidnow_vintage))
214
# endif
215
type = dreidnow_vintage;
216
# endif
217
}
218
#endif
219
#ifdef OPT_MMX
220
else if(basic_synth == INT123_synth_1to1_mmx) type = mmx;
221
#endif
222
#ifdef OPT_I586_DITHER
223
else if(basic_synth == INT123_synth_1to1_i586_dither) type = ifuenf_dither;
224
#endif
225
#ifdef OPT_I586
226
else if(basic_synth == INT123_synth_1to1_i586) type = ifuenf;
227
#endif
228
#ifdef OPT_ALTIVEC
229
else if(basic_synth == INT123_synth_1to1_altivec) type = altivec;
230
#endif
231
#ifdef OPT_X86_64
232
else if(basic_synth == INT123_synth_1to1_x86_64) type = x86_64;
233
#endif
234
#ifdef OPT_AVX
235
else if(basic_synth == INT123_synth_1to1_avx) type = avx;
236
#endif
237
#ifdef OPT_ARM
238
else if(basic_synth == INT123_synth_1to1_arm) type = arm;
239
#endif
240
#ifdef OPT_NEON
241
else if(basic_synth == INT123_synth_1to1_neon) type = neon;
242
#endif
243
#ifdef OPT_NEON64
244
else if(basic_synth == INT123_synth_1to1_neon64) type = neon64;
245
#endif
246
#ifdef OPT_GENERIC_DITHER
247
else if(basic_synth == INT123_synth_1to1_dither) type = generic_dither;
248
#endif
249
#ifdef OPT_DITHER /* either i586 or generic! */
250
#ifndef NO_DOWNSAMPLE
251
else if
252
(
253
basic_synth == INT123_synth_2to1_dither
254
|| basic_synth == INT123_synth_4to1_dither
255
) type = generic_dither;
256
#endif
257
#endif
258
#endif /* 16bit */
259
260
#ifndef NO_SYNTH32
261
262
#ifndef NO_REAL
263
#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
264
else if(basic_synth == INT123_synth_1to1_real_sse)
265
{
266
type = sse_or_vintage(fr);
267
}
268
#endif
269
#ifdef OPT_X86_64
270
else if(basic_synth == INT123_synth_1to1_real_x86_64) type = x86_64;
271
#endif
272
#ifdef OPT_AVX
273
else if(basic_synth == INT123_synth_1to1_real_avx) type = avx;
274
#endif
275
#ifdef OPT_ALTIVEC
276
else if(basic_synth == INT123_synth_1to1_real_altivec) type = altivec;
277
#endif
278
#ifdef OPT_NEON
279
else if(basic_synth == INT123_synth_1to1_real_neon) type = neon;
280
#endif
281
#ifdef OPT_NEON64
282
else if(basic_synth == INT123_synth_1to1_real_neon64) type = neon64;
283
#endif
284
285
#endif /* real */
286
287
#ifndef NO_32BIT
288
#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
289
else if(basic_synth == INT123_synth_1to1_s32_sse)
290
{
291
type = sse_or_vintage(fr);
292
}
293
#endif
294
#ifdef OPT_X86_64
295
else if(basic_synth == INT123_synth_1to1_s32_x86_64) type = x86_64;
296
#endif
297
#ifdef OPT_AVX
298
else if(basic_synth == INT123_synth_1to1_s32_avx) type = avx;
299
#endif
300
#ifdef OPT_ALTIVEC
301
else if(basic_synth == INT123_synth_1to1_s32_altivec) type = altivec;
302
#endif
303
#ifdef OPT_NEON
304
else if(basic_synth == INT123_synth_1to1_s32_neon) type = neon;
305
#endif
306
#ifdef OPT_NEON64
307
else if(basic_synth == INT123_synth_1to1_s32_neon64) type = neon64;
308
#endif
309
#endif /* 32bit */
310
311
#endif /* any 32 bit synth */
312
313
#ifdef OPT_X86
314
else if(find_synth(basic_synth, plain_i386))
315
type = idrei;
316
#endif
317
318
else if(find_synth(basic_synth, synth_base.plain))
319
type = generic;
320
321
322
323
#ifdef OPT_I486
324
/* i486 is special ... the specific code is in use for 16bit 1to1 stereo
325
otherwise we have i386 active... but still, the distinction doesn't matter*/
326
type = ivier;
327
#endif
328
329
if(type != nodec)
330
{
331
fr->cpu_opts.type = type;
332
fr->cpu_opts.class = INT123_decclass(type);
333
334
debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class);
335
return MPG123_OK;
336
}
337
else
338
{
339
if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!");
340
341
fr->err = MPG123_BAD_DECODER_SETUP;
342
return MPG123_ERR;
343
}
344
}
345
346
/* set synth functions for current frame, optimizations handled by opt_* macros */
347
int INT123_set_synth_functions(mpg123_handle *fr)
348
{
349
enum synth_resample resample = r_none;
350
enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */
351
352
/* Select the basic output format, different from 16bit: 8bit, real. */
353
if(FALSE){}
354
#ifndef NO_16BIT
355
else if(fr->af.dec_enc & MPG123_ENC_16)
356
basic_format = f_16;
357
#endif
358
#ifndef NO_8BIT
359
else if(fr->af.dec_enc & MPG123_ENC_8)
360
basic_format = f_8;
361
#endif
362
#ifndef NO_REAL
363
else if(fr->af.dec_enc & MPG123_ENC_FLOAT)
364
basic_format = f_real;
365
#endif
366
#ifndef NO_32BIT
367
/* 24 bit integer means decoding to 32 bit first. */
368
else if(fr->af.dec_enc & MPG123_ENC_32 || fr->af.dec_enc & MPG123_ENC_24)
369
basic_format = f_32;
370
#endif
371
372
/* Make sure the chosen format is compiled into this lib. */
373
if(basic_format == f_none)
374
{
375
if(NOQUIET) error("INT123_set_synth_functions: This output format is disabled in this build!");
376
377
return -1;
378
}
379
380
/* Be explicit about downsampling variant. */
381
switch(fr->down_sample)
382
{
383
case 0: resample = r_1to1; break;
384
#ifndef NO_DOWNSAMPLE
385
case 1: resample = r_2to1; break;
386
case 2: resample = r_4to1; break;
387
#endif
388
#ifndef NO_NTOM
389
case 3: resample = r_ntom; break;
390
#endif
391
}
392
393
if(resample == r_none)
394
{
395
if(NOQUIET) error("INT123_set_synth_functions: This resampling mode is not supported in this build!");
396
397
return -1;
398
}
399
400
debug2("selecting synth: resample=%i format=%i", resample, basic_format);
401
/* Finally selecting the synth functions for stereo / mono. */
402
fr->synth = fr->synths.plain[resample][basic_format];
403
fr->synth_stereo = fr->synths.stereo[resample][basic_format];
404
fr->synth_mono = fr->af.channels==2
405
? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */
406
: fr->synths.mono[resample][basic_format]; /* Mono MPEG file decoded to mono. */
407
408
if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */
409
{
410
fr->err = MPG123_BAD_DECODER_SETUP;
411
return MPG123_ERR;
412
}
413
414
if(INT123_frame_buffers(fr) != 0)
415
{
416
fr->err = MPG123_NO_BUFFERS;
417
if(NOQUIET) error("Failed to set up decoder buffers!");
418
419
return MPG123_ERR;
420
}
421
422
#ifndef NO_8BIT
423
if(basic_format == f_8)
424
{
425
if(INT123_make_conv16to8_table(fr) != 0)
426
{
427
if(NOQUIET) error("Failed to set up conv16to8 table!");
428
/* it's a bit more work to get proper error propagation up */
429
return -1;
430
}
431
}
432
#endif
433
434
#ifdef OPT_MMXORSSE
435
/* Special treatment for MMX, SSE and 3DNowExt stuff.
436
The real-decoding SSE for x86-64 uses normal tables! */
437
if(fr->cpu_opts.class == mmxsse
438
# ifndef NO_REAL
439
&& basic_format != f_real
440
# endif
441
# ifndef NO_32BIT
442
&& basic_format != f_32
443
# endif
444
# ifdef ACCURATE_ROUNDING
445
&& fr->cpu_opts.type != sse
446
&& fr->cpu_opts.type != sse_vintage
447
&& fr->cpu_opts.type != x86_64
448
&& fr->cpu_opts.type != neon
449
&& fr->cpu_opts.type != neon64
450
&& fr->cpu_opts.type != avx
451
# endif
452
)
453
{
454
#ifndef NO_LAYER3
455
INT123_init_layer3_stuff(fr, INT123_init_layer3_gainpow2_mmx);
456
#endif
457
#ifndef NO_LAYER12
458
INT123_init_layer12_stuff(fr, INT123_init_layer12_table_mmx);
459
#endif
460
fr->INT123_make_decode_tables = INT123_make_decode_tables_mmx;
461
}
462
else
463
#endif
464
{
465
#ifndef NO_LAYER3
466
INT123_init_layer3_stuff(fr, INT123_init_layer3_gainpow2);
467
#endif
468
#ifndef NO_LAYER12
469
INT123_init_layer12_stuff(fr, INT123_init_layer12_table);
470
#endif
471
fr->INT123_make_decode_tables = INT123_make_decode_tables;
472
}
473
474
/* We allocated the table buffers just now, so (re)create the tables. */
475
fr->INT123_make_decode_tables(fr);
476
477
return 0;
478
}
479
480
int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)
481
{
482
const char* chosen = ""; /* the chosen decoder opt as string */
483
enum optdec want_dec = nodec;
484
int done = 0;
485
int auto_choose = 0;
486
#ifdef OPT_DITHER
487
int dithered = FALSE; /* If some dithered decoder is chosen. */
488
#endif
489
490
want_dec = INT123_dectype(cpu);
491
auto_choose = want_dec == autodec;
492
/* Fill whole array of synth functions with generic code first. */
493
fr->synths = synth_base;
494
495
#ifndef OPT_MULTI
496
{
497
if(!auto_choose && want_dec != defopt)
498
{
499
if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt);
500
}
501
auto_choose = TRUE; /* There will be only one choice anyway. */
502
}
503
#endif
504
505
fr->cpu_opts.type = nodec;
506
/* covers any i386+ cpu; they actually differ only in the INT123_synth_1to1 function, mostly... */
507
#ifdef OPT_X86
508
if(cpu_i586(fr->cpu_flags))
509
{
510
# ifdef OPT_MULTI
511
debug2("standard flags: 0x%08x\textended flags: 0x%08x", fr->cpu_flags.std, fr->cpu_flags.ext);
512
# endif
513
# ifdef OPT_SSE
514
if( !done && (auto_choose || want_dec == sse)
515
&& cpu_sse(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )
516
{
517
chosen = dn_sse;
518
fr->cpu_opts.type = sse;
519
# ifndef NO_16BIT
520
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_sse;
521
# ifdef ACCURATE_ROUNDING
522
fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_sse;
523
# endif
524
# endif
525
# ifndef NO_REAL
526
fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_sse;
527
fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_sse;
528
# endif
529
# ifndef NO_32BIT
530
fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_sse;
531
fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_sse;
532
# endif
533
done = 1;
534
}
535
# endif
536
# ifdef OPT_SSE_VINTAGE
537
if( !done && (auto_choose || want_dec == sse_vintage)
538
&& cpu_sse(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )
539
{
540
chosen = dn_sse_vintage;
541
fr->cpu_opts.type = sse_vintage;
542
# ifndef NO_16BIT
543
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_sse;
544
# ifdef ACCURATE_ROUNDING
545
fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_sse;
546
# endif
547
# endif
548
# ifndef NO_REAL
549
fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_sse;
550
fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_sse;
551
# endif
552
# ifndef NO_32BIT
553
fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_sse;
554
fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_sse;
555
# endif
556
done = 1;
557
}
558
# endif
559
# ifdef OPT_3DNOWEXT
560
if( !done && (auto_choose || want_dec == dreidnowext)
561
&& cpu_3dnow(fr->cpu_flags)
562
&& cpu_3dnowext(fr->cpu_flags)
563
&& cpu_mmx(fr->cpu_flags) )
564
{
565
chosen = dn_dreidnowext;
566
fr->cpu_opts.type = dreidnowext;
567
# ifndef NO_16BIT
568
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnowext;
569
# endif
570
done = 1;
571
}
572
# endif
573
# ifdef OPT_3DNOWEXT_VINTAGE
574
if( !done && (auto_choose || want_dec == dreidnowext_vintage)
575
&& cpu_3dnow(fr->cpu_flags)
576
&& cpu_3dnowext(fr->cpu_flags)
577
&& cpu_mmx(fr->cpu_flags) )
578
{
579
chosen = dn_dreidnowext_vintage;
580
fr->cpu_opts.type = dreidnowext_vintage;
581
# ifndef NO_16BIT
582
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnowext;
583
# endif
584
done = 1;
585
}
586
# endif
587
# ifdef OPT_3DNOW
588
if( !done && (auto_choose || want_dec == dreidnow)
589
&& cpu_3dnow(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )
590
{
591
chosen = dn_dreidnow;
592
fr->cpu_opts.type = dreidnow;
593
# ifndef NO_16BIT
594
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnow;
595
# endif
596
done = 1;
597
}
598
# endif
599
# ifdef OPT_3DNOW_VINTAGE
600
if( !done && (auto_choose || want_dec == dreidnow_vintage)
601
&& cpu_3dnow(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )
602
{
603
chosen = dn_dreidnow_vintage;
604
fr->cpu_opts.type = dreidnow_vintage;
605
# ifndef NO_16BIT
606
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnow;
607
# endif
608
done = 1;
609
}
610
# endif
611
#ifdef OPT_MMX
612
if( !done && (auto_choose || want_dec == mmx)
613
&& cpu_mmx(fr->cpu_flags) )
614
{
615
chosen = dn_mmx;
616
fr->cpu_opts.type = mmx;
617
# ifndef NO_16BIT
618
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_mmx;
619
# endif
620
done = 1;
621
}
622
#endif
623
#ifdef OPT_I586
624
if(!done && (auto_choose || want_dec == ifuenf))
625
{
626
chosen = "i586/pentium";
627
fr->cpu_opts.type = ifuenf;
628
# ifndef NO_16BIT
629
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_i586;
630
# endif
631
done = 1;
632
}
633
#endif
634
#ifdef OPT_I586_DITHER
635
if(!done && (auto_choose || want_dec == ifuenf_dither))
636
{
637
chosen = "dithered i586/pentium";
638
fr->cpu_opts.type = ifuenf_dither;
639
dithered = TRUE;
640
# ifndef NO_16BIT
641
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_i586_dither;
642
# ifndef NO_DOWNSAMPLE
643
fr->synths.plain[r_2to1][f_16] = INT123_synth_2to1_dither;
644
fr->synths.plain[r_4to1][f_16] = INT123_synth_4to1_dither;
645
# endif
646
# endif
647
done = 1;
648
}
649
#endif
650
}
651
#ifdef OPT_I486
652
/* That won't cooperate in multi opt mode - forcing i486 in layer3.c
653
But still... here it is... maybe for real use in future. */
654
if(!done && (auto_choose || want_dec == ivier))
655
{
656
chosen = dn_ivier;
657
fr->cpu_opts.type = ivier;
658
done = 1;
659
}
660
#endif
661
#ifdef OPT_I386
662
if(!done && (auto_choose || want_dec == idrei))
663
{
664
chosen = dn_idrei;
665
fr->cpu_opts.type = idrei;
666
done = 1;
667
}
668
#endif
669
670
if(done)
671
{
672
/*
673
We have chosen some x86 decoder... fillup some i386 stuff.
674
There is an open question about using dithered INT123_synth_1to1 for 8bit wrappers.
675
For quality it won't make sense, but wrapped i586_dither wrapped may still be faster...
676
*/
677
enum synth_resample ri;
678
enum synth_format fi;
679
# ifndef NO_8BIT
680
# ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
681
if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16])
682
{
683
fr->synths.plain[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap;
684
fr->synths.mono[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_mono;
685
fr->synths.mono2stereo[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_m2s;
686
}
687
# endif
688
# endif
689
for(ri=0; ri<r_limit; ++ri)
690
for(fi=0; fi<f_limit; ++fi)
691
{
692
if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi])
693
fr->synths.plain[ri][fi] = plain_i386[ri][fi];
694
}
695
}
696
697
#endif /* OPT_X86 */
698
699
#ifdef OPT_AVX
700
if(!done && (auto_choose || want_dec == avx) && cpu_avx(fr->cpu_flags))
701
{
702
chosen = "x86-64 (AVX)";
703
fr->cpu_opts.type = avx;
704
# ifndef NO_16BIT
705
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_avx;
706
fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_avx;
707
# endif
708
# ifndef NO_REAL
709
fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_avx;
710
fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_fltst_avx;
711
# endif
712
# ifndef NO_32BIT
713
fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_avx;
714
fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_avx;
715
# endif
716
done = 1;
717
}
718
#endif
719
720
#ifdef OPT_X86_64
721
if(!done && (auto_choose || want_dec == x86_64))
722
{
723
chosen = "x86-64 (SSE)";
724
fr->cpu_opts.type = x86_64;
725
# ifndef NO_16BIT
726
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_x86_64;
727
fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_x86_64;
728
# endif
729
# ifndef NO_REAL
730
fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_x86_64;
731
fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_x86_64;
732
# endif
733
# ifndef NO_32BIT
734
fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_x86_64;
735
fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_x86_64;
736
# endif
737
done = 1;
738
}
739
#endif
740
741
# ifdef OPT_ALTIVEC
742
if(!done && (auto_choose || want_dec == altivec))
743
{
744
chosen = dn_altivec;
745
fr->cpu_opts.type = altivec;
746
# ifndef NO_16BIT
747
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_altivec;
748
fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_altivec;
749
# endif
750
# ifndef NO_REAL
751
fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_altivec;
752
fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_fltst_altivec;
753
# endif
754
# ifndef NO_32BIT
755
fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_altivec;
756
fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_altivec;
757
# endif
758
done = 1;
759
}
760
# endif
761
762
# ifdef OPT_NEON
763
if(!done && (auto_choose || want_dec == neon) && cpu_neon(fr->cpu_flags))
764
{
765
chosen = dn_neon;
766
fr->cpu_opts.type = neon;
767
# ifndef NO_16BIT
768
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_neon;
769
fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_neon;
770
# endif
771
# ifndef NO_REAL
772
fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_neon;
773
fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_neon;
774
# endif
775
# ifndef NO_32BIT
776
fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_neon;
777
fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_neon;
778
# endif
779
done = 1;
780
}
781
# endif
782
783
# ifdef OPT_ARM
784
if(!done && (auto_choose || want_dec == arm))
785
{
786
chosen = dn_arm;
787
fr->cpu_opts.type = arm;
788
# ifndef NO_16BIT
789
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_arm;
790
# endif
791
done = 1;
792
}
793
# endif
794
795
# ifdef OPT_NEON64
796
if(!done && (auto_choose || want_dec == neon64) && cpu_neon(fr->cpu_flags))
797
{
798
chosen = dn_neon64;
799
fr->cpu_opts.type = neon64;
800
# ifndef NO_16BIT
801
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_neon64;
802
fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_neon64;
803
# endif
804
# ifndef NO_REAL
805
fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_neon64;
806
fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_fltst_neon64;
807
# endif
808
# ifndef NO_32BIT
809
fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_neon64;
810
fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32st_neon64;
811
# endif
812
done = 1;
813
}
814
# endif
815
816
# ifdef OPT_GENERIC
817
if(!done && (auto_choose || want_dec == generic))
818
{
819
chosen = dn_generic;
820
fr->cpu_opts.type = generic;
821
done = 1;
822
}
823
# endif
824
825
#ifdef OPT_GENERIC_DITHER
826
if(!done && (auto_choose || want_dec == generic_dither))
827
{
828
chosen = "dithered generic";
829
fr->cpu_opts.type = generic_dither;
830
dithered = TRUE;
831
# ifndef NO_16BIT
832
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_dither;
833
# ifndef NO_DOWNSAMPLE
834
fr->synths.plain[r_2to1][f_16] = INT123_synth_2to1_dither;
835
fr->synths.plain[r_4to1][f_16] = INT123_synth_4to1_dither;
836
# endif
837
# endif
838
done = 1;
839
}
840
#endif
841
842
fr->cpu_opts.class = INT123_decclass(fr->cpu_opts.type);
843
844
# ifndef NO_8BIT
845
# ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
846
/* Last chance to use some optimized routine via generic wrappers (for 8bit). */
847
if( fr->cpu_opts.type != ifuenf_dither
848
&& fr->cpu_opts.type != generic_dither
849
&& fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] )
850
{
851
fr->synths.plain[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap;
852
fr->synths.mono[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_mono;
853
fr->synths.mono2stereo[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_m2s;
854
}
855
# endif
856
# endif
857
858
#ifdef OPT_THE_DCT36
859
INT123_dct36_choose(fr);
860
#endif
861
862
#ifdef OPT_DITHER
863
if(done && dithered)
864
{
865
/* run-time dither noise table generation */
866
if(!INT123_frame_dither_init(fr))
867
{
868
if(NOQUIET) error("Dither noise setup failed!");
869
return 0;
870
}
871
}
872
#endif
873
874
if(done)
875
{
876
if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
877
return 1;
878
}
879
else
880
{
881
if(NOQUIET) error("Could not set optimization!");
882
return 0;
883
}
884
}
885
886
enum optdec INT123_dectype(const char* decoder)
887
{
888
enum optdec dt;
889
if( (decoder == NULL)
890
|| (decoder[0] == 0) )
891
return autodec;
892
893
for(dt=autodec; dt<nodec; ++dt)
894
if(!strcasecmp(decoder, decname[dt])) return dt;
895
896
return nodec; /* If we found nothing... */
897
}
898
899
#ifdef OPT_MULTI
900
901
/* same number of entries as full list, but empty at beginning */
902
static const char *mpg123_supported_decoder_list[] =
903
{
904
#ifdef OPT_SSE
905
NULL,
906
#endif
907
#ifdef OPT_SSE_VINTAGE
908
NULL,
909
#endif
910
#ifdef OPT_3DNOWEXT
911
NULL,
912
#endif
913
#ifdef OPT_3DNOWEXT_VINTAGE
914
NULL,
915
#endif
916
#ifdef OPT_3DNOW
917
NULL,
918
#endif
919
#ifdef OPT_3DNOW_VINTAGE
920
NULL,
921
#endif
922
#ifdef OPT_MMX
923
NULL,
924
#endif
925
#ifdef OPT_I586
926
NULL,
927
#endif
928
#ifdef OPT_I586_DITHER
929
NULL,
930
#endif
931
#ifdef OPT_I486
932
NULL,
933
#endif
934
#ifdef OPT_I386
935
NULL,
936
#endif
937
#ifdef OPT_ALTIVEC
938
NULL,
939
#endif
940
#ifdef OPT_AVX
941
NULL,
942
#endif
943
#ifdef OPT_X86_64
944
NULL,
945
#endif
946
#ifdef OPT_ARM
947
NULL,
948
#endif
949
#ifdef OPT_NEON
950
NULL,
951
#endif
952
#ifdef OPT_NEON64
953
NULL,
954
#endif
955
#ifdef OPT_GENERIC_FLOAT
956
NULL,
957
#endif
958
# ifdef OPT_GENERIC
959
NULL,
960
# endif
961
# ifdef OPT_GENERIC_DITHER
962
NULL,
963
# endif
964
NULL
965
};
966
#endif
967
968
static const char *mpg123_decoder_list[] =
969
{
970
#ifdef OPT_SSE
971
dn_sse,
972
#endif
973
#ifdef OPT_SSE_VINTAGE
974
dn_sse_vintage,
975
#endif
976
#ifdef OPT_3DNOWEXT
977
dn_dreidnowext,
978
#endif
979
#ifdef OPT_3DNOWEXT_VINTAGE
980
dn_dreidnowext_vintage,
981
#endif
982
#ifdef OPT_3DNOW
983
dn_dreidnow,
984
#endif
985
#ifdef OPT_3DNOW_VINTAGE
986
dn_dreidnow_vintage,
987
#endif
988
#ifdef OPT_MMX
989
dn_mmx,
990
#endif
991
#ifdef OPT_I586
992
dn_ifuenf,
993
#endif
994
#ifdef OPT_I586_DITHER
995
dn_ifuenf_dither,
996
#endif
997
#ifdef OPT_I486
998
dn_ivier,
999
#endif
1000
#ifdef OPT_I386
1001
dn_idrei,
1002
#endif
1003
#ifdef OPT_ALTIVEC
1004
dn_altivec,
1005
#endif
1006
#ifdef OPT_AVX
1007
dn_avx,
1008
#endif
1009
#ifdef OPT_X86_64
1010
dn_x86_64,
1011
#endif
1012
#ifdef OPT_ARM
1013
dn_arm,
1014
#endif
1015
#ifdef OPT_NEON
1016
dn_neon,
1017
#endif
1018
#ifdef OPT_NEON64
1019
dn_neon64,
1020
#endif
1021
#ifdef OPT_GENERIC
1022
dn_generic,
1023
#endif
1024
#ifdef OPT_GENERIC_DITHER
1025
dn_generic_dither,
1026
#endif
1027
NULL
1028
};
1029
1030
static void check_decoders(void)
1031
{
1032
#ifndef OPT_MULTI
1033
/* In non-multi mode, only the full list (one entry) is used. */
1034
return;
1035
#else
1036
const char **d = mpg123_supported_decoder_list;
1037
#ifdef OPT_CPU_FLAGS
1038
struct cpuflags cpu_flags;
1039
wrap_getcpuflags(&cpu_flags);
1040
#endif
1041
#ifdef OPT_X86
1042
if(cpu_i586(cpu_flags))
1043
{
1044
/* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
1045
if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
1046
#ifdef OPT_SSE
1047
if(cpu_sse(cpu_flags)) *(d++) = dn_sse;
1048
#endif
1049
#ifdef OPT_SSE_VINTAGE
1050
if(cpu_sse(cpu_flags)) *(d++) = dn_sse_vintage;
1051
#endif
1052
#ifdef OPT_3DNOWEXT
1053
if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext;
1054
#endif
1055
#ifdef OPT_3DNOWEXT_VINTAGE
1056
if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext_vintage;
1057
#endif
1058
#ifdef OPT_3DNOW
1059
if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow;
1060
#endif
1061
#ifdef OPT_3DNOW_VINTAGE
1062
if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow_vintage;
1063
#endif
1064
#ifdef OPT_MMX
1065
if(cpu_mmx(cpu_flags)) *(d++) = dn_mmx;
1066
#endif
1067
#ifdef OPT_I586
1068
*(d++) = dn_ifuenf;
1069
#endif
1070
#ifdef OPT_I586_DITHER
1071
*(d++) = dn_ifuenf_dither;
1072
#endif
1073
}
1074
#endif
1075
/* just assume that the i486 built is run on a i486 cpu... */
1076
#ifdef OPT_I486
1077
*(d++) = dn_ivier;
1078
#endif
1079
#ifdef OPT_ALTIVEC
1080
*(d++) = dn_altivec;
1081
#endif
1082
/* every supported x86 can do i386, any cpu can do generic */
1083
#ifdef OPT_I386
1084
*(d++) = dn_idrei;
1085
#endif
1086
#ifdef OPT_AVX
1087
if(cpu_avx(cpu_flags)) *(d++) = dn_avx;
1088
#endif
1089
#ifdef OPT_X86_64
1090
*(d++) = dn_x86_64;
1091
#endif
1092
#ifdef OPT_ARM
1093
*(d++) = dn_arm;
1094
#endif
1095
#ifdef OPT_NEON
1096
if(cpu_neon(cpu_flags)) *(d++) = dn_neon;
1097
#endif
1098
#ifdef OPT_NEON64
1099
if(cpu_neon(cpu_flags)) *(d++) = dn_neon64;
1100
#endif
1101
#ifdef OPT_GENERIC
1102
*(d++) = dn_generic;
1103
#endif
1104
#ifdef OPT_GENERIC_DITHER
1105
*(d++) = dn_generic_dither;
1106
#endif
1107
#endif /* ndef OPT_MULTI */
1108
}
1109
1110
const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh)
1111
{
1112
if(mh == NULL) return NULL;
1113
1114
return decname[mh->cpu_opts.type];
1115
}
1116
1117
const char attribute_align_arg **mpg123_decoders(void){ return mpg123_decoder_list; }
1118
const char attribute_align_arg **mpg123_supported_decoders(void)
1119
{
1120
check_decoders();
1121
#ifdef OPT_MULTI
1122
return mpg123_supported_decoder_list;
1123
#else
1124
return mpg123_decoder_list;
1125
#endif
1126
}
1127
1128