Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/xxhash/src/xxh_x86dispatch.c
4251 views
1
/*
2
* xxHash - Extremely Fast Hash algorithm
3
* Copyright (C) 2020 Yann Collet
4
*
5
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions are
9
* met:
10
*
11
* * Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* * Redistributions in binary form must reproduce the above
14
* copyright notice, this list of conditions and the following disclaimer
15
* in the documentation and/or other materials provided with the
16
* distribution.
17
*
18
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
*
30
* You can contact the author at:
31
* - xxHash homepage: https://www.xxhash.com
32
* - xxHash source repository: https://github.com/Cyan4973/xxHash
33
*/
34
35
36
/*!
37
* @file xxh_x86dispatch.c
38
*
39
* Automatic dispatcher code for the @ref xxh3_family on x86-based targets.
40
*
41
* Optional add-on.
42
*
43
* **Compile this file with the default flags for your target.** Do not compile
44
* with flags like `-mavx*`, `-march=native`, or `/arch:AVX*`, there will be
45
* an error. See @ref XXH_X86DISPATCH_ALLOW_AVX for details.
46
*
47
* @defgroup dispatch x86 Dispatcher
48
* @{
49
*/
50
51
#if defined (__cplusplus)
52
extern "C" {
53
#endif
54
55
#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
56
57
/*!
58
* @def XXH_X86DISPATCH_ALLOW_AVX
59
* @brief Disables the AVX sanity check.
60
*
61
* Don't compile xxh_x86dispatch.c with options like `-mavx*`, `-march=native`,
62
* or `/arch:AVX*`. It is intended to be compiled for the minimum target, and
63
* it selectively enables SSE2, AVX2, and AVX512 when it is needed.
64
*
65
* Using this option _globally_ allows this feature, and therefore makes it
66
* undefined behavior to execute on any CPU without said feature.
67
*
68
* Even if the source code isn't directly using AVX intrinsics in a function,
69
* the compiler can still generate AVX code from autovectorization and by
70
* "upgrading" SSE2 intrinsics to use the VEX prefixes (a.k.a. AVX128).
71
*
72
* Use the same flags that you use to compile the rest of the program; this
73
* file will safely generate SSE2, AVX2, and AVX512 without these flags.
74
*
75
* Define XXH_X86DISPATCH_ALLOW_AVX to ignore this check, and feel free to open
76
* an issue if there is a target in the future where AVX is a default feature.
77
*/
78
#ifdef XXH_DOXYGEN
79
# define XXH_X86DISPATCH_ALLOW_AVX
80
#endif
81
82
#if defined(__AVX__) && !defined(XXH_X86DISPATCH_ALLOW_AVX)
83
# error "Do not compile xxh_x86dispatch.c with AVX enabled! See the comment above."
84
#endif
85
86
#ifdef __has_include
87
# define XXH_HAS_INCLUDE(header) __has_include(header)
88
#else
89
# define XXH_HAS_INCLUDE(header) 0
90
#endif
91
92
/*!
93
* @def XXH_DISPATCH_SCALAR
94
* @brief Enables/dispatching the scalar code path.
95
*
96
* If this is defined to 0, SSE2 support is assumed. This reduces code size
97
* when the scalar path is not needed.
98
*
99
* This is automatically defined to 0 when...
100
* - SSE2 support is enabled in the compiler
101
* - Targeting x86_64
102
* - Targeting Android x86
103
* - Targeting macOS
104
*/
105
#ifndef XXH_DISPATCH_SCALAR
106
# if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) /* SSE2 on by default */ \
107
|| defined(__x86_64__) || defined(_M_X64) /* x86_64 */ \
108
|| defined(__ANDROID__) || defined(__APPLEv__) /* Android or macOS */
109
# define XXH_DISPATCH_SCALAR 0 /* disable */
110
# else
111
# define XXH_DISPATCH_SCALAR 1
112
# endif
113
#endif
114
/*!
115
* @def XXH_DISPATCH_AVX2
116
* @brief Enables/disables dispatching for AVX2.
117
*
118
* This is automatically detected if it is not defined.
119
* - GCC 4.7 and later are known to support AVX2, but >4.9 is required for
120
* to get the AVX2 intrinsics and typedefs without -mavx -mavx2.
121
* - Visual Studio 2013 Update 2 and later are known to support AVX2.
122
* - The GCC/Clang internal header `<avx2intrin.h>` is detected. While this is
123
* not allowed to be included directly, it still appears in the builtin
124
* include path and is detectable with `__has_include`.
125
*
126
* @see XXH_AVX2
127
*/
128
#ifndef XXH_DISPATCH_AVX2
129
# if (defined(__GNUC__) && (__GNUC__ > 4)) /* GCC 5.0+ */ \
130
|| (defined(_MSC_VER) && _MSC_VER >= 1900 && !defined(__clang__)) /* VS 2015+ */ \
131
|| (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 180030501 && !defined(__clang__)) /* VS 2013 Update 2 */ \
132
|| (XXH_HAS_INCLUDE(<avx2intrin.h>) && !defined(_MSC_VER)) /* GCC/Clang internal header */
133
# define XXH_DISPATCH_AVX2 1 /* enable dispatch towards AVX2 */
134
# else
135
# define XXH_DISPATCH_AVX2 0
136
# endif
137
#endif /* XXH_DISPATCH_AVX2 */
138
139
/*!
140
* @def XXH_DISPATCH_AVX512
141
* @brief Enables/disables dispatching for AVX512.
142
*
143
* Automatically detected if one of the following conditions is met:
144
* - GCC 4.9 and later are known to support AVX512.
145
* - Visual Studio 2017 and later are known to support AVX2.
146
* - The GCC/Clang internal header `<avx512fintrin.h>` is detected. While this
147
* is not allowed to be included directly, it still appears in the builtin
148
* include path and is detectable with `__has_include`.
149
*
150
* @see XXH_AVX512
151
*/
152
#ifndef XXH_DISPATCH_AVX512
153
# if (defined(__GNUC__) \
154
&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))) /* GCC 4.9+ */ \
155
|| (defined(_MSC_VER) && _MSC_VER >= 1910 && !defined(__clang__)) /* VS 2017+ */ \
156
|| (XXH_HAS_INCLUDE(<avx512fintrin.h>) && !defined(_MSC_VER)) /* GCC/Clang internal header */
157
# define XXH_DISPATCH_AVX512 1 /* enable dispatch towards AVX512 */
158
# else
159
# define XXH_DISPATCH_AVX512 0
160
# endif
161
#endif /* XXH_DISPATCH_AVX512 */
162
163
/*!
164
* @def XXH_TARGET_SSE2
165
* @brief Allows a function to be compiled with SSE2 intrinsics.
166
*
167
* Uses `__attribute__((__target__("sse2")))` on GCC to allow SSE2 to be used
168
* even with `-mno-sse2`.
169
*
170
* @def XXH_TARGET_AVX2
171
* @brief Like @ref XXH_TARGET_SSE2, but for AVX2.
172
*
173
* @def XXH_TARGET_AVX512
174
* @brief Like @ref XXH_TARGET_SSE2, but for AVX512.
175
*/
176
#if defined(__GNUC__)
177
# include <emmintrin.h> /* SSE2 */
178
# if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
179
# include <immintrin.h> /* AVX2, AVX512F */
180
# endif
181
# define XXH_TARGET_SSE2 __attribute__((__target__("sse2")))
182
# define XXH_TARGET_AVX2 __attribute__((__target__("avx2")))
183
# define XXH_TARGET_AVX512 __attribute__((__target__("avx512f")))
184
#elif defined(_MSC_VER)
185
# include <intrin.h>
186
# define XXH_TARGET_SSE2
187
# define XXH_TARGET_AVX2
188
# define XXH_TARGET_AVX512
189
#else
190
# error "Dispatching is currently not supported for your compiler."
191
#endif
192
193
#ifdef XXH_DISPATCH_DEBUG
194
/* debug logging */
195
# include <stdio.h>
196
# define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); }
197
#else
198
# define XXH_debugPrint(str) ((void)0)
199
# undef NDEBUG /* avoid redefinition */
200
# define NDEBUG
201
#endif
202
#include <assert.h>
203
204
#define XXH_INLINE_ALL
205
#define XXH_X86DISPATCH
206
#include "xxhash.h"
207
208
/*
209
* Support both AT&T and Intel dialects
210
*
211
* GCC doesn't convert AT&T syntax to Intel syntax, and will error out if
212
* compiled with -masm=intel. Instead, it supports dialect switching with
213
* curly braces: { AT&T syntax | Intel syntax }
214
*
215
* Clang's integrated assembler automatically converts AT&T syntax to Intel if
216
* needed, making the dialect switching useless (it isn't even supported).
217
*
218
* Note: Comments are written in the inline assembly itself.
219
*/
220
#ifdef __clang__
221
# define XXH_I_ATT(intel, att) att "\n\t"
222
#else
223
# define XXH_I_ATT(intel, att) "{" att "|" intel "}\n\t"
224
#endif
225
226
/*!
227
* @internal
228
* @brief Runs CPUID.
229
*
230
* @param eax, ecx The parameters to pass to CPUID, %eax and %ecx respectively.
231
* @param abcd The array to store the result in, `{ eax, ebx, ecx, edx }`
232
*/
233
static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd)
234
{
235
#if defined(_MSC_VER)
236
__cpuidex(abcd, eax, ecx);
237
#else
238
xxh_u32 ebx, edx;
239
# if defined(__i386__) && defined(__PIC__)
240
__asm__(
241
"# Call CPUID\n\t"
242
"#\n\t"
243
"# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t"
244
"# EBX, so we use EDI instead.\n\t"
245
XXH_I_ATT("mov edi, ebx", "movl %%ebx, %%edi")
246
XXH_I_ATT("cpuid", "cpuid" )
247
XXH_I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi")
248
: "=D" (ebx),
249
# else
250
__asm__(
251
"# Call CPUID\n\t"
252
XXH_I_ATT("cpuid", "cpuid")
253
: "=b" (ebx),
254
# endif
255
"+a" (eax), "+c" (ecx), "=d" (edx));
256
abcd[0] = eax;
257
abcd[1] = ebx;
258
abcd[2] = ecx;
259
abcd[3] = edx;
260
#endif
261
}
262
263
/*
264
* Modified version of Intel's guide
265
* https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
266
*/
267
268
#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
269
/*!
270
* @internal
271
* @brief Runs `XGETBV`.
272
*
273
* While the CPU may support AVX2, the operating system might not properly save
274
* the full YMM/ZMM registers.
275
*
276
* xgetbv is used for detecting this: Any compliant operating system will define
277
* a set of flags in the xcr0 register indicating how it saves the AVX registers.
278
*
279
* You can manually disable this flag on Windows by running, as admin:
280
*
281
* bcdedit.exe /set xsavedisable 1
282
*
283
* and rebooting. Run the same command with 0 to re-enable it.
284
*/
285
static xxh_u64 XXH_xgetbv(void)
286
{
287
#if defined(_MSC_VER)
288
return _xgetbv(0); /* min VS2010 SP1 compiler is required */
289
#else
290
xxh_u32 xcr0_lo, xcr0_hi;
291
__asm__(
292
"# Call XGETBV\n\t"
293
"#\n\t"
294
"# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t"
295
"# the XGETBV opcode, so we encode it by hand instead.\n\t"
296
"# See <https://github.com/asmjit/asmjit/issues/78> for details.\n\t"
297
".byte 0x0f, 0x01, 0xd0\n\t"
298
: "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
299
return xcr0_lo | ((xxh_u64)xcr0_hi << 32);
300
#endif
301
}
302
#endif
303
304
#define XXH_SSE2_CPUID_MASK (1 << 26)
305
#define XXH_OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27))
306
#define XXH_AVX2_CPUID_MASK (1 << 5)
307
#define XXH_AVX2_XGETBV_MASK ((1 << 2) | (1 << 1))
308
#define XXH_AVX512F_CPUID_MASK (1 << 16)
309
#define XXH_AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1))
310
311
/*!
312
* @internal
313
* @brief Returns the best XXH3 implementation.
314
*
315
* Runs various CPUID/XGETBV tests to try and determine the best implementation.
316
*
317
* @ret The best @ref XXH_VECTOR implementation.
318
* @see XXH_VECTOR_TYPES
319
*/
320
static int XXH_featureTest(void)
321
{
322
xxh_u32 abcd[4];
323
xxh_u32 max_leaves;
324
int best = XXH_SCALAR;
325
#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
326
xxh_u64 xgetbv_val;
327
#endif
328
#if defined(__GNUC__) && defined(__i386__)
329
xxh_u32 cpuid_supported;
330
__asm__(
331
"# For the sake of ruthless backwards compatibility, check if CPUID\n\t"
332
"# is supported in the EFLAGS on i386.\n\t"
333
"# This is not necessary on x86_64 - CPUID is mandatory.\n\t"
334
"# The ID flag (bit 21) in the EFLAGS register indicates support\n\t"
335
"# for the CPUID instruction. If a software procedure can set and\n\t"
336
"# clear this flag, the processor executing the procedure supports\n\t"
337
"# the CPUID instruction.\n\t"
338
"# <https://c9x.me/x86/html/file_module_x86_id_45.html>\n\t"
339
"#\n\t"
340
"# Routine is from <https://wiki.osdev.org/CPUID>.\n\t"
341
342
"# Save EFLAGS\n\t"
343
XXH_I_ATT("pushfd", "pushfl" )
344
"# Store EFLAGS\n\t"
345
XXH_I_ATT("pushfd", "pushfl" )
346
"# Invert the ID bit in stored EFLAGS\n\t"
347
XXH_I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)")
348
"# Load stored EFLAGS (with ID bit inverted)\n\t"
349
XXH_I_ATT("popfd", "popfl" )
350
"# Store EFLAGS again (ID bit may or not be inverted)\n\t"
351
XXH_I_ATT("pushfd", "pushfl" )
352
"# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t"
353
XXH_I_ATT("pop eax", "popl %%eax" )
354
"# eax = whichever bits were changed\n\t"
355
XXH_I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" )
356
"# Restore original EFLAGS\n\t"
357
XXH_I_ATT("popfd", "popfl" )
358
"# eax = zero if ID bit can't be changed, else non-zero\n\t"
359
XXH_I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" )
360
: "=a" (cpuid_supported) :: "cc");
361
362
if (XXH_unlikely(!cpuid_supported)) {
363
XXH_debugPrint("CPUID support is not detected!");
364
return best;
365
}
366
367
#endif
368
/* Check how many CPUID pages we have */
369
XXH_cpuid(0, 0, abcd);
370
max_leaves = abcd[0];
371
372
/* Shouldn't happen on hardware, but happens on some QEMU configs. */
373
if (XXH_unlikely(max_leaves == 0)) {
374
XXH_debugPrint("Max CPUID leaves == 0!");
375
return best;
376
}
377
378
/* Check for SSE2, OSXSAVE and xgetbv */
379
XXH_cpuid(1, 0, abcd);
380
381
/*
382
* Test for SSE2. The check is redundant on x86_64, but it doesn't hurt.
383
*/
384
if (XXH_unlikely((abcd[3] & XXH_SSE2_CPUID_MASK) != XXH_SSE2_CPUID_MASK))
385
return best;
386
387
XXH_debugPrint("SSE2 support detected.");
388
389
best = XXH_SSE2;
390
#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
391
/* Make sure we have enough leaves */
392
if (XXH_unlikely(max_leaves < 7))
393
return best;
394
395
/* Test for OSXSAVE and XGETBV */
396
if ((abcd[2] & XXH_OSXSAVE_CPUID_MASK) != XXH_OSXSAVE_CPUID_MASK)
397
return best;
398
399
/* CPUID check for AVX features */
400
XXH_cpuid(7, 0, abcd);
401
402
xgetbv_val = XXH_xgetbv();
403
#if XXH_DISPATCH_AVX2
404
/* Validate that AVX2 is supported by the CPU */
405
if ((abcd[1] & XXH_AVX2_CPUID_MASK) != XXH_AVX2_CPUID_MASK)
406
return best;
407
408
/* Validate that the OS supports YMM registers */
409
if ((xgetbv_val & XXH_AVX2_XGETBV_MASK) != XXH_AVX2_XGETBV_MASK) {
410
XXH_debugPrint("AVX2 supported by the CPU, but not the OS.");
411
return best;
412
}
413
414
/* AVX2 supported */
415
XXH_debugPrint("AVX2 support detected.");
416
best = XXH_AVX2;
417
#endif
418
#if XXH_DISPATCH_AVX512
419
/* Check if AVX512F is supported by the CPU */
420
if ((abcd[1] & XXH_AVX512F_CPUID_MASK) != XXH_AVX512F_CPUID_MASK) {
421
XXH_debugPrint("AVX512F not supported by CPU");
422
return best;
423
}
424
425
/* Validate that the OS supports ZMM registers */
426
if ((xgetbv_val & XXH_AVX512F_XGETBV_MASK) != XXH_AVX512F_XGETBV_MASK) {
427
XXH_debugPrint("AVX512F supported by the CPU, but not the OS.");
428
return best;
429
}
430
431
/* AVX512F supported */
432
XXH_debugPrint("AVX512F support detected.");
433
best = XXH_AVX512;
434
#endif
435
#endif
436
return best;
437
}
438
439
440
/* === Vector implementations === */
441
442
/*!
443
* @internal
444
* @brief Defines the various dispatch functions.
445
*
446
* TODO: Consolidate?
447
*
448
* @param suffix The suffix for the functions, e.g. sse2 or scalar
449
* @param target XXH_TARGET_* or empty.
450
*/
451
#define XXH_DEFINE_DISPATCH_FUNCS(suffix, target) \
452
\
453
/* === XXH3, default variants === */ \
454
\
455
XXH_NO_INLINE target XXH64_hash_t \
456
XXHL64_default_##suffix(const void* XXH_RESTRICT input, size_t len) \
457
{ \
458
return XXH3_hashLong_64b_internal( \
459
input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \
460
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \
461
); \
462
} \
463
\
464
/* === XXH3, Seeded variants === */ \
465
\
466
XXH_NO_INLINE target XXH64_hash_t \
467
XXHL64_seed_##suffix(const void* XXH_RESTRICT input, size_t len, \
468
XXH64_hash_t seed) \
469
{ \
470
return XXH3_hashLong_64b_withSeed_internal( \
471
input, len, seed, XXH3_accumulate_512_##suffix, \
472
XXH3_scrambleAcc_##suffix, XXH3_initCustomSecret_##suffix \
473
); \
474
} \
475
\
476
/* === XXH3, Secret variants === */ \
477
\
478
XXH_NO_INLINE target XXH64_hash_t \
479
XXHL64_secret_##suffix(const void* XXH_RESTRICT input, size_t len, \
480
const void* secret, size_t secretLen) \
481
{ \
482
return XXH3_hashLong_64b_internal( \
483
input, len, secret, secretLen, \
484
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \
485
); \
486
} \
487
\
488
/* === XXH3 update variants === */ \
489
\
490
XXH_NO_INLINE target XXH_errorcode \
491
XXH3_update_##suffix(XXH3_state_t* state, const void* input, size_t len) \
492
{ \
493
return XXH3_update(state, (const xxh_u8*)input, len, \
494
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \
495
} \
496
\
497
/* === XXH128 default variants === */ \
498
\
499
XXH_NO_INLINE target XXH128_hash_t \
500
XXHL128_default_##suffix(const void* XXH_RESTRICT input, size_t len) \
501
{ \
502
return XXH3_hashLong_128b_internal( \
503
input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \
504
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \
505
); \
506
} \
507
\
508
/* === XXH128 Secret variants === */ \
509
\
510
XXH_NO_INLINE target XXH128_hash_t \
511
XXHL128_secret_##suffix(const void* XXH_RESTRICT input, size_t len, \
512
const void* XXH_RESTRICT secret, size_t secretLen) \
513
{ \
514
return XXH3_hashLong_128b_internal( \
515
input, len, (const xxh_u8*)secret, secretLen, \
516
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \
517
} \
518
\
519
/* === XXH128 Seeded variants === */ \
520
\
521
XXH_NO_INLINE target XXH128_hash_t \
522
XXHL128_seed_##suffix(const void* XXH_RESTRICT input, size_t len, \
523
XXH64_hash_t seed) \
524
{ \
525
return XXH3_hashLong_128b_withSeed_internal(input, len, seed, \
526
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix, \
527
XXH3_initCustomSecret_##suffix); \
528
}
529
530
/* End XXH_DEFINE_DISPATCH_FUNCS */
531
532
#if XXH_DISPATCH_SCALAR
533
XXH_DEFINE_DISPATCH_FUNCS(scalar, /* nothing */)
534
#endif
535
XXH_DEFINE_DISPATCH_FUNCS(sse2, XXH_TARGET_SSE2)
536
#if XXH_DISPATCH_AVX2
537
XXH_DEFINE_DISPATCH_FUNCS(avx2, XXH_TARGET_AVX2)
538
#endif
539
#if XXH_DISPATCH_AVX512
540
XXH_DEFINE_DISPATCH_FUNCS(avx512, XXH_TARGET_AVX512)
541
#endif
542
#undef XXH_DEFINE_DISPATCH_FUNCS
543
544
/* ==== Dispatchers ==== */
545
546
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(const void* XXH_RESTRICT, size_t);
547
548
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t);
549
550
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
551
552
typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH3_state_t*, const void*, size_t);
553
554
typedef struct {
555
XXH3_dispatchx86_hashLong64_default hashLong64_default;
556
XXH3_dispatchx86_hashLong64_withSeed hashLong64_seed;
557
XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret;
558
XXH3_dispatchx86_update update;
559
} XXH_dispatchFunctions_s;
560
561
#define XXH_NB_DISPATCHES 4
562
563
/*!
564
* @internal
565
* @brief Table of dispatchers for @ref XXH3_64bits().
566
*
567
* @pre The indices must match @ref XXH_VECTOR_TYPE.
568
*/
569
static const XXH_dispatchFunctions_s XXH_kDispatch[XXH_NB_DISPATCHES] = {
570
#if XXH_DISPATCH_SCALAR
571
/* Scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_update_scalar },
572
#else
573
/* Scalar */ { NULL, NULL, NULL, NULL },
574
#endif
575
/* SSE2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2, XXH3_update_sse2 },
576
#if XXH_DISPATCH_AVX2
577
/* AVX2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2, XXH3_update_avx2 },
578
#else
579
/* AVX2 */ { NULL, NULL, NULL, NULL },
580
#endif
581
#if XXH_DISPATCH_AVX512
582
/* AVX512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_update_avx512 }
583
#else
584
/* AVX512 */ { NULL, NULL, NULL, NULL }
585
#endif
586
};
587
/*!
588
* @internal
589
* @brief The selected dispatch table for @ref XXH3_64bits().
590
*/
591
static XXH_dispatchFunctions_s XXH_g_dispatch = { NULL, NULL, NULL, NULL };
592
593
594
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(const void* XXH_RESTRICT, size_t);
595
596
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t);
597
598
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
599
600
typedef struct {
601
XXH3_dispatchx86_hashLong128_default hashLong128_default;
602
XXH3_dispatchx86_hashLong128_withSeed hashLong128_seed;
603
XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret;
604
XXH3_dispatchx86_update update;
605
} XXH_dispatch128Functions_s;
606
607
608
/*!
609
* @internal
610
* @brief Table of dispatchers for @ref XXH3_128bits().
611
*
612
* @pre The indices must match @ref XXH_VECTOR_TYPE.
613
*/
614
static const XXH_dispatch128Functions_s XXH_kDispatch128[XXH_NB_DISPATCHES] = {
615
#if XXH_DISPATCH_SCALAR
616
/* Scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_update_scalar },
617
#else
618
/* Scalar */ { NULL, NULL, NULL, NULL },
619
#endif
620
/* SSE2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2, XXH3_update_sse2 },
621
#if XXH_DISPATCH_AVX2
622
/* AVX2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2, XXH3_update_avx2 },
623
#else
624
/* AVX2 */ { NULL, NULL, NULL, NULL },
625
#endif
626
#if XXH_DISPATCH_AVX512
627
/* AVX512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_update_avx512 }
628
#else
629
/* AVX512 */ { NULL, NULL, NULL, NULL }
630
#endif
631
};
632
633
/*!
634
* @internal
635
* @brief The selected dispatch table for @ref XXH3_64bits().
636
*/
637
static XXH_dispatch128Functions_s XXH_g_dispatch128 = { NULL, NULL, NULL, NULL };
638
639
/*!
640
* @internal
641
* @brief Runs a CPUID check and sets the correct dispatch tables.
642
*/
643
static void XXH_setDispatch(void)
644
{
645
int vecID = XXH_featureTest();
646
XXH_STATIC_ASSERT(XXH_AVX512 == XXH_NB_DISPATCHES-1);
647
assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512);
648
#if !XXH_DISPATCH_SCALAR
649
assert(vecID != XXH_SCALAR);
650
#endif
651
#if !XXH_DISPATCH_AVX512
652
assert(vecID != XXH_AVX512);
653
#endif
654
#if !XXH_DISPATCH_AVX2
655
assert(vecID != XXH_AVX2);
656
#endif
657
XXH_g_dispatch = XXH_kDispatch[vecID];
658
XXH_g_dispatch128 = XXH_kDispatch128[vecID];
659
}
660
661
662
/* ==== XXH3 public functions ==== */
663
664
static XXH64_hash_t
665
XXH3_hashLong_64b_defaultSecret_selection(const void* input, size_t len,
666
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
667
{
668
(void)seed64; (void)secret; (void)secretLen;
669
if (XXH_g_dispatch.hashLong64_default == NULL) XXH_setDispatch();
670
return XXH_g_dispatch.hashLong64_default(input, len);
671
}
672
673
XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len)
674
{
675
return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection);
676
}
677
678
static XXH64_hash_t
679
XXH3_hashLong_64b_withSeed_selection(const void* input, size_t len,
680
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
681
{
682
(void)secret; (void)secretLen;
683
if (XXH_g_dispatch.hashLong64_seed == NULL) XXH_setDispatch();
684
return XXH_g_dispatch.hashLong64_seed(input, len, seed64);
685
}
686
687
XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed)
688
{
689
return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection);
690
}
691
692
static XXH64_hash_t
693
XXH3_hashLong_64b_withSecret_selection(const void* input, size_t len,
694
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
695
{
696
(void)seed64;
697
if (XXH_g_dispatch.hashLong64_secret == NULL) XXH_setDispatch();
698
return XXH_g_dispatch.hashLong64_secret(input, len, secret, secretLen);
699
}
700
701
XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen)
702
{
703
return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection);
704
}
705
706
XXH_errorcode
707
XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len)
708
{
709
if (XXH_g_dispatch.update == NULL) XXH_setDispatch();
710
return XXH_g_dispatch.update(state, (const xxh_u8*)input, len);
711
}
712
713
714
/* ==== XXH128 public functions ==== */
715
716
static XXH128_hash_t
717
XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len,
718
XXH64_hash_t seed64, const void* secret, size_t secretLen)
719
{
720
(void)seed64; (void)secret; (void)secretLen;
721
if (XXH_g_dispatch128.hashLong128_default == NULL) XXH_setDispatch();
722
return XXH_g_dispatch128.hashLong128_default(input, len);
723
}
724
725
XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len)
726
{
727
return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection);
728
}
729
730
static XXH128_hash_t
731
XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len,
732
XXH64_hash_t seed64, const void* secret, size_t secretLen)
733
{
734
(void)secret; (void)secretLen;
735
if (XXH_g_dispatch128.hashLong128_seed == NULL) XXH_setDispatch();
736
return XXH_g_dispatch128.hashLong128_seed(input, len, seed64);
737
}
738
739
XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed)
740
{
741
return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection);
742
}
743
744
static XXH128_hash_t
745
XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len,
746
XXH64_hash_t seed64, const void* secret, size_t secretLen)
747
{
748
(void)seed64;
749
if (XXH_g_dispatch128.hashLong128_secret == NULL) XXH_setDispatch();
750
return XXH_g_dispatch128.hashLong128_secret(input, len, secret, secretLen);
751
}
752
753
XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen)
754
{
755
return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection);
756
}
757
758
XXH_errorcode
759
XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len)
760
{
761
if (XXH_g_dispatch128.update == NULL) XXH_setDispatch();
762
return XXH_g_dispatch128.update(state, (const xxh_u8*)input, len);
763
}
764
765
#endif // defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
766
767
#if defined (__cplusplus)
768
}
769
#endif
770
/*! @} */
771
772