Path: blob/a-new-beginning/SharedDependencies/Sources/xxhash/include/xxhash.h
2 views
/*1* xxHash - Extremely Fast Hash algorithm2* Header File3* Copyright (C) 2012-2023 Yann Collet4*5* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)6*7* Redistribution and use in source and binary forms, with or without8* modification, are permitted provided that the following conditions are9* met:10*11* * Redistributions of source code must retain the above copyright12* notice, this list of conditions and the following disclaimer.13* * Redistributions in binary form must reproduce the above14* copyright notice, this list of conditions and the following disclaimer15* in the documentation and/or other materials provided with the16* distribution.17*18* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS19* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT20* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR21* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT22* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,23* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT24* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,25* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY26* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT27* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE28* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.29*30* You can contact the author at:31* - xxHash homepage: https://www.xxhash.com32* - xxHash source repository: https://github.com/Cyan4973/xxHash33*/3435/*!36* @mainpage xxHash37*38* xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed39* limits.40*41* It is proposed in four flavors, in three families:42* 1. @ref XXH32_family43* - Classic 32-bit hash function. Simple, compact, and runs on almost all44* 32-bit and 64-bit systems.45* 2. @ref XXH64_family46* - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most47* 64-bit systems (but _not_ 32-bit systems).48* 3. @ref XXH3_family49* - Modern 64-bit and 128-bit hash function family which features improved50* strength and performance across the board, especially on smaller data.51* It benefits greatly from SIMD and 64-bit without requiring it.52*53* Benchmarks54* ---55* The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.56* The open source benchmark program is compiled with clang v10.0 using -O3 flag.57*58* | Hash Name | ISA ext | Width | Large Data Speed | Small Data Velocity |59* | -------------------- | ------- | ----: | ---------------: | ------------------: |60* | XXH3_64bits() | @b AVX2 | 64 | 59.4 GB/s | 133.1 |61* | MeowHash | AES-NI | 128 | 58.2 GB/s | 52.5 |62* | XXH3_128bits() | @b AVX2 | 128 | 57.9 GB/s | 118.1 |63* | CLHash | PCLMUL | 64 | 37.1 GB/s | 58.1 |64* | XXH3_64bits() | @b SSE2 | 64 | 31.5 GB/s | 133.1 |65* | XXH3_128bits() | @b SSE2 | 128 | 29.6 GB/s | 118.1 |66* | RAM sequential read | | N/A | 28.0 GB/s | N/A |67* | ahash | AES-NI | 64 | 22.5 GB/s | 107.2 |68* | City64 | | 64 | 22.0 GB/s | 76.6 |69* | T1ha2 | | 64 | 22.0 GB/s | 99.0 |70* | City128 | | 128 | 21.7 GB/s | 57.7 |71* | FarmHash | AES-NI | 64 | 21.3 GB/s | 71.9 |72* | XXH64() | | 64 | 19.4 GB/s | 71.0 |73* | SpookyHash | | 64 | 19.3 GB/s | 53.2 |74* | Mum | | 64 | 18.0 GB/s | 67.0 |75* | CRC32C | SSE4.2 | 32 | 13.0 GB/s | 57.9 |76* | XXH32() | | 32 | 9.7 GB/s | 71.9 |77* | City32 | | 32 | 9.1 GB/s | 66.0 |78* | Blake3* | @b AVX2 | 256 | 4.4 GB/s | 8.1 |79* | Murmur3 | | 32 | 3.9 GB/s | 56.1 |80* | SipHash* | | 64 | 3.0 GB/s | 43.2 |81* | Blake3* | @b SSE2 | 256 | 2.4 GB/s | 8.1 |82* | HighwayHash | | 64 | 1.4 GB/s | 6.0 |83* | FNV64 | | 64 | 1.2 GB/s | 62.7 |84* | Blake2* | | 256 | 1.1 GB/s | 5.1 |85* | SHA1* | | 160 | 0.8 GB/s | 5.6 |86* | MD5* | | 128 | 0.6 GB/s | 7.8 |87* @note88* - Hashes which require a specific ISA extension are noted. SSE2 is also noted,89* even though it is mandatory on x64.90* - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic91* by modern standards.92* - Small data velocity is a rough average of algorithm's efficiency for small93* data. For more accurate information, see the wiki.94* - More benchmarks and strength tests are found on the wiki:95* https://github.com/Cyan4973/xxHash/wiki96*97* Usage98* ------99* All xxHash variants use a similar API. Changing the algorithm is a trivial100* substitution.101*102* @pre103* For functions which take an input and length parameter, the following104* requirements are assumed:105* - The range from [`input`, `input + length`) is valid, readable memory.106* - The only exception is if the `length` is `0`, `input` may be `NULL`.107* - For C++, the objects must have the *TriviallyCopyable* property, as the108* functions access bytes directly as if it was an array of `unsigned char`.109*110* @anchor single_shot_example111* **Single Shot**112*113* These functions are stateless functions which hash a contiguous block of memory,114* immediately returning the result. They are the easiest and usually the fastest115* option.116*117* XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()118*119* @code{.c}120* #include <string.h>121* #include "xxhash.h"122*123* // Example for a function which hashes a null terminated string with XXH32().124* XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)125* {126* // NULL pointers are only valid if the length is zero127* size_t length = (string == NULL) ? 0 : strlen(string);128* return XXH32(string, length, seed);129* }130* @endcode131*132*133* @anchor streaming_example134* **Streaming**135*136* These groups of functions allow incremental hashing of unknown size, even137* more than what would fit in a size_t.138*139* XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()140*141* @code{.c}142* #include <stdio.h>143* #include <assert.h>144* #include "xxhash.h"145* // Example for a function which hashes a FILE incrementally with XXH3_64bits().146* XXH64_hash_t hashFile(FILE* f)147* {148* // Allocate a state struct. Do not just use malloc() or new.149* XXH3_state_t* state = XXH3_createState();150* assert(state != NULL && "Out of memory!");151* // Reset the state to start a new hashing session.152* XXH3_64bits_reset(state);153* char buffer[4096];154* size_t count;155* // Read the file in chunks156* while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {157* // Run update() as many times as necessary to process the data158* XXH3_64bits_update(state, buffer, count);159* }160* // Retrieve the finalized hash. This will not change the state.161* XXH64_hash_t result = XXH3_64bits_digest(state);162* // Free the state. Do not use free().163* XXH3_freeState(state);164* return result;165* }166* @endcode167*168* Streaming functions generate the xxHash value from an incremental input.169* This method is slower than single-call functions, due to state management.170* For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.171*172* An XXH state must first be allocated using `XXH*_createState()`.173*174* Start a new hash by initializing the state with a seed using `XXH*_reset()`.175*176* Then, feed the hash state by calling `XXH*_update()` as many times as necessary.177*178* The function returns an error code, with 0 meaning OK, and any other value179* meaning there is an error.180*181* Finally, a hash value can be produced anytime, by using `XXH*_digest()`.182* This function returns the nn-bits hash as an int or long long.183*184* It's still possible to continue inserting input into the hash state after a185* digest, and generate new hash values later on by invoking `XXH*_digest()`.186*187* When done, release the state using `XXH*_freeState()`.188*189*190* @anchor canonical_representation_example191* **Canonical Representation**192*193* The default return values from XXH functions are unsigned 32, 64 and 128 bit194* integers.195* This the simplest and fastest format for further post-processing.196*197* However, this leaves open the question of what is the order on the byte level,198* since little and big endian conventions will store the same number differently.199*200* The canonical representation settles this issue by mandating big-endian201* convention, the same convention as human-readable numbers (large digits first).202*203* When writing hash values to storage, sending them over a network, or printing204* them, it's highly recommended to use the canonical representation to ensure205* portability across a wider range of systems, present and future.206*207* The following functions allow transformation of hash values to and from208* canonical format.209*210* XXH32_canonicalFromHash(), XXH32_hashFromCanonical(),211* XXH64_canonicalFromHash(), XXH64_hashFromCanonical(),212* XXH128_canonicalFromHash(), XXH128_hashFromCanonical(),213*214* @code{.c}215* #include <stdio.h>216* #include "xxhash.h"217*218* // Example for a function which prints XXH32_hash_t in human readable format219* void printXxh32(XXH32_hash_t hash)220* {221* XXH32_canonical_t cano;222* XXH32_canonicalFromHash(&cano, hash);223* size_t i;224* for(i = 0; i < sizeof(cano.digest); ++i) {225* printf("%02x", cano.digest[i]);226* }227* printf("\n");228* }229*230* // Example for a function which converts XXH32_canonical_t to XXH32_hash_t231* XXH32_hash_t convertCanonicalToXxh32(XXH32_canonical_t cano)232* {233* XXH32_hash_t hash = XXH32_hashFromCanonical(&cano);234* return hash;235* }236* @endcode237*238*239* @file xxhash.h240* xxHash prototypes and implementation241*/242243#if defined(__cplusplus) && !defined(XXH_NO_EXTERNC_GUARD)244extern "C" {245#endif246247/* ****************************248* INLINE mode249******************************/250/*!251* @defgroup public Public API252* Contains details on the public xxHash functions.253* @{254*/255#ifdef XXH_DOXYGEN256/*!257* @brief Gives access to internal state declaration, required for static allocation.258*259* Incompatible with dynamic linking, due to risks of ABI changes.260*261* Usage:262* @code{.c}263* #define XXH_STATIC_LINKING_ONLY264* #include "xxhash.h"265* @endcode266*/267# define XXH_STATIC_LINKING_ONLY268/* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */269270/*!271* @brief Gives access to internal definitions.272*273* Usage:274* @code{.c}275* #define XXH_STATIC_LINKING_ONLY276* #define XXH_IMPLEMENTATION277* #include "xxhash.h"278* @endcode279*/280# define XXH_IMPLEMENTATION281/* Do not undef XXH_IMPLEMENTATION for Doxygen */282283/*!284* @brief Exposes the implementation and marks all functions as `inline`.285*286* Use these build macros to inline xxhash into the target unit.287* Inlining improves performance on small inputs, especially when the length is288* expressed as a compile-time constant:289*290* https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html291*292* It also keeps xxHash symbols private to the unit, so they are not exported.293*294* Usage:295* @code{.c}296* #define XXH_INLINE_ALL297* #include "xxhash.h"298* @endcode299* Do not compile and link xxhash.o as a separate object, as it is not useful.300*/301# define XXH_INLINE_ALL302# undef XXH_INLINE_ALL303/*!304* @brief Exposes the implementation without marking functions as inline.305*/306# define XXH_PRIVATE_API307# undef XXH_PRIVATE_API308/*!309* @brief Emulate a namespace by transparently prefixing all symbols.310*311* If you want to include _and expose_ xxHash functions from within your own312* library, but also want to avoid symbol collisions with other libraries which313* may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix314* any public symbol from xxhash library with the value of @ref XXH_NAMESPACE315* (therefore, avoid empty or numeric values).316*317* Note that no change is required within the calling program as long as it318* includes `xxhash.h`: Regular symbol names will be automatically translated319* by this header.320*/321# define XXH_NAMESPACE /* YOUR NAME HERE */322# undef XXH_NAMESPACE323#endif324325#define XXH_CAT(A,B) A##B326#define XXH_NAME2(A,B) XXH_CAT(A,B)327#define XXH_IPREF(Id) XXH_NAME2(XXH_NAMESPACE, Id)328329#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \330&& !defined(XXH_INLINE_ALL_31684351384)331/* this section should be traversed only once */332# define XXH_INLINE_ALL_31684351384333/* give access to the advanced API, required to compile implementations */334# undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */335# define XXH_STATIC_LINKING_ONLY336/* make all functions private */337# undef XXH_PUBLIC_API338# if defined(__GNUC__)339# define XXH_PUBLIC_API static __inline __attribute__((__unused__))340# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)341# define XXH_PUBLIC_API static inline342# elif defined(_MSC_VER)343# define XXH_PUBLIC_API static __inline344# else345/* note: this version may generate warnings for unused static functions */346# define XXH_PUBLIC_API static347# endif348349/*350* This part deals with the special case where a unit wants to inline xxHash,351* but "xxhash.h" has previously been included without XXH_INLINE_ALL,352* such as part of some previously included *.h header file.353* Without further action, the new include would just be ignored,354* and functions would effectively _not_ be inlined (silent failure).355* The following macros solve this situation by prefixing all inlined names,356* avoiding naming collision with previous inclusions.357*/358/* Before that, we unconditionally #undef all symbols,359* in case they were already defined with XXH_NAMESPACE.360* They will then be redefined for XXH_INLINE_ALL361*/362# undef XXH_versionNumber363/* XXH32 */364# undef XXH32365# undef XXH32_createState366# undef XXH32_freeState367# undef XXH32_reset368# undef XXH32_update369# undef XXH32_digest370# undef XXH32_copyState371# undef XXH32_canonicalFromHash372# undef XXH32_hashFromCanonical373/* XXH64 */374# undef XXH64375# undef XXH64_createState376# undef XXH64_freeState377# undef XXH64_reset378# undef XXH64_update379# undef XXH64_digest380# undef XXH64_copyState381# undef XXH64_canonicalFromHash382# undef XXH64_hashFromCanonical383/* XXH3_64bits */384# undef XXH3_64bits385# undef XXH3_64bits_withSecret386# undef XXH3_64bits_withSeed387# undef XXH3_64bits_withSecretandSeed388# undef XXH3_createState389# undef XXH3_freeState390# undef XXH3_copyState391# undef XXH3_64bits_reset392# undef XXH3_64bits_reset_withSeed393# undef XXH3_64bits_reset_withSecret394# undef XXH3_64bits_update395# undef XXH3_64bits_digest396# undef XXH3_generateSecret397/* XXH3_128bits */398# undef XXH128399# undef XXH3_128bits400# undef XXH3_128bits_withSeed401# undef XXH3_128bits_withSecret402# undef XXH3_128bits_reset403# undef XXH3_128bits_reset_withSeed404# undef XXH3_128bits_reset_withSecret405# undef XXH3_128bits_reset_withSecretandSeed406# undef XXH3_128bits_update407# undef XXH3_128bits_digest408# undef XXH128_isEqual409# undef XXH128_cmp410# undef XXH128_canonicalFromHash411# undef XXH128_hashFromCanonical412/* Finally, free the namespace itself */413# undef XXH_NAMESPACE414415/* employ the namespace for XXH_INLINE_ALL */416# define XXH_NAMESPACE XXH_INLINE_417/*418* Some identifiers (enums, type names) are not symbols,419* but they must nonetheless be renamed to avoid redeclaration.420* Alternative solution: do not redeclare them.421* However, this requires some #ifdefs, and has a more dispersed impact.422* Meanwhile, renaming can be achieved in a single place.423*/424# define XXH_OK XXH_IPREF(XXH_OK)425# define XXH_ERROR XXH_IPREF(XXH_ERROR)426# define XXH_errorcode XXH_IPREF(XXH_errorcode)427# define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t)428# define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t)429# define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)430# define XXH32_state_s XXH_IPREF(XXH32_state_s)431# define XXH32_state_t XXH_IPREF(XXH32_state_t)432# define XXH64_state_s XXH_IPREF(XXH64_state_s)433# define XXH64_state_t XXH_IPREF(XXH64_state_t)434# define XXH3_state_s XXH_IPREF(XXH3_state_s)435# define XXH3_state_t XXH_IPREF(XXH3_state_t)436# define XXH128_hash_t XXH_IPREF(XXH128_hash_t)437/* Ensure the header is parsed again, even if it was previously included */438# undef XXHASH_H_5627135585666179439# undef XXHASH_H_STATIC_13879238742440#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */441442/* ****************************************************************443* Stable API444*****************************************************************/445#ifndef XXHASH_H_5627135585666179446#define XXHASH_H_5627135585666179 1447448/*! @brief Marks a global symbol. */449#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)450# if defined(_WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))451# ifdef XXH_EXPORT452# define XXH_PUBLIC_API __declspec(dllexport)453# elif XXH_IMPORT454# define XXH_PUBLIC_API __declspec(dllimport)455# endif456# else457# define XXH_PUBLIC_API /* do nothing */458# endif459#endif460461#ifdef XXH_NAMESPACE462# define XXH_versionNumber XXH_IPREF(XXH_versionNumber)463/* XXH32 */464# define XXH32 XXH_IPREF(XXH32)465# define XXH32_createState XXH_IPREF(XXH32_createState)466# define XXH32_freeState XXH_IPREF(XXH32_freeState)467# define XXH32_reset XXH_IPREF(XXH32_reset)468# define XXH32_update XXH_IPREF(XXH32_update)469# define XXH32_digest XXH_IPREF(XXH32_digest)470# define XXH32_copyState XXH_IPREF(XXH32_copyState)471# define XXH32_canonicalFromHash XXH_IPREF(XXH32_canonicalFromHash)472# define XXH32_hashFromCanonical XXH_IPREF(XXH32_hashFromCanonical)473/* XXH64 */474# define XXH64 XXH_IPREF(XXH64)475# define XXH64_createState XXH_IPREF(XXH64_createState)476# define XXH64_freeState XXH_IPREF(XXH64_freeState)477# define XXH64_reset XXH_IPREF(XXH64_reset)478# define XXH64_update XXH_IPREF(XXH64_update)479# define XXH64_digest XXH_IPREF(XXH64_digest)480# define XXH64_copyState XXH_IPREF(XXH64_copyState)481# define XXH64_canonicalFromHash XXH_IPREF(XXH64_canonicalFromHash)482# define XXH64_hashFromCanonical XXH_IPREF(XXH64_hashFromCanonical)483/* XXH3_64bits */484# define XXH3_64bits XXH_IPREF(XXH3_64bits)485# define XXH3_64bits_withSecret XXH_IPREF(XXH3_64bits_withSecret)486# define XXH3_64bits_withSeed XXH_IPREF(XXH3_64bits_withSeed)487# define XXH3_64bits_withSecretandSeed XXH_IPREF(XXH3_64bits_withSecretandSeed)488# define XXH3_createState XXH_IPREF(XXH3_createState)489# define XXH3_freeState XXH_IPREF(XXH3_freeState)490# define XXH3_copyState XXH_IPREF(XXH3_copyState)491# define XXH3_64bits_reset XXH_IPREF(XXH3_64bits_reset)492# define XXH3_64bits_reset_withSeed XXH_IPREF(XXH3_64bits_reset_withSeed)493# define XXH3_64bits_reset_withSecret XXH_IPREF(XXH3_64bits_reset_withSecret)494# define XXH3_64bits_reset_withSecretandSeed XXH_IPREF(XXH3_64bits_reset_withSecretandSeed)495# define XXH3_64bits_update XXH_IPREF(XXH3_64bits_update)496# define XXH3_64bits_digest XXH_IPREF(XXH3_64bits_digest)497# define XXH3_generateSecret XXH_IPREF(XXH3_generateSecret)498# define XXH3_generateSecret_fromSeed XXH_IPREF(XXH3_generateSecret_fromSeed)499/* XXH3_128bits */500# define XXH128 XXH_IPREF(XXH128)501# define XXH3_128bits XXH_IPREF(XXH3_128bits)502# define XXH3_128bits_withSeed XXH_IPREF(XXH3_128bits_withSeed)503# define XXH3_128bits_withSecret XXH_IPREF(XXH3_128bits_withSecret)504# define XXH3_128bits_withSecretandSeed XXH_IPREF(XXH3_128bits_withSecretandSeed)505# define XXH3_128bits_reset XXH_IPREF(XXH3_128bits_reset)506# define XXH3_128bits_reset_withSeed XXH_IPREF(XXH3_128bits_reset_withSeed)507# define XXH3_128bits_reset_withSecret XXH_IPREF(XXH3_128bits_reset_withSecret)508# define XXH3_128bits_reset_withSecretandSeed XXH_IPREF(XXH3_128bits_reset_withSecretandSeed)509# define XXH3_128bits_update XXH_IPREF(XXH3_128bits_update)510# define XXH3_128bits_digest XXH_IPREF(XXH3_128bits_digest)511# define XXH128_isEqual XXH_IPREF(XXH128_isEqual)512# define XXH128_cmp XXH_IPREF(XXH128_cmp)513# define XXH128_canonicalFromHash XXH_IPREF(XXH128_canonicalFromHash)514# define XXH128_hashFromCanonical XXH_IPREF(XXH128_hashFromCanonical)515#endif516517518/* *************************************519* Compiler specifics520***************************************/521522/* specific declaration modes for Windows */523#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)524# if defined(_WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))525# ifdef XXH_EXPORT526# define XXH_PUBLIC_API __declspec(dllexport)527# elif XXH_IMPORT528# define XXH_PUBLIC_API __declspec(dllimport)529# endif530# else531# define XXH_PUBLIC_API /* do nothing */532# endif533#endif534535#if defined (__GNUC__)536# define XXH_CONSTF __attribute__((__const__))537# define XXH_PUREF __attribute__((__pure__))538# define XXH_MALLOCF __attribute__((__malloc__))539#else540# define XXH_CONSTF /* disable */541# define XXH_PUREF542# define XXH_MALLOCF543#endif544545/* *************************************546* Version547***************************************/548#define XXH_VERSION_MAJOR 0549#define XXH_VERSION_MINOR 8550#define XXH_VERSION_RELEASE 3551/*! @brief Version number, encoded as two digits each */552#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)553554/*!555* @brief Obtains the xxHash version.556*557* This is mostly useful when xxHash is compiled as a shared library,558* since the returned value comes from the library, as opposed to header file.559*560* @return @ref XXH_VERSION_NUMBER of the invoked library.561*/562XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);563564565/* ****************************566* Common basic types567******************************/568#include <stddef.h> /* size_t */569/*!570* @brief Exit code for the streaming API.571*/572typedef enum {573XXH_OK = 0, /*!< OK */574XXH_ERROR /*!< Error */575} XXH_errorcode;576577578/*-**********************************************************************579* 32-bit hash580************************************************************************/581#if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */582/*!583* @brief An unsigned 32-bit integer.584*585* Not necessarily defined to `uint32_t` but functionally equivalent.586*/587typedef uint32_t XXH32_hash_t;588589#elif !defined (__VMS) \590&& (defined (__cplusplus) \591|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )592# ifdef _AIX593# include <inttypes.h>594# else595# include <stdint.h>596# endif597typedef uint32_t XXH32_hash_t;598599#else600# include <limits.h>601# if UINT_MAX == 0xFFFFFFFFUL602typedef unsigned int XXH32_hash_t;603# elif ULONG_MAX == 0xFFFFFFFFUL604typedef unsigned long XXH32_hash_t;605# else606# error "unsupported platform: need a 32-bit type"607# endif608#endif609610/*!611* @}612*613* @defgroup XXH32_family XXH32 family614* @ingroup public615* Contains functions used in the classic 32-bit xxHash algorithm.616*617* @note618* XXH32 is useful for older platforms, with no or poor 64-bit performance.619* Note that the @ref XXH3_family provides competitive speed for both 32-bit620* and 64-bit systems, and offers true 64/128 bit hash results.621*622* @see @ref XXH64_family, @ref XXH3_family : Other xxHash families623* @see @ref XXH32_impl for implementation details624* @{625*/626627/*!628* @brief Calculates the 32-bit hash of @p input using xxHash32.629*630* @param input The block of data to be hashed, at least @p length bytes in size.631* @param length The length of @p input, in bytes.632* @param seed The 32-bit seed to alter the hash's output predictably.633*634* @pre635* The memory between @p input and @p input + @p length must be valid,636* readable, contiguous memory. However, if @p length is `0`, @p input may be637* `NULL`. In C++, this also must be *TriviallyCopyable*.638*639* @return The calculated 32-bit xxHash32 value.640*641* @see @ref single_shot_example "Single Shot Example" for an example.642*/643XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);644645#ifndef XXH_NO_STREAM646/*!647* @typedef struct XXH32_state_s XXH32_state_t648* @brief The opaque state struct for the XXH32 streaming API.649*650* @see XXH32_state_s for details.651* @see @ref streaming_example "Streaming Example"652*/653typedef struct XXH32_state_s XXH32_state_t;654655/*!656* @brief Allocates an @ref XXH32_state_t.657*658* @return An allocated pointer of @ref XXH32_state_t on success.659* @return `NULL` on failure.660*661* @note Must be freed with XXH32_freeState().662*663* @see @ref streaming_example "Streaming Example"664*/665XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);666/*!667* @brief Frees an @ref XXH32_state_t.668*669* @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().670*671* @return @ref XXH_OK.672*673* @note @p statePtr must be allocated with XXH32_createState().674*675* @see @ref streaming_example "Streaming Example"676*677*/678XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);679/*!680* @brief Copies one @ref XXH32_state_t to another.681*682* @param dst_state The state to copy to.683* @param src_state The state to copy from.684* @pre685* @p dst_state and @p src_state must not be `NULL` and must not overlap.686*/687XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);688689/*!690* @brief Resets an @ref XXH32_state_t to begin a new hash.691*692* @param statePtr The state struct to reset.693* @param seed The 32-bit seed to alter the hash result predictably.694*695* @pre696* @p statePtr must not be `NULL`.697*698* @return @ref XXH_OK on success.699* @return @ref XXH_ERROR on failure.700*701* @note This function resets and seeds a state. Call it before @ref XXH32_update().702*703* @see @ref streaming_example "Streaming Example"704*/705XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed);706707/*!708* @brief Consumes a block of @p input to an @ref XXH32_state_t.709*710* @param statePtr The state struct to update.711* @param input The block of data to be hashed, at least @p length bytes in size.712* @param length The length of @p input, in bytes.713*714* @pre715* @p statePtr must not be `NULL`.716* @pre717* The memory between @p input and @p input + @p length must be valid,718* readable, contiguous memory. However, if @p length is `0`, @p input may be719* `NULL`. In C++, this also must be *TriviallyCopyable*.720*721* @return @ref XXH_OK on success.722* @return @ref XXH_ERROR on failure.723*724* @note Call this to incrementally consume blocks of data.725*726* @see @ref streaming_example "Streaming Example"727*/728XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);729730/*!731* @brief Returns the calculated hash value from an @ref XXH32_state_t.732*733* @param statePtr The state struct to calculate the hash from.734*735* @pre736* @p statePtr must not be `NULL`.737*738* @return The calculated 32-bit xxHash32 value from that state.739*740* @note741* Calling XXH32_digest() will not affect @p statePtr, so you can update,742* digest, and update again.743*744* @see @ref streaming_example "Streaming Example"745*/746XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);747#endif /* !XXH_NO_STREAM */748749/******* Canonical representation *******/750751/*!752* @brief Canonical (big endian) representation of @ref XXH32_hash_t.753*/754typedef struct {755unsigned char digest[4]; /*!< Hash bytes, big endian */756} XXH32_canonical_t;757758/*!759* @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.760*761* @param dst The @ref XXH32_canonical_t pointer to be stored to.762* @param hash The @ref XXH32_hash_t to be converted.763*764* @pre765* @p dst must not be `NULL`.766*767* @see @ref canonical_representation_example "Canonical Representation Example"768*/769XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);770771/*!772* @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.773*774* @param src The @ref XXH32_canonical_t to convert.775*776* @pre777* @p src must not be `NULL`.778*779* @return The converted hash.780*781* @see @ref canonical_representation_example "Canonical Representation Example"782*/783XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);784785786/*! @cond Doxygen ignores this part */787#ifdef __has_attribute788# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)789#else790# define XXH_HAS_ATTRIBUTE(x) 0791#endif792/*! @endcond */793794/*! @cond Doxygen ignores this part */795/* C-language Attributes are added in C23. */796#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 202311L) && defined(__has_c_attribute)797# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)798#else799# define XXH_HAS_C_ATTRIBUTE(x) 0800#endif801/*! @endcond */802803/*! @cond Doxygen ignores this part */804#if defined(__cplusplus) && defined(__has_cpp_attribute)805# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)806#else807# define XXH_HAS_CPP_ATTRIBUTE(x) 0808#endif809/*! @endcond */810811/*! @cond Doxygen ignores this part */812/*813* Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute814* introduced in CPP17 and C23.815* CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough816* C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough817*/818#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)819# define XXH_FALLTHROUGH [[fallthrough]]820#elif XXH_HAS_ATTRIBUTE(__fallthrough__)821# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))822#else823# define XXH_FALLTHROUGH /* fallthrough */824#endif825/*! @endcond */826827/*! @cond Doxygen ignores this part */828/*829* Define XXH_NOESCAPE for annotated pointers in public API.830* https://clang.llvm.org/docs/AttributeReference.html#noescape831* As of writing this, only supported by clang.832*/833#if XXH_HAS_ATTRIBUTE(noescape)834# define XXH_NOESCAPE __attribute__((__noescape__))835#else836# define XXH_NOESCAPE837#endif838/*! @endcond */839840841/*!842* @}843* @ingroup public844* @{845*/846847#ifndef XXH_NO_LONG_LONG848/*-**********************************************************************849* 64-bit hash850************************************************************************/851#if defined(XXH_DOXYGEN) /* don't include <stdint.h> */852/*!853* @brief An unsigned 64-bit integer.854*855* Not necessarily defined to `uint64_t` but functionally equivalent.856*/857typedef uint64_t XXH64_hash_t;858#elif !defined (__VMS) \859&& (defined (__cplusplus) \860|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )861# ifdef _AIX862# include <inttypes.h>863# else864# include <stdint.h>865# endif866typedef uint64_t XXH64_hash_t;867#else868# include <limits.h>869# if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL870/* LP64 ABI says uint64_t is unsigned long */871typedef unsigned long XXH64_hash_t;872# else873/* the following type must have a width of 64-bit */874typedef unsigned long long XXH64_hash_t;875# endif876#endif877878/*!879* @}880*881* @defgroup XXH64_family XXH64 family882* @ingroup public883* @{884* Contains functions used in the classic 64-bit xxHash algorithm.885*886* @note887* XXH3 provides competitive speed for both 32-bit and 64-bit systems,888* and offers true 64/128 bit hash results.889* It provides better speed for systems with vector processing capabilities.890*/891892/*!893* @brief Calculates the 64-bit hash of @p input using xxHash64.894*895* @param input The block of data to be hashed, at least @p length bytes in size.896* @param length The length of @p input, in bytes.897* @param seed The 64-bit seed to alter the hash's output predictably.898*899* @pre900* The memory between @p input and @p input + @p length must be valid,901* readable, contiguous memory. However, if @p length is `0`, @p input may be902* `NULL`. In C++, this also must be *TriviallyCopyable*.903*904* @return The calculated 64-bit xxHash64 value.905*906* @see @ref single_shot_example "Single Shot Example" for an example.907*/908XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);909910/******* Streaming *******/911#ifndef XXH_NO_STREAM912/*!913* @brief The opaque state struct for the XXH64 streaming API.914*915* @see XXH64_state_s for details.916* @see @ref streaming_example "Streaming Example"917*/918typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */919920/*!921* @brief Allocates an @ref XXH64_state_t.922*923* @return An allocated pointer of @ref XXH64_state_t on success.924* @return `NULL` on failure.925*926* @note Must be freed with XXH64_freeState().927*928* @see @ref streaming_example "Streaming Example"929*/930XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);931932/*!933* @brief Frees an @ref XXH64_state_t.934*935* @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState().936*937* @return @ref XXH_OK.938*939* @note @p statePtr must be allocated with XXH64_createState().940*941* @see @ref streaming_example "Streaming Example"942*/943XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);944945/*!946* @brief Copies one @ref XXH64_state_t to another.947*948* @param dst_state The state to copy to.949* @param src_state The state to copy from.950* @pre951* @p dst_state and @p src_state must not be `NULL` and must not overlap.952*/953XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);954955/*!956* @brief Resets an @ref XXH64_state_t to begin a new hash.957*958* @param statePtr The state struct to reset.959* @param seed The 64-bit seed to alter the hash result predictably.960*961* @pre962* @p statePtr must not be `NULL`.963*964* @return @ref XXH_OK on success.965* @return @ref XXH_ERROR on failure.966*967* @note This function resets and seeds a state. Call it before @ref XXH64_update().968*969* @see @ref streaming_example "Streaming Example"970*/971XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);972973/*!974* @brief Consumes a block of @p input to an @ref XXH64_state_t.975*976* @param statePtr The state struct to update.977* @param input The block of data to be hashed, at least @p length bytes in size.978* @param length The length of @p input, in bytes.979*980* @pre981* @p statePtr must not be `NULL`.982* @pre983* The memory between @p input and @p input + @p length must be valid,984* readable, contiguous memory. However, if @p length is `0`, @p input may be985* `NULL`. In C++, this also must be *TriviallyCopyable*.986*987* @return @ref XXH_OK on success.988* @return @ref XXH_ERROR on failure.989*990* @note Call this to incrementally consume blocks of data.991*992* @see @ref streaming_example "Streaming Example"993*/994XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);995996/*!997* @brief Returns the calculated hash value from an @ref XXH64_state_t.998*999* @param statePtr The state struct to calculate the hash from.1000*1001* @pre1002* @p statePtr must not be `NULL`.1003*1004* @return The calculated 64-bit xxHash64 value from that state.1005*1006* @note1007* Calling XXH64_digest() will not affect @p statePtr, so you can update,1008* digest, and update again.1009*1010* @see @ref streaming_example "Streaming Example"1011*/1012XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);1013#endif /* !XXH_NO_STREAM */1014/******* Canonical representation *******/10151016/*!1017* @brief Canonical (big endian) representation of @ref XXH64_hash_t.1018*/1019typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;10201021/*!1022* @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t.1023*1024* @param dst The @ref XXH64_canonical_t pointer to be stored to.1025* @param hash The @ref XXH64_hash_t to be converted.1026*1027* @pre1028* @p dst must not be `NULL`.1029*1030* @see @ref canonical_representation_example "Canonical Representation Example"1031*/1032XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);10331034/*!1035* @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t.1036*1037* @param src The @ref XXH64_canonical_t to convert.1038*1039* @pre1040* @p src must not be `NULL`.1041*1042* @return The converted hash.1043*1044* @see @ref canonical_representation_example "Canonical Representation Example"1045*/1046XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);10471048#ifndef XXH_NO_XXH310491050/*!1051* @}1052* ************************************************************************1053* @defgroup XXH3_family XXH3 family1054* @ingroup public1055* @{1056*1057* XXH3 is a more recent hash algorithm featuring:1058* - Improved speed for both small and large inputs1059* - True 64-bit and 128-bit outputs1060* - SIMD acceleration1061* - Improved 32-bit viability1062*1063* Speed analysis methodology is explained here:1064*1065* https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html1066*1067* Compared to XXH64, expect XXH3 to run approximately1068* ~2x faster on large inputs and >3x faster on small ones,1069* exact differences vary depending on platform.1070*1071* XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,1072* but does not require it.1073* Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH31074* at competitive speeds, even without vector support. Further details are1075* explained in the implementation.1076*1077* XXH3 has a fast scalar implementation, but it also includes accelerated SIMD1078* implementations for many common platforms:1079* - AVX5121080* - AVX21081* - SSE21082* - ARM NEON1083* - WebAssembly SIMD1281084* - POWER8 VSX1085* - s390x ZVector1086* This can be controlled via the @ref XXH_VECTOR macro, but it automatically1087* selects the best version according to predefined macros. For the x86 family, an1088* automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c.1089*1090* XXH3 implementation is portable:1091* it has a generic C90 formulation that can be compiled on any platform,1092* all implementations generate exactly the same hash value on all platforms.1093* Starting from v0.8.0, it's also labelled "stable", meaning that1094* any future version will also generate the same hash value.1095*1096* XXH3 offers 2 variants, _64bits and _128bits.1097*1098* When only 64 bits are needed, prefer invoking the _64bits variant, as it1099* reduces the amount of mixing, resulting in faster speed on small inputs.1100* It's also generally simpler to manipulate a scalar return type than a struct.1101*1102* The API supports one-shot hashing, streaming mode, and custom secrets.1103*/11041105/*!1106* @ingroup tuning1107* @brief Possible values for @ref XXH_VECTOR.1108*1109* Unless set explicitly, determined automatically.1110*/1111# define XXH_SCALAR 0 /*!< Portable scalar version */1112# define XXH_SSE2 1 /*!< SSE2 for Pentium 4, Opteron, all x86_64. */1113# define XXH_AVX2 2 /*!< AVX2 for Haswell and Bulldozer */1114# define XXH_AVX512 3 /*!< AVX512 for Skylake and Icelake */1115# define XXH_NEON 4 /*!< NEON for most ARMv7-A, all AArch64, and WASM SIMD128 */1116# define XXH_VSX 5 /*!< VSX and ZVector for POWER8/z13 (64-bit) */1117# define XXH_SVE 6 /*!< SVE for some ARMv8-A and ARMv9-A */1118# define XXH_LSX 7 /*!< LSX (128-bit SIMD) for LoongArch64 */1119# define XXH_LASX 8 /*!< LASX (256-bit SIMD) for LoongArch64 */1120# define XXH_RVV 9 /*!< RVV (RISC-V Vector) for RISC-V */11211122/*-**********************************************************************1123* XXH3 64-bit variant1124************************************************************************/11251126/*!1127* @brief Calculates 64-bit unseeded variant of XXH3 hash of @p input.1128*1129* @param input The block of data to be hashed, at least @p length bytes in size.1130* @param length The length of @p input, in bytes.1131*1132* @pre1133* The memory between @p input and @p input + @p length must be valid,1134* readable, contiguous memory. However, if @p length is `0`, @p input may be1135* `NULL`. In C++, this also must be *TriviallyCopyable*.1136*1137* @return The calculated 64-bit XXH3 hash value.1138*1139* @note1140* This is equivalent to @ref XXH3_64bits_withSeed() with a seed of `0`, however1141* it may have slightly better performance due to constant propagation of the1142* defaults.1143*1144* @see1145* XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants1146* @see @ref single_shot_example "Single Shot Example" for an example.1147*/1148XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);11491150/*!1151* @brief Calculates 64-bit seeded variant of XXH3 hash of @p input.1152*1153* @param input The block of data to be hashed, at least @p length bytes in size.1154* @param length The length of @p input, in bytes.1155* @param seed The 64-bit seed to alter the hash result predictably.1156*1157* @pre1158* The memory between @p input and @p input + @p length must be valid,1159* readable, contiguous memory. However, if @p length is `0`, @p input may be1160* `NULL`. In C++, this also must be *TriviallyCopyable*.1161*1162* @return The calculated 64-bit XXH3 hash value.1163*1164* @note1165* seed == 0 produces the same results as @ref XXH3_64bits().1166*1167* This variant generates a custom secret on the fly based on default secret1168* altered using the @p seed value.1169*1170* While this operation is decently fast, note that it's not completely free.1171*1172* @see @ref single_shot_example "Single Shot Example" for an example.1173*/1174XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);11751176/*!1177* The bare minimum size for a custom secret.1178*1179* @see1180* XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),1181* XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().1182*/1183#define XXH3_SECRET_SIZE_MIN 13611841185/*!1186* @brief Calculates 64-bit variant of XXH3 with a custom "secret".1187*1188* @param data The block of data to be hashed, at least @p len bytes in size.1189* @param len The length of @p data, in bytes.1190* @param secret The secret data.1191* @param secretSize The length of @p secret, in bytes.1192*1193* @return The calculated 64-bit XXH3 hash value.1194*1195* @pre1196* The memory between @p data and @p data + @p len must be valid,1197* readable, contiguous memory. However, if @p length is `0`, @p data may be1198* `NULL`. In C++, this also must be *TriviallyCopyable*.1199*1200* It's possible to provide any blob of bytes as a "secret" to generate the hash.1201* This makes it more difficult for an external actor to prepare an intentional collision.1202* The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).1203* However, the quality of the secret impacts the dispersion of the hash algorithm.1204* Therefore, the secret _must_ look like a bunch of random bytes.1205* Avoid "trivial" or structured data such as repeated sequences or a text document.1206* Whenever in doubt about the "randomness" of the blob of bytes,1207* consider employing @ref XXH3_generateSecret() instead (see below).1208* It will generate a proper high entropy secret derived from the blob of bytes.1209* Another advantage of using XXH3_generateSecret() is that1210* it guarantees that all bits within the initial blob of bytes1211* will impact every bit of the output.1212* This is not necessarily the case when using the blob of bytes directly1213* because, when hashing _small_ inputs, only a portion of the secret is employed.1214*1215* @see @ref single_shot_example "Single Shot Example" for an example.1216*/1217XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);121812191220/******* Streaming *******/1221#ifndef XXH_NO_STREAM1222/*1223* Streaming requires state maintenance.1224* This operation costs memory and CPU.1225* As a consequence, streaming is slower than one-shot hashing.1226* For better performance, prefer one-shot functions whenever applicable.1227*/12281229/*!1230* @brief The opaque state struct for the XXH3 streaming API.1231*1232* @see XXH3_state_s for details.1233* @see @ref streaming_example "Streaming Example"1234*/1235typedef struct XXH3_state_s XXH3_state_t;1236XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);1237XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);12381239/*!1240* @brief Copies one @ref XXH3_state_t to another.1241*1242* @param dst_state The state to copy to.1243* @param src_state The state to copy from.1244* @pre1245* @p dst_state and @p src_state must not be `NULL` and must not overlap.1246*/1247XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);12481249/*!1250* @brief Resets an @ref XXH3_state_t to begin a new hash.1251*1252* @param statePtr The state struct to reset.1253*1254* @pre1255* @p statePtr must not be `NULL`.1256*1257* @return @ref XXH_OK on success.1258* @return @ref XXH_ERROR on failure.1259*1260* @note1261* - This function resets `statePtr` and generate a secret with default parameters.1262* - Call this function before @ref XXH3_64bits_update().1263* - Digest will be equivalent to `XXH3_64bits()`.1264*1265* @see @ref streaming_example "Streaming Example"1266*1267*/1268XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);12691270/*!1271* @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.1272*1273* @param statePtr The state struct to reset.1274* @param seed The 64-bit seed to alter the hash result predictably.1275*1276* @pre1277* @p statePtr must not be `NULL`.1278*1279* @return @ref XXH_OK on success.1280* @return @ref XXH_ERROR on failure.1281*1282* @note1283* - This function resets `statePtr` and generate a secret from `seed`.1284* - Call this function before @ref XXH3_64bits_update().1285* - Digest will be equivalent to `XXH3_64bits_withSeed()`.1286*1287* @see @ref streaming_example "Streaming Example"1288*1289*/1290XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);12911292/*!1293* @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.1294*1295* @param statePtr The state struct to reset.1296* @param secret The secret data.1297* @param secretSize The length of @p secret, in bytes.1298*1299* @pre1300* @p statePtr must not be `NULL`.1301*1302* @return @ref XXH_OK on success.1303* @return @ref XXH_ERROR on failure.1304*1305* @note1306* `secret` is referenced, it _must outlive_ the hash streaming session.1307*1308* Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,1309* and the quality of produced hash values depends on secret's entropy1310* (secret's content should look like a bunch of random bytes).1311* When in doubt about the randomness of a candidate `secret`,1312* consider employing `XXH3_generateSecret()` instead (see below).1313*1314* @see @ref streaming_example "Streaming Example"1315*/1316XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);13171318/*!1319* @brief Consumes a block of @p input to an @ref XXH3_state_t.1320*1321* @param statePtr The state struct to update.1322* @param input The block of data to be hashed, at least @p length bytes in size.1323* @param length The length of @p input, in bytes.1324*1325* @pre1326* @p statePtr must not be `NULL`.1327* @pre1328* The memory between @p input and @p input + @p length must be valid,1329* readable, contiguous memory. However, if @p length is `0`, @p input may be1330* `NULL`. In C++, this also must be *TriviallyCopyable*.1331*1332* @return @ref XXH_OK on success.1333* @return @ref XXH_ERROR on failure.1334*1335* @note Call this to incrementally consume blocks of data.1336*1337* @see @ref streaming_example "Streaming Example"1338*/1339XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);13401341/*!1342* @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t.1343*1344* @param statePtr The state struct to calculate the hash from.1345*1346* @pre1347* @p statePtr must not be `NULL`.1348*1349* @return The calculated XXH3 64-bit hash value from that state.1350*1351* @note1352* Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,1353* digest, and update again.1354*1355* @see @ref streaming_example "Streaming Example"1356*/1357XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);1358#endif /* !XXH_NO_STREAM */13591360/* note : canonical representation of XXH3 is the same as XXH641361* since they both produce XXH64_hash_t values */136213631364/*-**********************************************************************1365* XXH3 128-bit variant1366************************************************************************/13671368/*!1369* @brief The return value from 128-bit hashes.1370*1371* Stored in little endian order, although the fields themselves are in native1372* endianness.1373*/1374typedef struct {1375XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */1376XXH64_hash_t high64; /*!< `value >> 64` */1377} XXH128_hash_t;13781379/*!1380* @brief Calculates 128-bit unseeded variant of XXH3 of @p data.1381*1382* @param data The block of data to be hashed, at least @p length bytes in size.1383* @param len The length of @p data, in bytes.1384*1385* @return The calculated 128-bit variant of XXH3 value.1386*1387* The 128-bit variant of XXH3 has more strength, but it has a bit of overhead1388* for shorter inputs.1389*1390* This is equivalent to @ref XXH3_128bits_withSeed() with a seed of `0`, however1391* it may have slightly better performance due to constant propagation of the1392* defaults.1393*1394* @see XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants1395* @see @ref single_shot_example "Single Shot Example" for an example.1396*/1397XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);1398/*! @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.1399*1400* @param data The block of data to be hashed, at least @p length bytes in size.1401* @param len The length of @p data, in bytes.1402* @param seed The 64-bit seed to alter the hash result predictably.1403*1404* @return The calculated 128-bit variant of XXH3 value.1405*1406* @note1407* seed == 0 produces the same results as @ref XXH3_64bits().1408*1409* This variant generates a custom secret on the fly based on default secret1410* altered using the @p seed value.1411*1412* While this operation is decently fast, note that it's not completely free.1413*1414* @see XXH3_128bits(), XXH3_128bits_withSecret(): other seeding variants1415* @see @ref single_shot_example "Single Shot Example" for an example.1416*/1417XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);1418/*!1419* @brief Calculates 128-bit variant of XXH3 with a custom "secret".1420*1421* @param data The block of data to be hashed, at least @p len bytes in size.1422* @param len The length of @p data, in bytes.1423* @param secret The secret data.1424* @param secretSize The length of @p secret, in bytes.1425*1426* @return The calculated 128-bit variant of XXH3 value.1427*1428* It's possible to provide any blob of bytes as a "secret" to generate the hash.1429* This makes it more difficult for an external actor to prepare an intentional collision.1430* The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).1431* However, the quality of the secret impacts the dispersion of the hash algorithm.1432* Therefore, the secret _must_ look like a bunch of random bytes.1433* Avoid "trivial" or structured data such as repeated sequences or a text document.1434* Whenever in doubt about the "randomness" of the blob of bytes,1435* consider employing @ref XXH3_generateSecret() instead (see below).1436* It will generate a proper high entropy secret derived from the blob of bytes.1437* Another advantage of using XXH3_generateSecret() is that1438* it guarantees that all bits within the initial blob of bytes1439* will impact every bit of the output.1440* This is not necessarily the case when using the blob of bytes directly1441* because, when hashing _small_ inputs, only a portion of the secret is employed.1442*1443* @see @ref single_shot_example "Single Shot Example" for an example.1444*/1445XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);14461447/******* Streaming *******/1448#ifndef XXH_NO_STREAM1449/*1450* Streaming requires state maintenance.1451* This operation costs memory and CPU.1452* As a consequence, streaming is slower than one-shot hashing.1453* For better performance, prefer one-shot functions whenever applicable.1454*1455* XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().1456* Use already declared XXH3_createState() and XXH3_freeState().1457*1458* All reset and streaming functions have same meaning as their 64-bit counterpart.1459*/14601461/*!1462* @brief Resets an @ref XXH3_state_t to begin a new hash.1463*1464* @param statePtr The state struct to reset.1465*1466* @pre1467* @p statePtr must not be `NULL`.1468*1469* @return @ref XXH_OK on success.1470* @return @ref XXH_ERROR on failure.1471*1472* @note1473* - This function resets `statePtr` and generate a secret with default parameters.1474* - Call it before @ref XXH3_128bits_update().1475* - Digest will be equivalent to `XXH3_128bits()`.1476*1477* @see @ref streaming_example "Streaming Example"1478*/1479XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);14801481/*!1482* @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.1483*1484* @param statePtr The state struct to reset.1485* @param seed The 64-bit seed to alter the hash result predictably.1486*1487* @pre1488* @p statePtr must not be `NULL`.1489*1490* @return @ref XXH_OK on success.1491* @return @ref XXH_ERROR on failure.1492*1493* @note1494* - This function resets `statePtr` and generate a secret from `seed`.1495* - Call it before @ref XXH3_128bits_update().1496* - Digest will be equivalent to `XXH3_128bits_withSeed()`.1497*1498* @see @ref streaming_example "Streaming Example"1499*/1500XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);1501/*!1502* @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.1503*1504* @param statePtr The state struct to reset.1505* @param secret The secret data.1506* @param secretSize The length of @p secret, in bytes.1507*1508* @pre1509* @p statePtr must not be `NULL`.1510*1511* @return @ref XXH_OK on success.1512* @return @ref XXH_ERROR on failure.1513*1514* `secret` is referenced, it _must outlive_ the hash streaming session.1515* Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,1516* and the quality of produced hash values depends on secret's entropy1517* (secret's content should look like a bunch of random bytes).1518* When in doubt about the randomness of a candidate `secret`,1519* consider employing `XXH3_generateSecret()` instead (see below).1520*1521* @see @ref streaming_example "Streaming Example"1522*/1523XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);15241525/*!1526* @brief Consumes a block of @p input to an @ref XXH3_state_t.1527*1528* Call this to incrementally consume blocks of data.1529*1530* @param statePtr The state struct to update.1531* @param input The block of data to be hashed, at least @p length bytes in size.1532* @param length The length of @p input, in bytes.1533*1534* @pre1535* @p statePtr must not be `NULL`.1536*1537* @return @ref XXH_OK on success.1538* @return @ref XXH_ERROR on failure.1539*1540* @note1541* The memory between @p input and @p input + @p length must be valid,1542* readable, contiguous memory. However, if @p length is `0`, @p input may be1543* `NULL`. In C++, this also must be *TriviallyCopyable*.1544*1545*/1546XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);15471548/*!1549* @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t.1550*1551* @param statePtr The state struct to calculate the hash from.1552*1553* @pre1554* @p statePtr must not be `NULL`.1555*1556* @return The calculated XXH3 128-bit hash value from that state.1557*1558* @note1559* Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update,1560* digest, and update again.1561*1562*/1563XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);1564#endif /* !XXH_NO_STREAM */15651566/* Following helper functions make it possible to compare XXH128_hast_t values.1567* Since XXH128_hash_t is a structure, this capability is not offered by the language.1568* Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */15691570/*!1571* @brief Check equality of two XXH128_hash_t values1572*1573* @param h1 The 128-bit hash value.1574* @param h2 Another 128-bit hash value.1575*1576* @return `1` if `h1` and `h2` are equal.1577* @return `0` if they are not.1578*/1579XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);15801581/*!1582* @brief Compares two @ref XXH128_hash_t1583*1584* This comparator is compatible with stdlib's `qsort()`/`bsearch()`.1585*1586* @param h128_1 Left-hand side value1587* @param h128_2 Right-hand side value1588*1589* @return >0 if @p h128_1 > @p h128_21590* @return =0 if @p h128_1 == @p h128_21591* @return <0 if @p h128_1 < @p h128_21592*/1593XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);159415951596/******* Canonical representation *******/1597typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;159815991600/*!1601* @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t.1602*1603* @param dst The @ref XXH128_canonical_t pointer to be stored to.1604* @param hash The @ref XXH128_hash_t to be converted.1605*1606* @pre1607* @p dst must not be `NULL`.1608* @see @ref canonical_representation_example "Canonical Representation Example"1609*/1610XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);16111612/*!1613* @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t.1614*1615* @param src The @ref XXH128_canonical_t to convert.1616*1617* @pre1618* @p src must not be `NULL`.1619*1620* @return The converted hash.1621* @see @ref canonical_representation_example "Canonical Representation Example"1622*/1623XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);162416251626#endif /* !XXH_NO_XXH3 */1627#endif /* XXH_NO_LONG_LONG */16281629/*!1630* @}1631*/1632#endif /* XXHASH_H_5627135585666179 */1633163416351636#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)1637#define XXHASH_H_STATIC_138792387421638/* ****************************************************************************1639* This section contains declarations which are not guaranteed to remain stable.1640* They may change in future versions, becoming incompatible with a different1641* version of the library.1642* These declarations should only be used with static linking.1643* Never use them in association with dynamic linking!1644***************************************************************************** */16451646/*1647* These definitions are only present to allow static allocation1648* of XXH states, on stack or in a struct, for example.1649* Never **ever** access their members directly.1650*/16511652/*!1653* @internal1654* @brief Structure for XXH32 streaming API.1655*1656* @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,1657* @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is1658* an opaque type. This allows fields to safely be changed.1659*1660* Typedef'd to @ref XXH32_state_t.1661* Do not access the members of this struct directly.1662* @see XXH64_state_s, XXH3_state_s1663*/1664struct XXH32_state_s {1665XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */1666XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */1667XXH32_hash_t acc[4]; /*!< Accumulator lanes */1668unsigned char buffer[16]; /*!< Internal buffer for partial reads. */1669XXH32_hash_t bufferedSize; /*!< Amount of data in @ref buffer */1670XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */1671}; /* typedef'd to XXH32_state_t */167216731674#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */16751676/*!1677* @internal1678* @brief Structure for XXH64 streaming API.1679*1680* @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,1681* @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is1682* an opaque type. This allows fields to safely be changed.1683*1684* Typedef'd to @ref XXH64_state_t.1685* Do not access the members of this struct directly.1686* @see XXH32_state_s, XXH3_state_s1687*/1688struct XXH64_state_s {1689XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */1690XXH64_hash_t acc[4]; /*!< Accumulator lanes */1691unsigned char buffer[32]; /*!< Internal buffer for partial reads.. */1692XXH32_hash_t bufferedSize; /*!< Amount of data in @ref buffer */1693XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/1694XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */1695}; /* typedef'd to XXH64_state_t */16961697#ifndef XXH_NO_XXH316981699#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */1700# define XXH_ALIGN(n) _Alignas(n)1701#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */1702/* In C++ alignas() is a keyword */1703# define XXH_ALIGN(n) alignas(n)1704#elif defined(__GNUC__)1705# define XXH_ALIGN(n) __attribute__ ((aligned(n)))1706#elif defined(_MSC_VER)1707# define XXH_ALIGN(n) __declspec(align(n))1708#else1709# define XXH_ALIGN(n) /* disabled */1710#endif17111712/* Old GCC versions only accept the attribute after the type in structures. */1713#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \1714&& ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \1715&& defined(__GNUC__)1716# define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)1717#else1718# define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type1719#endif17201721/*!1722* @internal1723* @brief The size of the internal XXH3 buffer.1724*1725* This is the optimal update size for incremental hashing.1726*1727* @see XXH3_64b_update(), XXH3_128b_update().1728*/1729#define XXH3_INTERNALBUFFER_SIZE 25617301731/*!1732* @def XXH3_SECRET_DEFAULT_SIZE1733* @brief Default Secret's size1734*1735* This is the size of internal XXH3_kSecret1736* and is needed by XXH3_generateSecret_fromSeed().1737*1738* Not to be confused with @ref XXH3_SECRET_SIZE_MIN.1739*/1740#define XXH3_SECRET_DEFAULT_SIZE 19217411742/*!1743* @internal1744* @brief Structure for XXH3 streaming API.1745*1746* @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,1747* @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.1748* Otherwise it is an opaque type.1749* Never use this definition in combination with dynamic library.1750* This allows fields to safely be changed in the future.1751*1752* @note ** This structure has a strict alignment requirement of 64 bytes!! **1753* Do not allocate this with `malloc()` or `new`,1754* it will not be sufficiently aligned.1755* Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.1756*1757* Typedef'd to @ref XXH3_state_t.1758* Do never access the members of this struct directly.1759*1760* @see XXH3_INITSTATE() for stack initialization.1761* @see XXH3_createState(), XXH3_freeState().1762* @see XXH32_state_s, XXH64_state_s1763*/1764struct XXH3_state_s {1765XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);1766/*!< The 8 accumulators. See @ref XXH32_state_s::acc and @ref XXH64_state_s::acc */1767XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);1768/*!< Used to store a custom secret generated from a seed. */1769XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);1770/*!< The internal buffer. @see XXH32_state_s::mem32 */1771XXH32_hash_t bufferedSize;1772/*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */1773XXH32_hash_t useSeed;1774/*!< Reserved field. Needed for padding on 64-bit. */1775size_t nbStripesSoFar;1776/*!< Number or stripes processed. */1777XXH64_hash_t totalLen;1778/*!< Total length hashed. 64-bit even on 32-bit targets. */1779size_t nbStripesPerBlock;1780/*!< Number of stripes per block. */1781size_t secretLimit;1782/*!< Size of @ref customSecret or @ref extSecret */1783XXH64_hash_t seed;1784/*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */1785XXH64_hash_t reserved64;1786/*!< Reserved field. */1787const unsigned char* extSecret;1788/*!< Reference to an external secret for the _withSecret variants, NULL1789* for other variants. */1790/* note: there may be some padding at the end due to alignment on 64 bytes */1791}; /* typedef'd to XXH3_state_t */17921793#undef XXH_ALIGN_MEMBER17941795/*!1796* @brief Initializes a stack-allocated `XXH3_state_s`.1797*1798* When the @ref XXH3_state_t structure is merely emplaced on stack,1799* it should be initialized with XXH3_INITSTATE() or a memset()1800* in case its first reset uses XXH3_NNbits_reset_withSeed().1801* This init can be omitted if the first reset uses default or _withSecret mode.1802* This operation isn't necessary when the state is created with XXH3_createState().1803* Note that this doesn't prepare the state for a streaming operation,1804* it's still necessary to use XXH3_NNbits_reset*() afterwards.1805*/1806#define XXH3_INITSTATE(XXH3_state_ptr) \1807do { \1808XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \1809tmp_xxh3_state_ptr->seed = 0; \1810tmp_xxh3_state_ptr->extSecret = NULL; \1811} while(0)181218131814/*!1815* @brief Calculates the 128-bit hash of @p data using XXH3.1816*1817* @param data The block of data to be hashed, at least @p len bytes in size.1818* @param len The length of @p data, in bytes.1819* @param seed The 64-bit seed to alter the hash's output predictably.1820*1821* @pre1822* The memory between @p data and @p data + @p len must be valid,1823* readable, contiguous memory. However, if @p len is `0`, @p data may be1824* `NULL`. In C++, this also must be *TriviallyCopyable*.1825*1826* @return The calculated 128-bit XXH3 value.1827*1828* @see @ref single_shot_example "Single Shot Example" for an example.1829*/1830XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);183118321833/* === Experimental API === */1834/* Symbols defined below must be considered tied to a specific library version. */18351836/*!1837* @brief Derive a high-entropy secret from any user-defined content, named customSeed.1838*1839* @param secretBuffer A writable buffer for derived high-entropy secret data.1840* @param secretSize Size of secretBuffer, in bytes. Must be >= XXH3_SECRET_SIZE_MIN.1841* @param customSeed A user-defined content.1842* @param customSeedSize Size of customSeed, in bytes.1843*1844* @return @ref XXH_OK on success.1845* @return @ref XXH_ERROR on failure.1846*1847* The generated secret can be used in combination with `*_withSecret()` functions.1848* The `_withSecret()` variants are useful to provide a higher level of protection1849* than 64-bit seed, as it becomes much more difficult for an external actor to1850* guess how to impact the calculation logic.1851*1852* The function accepts as input a custom seed of any length and any content,1853* and derives from it a high-entropy secret of length @p secretSize into an1854* already allocated buffer @p secretBuffer.1855*1856* The generated secret can then be used with any `*_withSecret()` variant.1857* The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),1858* @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()1859* are part of this list. They all accept a `secret` parameter1860* which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)1861* _and_ feature very high entropy (consist of random-looking bytes).1862* These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can1863* be employed to ensure proper quality.1864*1865* @p customSeed can be anything. It can have any size, even small ones,1866* and its content can be anything, even "poor entropy" sources such as a bunch1867* of zeroes. The resulting `secret` will nonetheless provide all required qualities.1868*1869* @pre1870* - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN1871* - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.1872*1873* Example code:1874* @code{.c}1875* #include <stdio.h>1876* #include <stdlib.h>1877* #include <string.h>1878* #define XXH_STATIC_LINKING_ONLY // expose unstable API1879* #include "xxhash.h"1880* // Hashes argv[2] using the entropy from argv[1].1881* int main(int argc, char* argv[])1882* {1883* char secret[XXH3_SECRET_SIZE_MIN];1884* if (argv != 3) { return 1; }1885* XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));1886* XXH64_hash_t h = XXH3_64bits_withSecret(1887* argv[2], strlen(argv[2]),1888* secret, sizeof(secret)1889* );1890* printf("%016llx\n", (unsigned long long) h);1891* }1892* @endcode1893*/1894XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);18951896/*!1897* @brief Generate the same secret as the _withSeed() variants.1898*1899* @param secretBuffer A writable buffer of @ref XXH3_SECRET_DEFAULT_SIZE bytes1900* @param seed The 64-bit seed to alter the hash result predictably.1901*1902* The generated secret can be used in combination with1903*`*_withSecret()` and `_withSecretandSeed()` variants.1904*1905* Example C++ `std::string` hash class:1906* @code{.cpp}1907* #include <string>1908* #define XXH_STATIC_LINKING_ONLY // expose unstable API1909* #include "xxhash.h"1910* // Slow, seeds each time1911* class HashSlow {1912* XXH64_hash_t seed;1913* public:1914* HashSlow(XXH64_hash_t s) : seed{s} {}1915* size_t operator()(const std::string& x) const {1916* return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};1917* }1918* };1919* // Fast, caches the seeded secret for future uses.1920* class HashFast {1921* unsigned char secret[XXH3_SECRET_DEFAULT_SIZE];1922* public:1923* HashFast(XXH64_hash_t s) {1924* XXH3_generateSecret_fromSeed(secret, seed);1925* }1926* size_t operator()(const std::string& x) const {1927* return size_t{1928* XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))1929* };1930* }1931* };1932* @endcode1933*/1934XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);19351936/*!1937* @brief Maximum size of "short" key in bytes.1938*/1939#define XXH3_MIDSIZE_MAX 24019401941/*!1942* @brief Calculates 64/128-bit seeded variant of XXH3 hash of @p data.1943*1944* @param data The block of data to be hashed, at least @p len bytes in size.1945* @param len The length of @p data, in bytes.1946* @param secret The secret data.1947* @param secretSize The length of @p secret, in bytes.1948* @param seed The 64-bit seed to alter the hash result predictably.1949*1950* These variants generate hash values using either:1951* - @p seed for "short" keys (< @ref XXH3_MIDSIZE_MAX = 240 bytes)1952* - @p secret for "large" keys (>= @ref XXH3_MIDSIZE_MAX).1953*1954* This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.1955* `_withSeed()` has to generate the secret on the fly for "large" keys.1956* It's fast, but can be perceptible for "not so large" keys (< 1 KB).1957* `_withSecret()` has to generate the masks on the fly for "small" keys,1958* which requires more instructions than _withSeed() variants.1959* Therefore, _withSecretandSeed variant combines the best of both worlds.1960*1961* When @p secret has been generated by XXH3_generateSecret_fromSeed(),1962* this variant produces *exactly* the same results as `_withSeed()` variant,1963* hence offering only a pure speed benefit on "large" input,1964* by skipping the need to regenerate the secret for every large input.1965*1966* Another usage scenario is to hash the secret to a 64-bit hash value,1967* for example with XXH3_64bits(), which then becomes the seed,1968* and then employ both the seed and the secret in _withSecretandSeed().1969* On top of speed, an added benefit is that each bit in the secret1970* has a 50% chance to swap each bit in the output, via its impact to the seed.1971*1972* This is not guaranteed when using the secret directly in "small data" scenarios,1973* because only portions of the secret are employed for small data.1974*/1975XXH_PUBLIC_API XXH_PUREF XXH64_hash_t1976XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,1977XXH_NOESCAPE const void* secret, size_t secretSize,1978XXH64_hash_t seed);19791980/*!1981* @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.1982*1983* @param input The memory segment to be hashed, at least @p len bytes in size.1984* @param length The length of @p data, in bytes.1985* @param secret The secret used to alter hash result predictably.1986* @param secretSize The length of @p secret, in bytes (must be >= XXH3_SECRET_SIZE_MIN)1987* @param seed64 The 64-bit seed to alter the hash result predictably.1988*1989* @return @ref XXH_OK on success.1990* @return @ref XXH_ERROR on failure.1991*1992* @see XXH3_64bits_withSecretandSeed(): contract is the same.1993*/1994XXH_PUBLIC_API XXH_PUREF XXH128_hash_t1995XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,1996XXH_NOESCAPE const void* secret, size_t secretSize,1997XXH64_hash_t seed64);19981999#ifndef XXH_NO_STREAM2000/*!2001* @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.2002*2003* @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().2004* @param secret The secret data.2005* @param secretSize The length of @p secret, in bytes.2006* @param seed64 The 64-bit seed to alter the hash result predictably.2007*2008* @return @ref XXH_OK on success.2009* @return @ref XXH_ERROR on failure.2010*2011* @see XXH3_64bits_withSecretandSeed(). Contract is identical.2012*/2013XXH_PUBLIC_API XXH_errorcode2014XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,2015XXH_NOESCAPE const void* secret, size_t secretSize,2016XXH64_hash_t seed64);20172018/*!2019* @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.2020*2021* @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().2022* @param secret The secret data.2023* @param secretSize The length of @p secret, in bytes.2024* @param seed64 The 64-bit seed to alter the hash result predictably.2025*2026* @return @ref XXH_OK on success.2027* @return @ref XXH_ERROR on failure.2028*2029* @see XXH3_64bits_withSecretandSeed(). Contract is identical.2030*2031* Note: there was a bug in an earlier version of this function (<= v0.8.2)2032* that would make it generate an incorrect hash value2033* when @p seed == 0 and @p length < XXH3_MIDSIZE_MAX2034* and @p secret is different from XXH3_generateSecret_fromSeed().2035* As stated in the contract, the correct hash result must be2036* the same as XXH3_128bits_withSeed() when @p length <= XXH3_MIDSIZE_MAX.2037* Results generated by this older version are wrong, hence not comparable.2038*/2039XXH_PUBLIC_API XXH_errorcode2040XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,2041XXH_NOESCAPE const void* secret, size_t secretSize,2042XXH64_hash_t seed64);20432044#endif /* !XXH_NO_STREAM */20452046#endif /* !XXH_NO_XXH3 */2047#endif /* XXH_NO_LONG_LONG */2048#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)2049# define XXH_IMPLEMENTATION2050#endif20512052#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */205320542055/* ======================================================================== */2056/* ======================================================================== */2057/* ======================================================================== */205820592060/*-**********************************************************************2061* xxHash implementation2062*-**********************************************************************2063* xxHash's implementation used to be hosted inside xxhash.c.2064*2065* However, inlining requires implementation to be visible to the compiler,2066* hence be included alongside the header.2067* Previously, implementation was hosted inside xxhash.c,2068* which was then #included when inlining was activated.2069* This construction created issues with a few build and install systems,2070* as it required xxhash.c to be stored in /include directory.2071*2072* xxHash implementation is now directly integrated within xxhash.h.2073* As a consequence, xxhash.c is no longer needed in /include.2074*2075* xxhash.c is still available and is still useful.2076* In a "normal" setup, when xxhash is not inlined,2077* xxhash.h only exposes the prototypes and public symbols,2078* while xxhash.c can be built into an object file xxhash.o2079* which can then be linked into the final binary.2080************************************************************************/20812082#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \2083|| defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)2084# define XXH_IMPLEM_13a873738720852086/* *************************************2087* Tuning parameters2088***************************************/20892090/*!2091* @defgroup tuning Tuning parameters2092* @{2093*2094* Various macros to control xxHash's behavior.2095*/2096#ifdef XXH_DOXYGEN2097/*!2098* @brief Define this to disable 64-bit code.2099*2100* Useful if only using the @ref XXH32_family and you have a strict C90 compiler.2101*/2102# define XXH_NO_LONG_LONG2103# undef XXH_NO_LONG_LONG /* don't actually */2104/*!2105* @brief Controls how unaligned memory is accessed.2106*2107* By default, access to unaligned memory is controlled by `memcpy()`, which is2108* safe and portable.2109*2110* Unfortunately, on some target/compiler combinations, the generated assembly2111* is sub-optimal.2112*2113* The below switch allow selection of a different access method2114* in the search for improved performance.2115*2116* @par Possible options:2117*2118* - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`2119* @par2120* Use `memcpy()`. Safe and portable. Note that most modern compilers will2121* eliminate the function call and treat it as an unaligned access.2122*2123* - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`2124* @par2125* Depends on compiler extensions and is therefore not portable.2126* This method is safe _if_ your compiler supports it,2127* and *generally* as fast or faster than `memcpy`.2128*2129* - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast2130* @par2131* Casts directly and dereferences. This method doesn't depend on the2132* compiler, but it violates the C standard as it directly dereferences an2133* unaligned pointer. It can generate buggy code on targets which do not2134* support unaligned memory accesses, but in some circumstances, it's the2135* only known way to get the most performance.2136*2137* - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift2138* @par2139* Also portable. This can generate the best code on old compilers which don't2140* inline small `memcpy()` calls, and it might also be faster on big-endian2141* systems which lack a native byteswap instruction. However, some compilers2142* will emit literal byteshifts even if the target supports unaligned access.2143*2144*2145* @warning2146* Methods 1 and 2 rely on implementation-defined behavior. Use these with2147* care, as what works on one compiler/platform/optimization level may cause2148* another to read garbage data or even crash.2149*2150* See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.2151*2152* Prefer these methods in priority order (0 > 3 > 1 > 2)2153*/2154# define XXH_FORCE_MEMORY_ACCESS 021552156/*!2157* @def XXH_SIZE_OPT2158* @brief Controls how much xxHash optimizes for size.2159*2160* xxHash, when compiled, tends to result in a rather large binary size. This2161* is mostly due to heavy usage to forced inlining and constant folding of the2162* @ref XXH3_family to increase performance.2163*2164* However, some developers prefer size over speed. This option can2165* significantly reduce the size of the generated code. When using the `-Os`2166* or `-Oz` options on GCC or Clang, this is defined to 1 by default,2167* otherwise it is defined to 0.2168*2169* Most of these size optimizations can be controlled manually.2170*2171* This is a number from 0-2.2172* - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed2173* comes first.2174* - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more2175* conservative and disables hacks that increase code size. It implies the2176* options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,2177* and @ref XXH3_NEON_LANES == 8 if they are not already defined.2178* - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.2179* Performance may cry. For example, the single shot functions just use the2180* streaming API.2181*/2182# define XXH_SIZE_OPT 021832184/*!2185* @def XXH_FORCE_ALIGN_CHECK2186* @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()2187* and XXH64() only).2188*2189* This is an important performance trick for architectures without decent2190* unaligned memory access performance.2191*2192* It checks for input alignment, and when conditions are met, uses a "fast2193* path" employing direct 32-bit/64-bit reads, resulting in _dramatically2194* faster_ read speed.2195*2196* The check costs one initial branch per hash, which is generally negligible,2197* but not zero.2198*2199* Moreover, it's not useful to generate an additional code path if memory2200* access uses the same instruction for both aligned and unaligned2201* addresses (e.g. x86 and aarch64).2202*2203* In these cases, the alignment check can be removed by setting this macro to 0.2204* Then the code will always use unaligned memory access.2205* Align check is automatically disabled on x86, x64, ARM64, and some ARM chips2206* which are platforms known to offer good unaligned memory accesses performance.2207*2208* It is also disabled by default when @ref XXH_SIZE_OPT >= 1.2209*2210* This option does not affect XXH3 (only XXH32 and XXH64).2211*/2212# define XXH_FORCE_ALIGN_CHECK 022132214/*!2215* @def XXH_NO_INLINE_HINTS2216* @brief When non-zero, sets all functions to `static`.2217*2218* By default, xxHash tries to force the compiler to inline almost all internal2219* functions.2220*2221* This can usually improve performance due to reduced jumping and improved2222* constant folding, but significantly increases the size of the binary which2223* might not be favorable.2224*2225* Additionally, sometimes the forced inlining can be detrimental to performance,2226* depending on the architecture.2227*2228* XXH_NO_INLINE_HINTS marks all internal functions as static, giving the2229* compiler full control on whether to inline or not.2230*2231* When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if2232* @ref XXH_SIZE_OPT >= 1, this will automatically be defined.2233*/2234# define XXH_NO_INLINE_HINTS 022352236/*!2237* @def XXH3_INLINE_SECRET2238* @brief Determines whether to inline the XXH3 withSecret code.2239*2240* When the secret size is known, the compiler can improve the performance2241* of XXH3_64bits_withSecret() and XXH3_128bits_withSecret().2242*2243* However, if the secret size is not known, it doesn't have any benefit. This2244* happens when xxHash is compiled into a global symbol. Therefore, if2245* @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0.2246*2247* Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers2248* that are *sometimes* force inline on -Og, and it is impossible to automatically2249* detect this optimization level.2250*/2251# define XXH3_INLINE_SECRET 022522253/*!2254* @def XXH32_ENDJMP2255* @brief Whether to use a jump for `XXH32_finalize`.2256*2257* For performance, `XXH32_finalize` uses multiple branches in the finalizer.2258* This is generally preferable for performance,2259* but depending on exact architecture, a jmp may be preferable.2260*2261* This setting is only possibly making a difference for very small inputs.2262*/2263# define XXH32_ENDJMP 022642265/*!2266* @internal2267* @brief Redefines old internal names.2268*2269* For compatibility with code that uses xxHash's internals before the names2270* were changed to improve namespacing. There is no other reason to use this.2271*/2272# define XXH_OLD_NAMES2273# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */22742275/*!2276* @def XXH_NO_STREAM2277* @brief Disables the streaming API.2278*2279* When xxHash is not inlined and the streaming functions are not used, disabling2280* the streaming functions can improve code size significantly, especially with2281* the @ref XXH3_family which tends to make constant folded copies of itself.2282*/2283# define XXH_NO_STREAM2284# undef XXH_NO_STREAM /* don't actually */2285#endif /* XXH_DOXYGEN */2286/*!2287* @}2288*/22892290#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */2291/* prefer __packed__ structures (method 1) for GCC2292* < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy2293* which for some reason does unaligned loads. */2294# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))2295# define XXH_FORCE_MEMORY_ACCESS 12296# endif2297#endif22982299#ifndef XXH_SIZE_OPT2300/* default to 1 for -Os or -Oz */2301# if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)2302# define XXH_SIZE_OPT 12303# else2304# define XXH_SIZE_OPT 02305# endif2306#endif23072308#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */2309/* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */2310# if XXH_SIZE_OPT >= 1 || \2311defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \2312|| defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */2313# define XXH_FORCE_ALIGN_CHECK 02314# else2315# define XXH_FORCE_ALIGN_CHECK 12316# endif2317#endif23182319#ifndef XXH_NO_INLINE_HINTS2320# if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */2321# define XXH_NO_INLINE_HINTS 12322# else2323# define XXH_NO_INLINE_HINTS 02324# endif2325#endif23262327#ifndef XXH3_INLINE_SECRET2328# if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \2329|| !defined(XXH_INLINE_ALL)2330# define XXH3_INLINE_SECRET 02331# else2332# define XXH3_INLINE_SECRET 12333# endif2334#endif23352336#ifndef XXH32_ENDJMP2337/* generally preferable for performance */2338# define XXH32_ENDJMP 02339#endif23402341/*!2342* @defgroup impl Implementation2343* @{2344*/234523462347/* *************************************2348* Includes & Memory related functions2349***************************************/2350#if defined(XXH_NO_STREAM)2351/* nothing */2352#elif defined(XXH_NO_STDLIB)23532354/* When requesting to disable any mention of stdlib,2355* the library loses the ability to invoked malloc / free.2356* In practice, it means that functions like `XXH*_createState()`2357* will always fail, and return NULL.2358* This flag is useful in situations where2359* xxhash.h is integrated into some kernel, embedded or limited environment2360* without access to dynamic allocation.2361*/23622363static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }2364static void XXH_free(void* p) { (void)p; }23652366#else23672368/*2369* Modify the local functions below should you wish to use2370* different memory routines for malloc() and free()2371*/2372#include <stdlib.h>23732374/*!2375* @internal2376* @brief Modify this function to use a different routine than malloc().2377*/2378static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }23792380/*!2381* @internal2382* @brief Modify this function to use a different routine than free().2383*/2384static void XXH_free(void* p) { free(p); }23852386#endif /* XXH_NO_STDLIB */23872388#ifndef XXH_memcpy2389/*!2390* @internal2391* @brief XXH_memcpy() macro can be redirected at compile time2392*/2393# include <string.h>2394# define XXH_memcpy memcpy2395#endif23962397#ifndef XXH_memset2398/*!2399* @internal2400* @brief XXH_memset() macro can be redirected at compile time2401*/2402# include <string.h>2403# define XXH_memset memset2404#endif24052406#ifndef XXH_memcmp2407/*!2408* @internal2409* @brief XXH_memcmp() macro can be redirected at compile time2410* Note: only needed by XXH128.2411*/2412# include <string.h>2413# define XXH_memcmp memcmp2414#endif2415241624172418#include <limits.h> /* ULLONG_MAX */241924202421/* *************************************2422* Compiler Specific Options2423***************************************/2424#ifdef _MSC_VER /* Visual Studio warning fix */2425# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */2426#endif24272428#if XXH_NO_INLINE_HINTS /* disable inlining hints */2429# if defined(__GNUC__) || defined(__clang__)2430# define XXH_FORCE_INLINE static __attribute__((__unused__))2431# else2432# define XXH_FORCE_INLINE static2433# endif2434# define XXH_NO_INLINE static2435/* enable inlining hints */2436#elif defined(__GNUC__) || defined(__clang__)2437# define XXH_FORCE_INLINE static __inline__ __attribute__((__always_inline__, __unused__))2438# define XXH_NO_INLINE static __attribute__((__noinline__))2439#elif defined(_MSC_VER) /* Visual Studio */2440# define XXH_FORCE_INLINE static __forceinline2441# define XXH_NO_INLINE static __declspec(noinline)2442#elif defined (__cplusplus) \2443|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */2444# define XXH_FORCE_INLINE static inline2445# define XXH_NO_INLINE static2446#else2447# define XXH_FORCE_INLINE static2448# define XXH_NO_INLINE static2449#endif24502451#if defined(XXH_INLINE_ALL)2452# define XXH_STATIC XXH_FORCE_INLINE2453#else2454# define XXH_STATIC static2455#endif24562457#if XXH3_INLINE_SECRET2458# define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE2459#else2460# define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE2461#endif24622463#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */2464# define XXH_RESTRICT /* disable */2465#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */2466# define XXH_RESTRICT restrict2467#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \2468|| (defined (__clang__)) \2469|| (defined (_MSC_VER) && (_MSC_VER >= 1400)) \2470|| (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))2471/*2472* There are a LOT more compilers that recognize __restrict but this2473* covers the major ones.2474*/2475# define XXH_RESTRICT __restrict2476#else2477# define XXH_RESTRICT /* disable */2478#endif24792480/* *************************************2481* Debug2482***************************************/2483/*!2484* @ingroup tuning2485* @def XXH_DEBUGLEVEL2486* @brief Sets the debugging level.2487*2488* XXH_DEBUGLEVEL is expected to be defined externally, typically via the2489* compiler's command line options. The value must be a number.2490*/2491#ifndef XXH_DEBUGLEVEL2492# ifdef DEBUGLEVEL /* backwards compat */2493# define XXH_DEBUGLEVEL DEBUGLEVEL2494# else2495# define XXH_DEBUGLEVEL 02496# endif2497#endif24982499#if (XXH_DEBUGLEVEL>=1)2500# include <assert.h> /* note: can still be disabled with NDEBUG */2501# define XXH_ASSERT(c) assert(c)2502#else2503# if defined(__INTEL_COMPILER)2504# define XXH_ASSERT(c) XXH_ASSUME((unsigned char) (c))2505# else2506# define XXH_ASSERT(c) XXH_ASSUME(c)2507# endif2508#endif25092510/* note: use after variable declarations */2511#ifndef XXH_STATIC_ASSERT2512# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */2513# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)2514# elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */2515# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)2516# else2517# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)2518# endif2519# define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)2520#endif25212522/*!2523* @internal2524* @def XXH_COMPILER_GUARD(var)2525* @brief Used to prevent unwanted optimizations for @p var.2526*2527* It uses an empty GCC inline assembly statement with a register constraint2528* which forces @p var into a general purpose register (eg eax, ebx, ecx2529* on x86) and marks it as modified.2530*2531* This is used in a few places to avoid unwanted autovectorization (e.g.2532* XXH32_round()). All vectorization we want is explicit via intrinsics,2533* and _usually_ isn't wanted elsewhere.2534*2535* We also use it to prevent unwanted constant folding for AArch64 in2536* XXH3_initCustomSecret_scalar().2537*/2538#if defined(__GNUC__) || defined(__clang__)2539# define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))2540#else2541# define XXH_COMPILER_GUARD(var) ((void)0)2542#endif25432544/* Specifically for NEON vectors which use the "w" constraint, on2545* Clang. */2546#if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__)2547# define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var))2548#else2549# define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0)2550#endif25512552/* *************************************2553* Basic Types2554***************************************/2555#if !defined (__VMS) \2556&& (defined (__cplusplus) \2557|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )2558# ifdef _AIX2559# include <inttypes.h>2560# else2561# include <stdint.h>2562# endif2563typedef uint8_t xxh_u8;2564#else2565typedef unsigned char xxh_u8;2566#endif2567typedef XXH32_hash_t xxh_u32;25682569#ifdef XXH_OLD_NAMES2570# warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly"2571# define BYTE xxh_u82572# define U8 xxh_u82573# define U32 xxh_u322574#endif25752576/* *** Memory access *** */25772578/*!2579* @internal2580* @fn xxh_u32 XXH_read32(const void* ptr)2581* @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.2582*2583* Affected by @ref XXH_FORCE_MEMORY_ACCESS.2584*2585* @param ptr The pointer to read from.2586* @return The 32-bit native endian integer from the bytes at @p ptr.2587*/25882589/*!2590* @internal2591* @fn xxh_u32 XXH_readLE32(const void* ptr)2592* @brief Reads an unaligned 32-bit little endian integer from @p ptr.2593*2594* Affected by @ref XXH_FORCE_MEMORY_ACCESS.2595*2596* @param ptr The pointer to read from.2597* @return The 32-bit little endian integer from the bytes at @p ptr.2598*/25992600/*!2601* @internal2602* @fn xxh_u32 XXH_readBE32(const void* ptr)2603* @brief Reads an unaligned 32-bit big endian integer from @p ptr.2604*2605* Affected by @ref XXH_FORCE_MEMORY_ACCESS.2606*2607* @param ptr The pointer to read from.2608* @return The 32-bit big endian integer from the bytes at @p ptr.2609*/26102611/*!2612* @internal2613* @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)2614* @brief Like @ref XXH_readLE32(), but has an option for aligned reads.2615*2616* Affected by @ref XXH_FORCE_MEMORY_ACCESS.2617* Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is2618* always @ref XXH_alignment::XXH_unaligned.2619*2620* @param ptr The pointer to read from.2621* @param align Whether @p ptr is aligned.2622* @pre2623* If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte2624* aligned.2625* @return The 32-bit little endian integer from the bytes at @p ptr.2626*/26272628#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))2629/*2630* Manual byteshift. Best for old compilers which don't inline memcpy.2631* We actually directly use XXH_readLE32 and XXH_readBE32.2632*/2633#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))26342635/*2636* Force direct memory access. Only works on CPU which support unaligned memory2637* access in hardware.2638*/2639static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }26402641#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))26422643/*2644* __attribute__((aligned(1))) is supported by gcc and clang. Originally the2645* documentation claimed that it only increased the alignment, but actually it2646* can decrease it on gcc, clang, and icc:2647* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,2648* https://gcc.godbolt.org/z/xYez1j67Y.2649*/2650#ifdef XXH_OLD_NAMES2651typedef union { xxh_u32 u32; } __attribute__((__packed__)) unalign;2652#endif2653static xxh_u32 XXH_read32(const void* ptr)2654{2655typedef __attribute__((__aligned__(1))) __attribute__((__may_alias__)) xxh_u32 xxh_unalign32;2656return *((const xxh_unalign32*)ptr);2657}26582659#else26602661/*2662* Portable and safe solution. Generally efficient.2663* see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html2664*/2665static xxh_u32 XXH_read32(const void* memPtr)2666{2667xxh_u32 val;2668XXH_memcpy(&val, memPtr, sizeof(val));2669return val;2670}26712672#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */267326742675/* *** Endianness *** */26762677/*!2678* @ingroup tuning2679* @def XXH_CPU_LITTLE_ENDIAN2680* @brief Whether the target is little endian.2681*2682* Defined to 1 if the target is little endian, or 0 if it is big endian.2683* It can be defined externally, for example on the compiler command line.2684*2685* If it is not defined,2686* a runtime check (which is usually constant folded) is used instead.2687*2688* @note2689* This is not necessarily defined to an integer constant.2690*2691* @see XXH_isLittleEndian() for the runtime check.2692*/2693#ifndef XXH_CPU_LITTLE_ENDIAN2694/*2695* Try to detect endianness automatically, to avoid the nonstandard behavior2696* in `XXH_isLittleEndian()`2697*/2698# if defined(_WIN32) /* Windows is always little endian */ \2699|| defined(__LITTLE_ENDIAN__) \2700|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)2701# define XXH_CPU_LITTLE_ENDIAN 12702# elif defined(__BIG_ENDIAN__) \2703|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)2704# define XXH_CPU_LITTLE_ENDIAN 02705# else2706/*!2707* @internal2708* @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.2709*2710* Most compilers will constant fold this.2711*/2712static int XXH_isLittleEndian(void)2713{2714/*2715* Portable and well-defined behavior.2716* Don't use static: it is detrimental to performance.2717*/2718const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };2719return one.c[0];2720}2721# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()2722# endif2723#endif27242725272627272728/* ****************************************2729* Compiler-specific Functions and Macros2730******************************************/2731#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)27322733#ifdef __has_builtin2734# define XXH_HAS_BUILTIN(x) __has_builtin(x)2735#else2736# define XXH_HAS_BUILTIN(x) 02737#endif2738273927402741/*2742* C23 and future versions have standard "unreachable()".2743* Once it has been implemented reliably we can add it as an2744* additional case:2745*2746* ```2747* #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 202311L)2748* # include <stddef.h>2749* # ifdef unreachable2750* # define XXH_UNREACHABLE() unreachable()2751* # endif2752* #endif2753* ```2754*2755* Note C++23 also has std::unreachable() which can be detected2756* as follows:2757* ```2758* #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)2759* # include <utility>2760* # define XXH_UNREACHABLE() std::unreachable()2761* #endif2762* ```2763* NB: `__cpp_lib_unreachable` is defined in the `<version>` header.2764* We don't use that as including `<utility>` in `extern "C"` blocks2765* doesn't work on GCC122766*/27672768#if XXH_HAS_BUILTIN(__builtin_unreachable)2769# define XXH_UNREACHABLE() __builtin_unreachable()27702771#elif defined(_MSC_VER)2772# define XXH_UNREACHABLE() __assume(0)27732774#else2775# define XXH_UNREACHABLE()2776#endif27772778#if XXH_HAS_BUILTIN(__builtin_assume)2779# define XXH_ASSUME(c) __builtin_assume(c)2780#else2781# define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }2782#endif27832784/*!2785* @internal2786* @def XXH_rotl32(x,r)2787* @brief 32-bit rotate left.2788*2789* @param x The 32-bit integer to be rotated.2790* @param r The number of bits to rotate.2791* @pre2792* @p r > 0 && @p r < 322793* @note2794* @p x and @p r may be evaluated multiple times.2795* @return The rotated result.2796*/2797#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \2798&& XXH_HAS_BUILTIN(__builtin_rotateleft64)2799# define XXH_rotl32 __builtin_rotateleft322800# define XXH_rotl64 __builtin_rotateleft642801#elif XXH_HAS_BUILTIN(__builtin_stdc_rotate_left)2802# define XXH_rotl32 __builtin_stdc_rotate_left2803# define XXH_rotl64 __builtin_stdc_rotate_left2804/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */2805#elif defined(_MSC_VER)2806# define XXH_rotl32(x,r) _rotl(x,r)2807# define XXH_rotl64(x,r) _rotl64(x,r)2808#else2809# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))2810# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))2811#endif28122813/*!2814* @internal2815* @fn xxh_u32 XXH_swap32(xxh_u32 x)2816* @brief A 32-bit byteswap.2817*2818* @param x The 32-bit integer to byteswap.2819* @return @p x, byteswapped.2820*/2821#if defined(_MSC_VER) /* Visual Studio */2822# define XXH_swap32 _byteswap_ulong2823#elif XXH_GCC_VERSION >= 4032824# define XXH_swap32 __builtin_bswap322825#else2826static xxh_u32 XXH_swap32 (xxh_u32 x)2827{2828return ((x << 24) & 0xff000000 ) |2829((x << 8) & 0x00ff0000 ) |2830((x >> 8) & 0x0000ff00 ) |2831((x >> 24) & 0x000000ff );2832}2833#endif283428352836/* ***************************2837* Memory reads2838*****************************/28392840/*!2841* @internal2842* @brief Enum to indicate whether a pointer is aligned.2843*/2844typedef enum {2845XXH_aligned, /*!< Aligned */2846XXH_unaligned /*!< Possibly unaligned */2847} XXH_alignment;28482849/*2850* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.2851*2852* This is ideal for older compilers which don't inline memcpy.2853*/2854#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))28552856XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)2857{2858const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;2859return bytePtr[0]2860| ((xxh_u32)bytePtr[1] << 8)2861| ((xxh_u32)bytePtr[2] << 16)2862| ((xxh_u32)bytePtr[3] << 24);2863}28642865XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)2866{2867const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;2868return bytePtr[3]2869| ((xxh_u32)bytePtr[2] << 8)2870| ((xxh_u32)bytePtr[1] << 16)2871| ((xxh_u32)bytePtr[0] << 24);2872}28732874#else2875XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)2876{2877return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));2878}28792880static xxh_u32 XXH_readBE32(const void* ptr)2881{2882return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);2883}2884#endif28852886XXH_FORCE_INLINE xxh_u322887XXH_readLE32_align(const void* ptr, XXH_alignment align)2888{2889if (align==XXH_unaligned) {2890return XXH_readLE32(ptr);2891} else {2892return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);2893}2894}289528962897/* *************************************2898* Misc2899***************************************/2900/*! @ingroup public */2901XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }290229032904/* *******************************************************************2905* 32-bit hash functions2906*********************************************************************/2907/*!2908* @}2909* @defgroup XXH32_impl XXH32 implementation2910* @ingroup impl2911*2912* Details on the XXH32 implementation.2913* @{2914*/2915/* #define instead of static const, to be used as initializers */2916#define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */2917#define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */2918#define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */2919#define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */2920#define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */29212922#ifdef XXH_OLD_NAMES2923# define PRIME32_1 XXH_PRIME32_12924# define PRIME32_2 XXH_PRIME32_22925# define PRIME32_3 XXH_PRIME32_32926# define PRIME32_4 XXH_PRIME32_42927# define PRIME32_5 XXH_PRIME32_52928#endif29292930/*!2931* @internal2932* @brief Normal stripe processing routine.2933*2934* This shuffles the bits so that any bit from @p input impacts several bits in2935* @p acc.2936*2937* @param acc The accumulator lane.2938* @param input The stripe of input to mix.2939* @return The mixed accumulator lane.2940*/2941static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)2942{2943acc += input * XXH_PRIME32_2;2944acc = XXH_rotl32(acc, 13);2945acc *= XXH_PRIME32_1;2946#if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)2947/*2948* UGLY HACK:2949* A compiler fence is used to prevent GCC and Clang from2950* autovectorizing the XXH32 loop (pragmas and attributes don't work for some2951* reason) without globally disabling SSE4.1.2952*2953* The reason we want to avoid vectorization is because despite working on2954* 4 integers at a time, there are multiple factors slowing XXH32 down on2955* SSE4:2956* - There's a ridiculous amount of lag from pmulld (10 cycles of latency on2957* newer chips!) making it slightly slower to multiply four integers at2958* once compared to four integers independently. Even when pmulld was2959* fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE2960* just to multiply unless doing a long operation.2961*2962* - Four instructions are required to rotate,2963* movqda tmp, v // not required with VEX encoding2964* pslld tmp, 13 // tmp <<= 132965* psrld v, 19 // x >>= 192966* por v, tmp // x |= tmp2967* compared to one for scalar:2968* roll v, 13 // reliably fast across the board2969* shldl v, v, 13 // Sandy Bridge and later prefer this for some reason2970*2971* - Instruction level parallelism is actually more beneficial here because2972* the SIMD actually serializes this operation: While v1 is rotating, v22973* can load data, while v3 can multiply. SSE forces them to operate2974* together.2975*2976* This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing2977* the loop. NEON is only faster on the A53, and with the newer cores, it is less2978* than half the speed.2979*2980* Additionally, this is used on WASM SIMD128 because it JITs to the same2981* SIMD instructions and has the same issue.2982*/2983XXH_COMPILER_GUARD(acc);2984#endif2985return acc;2986}29872988/*!2989* @internal2990* @brief Mixes all bits to finalize the hash.2991*2992* The final mix ensures that all input bits have a chance to impact any bit in2993* the output digest, resulting in an unbiased distribution.2994*2995* @param hash The hash to avalanche.2996* @return The avalanched hash.2997*/2998static xxh_u32 XXH32_avalanche(xxh_u32 hash)2999{3000hash ^= hash >> 15;3001hash *= XXH_PRIME32_2;3002hash ^= hash >> 13;3003hash *= XXH_PRIME32_3;3004hash ^= hash >> 16;3005return hash;3006}30073008#define XXH_get32bits(p) XXH_readLE32_align(p, align)30093010/*!3011* @internal3012* @brief Sets up the initial accumulator state for XXH32().3013*/3014XXH_FORCE_INLINE void3015XXH32_initAccs(xxh_u32 *acc, xxh_u32 seed)3016{3017XXH_ASSERT(acc != NULL);3018acc[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;3019acc[1] = seed + XXH_PRIME32_2;3020acc[2] = seed + 0;3021acc[3] = seed - XXH_PRIME32_1;3022}30233024/*!3025* @internal3026* @brief Consumes a block of data for XXH32().3027*3028* @return the end input pointer.3029*/3030XXH_FORCE_INLINE const xxh_u8 *3031XXH32_consumeLong(3032xxh_u32 *XXH_RESTRICT acc,3033xxh_u8 const *XXH_RESTRICT input,3034size_t len,3035XXH_alignment align3036)3037{3038const xxh_u8* const bEnd = input + len;3039const xxh_u8* const limit = bEnd - 15;3040XXH_ASSERT(acc != NULL);3041XXH_ASSERT(input != NULL);3042XXH_ASSERT(len >= 16);3043do {3044acc[0] = XXH32_round(acc[0], XXH_get32bits(input)); input += 4;3045acc[1] = XXH32_round(acc[1], XXH_get32bits(input)); input += 4;3046acc[2] = XXH32_round(acc[2], XXH_get32bits(input)); input += 4;3047acc[3] = XXH32_round(acc[3], XXH_get32bits(input)); input += 4;3048} while (input < limit);30493050return input;3051}30523053/*!3054* @internal3055* @brief Merges the accumulator lanes together for XXH32()3056*/3057XXH_FORCE_INLINE XXH_PUREF xxh_u323058XXH32_mergeAccs(const xxh_u32 *acc)3059{3060XXH_ASSERT(acc != NULL);3061return XXH_rotl32(acc[0], 1) + XXH_rotl32(acc[1], 7)3062+ XXH_rotl32(acc[2], 12) + XXH_rotl32(acc[3], 18);3063}30643065/*!3066* @internal3067* @brief Processes the last 0-15 bytes of @p ptr.3068*3069* There may be up to 15 bytes remaining to consume from the input.3070* This final stage will digest them to ensure that all input bytes are present3071* in the final mix.3072*3073* @param hash The hash to finalize.3074* @param ptr The pointer to the remaining input.3075* @param len The remaining length, modulo 16.3076* @param align Whether @p ptr is aligned.3077* @return The finalized hash.3078* @see XXH64_finalize().3079*/3080static XXH_PUREF xxh_u323081XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)3082{3083#define XXH_PROCESS1 do { \3084hash += (*ptr++) * XXH_PRIME32_5; \3085hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \3086} while (0)30873088#define XXH_PROCESS4 do { \3089hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \3090ptr += 4; \3091hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \3092} while (0)30933094if (ptr==NULL) XXH_ASSERT(len == 0);30953096/* Compact rerolled version; generally faster */3097if (!XXH32_ENDJMP) {3098len &= 15;3099while (len >= 4) {3100XXH_PROCESS4;3101len -= 4;3102}3103while (len > 0) {3104XXH_PROCESS1;3105--len;3106}3107return XXH32_avalanche(hash);3108} else {3109switch(len&15) /* or switch(bEnd - p) */ {3110case 12: XXH_PROCESS4;3111XXH_FALLTHROUGH; /* fallthrough */3112case 8: XXH_PROCESS4;3113XXH_FALLTHROUGH; /* fallthrough */3114case 4: XXH_PROCESS4;3115return XXH32_avalanche(hash);31163117case 13: XXH_PROCESS4;3118XXH_FALLTHROUGH; /* fallthrough */3119case 9: XXH_PROCESS4;3120XXH_FALLTHROUGH; /* fallthrough */3121case 5: XXH_PROCESS4;3122XXH_PROCESS1;3123return XXH32_avalanche(hash);31243125case 14: XXH_PROCESS4;3126XXH_FALLTHROUGH; /* fallthrough */3127case 10: XXH_PROCESS4;3128XXH_FALLTHROUGH; /* fallthrough */3129case 6: XXH_PROCESS4;3130XXH_PROCESS1;3131XXH_PROCESS1;3132return XXH32_avalanche(hash);31333134case 15: XXH_PROCESS4;3135XXH_FALLTHROUGH; /* fallthrough */3136case 11: XXH_PROCESS4;3137XXH_FALLTHROUGH; /* fallthrough */3138case 7: XXH_PROCESS4;3139XXH_FALLTHROUGH; /* fallthrough */3140case 3: XXH_PROCESS1;3141XXH_FALLTHROUGH; /* fallthrough */3142case 2: XXH_PROCESS1;3143XXH_FALLTHROUGH; /* fallthrough */3144case 1: XXH_PROCESS1;3145XXH_FALLTHROUGH; /* fallthrough */3146case 0: return XXH32_avalanche(hash);3147}3148XXH_ASSERT(0);3149return hash; /* reaching this point is deemed impossible */3150}3151}31523153#ifdef XXH_OLD_NAMES3154# define PROCESS1 XXH_PROCESS13155# define PROCESS4 XXH_PROCESS43156#else3157# undef XXH_PROCESS13158# undef XXH_PROCESS43159#endif31603161/*!3162* @internal3163* @brief The implementation for @ref XXH32().3164*3165* @param input , len , seed Directly passed from @ref XXH32().3166* @param align Whether @p input is aligned.3167* @return The calculated hash.3168*/3169XXH_FORCE_INLINE XXH_PUREF xxh_u323170XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)3171{3172xxh_u32 h32;31733174if (input==NULL) XXH_ASSERT(len == 0);31753176if (len>=16) {3177xxh_u32 acc[4];3178XXH32_initAccs(acc, seed);31793180input = XXH32_consumeLong(acc, input, len, align);31813182h32 = XXH32_mergeAccs(acc);3183} else {3184h32 = seed + XXH_PRIME32_5;3185}31863187h32 += (xxh_u32)len;31883189return XXH32_finalize(h32, input, len&15, align);3190}31913192/*! @ingroup XXH32_family */3193XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)3194{3195#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 23196/* Simple version, good for code maintenance, but unfortunately slow for small inputs */3197XXH32_state_t state;3198XXH32_reset(&state, seed);3199XXH32_update(&state, (const xxh_u8*)input, len);3200return XXH32_digest(&state);3201#else3202if (XXH_FORCE_ALIGN_CHECK) {3203if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */3204return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);3205} }32063207return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);3208#endif3209}3210321132123213/******* Hash streaming *******/3214#ifndef XXH_NO_STREAM3215/*! @ingroup XXH32_family */3216XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)3217{3218return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));3219}3220/*! @ingroup XXH32_family */3221XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)3222{3223XXH_free(statePtr);3224return XXH_OK;3225}32263227/*! @ingroup XXH32_family */3228XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)3229{3230XXH_memcpy(dstState, srcState, sizeof(*dstState));3231}32323233/*! @ingroup XXH32_family */3234XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)3235{3236XXH_ASSERT(statePtr != NULL);3237XXH_memset(statePtr, 0, sizeof(*statePtr));3238XXH32_initAccs(statePtr->acc, seed);3239return XXH_OK;3240}324132423243/*! @ingroup XXH32_family */3244XXH_PUBLIC_API XXH_errorcode3245XXH32_update(XXH32_state_t* state, const void* input, size_t len)3246{3247if (input==NULL) {3248XXH_ASSERT(len == 0);3249return XXH_OK;3250}32513252state->total_len_32 += (XXH32_hash_t)len;3253state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));32543255XXH_ASSERT(state->bufferedSize < sizeof(state->buffer));3256if (len < sizeof(state->buffer) - state->bufferedSize) { /* fill in tmp buffer */3257XXH_memcpy(state->buffer + state->bufferedSize, input, len);3258state->bufferedSize += (XXH32_hash_t)len;3259return XXH_OK;3260}32613262{ const xxh_u8* xinput = (const xxh_u8*)input;3263const xxh_u8* const bEnd = xinput + len;32643265if (state->bufferedSize) { /* non-empty buffer: complete first */3266XXH_memcpy(state->buffer + state->bufferedSize, xinput, sizeof(state->buffer) - state->bufferedSize);3267xinput += sizeof(state->buffer) - state->bufferedSize;3268/* then process one round */3269(void)XXH32_consumeLong(state->acc, state->buffer, sizeof(state->buffer), XXH_aligned);3270state->bufferedSize = 0;3271}32723273XXH_ASSERT(xinput <= bEnd);3274if ((size_t)(bEnd - xinput) >= sizeof(state->buffer)) {3275/* Process the remaining data */3276xinput = XXH32_consumeLong(state->acc, xinput, (size_t)(bEnd - xinput), XXH_unaligned);3277}32783279if (xinput < bEnd) {3280/* Copy the leftover to the tmp buffer */3281XXH_memcpy(state->buffer, xinput, (size_t)(bEnd-xinput));3282state->bufferedSize = (unsigned)(bEnd-xinput);3283}3284}32853286return XXH_OK;3287}328832893290/*! @ingroup XXH32_family */3291XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)3292{3293xxh_u32 h32;32943295if (state->large_len) {3296h32 = XXH32_mergeAccs(state->acc);3297} else {3298h32 = state->acc[2] /* == seed */ + XXH_PRIME32_5;3299}33003301h32 += state->total_len_32;33023303return XXH32_finalize(h32, state->buffer, state->bufferedSize, XXH_aligned);3304}3305#endif /* !XXH_NO_STREAM */33063307/******* Canonical representation *******/33083309/*! @ingroup XXH32_family */3310XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)3311{3312XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));3313if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);3314XXH_memcpy(dst, &hash, sizeof(*dst));3315}3316/*! @ingroup XXH32_family */3317XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)3318{3319return XXH_readBE32(src);3320}332133223323#ifndef XXH_NO_LONG_LONG33243325/* *******************************************************************3326* 64-bit hash functions3327*********************************************************************/3328/*!3329* @}3330* @ingroup impl3331* @{3332*/3333/******* Memory access *******/33343335typedef XXH64_hash_t xxh_u64;33363337#ifdef XXH_OLD_NAMES3338# define U64 xxh_u643339#endif33403341#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))3342/*3343* Manual byteshift. Best for old compilers which don't inline memcpy.3344* We actually directly use XXH_readLE64 and XXH_readBE64.3345*/3346#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))33473348/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */3349static xxh_u64 XXH_read64(const void* memPtr)3350{3351return *(const xxh_u64*) memPtr;3352}33533354#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))33553356/*3357* __attribute__((aligned(1))) is supported by gcc and clang. Originally the3358* documentation claimed that it only increased the alignment, but actually it3359* can decrease it on gcc, clang, and icc:3360* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,3361* https://gcc.godbolt.org/z/xYez1j67Y.3362*/3363#ifdef XXH_OLD_NAMES3364typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((__packed__)) unalign64;3365#endif3366static xxh_u64 XXH_read64(const void* ptr)3367{3368typedef __attribute__((__aligned__(1))) __attribute__((__may_alias__)) xxh_u64 xxh_unalign64;3369return *((const xxh_unalign64*)ptr);3370}33713372#else33733374/*3375* Portable and safe solution. Generally efficient.3376* see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html3377*/3378static xxh_u64 XXH_read64(const void* memPtr)3379{3380xxh_u64 val;3381XXH_memcpy(&val, memPtr, sizeof(val));3382return val;3383}33843385#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */33863387#if defined(_MSC_VER) /* Visual Studio */3388# define XXH_swap64 _byteswap_uint643389#elif XXH_GCC_VERSION >= 4033390# define XXH_swap64 __builtin_bswap643391#else3392static xxh_u64 XXH_swap64(xxh_u64 x)3393{3394return ((x << 56) & 0xff00000000000000ULL) |3395((x << 40) & 0x00ff000000000000ULL) |3396((x << 24) & 0x0000ff0000000000ULL) |3397((x << 8) & 0x000000ff00000000ULL) |3398((x >> 8) & 0x00000000ff000000ULL) |3399((x >> 24) & 0x0000000000ff0000ULL) |3400((x >> 40) & 0x000000000000ff00ULL) |3401((x >> 56) & 0x00000000000000ffULL);3402}3403#endif340434053406/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */3407#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))34083409XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)3410{3411const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;3412return bytePtr[0]3413| ((xxh_u64)bytePtr[1] << 8)3414| ((xxh_u64)bytePtr[2] << 16)3415| ((xxh_u64)bytePtr[3] << 24)3416| ((xxh_u64)bytePtr[4] << 32)3417| ((xxh_u64)bytePtr[5] << 40)3418| ((xxh_u64)bytePtr[6] << 48)3419| ((xxh_u64)bytePtr[7] << 56);3420}34213422XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)3423{3424const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;3425return bytePtr[7]3426| ((xxh_u64)bytePtr[6] << 8)3427| ((xxh_u64)bytePtr[5] << 16)3428| ((xxh_u64)bytePtr[4] << 24)3429| ((xxh_u64)bytePtr[3] << 32)3430| ((xxh_u64)bytePtr[2] << 40)3431| ((xxh_u64)bytePtr[1] << 48)3432| ((xxh_u64)bytePtr[0] << 56);3433}34343435#else3436XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)3437{3438return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));3439}34403441static xxh_u64 XXH_readBE64(const void* ptr)3442{3443return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);3444}3445#endif34463447XXH_FORCE_INLINE xxh_u643448XXH_readLE64_align(const void* ptr, XXH_alignment align)3449{3450if (align==XXH_unaligned)3451return XXH_readLE64(ptr);3452else3453return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);3454}345534563457/******* xxh64 *******/3458/*!3459* @}3460* @defgroup XXH64_impl XXH64 implementation3461* @ingroup impl3462*3463* Details on the XXH64 implementation.3464* @{3465*/3466/* #define rather that static const, to be used as initializers */3467#define XXH_PRIME64_1 0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */3468#define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */3469#define XXH_PRIME64_3 0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */3470#define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */3471#define XXH_PRIME64_5 0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */34723473#ifdef XXH_OLD_NAMES3474# define PRIME64_1 XXH_PRIME64_13475# define PRIME64_2 XXH_PRIME64_23476# define PRIME64_3 XXH_PRIME64_33477# define PRIME64_4 XXH_PRIME64_43478# define PRIME64_5 XXH_PRIME64_53479#endif34803481/*! @copydoc XXH32_round */3482static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)3483{3484acc += input * XXH_PRIME64_2;3485acc = XXH_rotl64(acc, 31);3486acc *= XXH_PRIME64_1;3487#if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)3488/*3489* DISABLE AUTOVECTORIZATION:3490* A compiler fence is used to prevent GCC and Clang from3491* autovectorizing the XXH64 loop (pragmas and attributes don't work for some3492* reason) without globally disabling AVX512.3493*3494* Autovectorization of XXH64 tends to be detrimental,3495* though the exact outcome may change depending on exact cpu and compiler version.3496* For information, it has been reported as detrimental for Skylake-X,3497* but possibly beneficial for Zen4.3498*3499* The default is to disable auto-vectorization,3500* but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable.3501*/3502XXH_COMPILER_GUARD(acc);3503#endif3504return acc;3505}35063507static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)3508{3509val = XXH64_round(0, val);3510acc ^= val;3511acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4;3512return acc;3513}35143515/*! @copydoc XXH32_avalanche */3516static xxh_u64 XXH64_avalanche(xxh_u64 hash)3517{3518hash ^= hash >> 33;3519hash *= XXH_PRIME64_2;3520hash ^= hash >> 29;3521hash *= XXH_PRIME64_3;3522hash ^= hash >> 32;3523return hash;3524}352535263527#define XXH_get64bits(p) XXH_readLE64_align(p, align)35283529/*!3530* @internal3531* @brief Sets up the initial accumulator state for XXH64().3532*/3533XXH_FORCE_INLINE void3534XXH64_initAccs(xxh_u64 *acc, xxh_u64 seed)3535{3536XXH_ASSERT(acc != NULL);3537acc[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;3538acc[1] = seed + XXH_PRIME64_2;3539acc[2] = seed + 0;3540acc[3] = seed - XXH_PRIME64_1;3541}35423543/*!3544* @internal3545* @brief Consumes a block of data for XXH64().3546*3547* @return the end input pointer.3548*/3549XXH_FORCE_INLINE const xxh_u8 *3550XXH64_consumeLong(3551xxh_u64 *XXH_RESTRICT acc,3552xxh_u8 const *XXH_RESTRICT input,3553size_t len,3554XXH_alignment align3555)3556{3557const xxh_u8* const bEnd = input + len;3558const xxh_u8* const limit = bEnd - 31;3559XXH_ASSERT(acc != NULL);3560XXH_ASSERT(input != NULL);3561XXH_ASSERT(len >= 32);3562do {3563/* reroll on 32-bit */3564if (sizeof(void *) < sizeof(xxh_u64)) {3565size_t i;3566for (i = 0; i < 4; i++) {3567acc[i] = XXH64_round(acc[i], XXH_get64bits(input));3568input += 8;3569}3570} else {3571acc[0] = XXH64_round(acc[0], XXH_get64bits(input)); input += 8;3572acc[1] = XXH64_round(acc[1], XXH_get64bits(input)); input += 8;3573acc[2] = XXH64_round(acc[2], XXH_get64bits(input)); input += 8;3574acc[3] = XXH64_round(acc[3], XXH_get64bits(input)); input += 8;3575}3576} while (input < limit);35773578return input;3579}35803581/*!3582* @internal3583* @brief Merges the accumulator lanes together for XXH64()3584*/3585XXH_FORCE_INLINE XXH_PUREF xxh_u643586XXH64_mergeAccs(const xxh_u64 *acc)3587{3588XXH_ASSERT(acc != NULL);3589{3590xxh_u64 h64 = XXH_rotl64(acc[0], 1) + XXH_rotl64(acc[1], 7)3591+ XXH_rotl64(acc[2], 12) + XXH_rotl64(acc[3], 18);3592/* reroll on 32-bit */3593if (sizeof(void *) < sizeof(xxh_u64)) {3594size_t i;3595for (i = 0; i < 4; i++) {3596h64 = XXH64_mergeRound(h64, acc[i]);3597}3598} else {3599h64 = XXH64_mergeRound(h64, acc[0]);3600h64 = XXH64_mergeRound(h64, acc[1]);3601h64 = XXH64_mergeRound(h64, acc[2]);3602h64 = XXH64_mergeRound(h64, acc[3]);3603}3604return h64;3605}3606}36073608/*!3609* @internal3610* @brief Processes the last 0-31 bytes of @p ptr.3611*3612* There may be up to 31 bytes remaining to consume from the input.3613* This final stage will digest them to ensure that all input bytes are present3614* in the final mix.3615*3616* @param hash The hash to finalize.3617* @param ptr The pointer to the remaining input.3618* @param len The remaining length, modulo 32.3619* @param align Whether @p ptr is aligned.3620* @return The finalized hash3621* @see XXH32_finalize().3622*/3623XXH_STATIC XXH_PUREF xxh_u643624XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)3625{3626if (ptr==NULL) XXH_ASSERT(len == 0);3627len &= 31;3628while (len >= 8) {3629xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));3630ptr += 8;3631hash ^= k1;3632hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;3633len -= 8;3634}3635if (len >= 4) {3636hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;3637ptr += 4;3638hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;3639len -= 4;3640}3641while (len > 0) {3642hash ^= (*ptr++) * XXH_PRIME64_5;3643hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;3644--len;3645}3646return XXH64_avalanche(hash);3647}36483649#ifdef XXH_OLD_NAMES3650# define PROCESS1_64 XXH_PROCESS1_643651# define PROCESS4_64 XXH_PROCESS4_643652# define PROCESS8_64 XXH_PROCESS8_643653#else3654# undef XXH_PROCESS1_643655# undef XXH_PROCESS4_643656# undef XXH_PROCESS8_643657#endif36583659/*!3660* @internal3661* @brief The implementation for @ref XXH64().3662*3663* @param input , len , seed Directly passed from @ref XXH64().3664* @param align Whether @p input is aligned.3665* @return The calculated hash.3666*/3667XXH_FORCE_INLINE XXH_PUREF xxh_u643668XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)3669{3670xxh_u64 h64;3671if (input==NULL) XXH_ASSERT(len == 0);36723673if (len>=32) { /* Process a large block of data */3674xxh_u64 acc[4];3675XXH64_initAccs(acc, seed);36763677input = XXH64_consumeLong(acc, input, len, align);36783679h64 = XXH64_mergeAccs(acc);3680} else {3681h64 = seed + XXH_PRIME64_5;3682}36833684h64 += (xxh_u64) len;36853686return XXH64_finalize(h64, input, len, align);3687}368836893690/*! @ingroup XXH64_family */3691XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)3692{3693#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 23694/* Simple version, good for code maintenance, but unfortunately slow for small inputs */3695XXH64_state_t state;3696XXH64_reset(&state, seed);3697XXH64_update(&state, (const xxh_u8*)input, len);3698return XXH64_digest(&state);3699#else3700if (XXH_FORCE_ALIGN_CHECK) {3701if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */3702return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);3703} }37043705return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);37063707#endif3708}37093710/******* Hash Streaming *******/3711#ifndef XXH_NO_STREAM3712/*! @ingroup XXH64_family*/3713XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)3714{3715return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));3716}3717/*! @ingroup XXH64_family */3718XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)3719{3720XXH_free(statePtr);3721return XXH_OK;3722}37233724/*! @ingroup XXH64_family */3725XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)3726{3727XXH_memcpy(dstState, srcState, sizeof(*dstState));3728}37293730/*! @ingroup XXH64_family */3731XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)3732{3733XXH_ASSERT(statePtr != NULL);3734XXH_memset(statePtr, 0, sizeof(*statePtr));3735XXH64_initAccs(statePtr->acc, seed);3736return XXH_OK;3737}37383739/*! @ingroup XXH64_family */3740XXH_PUBLIC_API XXH_errorcode3741XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)3742{3743if (input==NULL) {3744XXH_ASSERT(len == 0);3745return XXH_OK;3746}37473748state->total_len += len;37493750XXH_ASSERT(state->bufferedSize <= sizeof(state->buffer));3751if (len < sizeof(state->buffer) - state->bufferedSize) { /* fill in tmp buffer */3752XXH_memcpy(state->buffer + state->bufferedSize, input, len);3753state->bufferedSize += (XXH32_hash_t)len;3754return XXH_OK;3755}37563757{ const xxh_u8* xinput = (const xxh_u8*)input;3758const xxh_u8* const bEnd = xinput + len;37593760if (state->bufferedSize) { /* non-empty buffer => complete first */3761XXH_memcpy(state->buffer + state->bufferedSize, xinput, sizeof(state->buffer) - state->bufferedSize);3762xinput += sizeof(state->buffer) - state->bufferedSize;3763/* and process one round */3764(void)XXH64_consumeLong(state->acc, state->buffer, sizeof(state->buffer), XXH_aligned);3765state->bufferedSize = 0;3766}37673768XXH_ASSERT(xinput <= bEnd);3769if ((size_t)(bEnd - xinput) >= sizeof(state->buffer)) {3770/* Process the remaining data */3771xinput = XXH64_consumeLong(state->acc, xinput, (size_t)(bEnd - xinput), XXH_unaligned);3772}37733774if (xinput < bEnd) {3775/* Copy the leftover to the tmp buffer */3776XXH_memcpy(state->buffer, xinput, (size_t)(bEnd-xinput));3777state->bufferedSize = (unsigned)(bEnd-xinput);3778}3779}37803781return XXH_OK;3782}378337843785/*! @ingroup XXH64_family */3786XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)3787{3788xxh_u64 h64;37893790if (state->total_len >= 32) {3791h64 = XXH64_mergeAccs(state->acc);3792} else {3793h64 = state->acc[2] /*seed*/ + XXH_PRIME64_5;3794}37953796h64 += (xxh_u64) state->total_len;37973798return XXH64_finalize(h64, state->buffer, (size_t)state->total_len, XXH_aligned);3799}3800#endif /* !XXH_NO_STREAM */38013802/******* Canonical representation *******/38033804/*! @ingroup XXH64_family */3805XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)3806{3807XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));3808if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);3809XXH_memcpy(dst, &hash, sizeof(*dst));3810}38113812/*! @ingroup XXH64_family */3813XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)3814{3815return XXH_readBE64(src);3816}38173818#ifndef XXH_NO_XXH338193820/* *********************************************************************3821* XXH33822* New generation hash designed for speed on small keys and vectorization3823************************************************************************ */3824/*!3825* @}3826* @defgroup XXH3_impl XXH3 implementation3827* @ingroup impl3828* @{3829*/38303831/* === Compiler specifics === */383238333834#if (defined(__GNUC__) && (__GNUC__ >= 3)) \3835|| (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \3836|| defined(__clang__)3837# define XXH_likely(x) __builtin_expect(x, 1)3838# define XXH_unlikely(x) __builtin_expect(x, 0)3839#else3840# define XXH_likely(x) (x)3841# define XXH_unlikely(x) (x)3842#endif38433844#ifndef XXH_HAS_INCLUDE3845# ifdef __has_include3846/*3847* Not defined as XXH_HAS_INCLUDE(x) (function-like) because3848* this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion)3849*/3850# define XXH_HAS_INCLUDE __has_include3851# else3852# define XXH_HAS_INCLUDE(x) 03853# endif3854#endif38553856#if defined(__GNUC__) || defined(__clang__)3857# if defined(__ARM_FEATURE_SVE)3858# include <arm_sve.h>3859# endif3860# if defined(__ARM_NEON__) || defined(__ARM_NEON) \3861|| (defined(_M_ARM) && _M_ARM >= 7) \3862|| defined(_M_ARM64) || defined(_M_ARM64EC) \3863|| (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* WASM SIMD128 via SIMDe */3864# define inline __inline__ /* circumvent a clang bug */3865# include <arm_neon.h>3866# undef inline3867# elif defined(__AVX2__)3868# include <immintrin.h>3869# elif defined(__SSE2__)3870# include <emmintrin.h>3871# elif defined(__loongarch_asx)3872# include <lasxintrin.h>3873# include <lsxintrin.h>3874# elif defined(__loongarch_sx)3875# include <lsxintrin.h>3876# elif defined(__riscv_vector)3877# include <riscv_vector.h>3878# endif3879#endif38803881#if defined(_MSC_VER)3882# include <intrin.h>3883#endif38843885/*3886* One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while3887* remaining a true 64-bit/128-bit hash function.3888*3889* This is done by prioritizing a subset of 64-bit operations that can be3890* emulated without too many steps on the average 32-bit machine.3891*3892* For example, these two lines seem similar, and run equally fast on 64-bit:3893*3894* xxh_u64 x;3895* x ^= (x >> 47); // good3896* x ^= (x >> 13); // bad3897*3898* However, to a 32-bit machine, there is a major difference.3899*3900* x ^= (x >> 47) looks like this:3901*3902* x.lo ^= (x.hi >> (47 - 32));3903*3904* while x ^= (x >> 13) looks like this:3905*3906* // note: funnel shifts are not usually cheap.3907* x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));3908* x.hi ^= (x.hi >> 13);3909*3910* The first one is significantly faster than the second, simply because the3911* shift is larger than 32. This means:3912* - All the bits we need are in the upper 32 bits, so we can ignore the lower3913* 32 bits in the shift.3914* - The shift result will always fit in the lower 32 bits, and therefore,3915* we can ignore the upper 32 bits in the xor.3916*3917* Thanks to this optimization, XXH3 only requires these features to be efficient:3918*3919* - Usable unaligned access3920* - A 32-bit or 64-bit ALU3921* - If 32-bit, a decent ADC instruction3922* - A 32 or 64-bit multiply with a 64-bit result3923* - For the 128-bit variant, a decent byteswap helps short inputs.3924*3925* The first two are already required by XXH32, and almost all 32-bit and 64-bit3926* platforms which can run XXH32 can run XXH3 efficiently.3927*3928* Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one3929* notable exception.3930*3931* First of all, Thumb-1 lacks support for the UMULL instruction which3932* performs the important long multiply. This means numerous __aeabi_lmul3933* calls.3934*3935* Second of all, the 8 functional registers are just not enough.3936* Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need3937* Lo registers, and this shuffling results in thousands more MOVs than A32.3938*3939* A32 and T32 don't have this limitation. They can access all 14 registers,3940* do a 32->64 multiply with UMULL, and the flexible operand allowing free3941* shifts is helpful, too.3942*3943* Therefore, we do a quick sanity check.3944*3945* If compiling Thumb-1 for a target which supports ARM instructions, we will3946* emit a warning, as it is not a "sane" platform to compile for.3947*3948* Usually, if this happens, it is because of an accident and you probably need3949* to specify -march, as you likely meant to compile for a newer architecture.3950*3951* Credit: large sections of the vectorial and asm source code paths3952* have been contributed by @easyaspi3143953*/3954#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)3955# warning "XXH3 is highly inefficient without ARM or Thumb-2."3956#endif39573958/* ==========================================3959* Vectorization detection3960* ========================================== */39613962#ifdef XXH_DOXYGEN3963/*!3964* @ingroup tuning3965* @brief Overrides the vectorization implementation chosen for XXH3.3966*3967* Can be defined to 0 to disable SIMD,3968* or any other authorized value of @ref XXH_VECTOR.3969*3970* If this is not defined, it uses predefined macros to determine the best3971* implementation.3972*/3973# define XXH_VECTOR XXH_SCALAR3974/*!3975* @ingroup tuning3976* @brief Selects the minimum alignment for XXH3's accumulators.3977*3978* When using SIMD, this should match the alignment required for said vector3979* type, so, for example, 32 for AVX2.3980*3981* Default: Auto detected.3982*/3983# define XXH_ACC_ALIGN 83984#endif39853986/* Actual definition */3987#ifndef XXH_DOXYGEN3988#endif39893990#ifndef XXH_VECTOR /* can be defined on command line */3991# if ( \3992defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \3993|| defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \3994|| (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* wasm simd128 via SIMDe */ \3995) && ( \3996defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \3997|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \3998)3999# define XXH_VECTOR XXH_NEON4000# elif defined(__ARM_FEATURE_SVE)4001# define XXH_VECTOR XXH_SVE4002# elif defined(__AVX512F__)4003# define XXH_VECTOR XXH_AVX5124004# elif defined(__AVX2__)4005# define XXH_VECTOR XXH_AVX24006# elif defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))4007# define XXH_VECTOR XXH_SSE24008# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \4009|| (defined(__s390x__) && defined(__VEC__)) \4010&& defined(__GNUC__) /* TODO: IBM XL */4011# define XXH_VECTOR XXH_VSX4012# elif defined(__loongarch_asx)4013# define XXH_VECTOR XXH_LASX4014# elif defined(__loongarch_sx)4015# define XXH_VECTOR XXH_LSX4016# elif defined(__riscv_vector)4017# define XXH_VECTOR XXH_RVV4018# else4019# define XXH_VECTOR XXH_SCALAR4020# endif4021#endif40224023/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */4024#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)4025# ifdef _MSC_VER4026# pragma warning(once : 4606)4027# else4028# warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."4029# endif4030# undef XXH_VECTOR4031# define XXH_VECTOR XXH_SCALAR4032#endif40334034/*4035* Controls the alignment of the accumulator,4036* for compatibility with aligned vector loads, which are usually faster.4037*/4038#ifndef XXH_ACC_ALIGN4039# if defined(XXH_X86DISPATCH)4040# define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */4041# elif XXH_VECTOR == XXH_SCALAR /* scalar */4042# define XXH_ACC_ALIGN 84043# elif XXH_VECTOR == XXH_SSE2 /* sse2 */4044# define XXH_ACC_ALIGN 164045# elif XXH_VECTOR == XXH_AVX2 /* avx2 */4046# define XXH_ACC_ALIGN 324047# elif XXH_VECTOR == XXH_NEON /* neon */4048# define XXH_ACC_ALIGN 164049# elif XXH_VECTOR == XXH_VSX /* vsx */4050# define XXH_ACC_ALIGN 164051# elif XXH_VECTOR == XXH_AVX512 /* avx512 */4052# define XXH_ACC_ALIGN 644053# elif XXH_VECTOR == XXH_SVE /* sve */4054# define XXH_ACC_ALIGN 644055# elif XXH_VECTOR == XXH_LASX /* lasx */4056# define XXH_ACC_ALIGN 644057# elif XXH_VECTOR == XXH_LSX /* lsx */4058# define XXH_ACC_ALIGN 644059# elif XXH_VECTOR == XXH_RVV /* rvv */4060# define XXH_ACC_ALIGN 64 /* could be 8, but 64 may be faster */4061# endif4062#endif40634064#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \4065|| XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX5124066# define XXH_SEC_ALIGN XXH_ACC_ALIGN4067#elif XXH_VECTOR == XXH_SVE4068# define XXH_SEC_ALIGN XXH_ACC_ALIGN4069#elif XXH_VECTOR == XXH_RVV4070# define XXH_SEC_ALIGN XXH_ACC_ALIGN4071#else4072# define XXH_SEC_ALIGN 84073#endif40744075#if defined(__GNUC__) || defined(__clang__)4076# define XXH_ALIASING __attribute__((__may_alias__))4077#else4078# define XXH_ALIASING /* nothing */4079#endif40804081/*4082* UGLY HACK:4083* GCC usually generates the best code with -O3 for xxHash.4084*4085* However, when targeting AVX2, it is overzealous in its unrolling resulting4086* in code roughly 3/4 the speed of Clang.4087*4088* There are other issues, such as GCC splitting _mm256_loadu_si256 into4089* _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which4090* only applies to Sandy and Ivy Bridge... which don't even support AVX2.4091*4092* That is why when compiling the AVX2 version, it is recommended to use either4093* -O2 -mavx2 -march=haswell4094* or4095* -O2 -mavx2 -mno-avx256-split-unaligned-load4096* for decent performance, or to use Clang instead.4097*4098* Fortunately, we can control the first one with a pragma that forces GCC into4099* -O2, but the other one we can't control without "failed to inline always4100* inline function due to target mismatch" warnings.4101*/4102#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \4103&& defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \4104&& defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */4105# pragma GCC push_options4106# pragma GCC optimize("-O2")4107#endif41084109#if XXH_VECTOR == XXH_NEON41104111/*4112* UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O34113* optimizes out the entire hashLong loop because of the aliasing violation.4114*4115* However, GCC is also inefficient at load-store optimization with vld1q/vst1q,4116* so the only option is to mark it as aliasing.4117*/4118typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;41194120/*!4121* @internal4122* @brief `vld1q_u64` but faster and alignment-safe.4123*4124* On AArch64, unaligned access is always safe, but on ARMv7-a, it is only4125* *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).4126*4127* GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it4128* prohibits load-store optimizations. Therefore, a direct dereference is used.4129*4130* Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe4131* unaligned load.4132*/4133#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)4134XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */4135{4136return *(xxh_aliasing_uint64x2_t const *)ptr;4137}4138#else4139XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)4140{4141return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));4142}4143#endif41444145/*!4146* @internal4147* @brief `vmlal_u32` on low and high halves of a vector.4148*4149* This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with4150* inline assembly and were therefore incapable of merging the `vget_{low, high}_u32`4151* with `vmlal_u32`.4152*/4153#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 114154XXH_FORCE_INLINE uint64x2_t4155XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)4156{4157/* Inline assembly is the only way */4158__asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));4159return acc;4160}4161XXH_FORCE_INLINE uint64x2_t4162XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)4163{4164/* This intrinsic works as expected */4165return vmlal_high_u32(acc, lhs, rhs);4166}4167#else4168/* Portable intrinsic versions */4169XXH_FORCE_INLINE uint64x2_t4170XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)4171{4172return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));4173}4174/*! @copydoc XXH_vmlal_low_u324175* Assume the compiler converts this to vmlal_high_u32 on aarch64 */4176XXH_FORCE_INLINE uint64x2_t4177XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)4178{4179return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));4180}4181#endif41824183/*!4184* @ingroup tuning4185* @brief Controls the NEON to scalar ratio for XXH34186*4187* This can be set to 2, 4, 6, or 8.4188*4189* ARM Cortex CPUs are _very_ sensitive to how their pipelines are used.4190*4191* For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those4192* can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU4193* bandwidth.4194*4195* This is even more noticeable on the more advanced cores like the Cortex-A76 which4196* can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.4197*4198* Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes4199* and 2 scalar lanes, which is chosen by default.4200*4201* This does not apply to Apple processors or 32-bit processors, which run better with4202* full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes.4203*4204* This change benefits CPUs with large micro-op buffers without negatively affecting4205* most other CPUs:4206*4207* | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. |4208* |:----------------------|:--------------------|----------:|-----------:|------:|4209* | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% |4210* | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% |4211* | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% |4212* | Apple M1 | 4 NEON/8 micro-ops | 37.3 GB/s | 36.1 GB/s | ~-3% |4213*4214* It also seems to fix some bad codegen on GCC, making it almost as fast as clang.4215*4216* When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning4217* it effectively becomes worse 4.4218*4219* @see XXH3_accumulate_512_neon()4220*/4221# ifndef XXH3_NEON_LANES4222# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \4223&& !defined(__APPLE__) && XXH_SIZE_OPT <= 04224# define XXH3_NEON_LANES 64225# else4226# define XXH3_NEON_LANES XXH_ACC_NB4227# endif4228# endif4229#endif /* XXH_VECTOR == XXH_NEON */42304231/*4232* VSX and Z Vector helpers.4233*4234* This is very messy, and any pull requests to clean this up are welcome.4235*4236* There are a lot of problems with supporting VSX and s390x, due to4237* inconsistent intrinsics, spotty coverage, and multiple endiannesses.4238*/4239#if XXH_VECTOR == XXH_VSX4240/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,4241* and `pixel`. This is a problem for obvious reasons.4242*4243* These keywords are unnecessary; the spec literally says they are4244* equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd4245* after including the header.4246*4247* We use pragma push_macro/pop_macro to keep the namespace clean. */4248# pragma push_macro("bool")4249# pragma push_macro("vector")4250# pragma push_macro("pixel")4251/* silence potential macro redefined warnings */4252# undef bool4253# undef vector4254# undef pixel42554256# if defined(__s390x__)4257# include <s390intrin.h>4258# else4259# include <altivec.h>4260# endif42614262/* Restore the original macro values, if applicable. */4263# pragma pop_macro("pixel")4264# pragma pop_macro("vector")4265# pragma pop_macro("bool")42664267typedef __vector unsigned long long xxh_u64x2;4268typedef __vector unsigned char xxh_u8x16;4269typedef __vector unsigned xxh_u32x4;42704271/*4272* UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.4273*/4274typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;42754276# ifndef XXH_VSX_BE4277# if defined(__BIG_ENDIAN__) \4278|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)4279# define XXH_VSX_BE 14280# elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__4281# warning "-maltivec=be is not recommended. Please use native endianness."4282# define XXH_VSX_BE 14283# else4284# define XXH_VSX_BE 04285# endif4286# endif /* !defined(XXH_VSX_BE) */42874288# if XXH_VSX_BE4289# if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))4290# define XXH_vec_revb vec_revb4291# else4292/*!4293* A polyfill for POWER9's vec_revb().4294*/4295XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)4296{4297xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,42980x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };4299return vec_perm(val, val, vByteSwap);4300}4301# endif4302# endif /* XXH_VSX_BE */43034304/*!4305* Performs an unaligned vector load and byte swaps it on big endian.4306*/4307XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)4308{4309xxh_u64x2 ret;4310XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));4311# if XXH_VSX_BE4312ret = XXH_vec_revb(ret);4313# endif4314return ret;4315}43164317/*4318* vec_mulo and vec_mule are very problematic intrinsics on PowerPC4319*4320* These intrinsics weren't added until GCC 8, despite existing for a while,4321* and they are endian dependent. Also, their meaning swap depending on version.4322* */4323# if defined(__s390x__)4324/* s390x is always big endian, no issue on this platform */4325# define XXH_vec_mulo vec_mulo4326# define XXH_vec_mule vec_mule4327# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)4328/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */4329/* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */4330# define XXH_vec_mulo __builtin_altivec_vmulouw4331# define XXH_vec_mule __builtin_altivec_vmuleuw4332# else4333/* gcc needs inline assembly */4334/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */4335XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)4336{4337xxh_u64x2 result;4338__asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));4339return result;4340}4341XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)4342{4343xxh_u64x2 result;4344__asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));4345return result;4346}4347# endif /* XXH_vec_mulo, XXH_vec_mule */4348#endif /* XXH_VECTOR == XXH_VSX */43494350#if XXH_VECTOR == XXH_SVE4351#define ACCRND(acc, offset) \4352do { \4353svuint64_t input_vec = svld1_u64(mask, xinput + offset); \4354svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \4355svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \4356svuint64_t swapped = svtbl_u64(input_vec, kSwap); \4357svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \4358svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \4359svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \4360acc = svadd_u64_x(mask, acc, mul); \4361} while (0)4362#endif /* XXH_VECTOR == XXH_SVE */43634364/* prefetch4365* can be disabled, by declaring XXH_NO_PREFETCH build macro */4366#if defined(XXH_NO_PREFETCH)4367# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */4368#else4369# if XXH_SIZE_OPT >= 14370# define XXH_PREFETCH(ptr) (void)(ptr)4371# elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */4372# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */4373# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)4374# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )4375# define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)4376# else4377# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */4378# endif4379#endif /* XXH_NO_PREFETCH */438043814382/* ==========================================4383* XXH3 default settings4384* ========================================== */43854386#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */43874388#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)4389# error "default keyset is not large enough"4390#endif43914392/*!4393* @internal4394* @def XXH3_kSecret4395* @brief Pseudorandom secret taken directly from FARSH. */4396XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {43970xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,43980xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,43990xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,44000xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,44010x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,44020x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,44030xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,44040x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,44050xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,44060x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,44070x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,44080x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,4409};44104411static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL; /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */4412static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */44134414#ifdef XXH_OLD_NAMES4415# define kSecret XXH3_kSecret4416#endif44174418#ifdef XXH_DOXYGEN4419/*!4420* @brief Calculates a 32-bit to 64-bit long multiply.4421*4422* Implemented as a macro.4423*4424* Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't4425* need to (but it shouldn't need to anyways, it is about 7 instructions to do4426* a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we4427* use that instead of the normal method.4428*4429* If you are compiling for platforms like Thumb-1 and don't have a better option,4430* you may also want to write your own long multiply routine here.4431*4432* @param x, y Numbers to be multiplied4433* @return 64-bit product of the low 32 bits of @p x and @p y.4434*/4435XXH_FORCE_INLINE xxh_u644436XXH_mult32to64(xxh_u64 x, xxh_u64 y)4437{4438return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);4439}4440#elif defined(_MSC_VER) && defined(_M_IX86)4441# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))4442#else4443/*4444* Downcast + upcast is usually better than masking on older compilers like4445* GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.4446*4447* The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands4448* and perform a full 64x64 multiply -- entirely redundant on 32-bit.4449*/4450# define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))4451#endif44524453/*!4454* @brief Calculates a 64->128-bit long multiply.4455*4456* Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar4457* version.4458*4459* @param lhs , rhs The 64-bit integers to be multiplied4460* @return The 128-bit result represented in an @ref XXH128_hash_t.4461*/4462static XXH128_hash_t4463XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)4464{4465/*4466* GCC/Clang __uint128_t method.4467*4468* On most 64-bit targets, GCC and Clang define a __uint128_t type.4469* This is usually the best way as it usually uses a native long 64-bit4470* multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.4471*4472* Usually.4473*4474* Despite being a 32-bit platform, Clang (and emscripten) define this type4475* despite not having the arithmetic for it. This results in a laggy4476* compiler builtin call which calculates a full 128-bit multiply.4477* In that case it is best to use the portable one.4478* https://github.com/Cyan4973/xxHash/issues/211#issuecomment-5155756774479*/4480#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \4481&& defined(__SIZEOF_INT128__) \4482|| (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)44834484__uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;4485XXH128_hash_t r128;4486r128.low64 = (xxh_u64)(product);4487r128.high64 = (xxh_u64)(product >> 64);4488return r128;44894490/*4491* MSVC for x64's _umul128 method.4492*4493* xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);4494*4495* This compiles to single operand MUL on x64.4496*/4497#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)44984499#ifndef _MSC_VER4500# pragma intrinsic(_umul128)4501#endif4502xxh_u64 product_high;4503xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);4504XXH128_hash_t r128;4505r128.low64 = product_low;4506r128.high64 = product_high;4507return r128;45084509/*4510* MSVC for ARM64's __umulh method.4511*4512* This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.4513*/4514#elif defined(_M_ARM64) || defined(_M_ARM64EC)45154516#ifndef _MSC_VER4517# pragma intrinsic(__umulh)4518#endif4519XXH128_hash_t r128;4520r128.low64 = lhs * rhs;4521r128.high64 = __umulh(lhs, rhs);4522return r128;45234524#else4525/*4526* Portable scalar method. Optimized for 32-bit and 64-bit ALUs.4527*4528* This is a fast and simple grade school multiply, which is shown below4529* with base 10 arithmetic instead of base 0x100000000.4530*4531* 9 3 // D2 lhs = 934532* x 7 5 // D2 rhs = 754533* ----------4534* 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 154535* 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 454536* 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 214537* + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 634538* ---------4539* 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 274540* + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 674541* ---------4542* 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 69754543*4544* The reasons for adding the products like this are:4545* 1. It avoids manual carry tracking. Just like how4546* (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.4547* This avoids a lot of complexity.4548*4549* 2. It hints for, and on Clang, compiles to, the powerful UMAAL4550* instruction available in ARM's Digital Signal Processing extension4551* in 32-bit ARMv6 and later, which is shown below:4552*4553* void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)4554* {4555* xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;4556* *RdLo = (xxh_u32)(product & 0xFFFFFFFF);4557* *RdHi = (xxh_u32)(product >> 32);4558* }4559*4560* This instruction was designed for efficient long multiplication, and4561* allows this to be calculated in only 4 instructions at speeds4562* comparable to some 64-bit ALUs.4563*4564* 3. It isn't terrible on other platforms. Usually this will be a couple4565* of 32-bit ADD/ADCs.4566*/45674568/* First calculate all of the cross products. */4569xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);4570xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF);4571xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);4572xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);45734574/* Now add the products together. These will never overflow. */4575xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;4576xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;4577xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);45784579XXH128_hash_t r128;4580r128.low64 = lower;4581r128.high64 = upper;4582return r128;4583#endif4584}45854586/*!4587* @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.4588*4589* The reason for the separate function is to prevent passing too many structs4590* around by value. This will hopefully inline the multiply, but we don't force it.4591*4592* @param lhs , rhs The 64-bit integers to multiply4593* @return The low 64 bits of the product XOR'd by the high 64 bits.4594* @see XXH_mult64to128()4595*/4596static xxh_u644597XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)4598{4599XXH128_hash_t product = XXH_mult64to128(lhs, rhs);4600return product.low64 ^ product.high64;4601}46024603/*! Seems to produce slightly better code on GCC for some reason. */4604XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)4605{4606XXH_ASSERT(0 <= shift && shift < 64);4607return v64 ^ (v64 >> shift);4608}46094610/*4611* This is a fast avalanche stage,4612* suitable when input bits are already partially mixed4613*/4614static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)4615{4616h64 = XXH_xorshift64(h64, 37);4617h64 *= PRIME_MX1;4618h64 = XXH_xorshift64(h64, 32);4619return h64;4620}46214622/*4623* This is a stronger avalanche,4624* inspired by Pelle Evensen's rrmxmx4625* preferable when input has not been previously mixed4626*/4627static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)4628{4629/* this mix is inspired by Pelle Evensen's rrmxmx */4630h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);4631h64 *= PRIME_MX2;4632h64 ^= (h64 >> 35) + len ;4633h64 *= PRIME_MX2;4634return XXH_xorshift64(h64, 28);4635}463646374638/* ==========================================4639* Short keys4640* ==========================================4641* One of the shortcomings of XXH32 and XXH64 was that their performance was4642* sub-optimal on short lengths. It used an iterative algorithm which strongly4643* favored lengths that were a multiple of 4 or 8.4644*4645* Instead of iterating over individual inputs, we use a set of single shot4646* functions which piece together a range of lengths and operate in constant time.4647*4648* Additionally, the number of multiplies has been significantly reduced. This4649* reduces latency, especially when emulating 64-bit multiplies on 32-bit.4650*4651* Depending on the platform, this may or may not be faster than XXH32, but it4652* is almost guaranteed to be faster than XXH64.4653*/46544655/*4656* At very short lengths, there isn't enough input to fully hide secrets, or use4657* the entire secret.4658*4659* There is also only a limited amount of mixing we can do before significantly4660* impacting performance.4661*4662* Therefore, we use different sections of the secret and always mix two secret4663* samples with an XOR. This should have no effect on performance on the4664* seedless or withSeed variants because everything _should_ be constant folded4665* by modern compilers.4666*4667* The XOR mixing hides individual parts of the secret and increases entropy.4668*4669* This adds an extra layer of strength for custom secrets.4670*/4671XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t4672XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)4673{4674XXH_ASSERT(input != NULL);4675XXH_ASSERT(1 <= len && len <= 3);4676XXH_ASSERT(secret != NULL);4677/*4678* len = 1: combined = { input[0], 0x01, input[0], input[0] }4679* len = 2: combined = { input[1], 0x02, input[0], input[1] }4680* len = 3: combined = { input[2], 0x03, input[0], input[1] }4681*/4682{ xxh_u8 const c1 = input[0];4683xxh_u8 const c2 = input[len >> 1];4684xxh_u8 const c3 = input[len - 1];4685xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24)4686| ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);4687xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;4688xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;4689return XXH64_avalanche(keyed);4690}4691}46924693XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t4694XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)4695{4696XXH_ASSERT(input != NULL);4697XXH_ASSERT(secret != NULL);4698XXH_ASSERT(4 <= len && len <= 8);4699seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;4700{ xxh_u32 const input1 = XXH_readLE32(input);4701xxh_u32 const input2 = XXH_readLE32(input + len - 4);4702xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;4703xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);4704xxh_u64 const keyed = input64 ^ bitflip;4705return XXH3_rrmxmx(keyed, len);4706}4707}47084709XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t4710XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)4711{4712XXH_ASSERT(input != NULL);4713XXH_ASSERT(secret != NULL);4714XXH_ASSERT(9 <= len && len <= 16);4715{ xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;4716xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;4717xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1;4718xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;4719xxh_u64 const acc = len4720+ XXH_swap64(input_lo) + input_hi4721+ XXH3_mul128_fold64(input_lo, input_hi);4722return XXH3_avalanche(acc);4723}4724}47254726XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t4727XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)4728{4729XXH_ASSERT(len <= 16);4730{ if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed);4731if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);4732if (len) return XXH3_len_1to3_64b(input, len, secret, seed);4733return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));4734}4735}47364737/*4738* DISCLAIMER: There are known *seed-dependent* multicollisions here due to4739* multiplication by zero, affecting hashes of lengths 17 to 240.4740*4741* However, they are very unlikely.4742*4743* Keep this in mind when using the unseeded XXH3_64bits() variant: As with all4744* unseeded non-cryptographic hashes, it does not attempt to defend itself4745* against specially crafted inputs, only random inputs.4746*4747* Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes4748* cancelling out the secret is taken an arbitrary number of times (addressed4749* in XXH3_accumulate_512), this collision is very unlikely with random inputs4750* and/or proper seeding:4751*4752* This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a4753* function that is only called up to 16 times per hash with up to 240 bytes of4754* input.4755*4756* This is not too bad for a non-cryptographic hash function, especially with4757* only 64 bit outputs.4758*4759* The 128-bit variant (which trades some speed for strength) is NOT affected4760* by this, although it is always a good idea to use a proper seed if you care4761* about strength.4762*/4763XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,4764const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)4765{4766#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \4767&& defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \4768&& !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */4769/*4770* UGLY HACK:4771* GCC for x86 tends to autovectorize the 128-bit multiply, resulting in4772* slower code.4773*4774* By forcing seed64 into a register, we disrupt the cost model and4775* cause it to scalarize. See `XXH32_round()`4776*4777* FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,4778* XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on4779* GCC 9.2, despite both emitting scalar code.4780*4781* GCC generates much better scalar code than Clang for the rest of XXH3,4782* which is why finding a more optimal codepath is an interest.4783*/4784XXH_COMPILER_GUARD(seed64);4785#endif4786{ xxh_u64 const input_lo = XXH_readLE64(input);4787xxh_u64 const input_hi = XXH_readLE64(input+8);4788return XXH3_mul128_fold64(4789input_lo ^ (XXH_readLE64(secret) + seed64),4790input_hi ^ (XXH_readLE64(secret+8) - seed64)4791);4792}4793}47944795/* For mid range keys, XXH3 uses a Mum-hash variant. */4796XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t4797XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,4798const xxh_u8* XXH_RESTRICT secret, size_t secretSize,4799XXH64_hash_t seed)4800{4801XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;4802XXH_ASSERT(16 < len && len <= 128);48034804{ xxh_u64 acc = len * XXH_PRIME64_1;4805#if XXH_SIZE_OPT >= 14806/* Smaller and cleaner, but slightly slower. */4807unsigned int i = (unsigned int)(len - 1) / 32;4808do {4809acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);4810acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);4811} while (i-- != 0);4812#else4813if (len > 32) {4814if (len > 64) {4815if (len > 96) {4816acc += XXH3_mix16B(input+48, secret+96, seed);4817acc += XXH3_mix16B(input+len-64, secret+112, seed);4818}4819acc += XXH3_mix16B(input+32, secret+64, seed);4820acc += XXH3_mix16B(input+len-48, secret+80, seed);4821}4822acc += XXH3_mix16B(input+16, secret+32, seed);4823acc += XXH3_mix16B(input+len-32, secret+48, seed);4824}4825acc += XXH3_mix16B(input+0, secret+0, seed);4826acc += XXH3_mix16B(input+len-16, secret+16, seed);4827#endif4828return XXH3_avalanche(acc);4829}4830}48314832XXH_NO_INLINE XXH_PUREF XXH64_hash_t4833XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,4834const xxh_u8* XXH_RESTRICT secret, size_t secretSize,4835XXH64_hash_t seed)4836{4837XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;4838XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);48394840#define XXH3_MIDSIZE_STARTOFFSET 34841#define XXH3_MIDSIZE_LASTOFFSET 1748424843{ xxh_u64 acc = len * XXH_PRIME64_1;4844xxh_u64 acc_end;4845unsigned int const nbRounds = (unsigned int)len / 16;4846unsigned int i;4847XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);4848for (i=0; i<8; i++) {4849acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);4850}4851/* last bytes */4852acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);4853XXH_ASSERT(nbRounds >= 8);4854acc = XXH3_avalanche(acc);4855#if defined(__clang__) /* Clang */ \4856&& (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \4857&& !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */4858/*4859* UGLY HACK:4860* Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.4861* In everywhere else, it uses scalar code.4862*4863* For 64->128-bit multiplies, even if the NEON was 100% optimal, it4864* would still be slower than UMAAL (see XXH_mult64to128).4865*4866* Unfortunately, Clang doesn't handle the long multiplies properly and4867* converts them to the nonexistent "vmulq_u64" intrinsic, which is then4868* scalarized into an ugly mess of VMOV.32 instructions.4869*4870* This mess is difficult to avoid without turning autovectorization4871* off completely, but they are usually relatively minor and/or not4872* worth it to fix.4873*4874* This loop is the easiest to fix, as unlike XXH32, this pragma4875* _actually works_ because it is a loop vectorization instead of an4876* SLP vectorization.4877*/4878#pragma clang loop vectorize(disable)4879#endif4880for (i=8 ; i < nbRounds; i++) {4881/*4882* Prevents clang for unrolling the acc loop and interleaving with this one.4883*/4884XXH_COMPILER_GUARD(acc);4885acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);4886}4887return XXH3_avalanche(acc + acc_end);4888}4889}489048914892/* ======= Long Keys ======= */48934894#define XXH_STRIPE_LEN 644895#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */4896#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))48974898#ifdef XXH_OLD_NAMES4899# define STRIPE_LEN XXH_STRIPE_LEN4900# define ACC_NB XXH_ACC_NB4901#endif49024903#ifndef XXH_PREFETCH_DIST4904# ifdef __clang__4905# define XXH_PREFETCH_DIST 3204906# else4907# if (XXH_VECTOR == XXH_AVX512)4908# define XXH_PREFETCH_DIST 5124909# else4910# define XXH_PREFETCH_DIST 3844911# endif4912# endif /* __clang__ */4913#endif /* XXH_PREFETCH_DIST */49144915/*4916* These macros are to generate an XXH3_accumulate() function.4917* The two arguments select the name suffix and target attribute.4918*4919* The name of this symbol is XXH3_accumulate_<name>() and it calls4920* XXH3_accumulate_512_<name>().4921*4922* It may be useful to hand implement this function if the compiler fails to4923* optimize the inline function.4924*/4925#define XXH3_ACCUMULATE_TEMPLATE(name) \4926void \4927XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \4928const xxh_u8* XXH_RESTRICT input, \4929const xxh_u8* XXH_RESTRICT secret, \4930size_t nbStripes) \4931{ \4932size_t n; \4933for (n = 0; n < nbStripes; n++ ) { \4934const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \4935XXH_PREFETCH(in + XXH_PREFETCH_DIST); \4936XXH3_accumulate_512_##name( \4937acc, \4938in, \4939secret + n*XXH_SECRET_CONSUME_RATE); \4940} \4941}494249434944XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)4945{4946if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);4947XXH_memcpy(dst, &v64, sizeof(v64));4948}49494950/* Several intrinsic functions below are supposed to accept __int64 as argument,4951* as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .4952* However, several environments do not define __int64 type,4953* requiring a workaround.4954*/4955#if !defined (__VMS) \4956&& (defined (__cplusplus) \4957|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )4958typedef int64_t xxh_i64;4959#else4960/* the following type must have a width of 64-bit */4961typedef long long xxh_i64;4962#endif496349644965/*4966* XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.4967*4968* It is a hardened version of UMAC, based off of FARSH's implementation.4969*4970* This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD4971* implementations, and it is ridiculously fast.4972*4973* We harden it by mixing the original input to the accumulators as well as the product.4974*4975* This means that in the (relatively likely) case of a multiply by zero, the4976* original input is preserved.4977*4978* On 128-bit inputs, we swap 64-bit pairs when we add the input to improve4979* cross-pollination, as otherwise the upper and lower halves would be4980* essentially independent.4981*4982* This doesn't matter on 64-bit hashes since they all get merged together in4983* the end, so we skip the extra step.4984*4985* Both XXH3_64bits and XXH3_128bits use this subroutine.4986*/49874988#if (XXH_VECTOR == XXH_AVX512) \4989|| (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)49904991#ifndef XXH_TARGET_AVX5124992# define XXH_TARGET_AVX512 /* disable attribute target */4993#endif49944995XXH_FORCE_INLINE XXH_TARGET_AVX512 void4996XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,4997const void* XXH_RESTRICT input,4998const void* XXH_RESTRICT secret)4999{5000__m512i* const xacc = (__m512i *) acc;5001XXH_ASSERT((((size_t)acc) & 63) == 0);5002XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));50035004{5005/* data_vec = input[0]; */5006__m512i const data_vec = _mm512_loadu_si512 (input);5007/* key_vec = secret[0]; */5008__m512i const key_vec = _mm512_loadu_si512 (secret);5009/* data_key = data_vec ^ key_vec; */5010__m512i const data_key = _mm512_xor_si512 (data_vec, key_vec);5011/* data_key_lo = data_key >> 32; */5012__m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);5013/* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */5014__m512i const product = _mm512_mul_epu32 (data_key, data_key_lo);5015/* xacc[0] += swap(data_vec); */5016__m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));5017__m512i const sum = _mm512_add_epi64(*xacc, data_swap);5018/* xacc[0] += product; */5019*xacc = _mm512_add_epi64(product, sum);5020}5021}5022XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)50235024/*5025* XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.5026*5027* Multiplication isn't perfect, as explained by Google in HighwayHash:5028*5029* // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to5030* // varying degrees. In descending order of goodness, bytes5031* // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.5032* // As expected, the upper and lower bytes are much worse.5033*5034* Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L2915035*5036* Since our algorithm uses a pseudorandom secret to add some variance into the5037* mix, we don't need to (or want to) mix as often or as much as HighwayHash does.5038*5039* This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid5040* extraction.5041*5042* Both XXH3_64bits and XXH3_128bits use this subroutine.5043*/50445045XXH_FORCE_INLINE XXH_TARGET_AVX512 void5046XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)5047{5048XXH_ASSERT((((size_t)acc) & 63) == 0);5049XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));5050{ __m512i* const xacc = (__m512i*) acc;5051const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);50525053/* xacc[0] ^= (xacc[0] >> 47) */5054__m512i const acc_vec = *xacc;5055__m512i const shifted = _mm512_srli_epi64 (acc_vec, 47);5056/* xacc[0] ^= secret; */5057__m512i const key_vec = _mm512_loadu_si512 (secret);5058__m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);50595060/* xacc[0] *= XXH_PRIME32_1; */5061__m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);5062__m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32);5063__m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);5064*xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));5065}5066}50675068XXH_FORCE_INLINE XXH_TARGET_AVX512 void5069XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)5070{5071XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);5072XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);5073XXH_ASSERT(((size_t)customSecret & 63) == 0);5074(void)(&XXH_writeLE64);5075{ int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);5076__m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);5077__m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);50785079const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret);5080__m512i* const dest = ( __m512i*) customSecret;5081int i;5082XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */5083XXH_ASSERT(((size_t)dest & 63) == 0);5084for (i=0; i < nbRounds; ++i) {5085dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);5086} }5087}50885089#endif50905091#if (XXH_VECTOR == XXH_AVX2) \5092|| (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)50935094#ifndef XXH_TARGET_AVX25095# define XXH_TARGET_AVX2 /* disable attribute target */5096#endif50975098XXH_FORCE_INLINE XXH_TARGET_AVX2 void5099XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,5100const void* XXH_RESTRICT input,5101const void* XXH_RESTRICT secret)5102{5103XXH_ASSERT((((size_t)acc) & 31) == 0);5104{ __m256i* const xacc = (__m256i *) acc;5105/* Unaligned. This is mainly for pointer arithmetic, and because5106* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */5107const __m256i* const xinput = (const __m256i *) input;5108/* Unaligned. This is mainly for pointer arithmetic, and because5109* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */5110const __m256i* const xsecret = (const __m256i *) secret;51115112size_t i;5113for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {5114/* data_vec = xinput[i]; */5115__m256i const data_vec = _mm256_loadu_si256 (xinput+i);5116/* key_vec = xsecret[i]; */5117__m256i const key_vec = _mm256_loadu_si256 (xsecret+i);5118/* data_key = data_vec ^ key_vec; */5119__m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);5120/* data_key_lo = data_key >> 32; */5121__m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);5122/* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */5123__m256i const product = _mm256_mul_epu32 (data_key, data_key_lo);5124/* xacc[i] += swap(data_vec); */5125__m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));5126__m256i const sum = _mm256_add_epi64(xacc[i], data_swap);5127/* xacc[i] += product; */5128xacc[i] = _mm256_add_epi64(product, sum);5129} }5130}5131XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)51325133XXH_FORCE_INLINE XXH_TARGET_AVX2 void5134XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)5135{5136XXH_ASSERT((((size_t)acc) & 31) == 0);5137{ __m256i* const xacc = (__m256i*) acc;5138/* Unaligned. This is mainly for pointer arithmetic, and because5139* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */5140const __m256i* const xsecret = (const __m256i *) secret;5141const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);51425143size_t i;5144for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {5145/* xacc[i] ^= (xacc[i] >> 47) */5146__m256i const acc_vec = xacc[i];5147__m256i const shifted = _mm256_srli_epi64 (acc_vec, 47);5148__m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted);5149/* xacc[i] ^= xsecret; */5150__m256i const key_vec = _mm256_loadu_si256 (xsecret+i);5151__m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);51525153/* xacc[i] *= XXH_PRIME32_1; */5154__m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);5155__m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32);5156__m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);5157xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));5158}5159}5160}51615162XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)5163{5164XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);5165XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);5166XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);5167(void)(&XXH_writeLE64);5168XXH_PREFETCH(customSecret);5169{ __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);51705171const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret);5172__m256i* dest = ( __m256i*) customSecret;51735174# if defined(__GNUC__) || defined(__clang__)5175/*5176* On GCC & Clang, marking 'dest' as modified will cause the compiler:5177* - do not extract the secret from sse registers in the internal loop5178* - use less common registers, and avoid pushing these reg into stack5179*/5180XXH_COMPILER_GUARD(dest);5181# endif5182XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */5183XXH_ASSERT(((size_t)dest & 31) == 0);51845185/* GCC -O2 need unroll loop manually */5186dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);5187dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);5188dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);5189dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);5190dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);5191dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);5192}5193}51945195#endif51965197/* x86dispatch always generates SSE2 */5198#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)51995200#ifndef XXH_TARGET_SSE25201# define XXH_TARGET_SSE2 /* disable attribute target */5202#endif52035204XXH_FORCE_INLINE XXH_TARGET_SSE2 void5205XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,5206const void* XXH_RESTRICT input,5207const void* XXH_RESTRICT secret)5208{5209/* SSE2 is just a half-scale version of the AVX2 version. */5210XXH_ASSERT((((size_t)acc) & 15) == 0);5211{ __m128i* const xacc = (__m128i *) acc;5212/* Unaligned. This is mainly for pointer arithmetic, and because5213* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */5214const __m128i* const xinput = (const __m128i *) input;5215/* Unaligned. This is mainly for pointer arithmetic, and because5216* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */5217const __m128i* const xsecret = (const __m128i *) secret;52185219size_t i;5220for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {5221/* data_vec = xinput[i]; */5222__m128i const data_vec = _mm_loadu_si128 (xinput+i);5223/* key_vec = xsecret[i]; */5224__m128i const key_vec = _mm_loadu_si128 (xsecret+i);5225/* data_key = data_vec ^ key_vec; */5226__m128i const data_key = _mm_xor_si128 (data_vec, key_vec);5227/* data_key_lo = data_key >> 32; */5228__m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));5229/* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */5230__m128i const product = _mm_mul_epu32 (data_key, data_key_lo);5231/* xacc[i] += swap(data_vec); */5232__m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));5233__m128i const sum = _mm_add_epi64(xacc[i], data_swap);5234/* xacc[i] += product; */5235xacc[i] = _mm_add_epi64(product, sum);5236} }5237}5238XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)52395240XXH_FORCE_INLINE XXH_TARGET_SSE2 void5241XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)5242{5243XXH_ASSERT((((size_t)acc) & 15) == 0);5244{ __m128i* const xacc = (__m128i*) acc;5245/* Unaligned. This is mainly for pointer arithmetic, and because5246* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */5247const __m128i* const xsecret = (const __m128i *) secret;5248const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);52495250size_t i;5251for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {5252/* xacc[i] ^= (xacc[i] >> 47) */5253__m128i const acc_vec = xacc[i];5254__m128i const shifted = _mm_srli_epi64 (acc_vec, 47);5255__m128i const data_vec = _mm_xor_si128 (acc_vec, shifted);5256/* xacc[i] ^= xsecret[i]; */5257__m128i const key_vec = _mm_loadu_si128 (xsecret+i);5258__m128i const data_key = _mm_xor_si128 (data_vec, key_vec);52595260/* xacc[i] *= XXH_PRIME32_1; */5261__m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));5262__m128i const prod_lo = _mm_mul_epu32 (data_key, prime32);5263__m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32);5264xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));5265}5266}5267}52685269XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)5270{5271XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);5272(void)(&XXH_writeLE64);5273{ int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);52745275# if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER <= 19005276/* MSVC 32bit mode does not support _mm_set_epi64x before 20155277* and some specific variants of 2015 may also lack it */5278/* Cast to unsigned 64-bit first to avoid signed arithmetic issues */5279xxh_u64 const seed64_unsigned = (xxh_u64)seed64;5280xxh_u64 const neg_seed64 = (xxh_u64)(0ULL - seed64_unsigned);5281__m128i const seed = _mm_set_epi32(5282(int)(neg_seed64 >> 32), /* high 32 bits of negated seed */5283(int)(neg_seed64), /* low 32 bits of negated seed */5284(int)(seed64_unsigned >> 32), /* high 32 bits of original seed */5285(int)(seed64_unsigned) /* low 32 bits of original seed */5286);5287# else5288__m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);5289# endif5290int i;52915292const void* const src16 = XXH3_kSecret;5293__m128i* dst16 = (__m128i*) customSecret;5294# if defined(__GNUC__) || defined(__clang__)5295/*5296* On GCC & Clang, marking 'dest' as modified will cause the compiler:5297* - do not extract the secret from sse registers in the internal loop5298* - use less common registers, and avoid pushing these reg into stack5299*/5300XXH_COMPILER_GUARD(dst16);5301# endif5302XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */5303XXH_ASSERT(((size_t)dst16 & 15) == 0);53045305for (i=0; i < nbRounds; ++i) {5306dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);5307} }5308}53095310#endif53115312#if (XXH_VECTOR == XXH_NEON)53135314/* forward declarations for the scalar routines */5315XXH_FORCE_INLINE void5316XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,5317void const* XXH_RESTRICT secret, size_t lane);53185319XXH_FORCE_INLINE void5320XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,5321void const* XXH_RESTRICT secret, size_t lane);53225323/*!5324* @internal5325* @brief The bulk processing loop for NEON and WASM SIMD128.5326*5327* The NEON code path is actually partially scalar when running on AArch64. This5328* is to optimize the pipelining and can have up to 15% speedup depending on the5329* CPU, and it also mitigates some GCC codegen issues.5330*5331* @see XXH3_NEON_LANES for configuring this and details about this optimization.5332*5333* NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit5334* integers instead of the other platforms which mask full 64-bit vectors,5335* so the setup is more complicated than just shifting right.5336*5337* Additionally, there is an optimization for 4 lanes at once noted below.5338*5339* Since, as stated, the most optimal amount of lanes for Cortexes is 6,5340* there needs to be *three* versions of the accumulate operation used5341* for the remaining 2 lanes.5342*5343* WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap5344* nearly perfectly.5345*/53465347XXH_FORCE_INLINE void5348XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,5349const void* XXH_RESTRICT input,5350const void* XXH_RESTRICT secret)5351{5352XXH_ASSERT((((size_t)acc) & 15) == 0);5353XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);5354{ /* GCC for darwin arm64 does not like aliasing here */5355xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;5356/* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */5357uint8_t const* xinput = (const uint8_t *) input;5358uint8_t const* xsecret = (const uint8_t *) secret;53595360size_t i;5361#ifdef __wasm_simd128__5362/*5363* On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret5364* is constant propagated, which results in it converting it to this5365* inside the loop:5366*5367* a = v128.load(XXH3_kSecret + 0 + $secret_offset, offset = 0)5368* b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0)5369* ...5370*5371* This requires a full 32-bit address immediate (and therefore a 6 byte5372* instruction) as well as an add for each offset.5373*5374* Putting an asm guard prevents it from folding (at the cost of losing5375* the alignment hint), and uses the free offset in `v128.load` instead5376* of adding secret_offset each time which overall reduces code size by5377* about a kilobyte and improves performance.5378*/5379XXH_COMPILER_GUARD(xsecret);5380#endif5381/* Scalar lanes use the normal scalarRound routine */5382for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {5383XXH3_scalarRound(acc, input, secret, i);5384}5385i = 0;5386/* 4 NEON lanes at a time. */5387for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {5388/* data_vec = xinput[i]; */5389uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16));5390uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16));5391/* key_vec = xsecret[i]; */5392uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16));5393uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16));5394/* data_swap = swap(data_vec) */5395uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);5396uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);5397/* data_key = data_vec ^ key_vec; */5398uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);5399uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);54005401/*5402* If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a5403* de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to5404* get one vector with the low 32 bits of each lane, and one vector5405* with the high 32 bits of each lane.5406*5407* The intrinsic returns a double vector because the original ARMv7-a5408* instruction modified both arguments in place. AArch64 and SIMD128 emit5409* two instructions from this intrinsic.5410*5411* [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]5412* [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]5413*/5414uint32x4x2_t unzipped = vuzpq_u32(5415vreinterpretq_u32_u64(data_key_1),5416vreinterpretq_u32_u64(data_key_2)5417);5418/* data_key_lo = data_key & 0xFFFFFFFF */5419uint32x4_t data_key_lo = unzipped.val[0];5420/* data_key_hi = data_key >> 32 */5421uint32x4_t data_key_hi = unzipped.val[1];5422/*5423* Then, we can split the vectors horizontally and multiply which, as for most5424* widening intrinsics, have a variant that works on both high half vectors5425* for free on AArch64. A similar instruction is available on SIMD128.5426*5427* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi5428*/5429uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);5430uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);5431/*5432* Clang reorders5433* a += b * c; // umlal swap.2d, dkl.2s, dkh.2s5434* c += a; // add acc.2d, acc.2d, swap.2d5435* to5436* c += a; // add acc.2d, acc.2d, swap.2d5437* c += b * c; // umlal acc.2d, dkl.2s, dkh.2s5438*5439* While it would make sense in theory since the addition is faster,5440* for reasons likely related to umlal being limited to certain NEON5441* pipelines, this is worse. A compiler guard fixes this.5442*/5443XXH_COMPILER_GUARD_CLANG_NEON(sum_1);5444XXH_COMPILER_GUARD_CLANG_NEON(sum_2);5445/* xacc[i] = acc_vec + sum; */5446xacc[i] = vaddq_u64(xacc[i], sum_1);5447xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);5448}5449/* Operate on the remaining NEON lanes 2 at a time. */5450for (; i < XXH3_NEON_LANES / 2; i++) {5451/* data_vec = xinput[i]; */5452uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));5453/* key_vec = xsecret[i]; */5454uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));5455/* acc_vec_2 = swap(data_vec) */5456uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);5457/* data_key = data_vec ^ key_vec; */5458uint64x2_t data_key = veorq_u64(data_vec, key_vec);5459/* For two lanes, just use VMOVN and VSHRN. */5460/* data_key_lo = data_key & 0xFFFFFFFF; */5461uint32x2_t data_key_lo = vmovn_u64(data_key);5462/* data_key_hi = data_key >> 32; */5463uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);5464/* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */5465uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);5466/* Same Clang workaround as before */5467XXH_COMPILER_GUARD_CLANG_NEON(sum);5468/* xacc[i] = acc_vec + sum; */5469xacc[i] = vaddq_u64 (xacc[i], sum);5470}5471}5472}5473XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)54745475XXH_FORCE_INLINE void5476XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)5477{5478XXH_ASSERT((((size_t)acc) & 15) == 0);54795480{ xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc;5481uint8_t const* xsecret = (uint8_t const*) secret;54825483size_t i;5484/* WASM uses operator overloads and doesn't need these. */5485#ifndef __wasm_simd128__5486/* { prime32_1, prime32_1 } */5487uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1);5488/* { 0, prime32_1, 0, prime32_1 } */5489uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32));5490#endif54915492/* AArch64 uses both scalar and neon at the same time */5493for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {5494XXH3_scalarScrambleRound(acc, secret, i);5495}5496for (i=0; i < XXH3_NEON_LANES / 2; i++) {5497/* xacc[i] ^= (xacc[i] >> 47); */5498uint64x2_t acc_vec = xacc[i];5499uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);5500uint64x2_t data_vec = veorq_u64(acc_vec, shifted);55015502/* xacc[i] ^= xsecret[i]; */5503uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));5504uint64x2_t data_key = veorq_u64(data_vec, key_vec);5505/* xacc[i] *= XXH_PRIME32_1 */5506#ifdef __wasm_simd128__5507/* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */5508xacc[i] = data_key * XXH_PRIME32_1;5509#else5510/*5511* Expanded version with portable NEON intrinsics5512*5513* lo(x) * lo(y) + (hi(x) * lo(y) << 32)5514*5515* prod_hi = hi(data_key) * lo(prime) << 325516*5517* Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector5518* as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits5519* and avoid the shift.5520*/5521uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi);5522/* Extract low bits for vmlal_u32 */5523uint32x2_t data_key_lo = vmovn_u64(data_key);5524/* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */5525xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);5526#endif5527}5528}5529}5530#endif55315532#if (XXH_VECTOR == XXH_VSX)55335534XXH_FORCE_INLINE void5535XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,5536const void* XXH_RESTRICT input,5537const void* XXH_RESTRICT secret)5538{5539/* presumed aligned */5540xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;5541xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */5542xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */5543xxh_u64x2 const v32 = { 32, 32 };5544size_t i;5545for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {5546/* data_vec = xinput[i]; */5547xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);5548/* key_vec = xsecret[i]; */5549xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);5550xxh_u64x2 const data_key = data_vec ^ key_vec;5551/* shuffled = (data_key << 32) | (data_key >> 32); */5552xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);5553/* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */5554xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);5555/* acc_vec = xacc[i]; */5556xxh_u64x2 acc_vec = xacc[i];5557acc_vec += product;55585559/* swap high and low halves */5560#ifdef __s390x__5561acc_vec += vec_permi(data_vec, data_vec, 2);5562#else5563acc_vec += vec_xxpermdi(data_vec, data_vec, 2);5564#endif5565xacc[i] = acc_vec;5566}5567}5568XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)55695570XXH_FORCE_INLINE void5571XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)5572{5573XXH_ASSERT((((size_t)acc) & 15) == 0);55745575{ xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;5576const xxh_u8* const xsecret = (const xxh_u8*) secret;5577/* constants */5578xxh_u64x2 const v32 = { 32, 32 };5579xxh_u64x2 const v47 = { 47, 47 };5580xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };5581size_t i;5582for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {5583/* xacc[i] ^= (xacc[i] >> 47); */5584xxh_u64x2 const acc_vec = xacc[i];5585xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);55865587/* xacc[i] ^= xsecret[i]; */5588xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);5589xxh_u64x2 const data_key = data_vec ^ key_vec;55905591/* xacc[i] *= XXH_PRIME32_1 */5592/* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */5593xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime);5594/* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */5595xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime);5596xacc[i] = prod_odd + (prod_even << v32);5597} }5598}55995600#endif56015602#if (XXH_VECTOR == XXH_SVE)56035604XXH_FORCE_INLINE void5605XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,5606const void* XXH_RESTRICT input,5607const void* XXH_RESTRICT secret)5608{5609uint64_t *xacc = (uint64_t *)acc;5610const uint64_t *xinput = (const uint64_t *)(const void *)input;5611const uint64_t *xsecret = (const uint64_t *)(const void *)secret;5612svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);5613uint64_t element_count = svcntd();5614if (element_count >= 8) {5615svbool_t mask = svptrue_pat_b64(SV_VL8);5616svuint64_t vacc = svld1_u64(mask, xacc);5617ACCRND(vacc, 0);5618svst1_u64(mask, xacc, vacc);5619} else if (element_count == 2) { /* sve128 */5620svbool_t mask = svptrue_pat_b64(SV_VL2);5621svuint64_t acc0 = svld1_u64(mask, xacc + 0);5622svuint64_t acc1 = svld1_u64(mask, xacc + 2);5623svuint64_t acc2 = svld1_u64(mask, xacc + 4);5624svuint64_t acc3 = svld1_u64(mask, xacc + 6);5625ACCRND(acc0, 0);5626ACCRND(acc1, 2);5627ACCRND(acc2, 4);5628ACCRND(acc3, 6);5629svst1_u64(mask, xacc + 0, acc0);5630svst1_u64(mask, xacc + 2, acc1);5631svst1_u64(mask, xacc + 4, acc2);5632svst1_u64(mask, xacc + 6, acc3);5633} else {5634svbool_t mask = svptrue_pat_b64(SV_VL4);5635svuint64_t acc0 = svld1_u64(mask, xacc + 0);5636svuint64_t acc1 = svld1_u64(mask, xacc + 4);5637ACCRND(acc0, 0);5638ACCRND(acc1, 4);5639svst1_u64(mask, xacc + 0, acc0);5640svst1_u64(mask, xacc + 4, acc1);5641}5642}56435644XXH_FORCE_INLINE void5645XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,5646const xxh_u8* XXH_RESTRICT input,5647const xxh_u8* XXH_RESTRICT secret,5648size_t nbStripes)5649{5650if (nbStripes != 0) {5651uint64_t *xacc = (uint64_t *)acc;5652const uint64_t *xinput = (const uint64_t *)(const void *)input;5653const uint64_t *xsecret = (const uint64_t *)(const void *)secret;5654svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);5655uint64_t element_count = svcntd();5656if (element_count >= 8) {5657svbool_t mask = svptrue_pat_b64(SV_VL8);5658svuint64_t vacc = svld1_u64(mask, xacc + 0);5659do {5660/* svprfd(svbool_t, void *, enum svfprop); */5661svprfd(mask, xinput + 128, SV_PLDL1STRM);5662ACCRND(vacc, 0);5663xinput += 8;5664xsecret += 1;5665nbStripes--;5666} while (nbStripes != 0);56675668svst1_u64(mask, xacc + 0, vacc);5669} else if (element_count == 2) { /* sve128 */5670svbool_t mask = svptrue_pat_b64(SV_VL2);5671svuint64_t acc0 = svld1_u64(mask, xacc + 0);5672svuint64_t acc1 = svld1_u64(mask, xacc + 2);5673svuint64_t acc2 = svld1_u64(mask, xacc + 4);5674svuint64_t acc3 = svld1_u64(mask, xacc + 6);5675do {5676svprfd(mask, xinput + 128, SV_PLDL1STRM);5677ACCRND(acc0, 0);5678ACCRND(acc1, 2);5679ACCRND(acc2, 4);5680ACCRND(acc3, 6);5681xinput += 8;5682xsecret += 1;5683nbStripes--;5684} while (nbStripes != 0);56855686svst1_u64(mask, xacc + 0, acc0);5687svst1_u64(mask, xacc + 2, acc1);5688svst1_u64(mask, xacc + 4, acc2);5689svst1_u64(mask, xacc + 6, acc3);5690} else {5691svbool_t mask = svptrue_pat_b64(SV_VL4);5692svuint64_t acc0 = svld1_u64(mask, xacc + 0);5693svuint64_t acc1 = svld1_u64(mask, xacc + 4);5694do {5695svprfd(mask, xinput + 128, SV_PLDL1STRM);5696ACCRND(acc0, 0);5697ACCRND(acc1, 4);5698xinput += 8;5699xsecret += 1;5700nbStripes--;5701} while (nbStripes != 0);57025703svst1_u64(mask, xacc + 0, acc0);5704svst1_u64(mask, xacc + 4, acc1);5705}5706}5707}57085709#endif57105711#if (XXH_VECTOR == XXH_LSX)5712#define _LSX_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))57135714XXH_FORCE_INLINE void5715XXH3_accumulate_512_lsx( void* XXH_RESTRICT acc,5716const void* XXH_RESTRICT input,5717const void* XXH_RESTRICT secret)5718{5719XXH_ASSERT((((size_t)acc) & 15) == 0);5720{5721__m128i* const xacc = (__m128i *) acc;5722const __m128i* const xinput = (const __m128i *) input;5723const __m128i* const xsecret = (const __m128i *) secret;5724size_t i;57255726for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) {5727/* data_vec = xinput[i]; */5728__m128i const data_vec = __lsx_vld(xinput + i, 0);5729/* key_vec = xsecret[i]; */5730__m128i const key_vec = __lsx_vld(xsecret + i, 0);5731/* data_key = data_vec ^ key_vec; */5732__m128i const data_key = __lsx_vxor_v(data_vec, key_vec);5733/* data_key_lo = data_key >> 32; */5734__m128i const data_key_lo = __lsx_vsrli_d(data_key, 32);5735// __m128i const data_key_lo = __lsx_vsrli_d(data_key, 32);5736/* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */5737__m128i const product = __lsx_vmulwev_d_wu(data_key, data_key_lo);5738/* xacc[i] += swap(data_vec); */5739__m128i const data_swap = __lsx_vshuf4i_w(data_vec, _LSX_SHUFFLE(1, 0, 3, 2));5740__m128i const sum = __lsx_vadd_d(xacc[i], data_swap);5741/* xacc[i] += product; */5742xacc[i] = __lsx_vadd_d(product, sum);5743}5744}5745}5746XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(lsx)57475748XXH_FORCE_INLINE void5749XXH3_scrambleAcc_lsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)5750{5751XXH_ASSERT((((size_t)acc) & 15) == 0);5752{5753__m128i* const xacc = (__m128i*) acc;5754const __m128i* const xsecret = (const __m128i *) secret;5755const __m128i prime32 = __lsx_vreplgr2vr_d(XXH_PRIME32_1);5756size_t i;57575758for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) {5759/* xacc[i] ^= (xacc[i] >> 47) */5760__m128i const acc_vec = xacc[i];5761__m128i const shifted = __lsx_vsrli_d(acc_vec, 47);5762__m128i const data_vec = __lsx_vxor_v(acc_vec, shifted);5763/* xacc[i] ^= xsecret[i]; */5764__m128i const key_vec = __lsx_vld(xsecret + i, 0);5765__m128i const data_key = __lsx_vxor_v(data_vec, key_vec);57665767/* xacc[i] *= XXH_PRIME32_1; */5768xacc[i] = __lsx_vmul_d(data_key, prime32);5769}5770}5771}57725773#endif57745775#if (XXH_VECTOR == XXH_LASX)5776#define _LASX_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))57775778XXH_FORCE_INLINE void5779XXH3_accumulate_512_lasx( void* XXH_RESTRICT acc,5780const void* XXH_RESTRICT input,5781const void* XXH_RESTRICT secret)5782{5783XXH_ASSERT((((size_t)acc) & 31) == 0);5784{5785size_t i;5786__m256i* const xacc = (__m256i *) acc;5787const __m256i* const xinput = (const __m256i *) input;5788const __m256i* const xsecret = (const __m256i *) secret;57895790for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m256i); i++) {5791/* data_vec = xinput[i]; */5792__m256i const data_vec = __lasx_xvld(xinput + i, 0);5793/* key_vec = xsecret[i]; */5794__m256i const key_vec = __lasx_xvld(xsecret + i, 0);5795/* data_key = data_vec ^ key_vec; */5796__m256i const data_key = __lasx_xvxor_v(data_vec, key_vec);5797/* data_key_lo = data_key >> 32; */5798__m256i const data_key_lo = __lasx_xvsrli_d(data_key, 32);5799// __m256i const data_key_lo = __lasx_xvsrli_d(data_key, 32);5800/* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */5801__m256i const product = __lasx_xvmulwev_d_wu(data_key, data_key_lo);5802/* xacc[i] += swap(data_vec); */5803__m256i const data_swap = __lasx_xvshuf4i_w(data_vec, _LASX_SHUFFLE(1, 0, 3, 2));5804__m256i const sum = __lasx_xvadd_d(xacc[i], data_swap);5805/* xacc[i] += product; */5806xacc[i] = __lasx_xvadd_d(product, sum);5807}5808}5809}5810XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(lasx)58115812XXH_FORCE_INLINE void5813XXH3_scrambleAcc_lasx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)5814{5815XXH_ASSERT((((size_t)acc) & 31) == 0);5816{5817__m256i* const xacc = (__m256i*) acc;5818const __m256i* const xsecret = (const __m256i *) secret;5819const __m256i prime32 = __lasx_xvreplgr2vr_d(XXH_PRIME32_1);5820size_t i;58215822for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m256i); i++) {5823/* xacc[i] ^= (xacc[i] >> 47) */5824__m256i const acc_vec = xacc[i];5825__m256i const shifted = __lasx_xvsrli_d(acc_vec, 47);5826__m256i const data_vec = __lasx_xvxor_v(acc_vec, shifted);5827/* xacc[i] ^= xsecret[i]; */5828__m256i const key_vec = __lasx_xvld(xsecret + i, 0);5829__m256i const data_key = __lasx_xvxor_v(data_vec, key_vec);58305831/* xacc[i] *= XXH_PRIME32_1; */5832xacc[i] = __lasx_xvmul_d(data_key, prime32);5833}5834}5835}58365837#endif58385839#if (XXH_VECTOR == XXH_RVV)5840#define XXH_CONCAT2(X, Y) X ## Y5841#define XXH_CONCAT(X, Y) XXH_CONCAT2(X, Y)5842#if ((defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 13) || \5843(defined(__clang__) && __clang_major__ < 16))5844#define XXH_RVOP(op) op5845#define XXH_RVCAST(op) XXH_CONCAT(vreinterpret_v_, op)5846#else5847#define XXH_RVOP(op) XXH_CONCAT(__riscv_, op)5848#define XXH_RVCAST(op) XXH_CONCAT(__riscv_vreinterpret_v_, op)5849#endif5850XXH_FORCE_INLINE void5851XXH3_accumulate_512_rvv( void* XXH_RESTRICT acc,5852const void* XXH_RESTRICT input,5853const void* XXH_RESTRICT secret)5854{5855XXH_ASSERT((((size_t)acc) & 63) == 0);5856{5857// Try to set vector lenght to 512 bits.5858// If this length is unavailable, then maximum available will be used5859size_t vl = XXH_RVOP(vsetvl_e64m2)(8);58605861uint64_t* xacc = (uint64_t*) acc;5862const uint64_t* xinput = (const uint64_t*) input;5863const uint64_t* xsecret = (const uint64_t*) secret;5864static const uint64_t swap_mask[16] = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};5865vuint64m2_t xswap_mask = XXH_RVOP(vle64_v_u64m2)(swap_mask, vl);58665867size_t i;5868for (i = 0; i < XXH_STRIPE_LEN/8; i += vl) {5869/* data_vec = xinput[i]; */5870vuint64m2_t data_vec = XXH_RVCAST(u8m2_u64m2)(XXH_RVOP(vle8_v_u8m2)((const uint8_t*)(xinput + i), vl * 8));5871/* key_vec = xsecret[i]; */5872vuint64m2_t key_vec = XXH_RVCAST(u8m2_u64m2)(XXH_RVOP(vle8_v_u8m2)((const uint8_t*)(xsecret + i), vl * 8));5873/* acc_vec = xacc[i]; */5874vuint64m2_t acc_vec = XXH_RVOP(vle64_v_u64m2)(xacc + i, vl);5875/* data_key = data_vec ^ key_vec; */5876vuint64m2_t data_key = XXH_RVOP(vxor_vv_u64m2)(data_vec, key_vec, vl);5877/* data_key_hi = data_key >> 32; */5878vuint64m2_t data_key_hi = XXH_RVOP(vsrl_vx_u64m2)(data_key, 32, vl);5879/* data_key_lo = data_key & 0xffffffff; */5880vuint64m2_t data_key_lo = XXH_RVOP(vand_vx_u64m2)(data_key, 0xffffffff, vl);5881/* swap high and low halves */5882vuint64m2_t data_swap = XXH_RVOP(vrgather_vv_u64m2)(data_vec, xswap_mask, vl);5883/* acc_vec += data_key_lo * data_key_hi; */5884acc_vec = XXH_RVOP(vmacc_vv_u64m2)(acc_vec, data_key_lo, data_key_hi, vl);5885/* acc_vec += data_swap; */5886acc_vec = XXH_RVOP(vadd_vv_u64m2)(acc_vec, data_swap, vl);5887/* xacc[i] = acc_vec; */5888XXH_RVOP(vse64_v_u64m2)(xacc + i, acc_vec, vl);5889}5890}5891}58925893XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(rvv)58945895XXH_FORCE_INLINE void5896XXH3_scrambleAcc_rvv(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)5897{5898XXH_ASSERT((((size_t)acc) & 15) == 0);5899{5900size_t count = XXH_STRIPE_LEN/8;5901uint64_t* xacc = (uint64_t*)acc;5902const uint8_t* xsecret = (const uint8_t *)secret;5903size_t vl;5904for (; count > 0; count -= vl, xacc += vl, xsecret += vl*8) {5905vl = XXH_RVOP(vsetvl_e64m2)(count);5906{5907/* key_vec = xsecret[i]; */5908vuint64m2_t key_vec = XXH_RVCAST(u8m2_u64m2)(XXH_RVOP(vle8_v_u8m2)(xsecret, vl*8));5909/* acc_vec = xacc[i]; */5910vuint64m2_t acc_vec = XXH_RVOP(vle64_v_u64m2)(xacc, vl);5911/* acc_vec ^= acc_vec >> 47; */5912vuint64m2_t vsrl = XXH_RVOP(vsrl_vx_u64m2)(acc_vec, 47, vl);5913acc_vec = XXH_RVOP(vxor_vv_u64m2)(acc_vec, vsrl, vl);5914/* acc_vec ^= key_vec; */5915acc_vec = XXH_RVOP(vxor_vv_u64m2)(acc_vec, key_vec, vl);5916/* acc_vec *= XXH_PRIME32_1; */5917acc_vec = XXH_RVOP(vmul_vx_u64m2)(acc_vec, XXH_PRIME32_1, vl);5918/* xacc[i] *= acc_vec; */5919XXH_RVOP(vse64_v_u64m2)(xacc, acc_vec, vl);5920}5921}5922}5923}59245925XXH_FORCE_INLINE void5926XXH3_initCustomSecret_rvv(void* XXH_RESTRICT customSecret, xxh_u64 seed64)5927{5928XXH_STATIC_ASSERT(XXH_SEC_ALIGN >= 8);5929XXH_ASSERT(((size_t)customSecret & 7) == 0);5930(void)(&XXH_writeLE64);5931{5932size_t count = XXH_SECRET_DEFAULT_SIZE/8;5933size_t vl;5934size_t VLMAX = XXH_RVOP(vsetvlmax_e64m2)();5935int64_t* cSecret = (int64_t*)customSecret;5936const int64_t* kSecret = (const int64_t*)(const void*)XXH3_kSecret;59375938#if __riscv_v_intrinsic >= 10000005939// ratified v1.0 intrinics version5940vbool32_t mneg = XXH_RVCAST(u8m1_b32)(5941XXH_RVOP(vmv_v_x_u8m1)(0xaa, XXH_RVOP(vsetvlmax_e8m1)()));5942#else5943// support pre-ratification intrinics, which lack mask to vector casts5944size_t vlmax = XXH_RVOP(vsetvlmax_e8m1)();5945vbool32_t mneg = XXH_RVOP(vmseq_vx_u8mf4_b32)(5946XXH_RVOP(vand_vx_u8mf4)(5947XXH_RVOP(vid_v_u8mf4)(vlmax), 1, vlmax), 1, vlmax);5948#endif5949vint64m2_t seed = XXH_RVOP(vmv_v_x_i64m2)((int64_t)seed64, VLMAX);5950seed = XXH_RVOP(vneg_v_i64m2_mu)(mneg, seed, seed, VLMAX);59515952for (; count > 0; count -= vl, cSecret += vl, kSecret += vl) {5953/* make sure vl=VLMAX until last iteration */5954vl = XXH_RVOP(vsetvl_e64m2)(count < VLMAX ? count : VLMAX);5955{5956vint64m2_t src = XXH_RVOP(vle64_v_i64m2)(kSecret, vl);5957vint64m2_t res = XXH_RVOP(vadd_vv_i64m2)(src, seed, vl);5958XXH_RVOP(vse64_v_i64m2)(cSecret, res, vl);5959}5960}5961}5962}5963#endif596459655966/* scalar variants - universal */59675968#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))5969/*5970* In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they5971* emit an excess mask and a full 64-bit multiply-add (MADD X-form).5972*5973* While this might not seem like much, as AArch64 is a 64-bit architecture, only5974* big Cortex designs have a full 64-bit multiplier.5975*5976* On the little cores, the smaller 32-bit multiplier is used, and full 64-bit5977* multiplies expand to 2-3 multiplies in microcode. This has a major penalty5978* of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.5979*5980* Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does5981* not have this penalty and does the mask automatically.5982*/5983XXH_FORCE_INLINE xxh_u645984XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)5985{5986xxh_u64 ret;5987/* note: %x = 64-bit register, %w = 32-bit register */5988__asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));5989return ret;5990}5991#else5992XXH_FORCE_INLINE xxh_u645993XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)5994{5995return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;5996}5997#endif59985999/*!6000* @internal6001* @brief Scalar round for @ref XXH3_accumulate_512_scalar().6002*6003* This is extracted to its own function because the NEON path uses a combination6004* of NEON and scalar.6005*/6006XXH_FORCE_INLINE void6007XXH3_scalarRound(void* XXH_RESTRICT acc,6008void const* XXH_RESTRICT input,6009void const* XXH_RESTRICT secret,6010size_t lane)6011{6012xxh_u64* xacc = (xxh_u64*) acc;6013xxh_u8 const* xinput = (xxh_u8 const*) input;6014xxh_u8 const* xsecret = (xxh_u8 const*) secret;6015XXH_ASSERT(lane < XXH_ACC_NB);6016XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);6017{6018xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);6019xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);6020xacc[lane ^ 1] += data_val; /* swap adjacent lanes */6021xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);6022}6023}60246025/*!6026* @internal6027* @brief Processes a 64 byte block of data using the scalar path.6028*/6029XXH_FORCE_INLINE void6030XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,6031const void* XXH_RESTRICT input,6032const void* XXH_RESTRICT secret)6033{6034size_t i;6035/* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */6036#if defined(__GNUC__) && !defined(__clang__) \6037&& (defined(__arm__) || defined(__thumb2__)) \6038&& defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \6039&& XXH_SIZE_OPT <= 06040# pragma GCC unroll 86041#endif6042for (i=0; i < XXH_ACC_NB; i++) {6043XXH3_scalarRound(acc, input, secret, i);6044}6045}6046XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)60476048/*!6049* @internal6050* @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().6051*6052* This is extracted to its own function because the NEON path uses a combination6053* of NEON and scalar.6054*/6055XXH_FORCE_INLINE void6056XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,6057void const* XXH_RESTRICT secret,6058size_t lane)6059{6060xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */6061const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */6062XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);6063XXH_ASSERT(lane < XXH_ACC_NB);6064{6065xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);6066xxh_u64 acc64 = xacc[lane];6067acc64 = XXH_xorshift64(acc64, 47);6068acc64 ^= key64;6069acc64 *= XXH_PRIME32_1;6070xacc[lane] = acc64;6071}6072}60736074/*!6075* @internal6076* @brief Scrambles the accumulators after a large chunk has been read6077*/6078XXH_FORCE_INLINE void6079XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)6080{6081size_t i;6082for (i=0; i < XXH_ACC_NB; i++) {6083XXH3_scalarScrambleRound(acc, secret, i);6084}6085}60866087XXH_FORCE_INLINE void6088XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)6089{6090/*6091* We need a separate pointer for the hack below,6092* which requires a non-const pointer.6093* Any decent compiler will optimize this out otherwise.6094*/6095const xxh_u8* kSecretPtr = XXH3_kSecret;6096XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);60976098#if defined(__GNUC__) && defined(__aarch64__)6099/*6100* UGLY HACK:6101* GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are6102* placed sequentially, in order, at the top of the unrolled loop.6103*6104* While MOVK is great for generating constants (2 cycles for a 64-bit6105* constant compared to 4 cycles for LDR), it fights for bandwidth with6106* the arithmetic instructions.6107*6108* I L S6109* MOVK6110* MOVK6111* MOVK6112* MOVK6113* ADD6114* SUB STR6115* STR6116* By forcing loads from memory (as the asm line causes the compiler to assume6117* that XXH3_kSecretPtr has been changed), the pipelines are used more6118* efficiently:6119* I L S6120* LDR6121* ADD LDR6122* SUB STR6123* STR6124*6125* See XXH3_NEON_LANES for details on the pipeline.6126*6127* XXH3_64bits_withSeed, len == 256, Snapdragon 8356128* without hack: 2654.4 MB/s6129* with hack: 3202.9 MB/s6130*/6131XXH_COMPILER_GUARD(kSecretPtr);6132#endif6133{ int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;6134int i;6135for (i=0; i < nbRounds; i++) {6136/*6137* The asm hack causes the compiler to assume that kSecretPtr aliases with6138* customSecret, and on aarch64, this prevented LDP from merging two6139* loads together for free. Putting the loads together before the stores6140* properly generates LDP.6141*/6142xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64;6143xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;6144XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo);6145XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);6146} }6147}614861496150typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);6151typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);6152typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);615361546155#if (XXH_VECTOR == XXH_AVX512)61566157#define XXH3_accumulate_512 XXH3_accumulate_512_avx5126158#define XXH3_accumulate XXH3_accumulate_avx5126159#define XXH3_scrambleAcc XXH3_scrambleAcc_avx5126160#define XXH3_initCustomSecret XXH3_initCustomSecret_avx51261616162#elif (XXH_VECTOR == XXH_AVX2)61636164#define XXH3_accumulate_512 XXH3_accumulate_512_avx26165#define XXH3_accumulate XXH3_accumulate_avx26166#define XXH3_scrambleAcc XXH3_scrambleAcc_avx26167#define XXH3_initCustomSecret XXH3_initCustomSecret_avx261686169#elif (XXH_VECTOR == XXH_SSE2)61706171#define XXH3_accumulate_512 XXH3_accumulate_512_sse26172#define XXH3_accumulate XXH3_accumulate_sse26173#define XXH3_scrambleAcc XXH3_scrambleAcc_sse26174#define XXH3_initCustomSecret XXH3_initCustomSecret_sse261756176#elif (XXH_VECTOR == XXH_NEON)61776178#define XXH3_accumulate_512 XXH3_accumulate_512_neon6179#define XXH3_accumulate XXH3_accumulate_neon6180#define XXH3_scrambleAcc XXH3_scrambleAcc_neon6181#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar61826183#elif (XXH_VECTOR == XXH_VSX)61846185#define XXH3_accumulate_512 XXH3_accumulate_512_vsx6186#define XXH3_accumulate XXH3_accumulate_vsx6187#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx6188#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar61896190#elif (XXH_VECTOR == XXH_SVE)6191#define XXH3_accumulate_512 XXH3_accumulate_512_sve6192#define XXH3_accumulate XXH3_accumulate_sve6193#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar6194#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar61956196#elif (XXH_VECTOR == XXH_LASX)6197#define XXH3_accumulate_512 XXH3_accumulate_512_lasx6198#define XXH3_accumulate XXH3_accumulate_lasx6199#define XXH3_scrambleAcc XXH3_scrambleAcc_lasx6200#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar62016202#elif (XXH_VECTOR == XXH_LSX)6203#define XXH3_accumulate_512 XXH3_accumulate_512_lsx6204#define XXH3_accumulate XXH3_accumulate_lsx6205#define XXH3_scrambleAcc XXH3_scrambleAcc_lsx6206#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar62076208#elif (XXH_VECTOR == XXH_RVV)6209#define XXH3_accumulate_512 XXH3_accumulate_512_rvv6210#define XXH3_accumulate XXH3_accumulate_rvv6211#define XXH3_scrambleAcc XXH3_scrambleAcc_rvv6212#define XXH3_initCustomSecret XXH3_initCustomSecret_rvv62136214#else /* scalar */62156216#define XXH3_accumulate_512 XXH3_accumulate_512_scalar6217#define XXH3_accumulate XXH3_accumulate_scalar6218#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar6219#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar62206221#endif62226223#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */6224# undef XXH3_initCustomSecret6225# define XXH3_initCustomSecret XXH3_initCustomSecret_scalar6226#endif62276228XXH_FORCE_INLINE void6229XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,6230const xxh_u8* XXH_RESTRICT input, size_t len,6231const xxh_u8* XXH_RESTRICT secret, size_t secretSize,6232XXH3_f_accumulate f_acc,6233XXH3_f_scrambleAcc f_scramble)6234{6235size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;6236size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;6237size_t const nb_blocks = (len - 1) / block_len;62386239size_t n;62406241XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);62426243for (n = 0; n < nb_blocks; n++) {6244f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);6245f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);6246}62476248/* last partial block */6249XXH_ASSERT(len > XXH_STRIPE_LEN);6250{ size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;6251XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));6252f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);62536254/* last stripe */6255{ const xxh_u8* const p = input + len - XXH_STRIPE_LEN;6256#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */6257XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);6258} }6259}62606261XXH_FORCE_INLINE xxh_u646262XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)6263{6264return XXH3_mul128_fold64(6265acc[0] ^ XXH_readLE64(secret),6266acc[1] ^ XXH_readLE64(secret+8) );6267}62686269static XXH_PUREF XXH64_hash_t6270XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)6271{6272xxh_u64 result64 = start;6273size_t i = 0;62746275for (i = 0; i < 4; i++) {6276result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);6277#if defined(__clang__) /* Clang */ \6278&& (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \6279&& (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \6280&& !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */6281/*6282* UGLY HACK:6283* Prevent autovectorization on Clang ARMv7-a. Exact same problem as6284* the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.6285* XXH3_64bits, len == 256, Snapdragon 835:6286* without hack: 2063.7 MB/s6287* with hack: 2560.7 MB/s6288*/6289XXH_COMPILER_GUARD(result64);6290#endif6291}62926293return XXH3_avalanche(result64);6294}62956296/* do not align on 8, so that the secret is different from the accumulator */6297#define XXH_SECRET_MERGEACCS_START 1162986299static XXH_PUREF XXH64_hash_t6300XXH3_finalizeLong_64b(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 len)6301{6302return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START, len * XXH_PRIME64_1);6303}63046305#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \6306XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }63076308XXH_FORCE_INLINE XXH64_hash_t6309XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,6310const void* XXH_RESTRICT secret, size_t secretSize,6311XXH3_f_accumulate f_acc,6312XXH3_f_scrambleAcc f_scramble)6313{6314XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;63156316XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);63176318/* converge into final hash */6319XXH_STATIC_ASSERT(sizeof(acc) == 64);6320XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);6321return XXH3_finalizeLong_64b(acc, (const xxh_u8*)secret, (xxh_u64)len);6322}63236324/*6325* It's important for performance to transmit secret's size (when it's static)6326* so that the compiler can properly optimize the vectorized loop.6327* This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.6328* When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE6329* breaks -Og, this is XXH_NO_INLINE.6330*/6331XXH3_WITH_SECRET_INLINE XXH64_hash_t6332XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,6333XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)6334{6335(void)seed64;6336return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);6337}63386339/*6340* It's preferable for performance that XXH3_hashLong is not inlined,6341* as it results in a smaller function for small data, easier to the instruction cache.6342* Note that inside this no_inline function, we do inline the internal loop,6343* and provide a statically defined secret size to allow optimization of vector loop.6344*/6345XXH_NO_INLINE XXH_PUREF XXH64_hash_t6346XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,6347XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)6348{6349(void)seed64; (void)secret; (void)secretLen;6350return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);6351}63526353/*6354* XXH3_hashLong_64b_withSeed():6355* Generate a custom key based on alteration of default XXH3_kSecret with the seed,6356* and then use this key for long mode hashing.6357*6358* This operation is decently fast but nonetheless costs a little bit of time.6359* Try to avoid it whenever possible (typically when seed==0).6360*6361* It's important for performance that XXH3_hashLong is not inlined. Not sure6362* why (uop cache maybe?), but the difference is large and easily measurable.6363*/6364XXH_FORCE_INLINE XXH64_hash_t6365XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,6366XXH64_hash_t seed,6367XXH3_f_accumulate f_acc,6368XXH3_f_scrambleAcc f_scramble,6369XXH3_f_initCustomSecret f_initSec)6370{6371#if XXH_SIZE_OPT <= 06372if (seed == 0)6373return XXH3_hashLong_64b_internal(input, len,6374XXH3_kSecret, sizeof(XXH3_kSecret),6375f_acc, f_scramble);6376#endif6377{ XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];6378f_initSec(secret, seed);6379return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),6380f_acc, f_scramble);6381}6382}63836384/*6385* It's important for performance that XXH3_hashLong is not inlined.6386*/6387XXH_NO_INLINE XXH64_hash_t6388XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,6389XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)6390{6391(void)secret; (void)secretLen;6392return XXH3_hashLong_64b_withSeed_internal(input, len, seed,6393XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);6394}639563966397typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,6398XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);63996400XXH_FORCE_INLINE XXH64_hash_t6401XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,6402XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,6403XXH3_hashLong64_f f_hashLong)6404{6405XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);6406/*6407* If an action is to be taken if `secretLen` condition is not respected,6408* it should be done here.6409* For now, it's a contract pre-condition.6410* Adding a check and a branch here would cost performance at every hash.6411* Also, note that function signature doesn't offer room to return an error.6412*/6413if (len <= 16)6414return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);6415if (len <= 128)6416return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);6417if (len <= XXH3_MIDSIZE_MAX)6418return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);6419return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);6420}642164226423/* === Public entry point === */64246425/*! @ingroup XXH3_family */6426XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)6427{6428return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);6429}64306431/*! @ingroup XXH3_family */6432XXH_PUBLIC_API XXH64_hash_t6433XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)6434{6435return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);6436}64376438/*! @ingroup XXH3_family */6439XXH_PUBLIC_API XXH64_hash_t6440XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)6441{6442return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);6443}64446445XXH_PUBLIC_API XXH64_hash_t6446XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)6447{6448if (length <= XXH3_MIDSIZE_MAX)6449return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);6450return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);6451}645264536454/* === XXH3 streaming === */6455#ifndef XXH_NO_STREAM6456/*6457* Malloc's a pointer that is always aligned to @align.6458*6459* This must be freed with `XXH_alignedFree()`.6460*6461* malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte6462* alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX26463* or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.6464*6465* This underalignment previously caused a rather obvious crash which went6466* completely unnoticed due to XXH3_createState() not actually being tested.6467* Credit to RedSpah for noticing this bug.6468*6469* The alignment is done manually: Functions like posix_memalign or _mm_malloc6470* are avoided: To maintain portability, we would have to write a fallback6471* like this anyways, and besides, testing for the existence of library6472* functions without relying on external build tools is impossible.6473*6474* The method is simple: Overallocate, manually align, and store the offset6475* to the original behind the returned pointer.6476*6477* Align must be a power of 2 and 8 <= align <= 128.6478*/6479static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)6480{6481XXH_ASSERT(align <= 128 && align >= 8); /* range check */6482XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */6483XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */6484{ /* Overallocate to make room for manual realignment and an offset byte */6485xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);6486if (base != NULL) {6487/*6488* Get the offset needed to align this pointer.6489*6490* Even if the returned pointer is aligned, there will always be6491* at least one byte to store the offset to the original pointer.6492*/6493size_t offset = align - ((size_t)base & (align - 1)); /* base % align */6494/* Add the offset for the now-aligned pointer */6495xxh_u8* ptr = base + offset;64966497XXH_ASSERT((size_t)ptr % align == 0);64986499/* Store the offset immediately before the returned pointer. */6500ptr[-1] = (xxh_u8)offset;6501return ptr;6502}6503return NULL;6504}6505}6506/*6507* Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass6508* normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.6509*/6510static void XXH_alignedFree(void* p)6511{6512if (p != NULL) {6513xxh_u8* ptr = (xxh_u8*)p;6514/* Get the offset byte we added in XXH_malloc. */6515xxh_u8 offset = ptr[-1];6516/* Free the original malloc'd pointer */6517xxh_u8* base = ptr - offset;6518XXH_free(base);6519}6520}6521/*! @ingroup XXH3_family */6522/*!6523* @brief Allocate an @ref XXH3_state_t.6524*6525* @return An allocated pointer of @ref XXH3_state_t on success.6526* @return `NULL` on failure.6527*6528* @note Must be freed with XXH3_freeState().6529*6530* @see @ref streaming_example "Streaming Example"6531*/6532XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)6533{6534XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);6535if (state==NULL) return NULL;6536XXH3_INITSTATE(state);6537return state;6538}65396540/*! @ingroup XXH3_family */6541/*!6542* @brief Frees an @ref XXH3_state_t.6543*6544* @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().6545*6546* @return @ref XXH_OK.6547*6548* @note Must be allocated with XXH3_createState().6549*6550* @see @ref streaming_example "Streaming Example"6551*/6552XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)6553{6554XXH_alignedFree(statePtr);6555return XXH_OK;6556}65576558/*! @ingroup XXH3_family */6559XXH_PUBLIC_API void6560XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)6561{6562XXH_memcpy(dst_state, src_state, sizeof(*dst_state));6563}65646565static void6566XXH3_reset_internal(XXH3_state_t* statePtr,6567XXH64_hash_t seed,6568const void* secret, size_t secretSize)6569{6570size_t const initStart = offsetof(XXH3_state_t, bufferedSize);6571size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;6572XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);6573XXH_ASSERT(statePtr != NULL);6574/* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */6575XXH_memset((char*)statePtr + initStart, 0, initLength);6576statePtr->acc[0] = XXH_PRIME32_3;6577statePtr->acc[1] = XXH_PRIME64_1;6578statePtr->acc[2] = XXH_PRIME64_2;6579statePtr->acc[3] = XXH_PRIME64_3;6580statePtr->acc[4] = XXH_PRIME64_4;6581statePtr->acc[5] = XXH_PRIME32_2;6582statePtr->acc[6] = XXH_PRIME64_5;6583statePtr->acc[7] = XXH_PRIME32_1;6584statePtr->seed = seed;6585statePtr->useSeed = (seed != 0);6586statePtr->extSecret = (const unsigned char*)secret;6587XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);6588statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;6589statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;6590}65916592/*! @ingroup XXH3_family */6593XXH_PUBLIC_API XXH_errorcode6594XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)6595{6596if (statePtr == NULL) return XXH_ERROR;6597XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);6598return XXH_OK;6599}66006601/*! @ingroup XXH3_family */6602XXH_PUBLIC_API XXH_errorcode6603XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)6604{6605if (statePtr == NULL) return XXH_ERROR;6606XXH3_reset_internal(statePtr, 0, secret, secretSize);6607if (secret == NULL) return XXH_ERROR;6608if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;6609return XXH_OK;6610}66116612/*! @ingroup XXH3_family */6613XXH_PUBLIC_API XXH_errorcode6614XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)6615{6616if (statePtr == NULL) return XXH_ERROR;6617if (seed==0) return XXH3_64bits_reset(statePtr);6618if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))6619XXH3_initCustomSecret(statePtr->customSecret, seed);6620XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);6621return XXH_OK;6622}66236624/*! @ingroup XXH3_family */6625XXH_PUBLIC_API XXH_errorcode6626XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)6627{6628if (statePtr == NULL) return XXH_ERROR;6629if (secret == NULL) return XXH_ERROR;6630if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;6631XXH3_reset_internal(statePtr, seed64, secret, secretSize);6632statePtr->useSeed = 1; /* always, even if seed64==0 */6633return XXH_OK;6634}66356636/*!6637* @internal6638* @brief Processes a large input for XXH3_update() and XXH3_digest_long().6639*6640* Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.6641*6642* @param acc Pointer to the 8 accumulator lanes6643* @param nbStripesSoFarPtr In/out pointer to the number of leftover stripes in the block*6644* @param nbStripesPerBlock Number of stripes in a block6645* @param input Input pointer6646* @param nbStripes Number of stripes to process6647* @param secret Secret pointer6648* @param secretLimit Offset of the last block in @p secret6649* @param f_acc Pointer to an XXH3_accumulate implementation6650* @param f_scramble Pointer to an XXH3_scrambleAcc implementation6651* @return Pointer past the end of @p input after processing6652*/6653XXH_FORCE_INLINE const xxh_u8 *6654XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,6655size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,6656const xxh_u8* XXH_RESTRICT input, size_t nbStripes,6657const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,6658XXH3_f_accumulate f_acc,6659XXH3_f_scrambleAcc f_scramble)6660{6661const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;6662/* Process full blocks */6663if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {6664/* Process the initial partial block... */6665size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;66666667do {6668/* Accumulate and scramble */6669f_acc(acc, input, initialSecret, nbStripesThisIter);6670f_scramble(acc, secret + secretLimit);6671input += nbStripesThisIter * XXH_STRIPE_LEN;6672nbStripes -= nbStripesThisIter;6673/* Then continue the loop with the full block size */6674nbStripesThisIter = nbStripesPerBlock;6675initialSecret = secret;6676} while (nbStripes >= nbStripesPerBlock);6677*nbStripesSoFarPtr = 0;6678}6679/* Process a partial block */6680if (nbStripes > 0) {6681f_acc(acc, input, initialSecret, nbStripes);6682input += nbStripes * XXH_STRIPE_LEN;6683*nbStripesSoFarPtr += nbStripes;6684}6685/* Return end pointer */6686return input;6687}66886689#ifndef XXH3_STREAM_USE_STACK6690# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */6691# define XXH3_STREAM_USE_STACK 16692# endif6693#endif6694/* This function accepts f_acc and f_scramble as function pointers,6695* making it possible to implement multiple variants with different acc & scramble stages.6696* This is notably useful to implement multiple vector variants with different intrinsics.6697*/6698XXH_FORCE_INLINE XXH_errorcode6699XXH3_update(XXH3_state_t* XXH_RESTRICT const state,6700const xxh_u8* XXH_RESTRICT input, size_t len,6701XXH3_f_accumulate f_acc,6702XXH3_f_scrambleAcc f_scramble)6703{6704if (input==NULL) {6705XXH_ASSERT(len == 0);6706return XXH_OK;6707}67086709XXH_ASSERT(state != NULL);6710state->totalLen += len;67116712/* small input : just fill in tmp buffer */6713XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);6714if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {6715XXH_memcpy(state->buffer + state->bufferedSize, input, len);6716state->bufferedSize += (XXH32_hash_t)len;6717return XXH_OK;6718}67196720{ const xxh_u8* const bEnd = input + len;6721const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;6722#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 16723/* For some reason, gcc and MSVC seem to suffer greatly6724* when operating accumulators directly into state.6725* Operating into stack space seems to enable proper optimization.6726* clang, on the other hand, doesn't seem to need this trick */6727XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];6728XXH_memcpy(acc, state->acc, sizeof(acc));6729#else6730xxh_u64* XXH_RESTRICT const acc = state->acc;6731#endif67326733/* total input is now > XXH3_INTERNALBUFFER_SIZE */6734#define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)6735XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */67366737/*6738* Internal buffer is partially filled (always, except at beginning)6739* Complete it, then consume it.6740*/6741if (state->bufferedSize) {6742size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;6743XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);6744input += loadSize;6745XXH3_consumeStripes(acc,6746&state->nbStripesSoFar, state->nbStripesPerBlock,6747state->buffer, XXH3_INTERNALBUFFER_STRIPES,6748secret, state->secretLimit,6749f_acc, f_scramble);6750state->bufferedSize = 0;6751}6752XXH_ASSERT(input < bEnd);6753if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {6754size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;6755input = XXH3_consumeStripes(acc,6756&state->nbStripesSoFar, state->nbStripesPerBlock,6757input, nbStripes,6758secret, state->secretLimit,6759f_acc, f_scramble);6760XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);67616762}6763/* Some remaining input (always) : buffer it */6764XXH_ASSERT(input < bEnd);6765XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);6766XXH_ASSERT(state->bufferedSize == 0);6767XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));6768state->bufferedSize = (XXH32_hash_t)(bEnd-input);6769#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 16770/* save stack accumulators into state */6771XXH_memcpy(state->acc, acc, sizeof(acc));6772#endif6773}67746775return XXH_OK;6776}67776778/*6779* Both XXH3_64bits_update and XXH3_128bits_update use this routine.6780*/6781XXH_NO_INLINE XXH_errorcode6782XXH3_update_regular(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)6783{6784return XXH3_update(state, (const xxh_u8*)input, len,6785XXH3_accumulate, XXH3_scrambleAcc);6786}67876788/*! @ingroup XXH3_family */6789XXH_PUBLIC_API XXH_errorcode6790XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)6791{6792return XXH3_update_regular(state, input, len);6793}679467956796XXH_FORCE_INLINE void6797XXH3_digest_long (XXH64_hash_t* acc,6798const XXH3_state_t* state,6799const unsigned char* secret)6800{6801xxh_u8 lastStripe[XXH_STRIPE_LEN];6802const xxh_u8* lastStripePtr;68036804/*6805* Digest on a local copy. This way, the state remains unaltered, and it can6806* continue ingesting more input afterwards.6807*/6808XXH_memcpy(acc, state->acc, sizeof(state->acc));6809if (state->bufferedSize >= XXH_STRIPE_LEN) {6810/* Consume remaining stripes then point to remaining data in buffer */6811size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;6812size_t nbStripesSoFar = state->nbStripesSoFar;6813XXH3_consumeStripes(acc,6814&nbStripesSoFar, state->nbStripesPerBlock,6815state->buffer, nbStripes,6816secret, state->secretLimit,6817XXH3_accumulate, XXH3_scrambleAcc);6818lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;6819} else { /* bufferedSize < XXH_STRIPE_LEN */6820/* Copy to temp buffer */6821size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;6822XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */6823XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);6824XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);6825lastStripePtr = lastStripe;6826}6827/* Last stripe */6828XXH3_accumulate_512(acc,6829lastStripePtr,6830secret + state->secretLimit - XXH_SECRET_LASTACC_START);6831}68326833/*! @ingroup XXH3_family */6834XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)6835{6836const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;6837if (state->totalLen > XXH3_MIDSIZE_MAX) {6838XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];6839XXH3_digest_long(acc, state, secret);6840return XXH3_finalizeLong_64b(acc, secret, (xxh_u64)state->totalLen);6841}6842/* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */6843if (state->useSeed)6844return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);6845return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),6846secret, state->secretLimit + XXH_STRIPE_LEN);6847}6848#endif /* !XXH_NO_STREAM */684968506851/* ==========================================6852* XXH3 128 bits (a.k.a XXH128)6853* ==========================================6854* XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,6855* even without counting the significantly larger output size.6856*6857* For example, extra steps are taken to avoid the seed-dependent collisions6858* in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).6859*6860* This strength naturally comes at the cost of some speed, especially on short6861* lengths. Note that longer hashes are about as fast as the 64-bit version6862* due to it using only a slight modification of the 64-bit loop.6863*6864* XXH128 is also more oriented towards 64-bit machines. It is still extremely6865* fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).6866*/68676868XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t6869XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)6870{6871/* A doubled version of 1to3_64b with different constants. */6872XXH_ASSERT(input != NULL);6873XXH_ASSERT(1 <= len && len <= 3);6874XXH_ASSERT(secret != NULL);6875/*6876* len = 1: combinedl = { input[0], 0x01, input[0], input[0] }6877* len = 2: combinedl = { input[1], 0x02, input[0], input[1] }6878* len = 3: combinedl = { input[2], 0x03, input[0], input[1] }6879*/6880{ xxh_u8 const c1 = input[0];6881xxh_u8 const c2 = input[len >> 1];6882xxh_u8 const c3 = input[len - 1];6883xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)6884| ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);6885xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);6886xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;6887xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;6888xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;6889xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;6890XXH128_hash_t h128;6891h128.low64 = XXH64_avalanche(keyed_lo);6892h128.high64 = XXH64_avalanche(keyed_hi);6893return h128;6894}6895}68966897XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t6898XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)6899{6900XXH_ASSERT(input != NULL);6901XXH_ASSERT(secret != NULL);6902XXH_ASSERT(4 <= len && len <= 8);6903seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;6904{ xxh_u32 const input_lo = XXH_readLE32(input);6905xxh_u32 const input_hi = XXH_readLE32(input + len - 4);6906xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);6907xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;6908xxh_u64 const keyed = input_64 ^ bitflip;69096910/* Shift len to the left to ensure it is even, this avoids even multiplies. */6911XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));69126913m128.high64 += (m128.low64 << 1);6914m128.low64 ^= (m128.high64 >> 3);69156916m128.low64 = XXH_xorshift64(m128.low64, 35);6917m128.low64 *= PRIME_MX2;6918m128.low64 = XXH_xorshift64(m128.low64, 28);6919m128.high64 = XXH3_avalanche(m128.high64);6920return m128;6921}6922}69236924XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t6925XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)6926{6927XXH_ASSERT(input != NULL);6928XXH_ASSERT(secret != NULL);6929XXH_ASSERT(9 <= len && len <= 16);6930{ xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;6931xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;6932xxh_u64 const input_lo = XXH_readLE64(input);6933xxh_u64 input_hi = XXH_readLE64(input + len - 8);6934XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);6935/*6936* Put len in the middle of m128 to ensure that the length gets mixed to6937* both the low and high bits in the 128x64 multiply below.6938*/6939m128.low64 += (xxh_u64)(len - 1) << 54;6940input_hi ^= bitfliph;6941/*6942* Add the high 32 bits of input_hi to the high 32 bits of m128, then6943* add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to6944* the high 64 bits of m128.6945*6946* The best approach to this operation is different on 32-bit and 64-bit.6947*/6948if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */6949/*6950* 32-bit optimized version, which is more readable.6951*6952* On 32-bit, it removes an ADC and delays a dependency between the two6953* halves of m128.high64, but it generates an extra mask on 64-bit.6954*/6955m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);6956} else {6957/*6958* 64-bit optimized (albeit more confusing) version.6959*6960* Uses some properties of addition and multiplication to remove the mask:6961*6962* Let:6963* a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)6964* b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)6965* c = XXH_PRIME32_26966*6967* a + (b * c)6968* Inverse Property: x + y - x == y6969* a + (b * (1 + c - 1))6970* Distributive Property: x * (y + z) == (x * y) + (x * z)6971* a + (b * 1) + (b * (c - 1))6972* Identity Property: x * 1 == x6973* a + b + (b * (c - 1))6974*6975* Substitute a, b, and c:6976* input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))6977*6978* Since input_hi.hi + input_hi.lo == input_hi, we get this:6979* input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))6980*/6981m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);6982}6983/* m128 ^= XXH_swap64(m128 >> 64); */6984m128.low64 ^= XXH_swap64(m128.high64);69856986{ /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */6987XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);6988h128.high64 += m128.high64 * XXH_PRIME64_2;69896990h128.low64 = XXH3_avalanche(h128.low64);6991h128.high64 = XXH3_avalanche(h128.high64);6992return h128;6993} }6994}69956996/*6997* Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN6998*/6999XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t7000XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)7001{7002XXH_ASSERT(len <= 16);7003{ if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);7004if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);7005if (len) return XXH3_len_1to3_128b(input, len, secret, seed);7006{ XXH128_hash_t h128;7007xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);7008xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);7009h128.low64 = XXH64_avalanche(seed ^ bitflipl);7010h128.high64 = XXH64_avalanche( seed ^ bitfliph);7011return h128;7012} }7013}70147015/*7016* A bit slower than XXH3_mix16B, but handles multiply by zero better.7017*/7018XXH_FORCE_INLINE XXH128_hash_t7019XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,7020const xxh_u8* secret, XXH64_hash_t seed)7021{7022acc.low64 += XXH3_mix16B (input_1, secret+0, seed);7023acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);7024acc.high64 += XXH3_mix16B (input_2, secret+16, seed);7025acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);7026return acc;7027}702870297030XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t7031XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,7032const xxh_u8* XXH_RESTRICT secret, size_t secretSize,7033XXH64_hash_t seed)7034{7035XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;7036XXH_ASSERT(16 < len && len <= 128);70377038{ XXH128_hash_t acc;7039acc.low64 = len * XXH_PRIME64_1;7040acc.high64 = 0;70417042#if XXH_SIZE_OPT >= 17043{7044/* Smaller, but slightly slower. */7045unsigned int i = (unsigned int)(len - 1) / 32;7046do {7047acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);7048} while (i-- != 0);7049}7050#else7051if (len > 32) {7052if (len > 64) {7053if (len > 96) {7054acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);7055}7056acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);7057}7058acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);7059}7060acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);7061#endif7062{ XXH128_hash_t h128;7063h128.low64 = acc.low64 + acc.high64;7064h128.high64 = (acc.low64 * XXH_PRIME64_1)7065+ (acc.high64 * XXH_PRIME64_4)7066+ ((len - seed) * XXH_PRIME64_2);7067h128.low64 = XXH3_avalanche(h128.low64);7068h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);7069return h128;7070}7071}7072}70737074XXH_NO_INLINE XXH_PUREF XXH128_hash_t7075XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,7076const xxh_u8* XXH_RESTRICT secret, size_t secretSize,7077XXH64_hash_t seed)7078{7079XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;7080XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);70817082{ XXH128_hash_t acc;7083unsigned i;7084acc.low64 = len * XXH_PRIME64_1;7085acc.high64 = 0;7086/*7087* We set as `i` as offset + 32. We do this so that unchanged7088* `len` can be used as upper bound. This reaches a sweet spot7089* where both x86 and aarch64 get simple agen and good codegen7090* for the loop.7091*/7092for (i = 32; i < 160; i += 32) {7093acc = XXH128_mix32B(acc,7094input + i - 32,7095input + i - 16,7096secret + i - 32,7097seed);7098}7099acc.low64 = XXH3_avalanche(acc.low64);7100acc.high64 = XXH3_avalanche(acc.high64);7101/*7102* NB: `i <= len` will duplicate the last 32-bytes if7103* len % 32 was zero. This is an unfortunate necessity to keep7104* the hash result stable.7105*/7106for (i=160; i <= len; i += 32) {7107acc = XXH128_mix32B(acc,7108input + i - 32,7109input + i - 16,7110secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,7111seed);7112}7113/* last bytes */7114acc = XXH128_mix32B(acc,7115input + len - 16,7116input + len - 32,7117secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,7118(XXH64_hash_t)0 - seed);71197120{ XXH128_hash_t h128;7121h128.low64 = acc.low64 + acc.high64;7122h128.high64 = (acc.low64 * XXH_PRIME64_1)7123+ (acc.high64 * XXH_PRIME64_4)7124+ ((len - seed) * XXH_PRIME64_2);7125h128.low64 = XXH3_avalanche(h128.low64);7126h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);7127return h128;7128}7129}7130}71317132static XXH_PUREF XXH128_hash_t7133XXH3_finalizeLong_128b(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, size_t secretSize, xxh_u64 len)7134{7135XXH128_hash_t h128;7136h128.low64 = XXH3_finalizeLong_64b(acc, secret, len);7137h128.high64 = XXH3_mergeAccs(acc, secret + secretSize7138- XXH_STRIPE_LEN - XXH_SECRET_MERGEACCS_START,7139~(len * XXH_PRIME64_2));7140return h128;7141}71427143XXH_FORCE_INLINE XXH128_hash_t7144XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,7145const xxh_u8* XXH_RESTRICT secret, size_t secretSize,7146XXH3_f_accumulate f_acc,7147XXH3_f_scrambleAcc f_scramble)7148{7149XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;71507151XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);71527153/* converge into final hash */7154XXH_STATIC_ASSERT(sizeof(acc) == 64);7155XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);7156return XXH3_finalizeLong_128b(acc, secret, secretSize, (xxh_u64)len);7157}71587159/*7160* It's important for performance that XXH3_hashLong() is not inlined.7161*/7162XXH_NO_INLINE XXH_PUREF XXH128_hash_t7163XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,7164XXH64_hash_t seed64,7165const void* XXH_RESTRICT secret, size_t secretLen)7166{7167(void)seed64; (void)secret; (void)secretLen;7168return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),7169XXH3_accumulate, XXH3_scrambleAcc);7170}71717172/*7173* It's important for performance to pass @p secretLen (when it's static)7174* to the compiler, so that it can properly optimize the vectorized loop.7175*7176* When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE7177* breaks -Og, this is XXH_NO_INLINE.7178*/7179XXH3_WITH_SECRET_INLINE XXH128_hash_t7180XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,7181XXH64_hash_t seed64,7182const void* XXH_RESTRICT secret, size_t secretLen)7183{7184(void)seed64;7185return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,7186XXH3_accumulate, XXH3_scrambleAcc);7187}71887189XXH_FORCE_INLINE XXH128_hash_t7190XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,7191XXH64_hash_t seed64,7192XXH3_f_accumulate f_acc,7193XXH3_f_scrambleAcc f_scramble,7194XXH3_f_initCustomSecret f_initSec)7195{7196if (seed64 == 0)7197return XXH3_hashLong_128b_internal(input, len,7198XXH3_kSecret, sizeof(XXH3_kSecret),7199f_acc, f_scramble);7200{ XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];7201f_initSec(secret, seed64);7202return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),7203f_acc, f_scramble);7204}7205}72067207/*7208* It's important for performance that XXH3_hashLong is not inlined.7209*/7210XXH_NO_INLINE XXH128_hash_t7211XXH3_hashLong_128b_withSeed(const void* input, size_t len,7212XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)7213{7214(void)secret; (void)secretLen;7215return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,7216XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);7217}72187219typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,7220XXH64_hash_t, const void* XXH_RESTRICT, size_t);72217222XXH_FORCE_INLINE XXH128_hash_t7223XXH3_128bits_internal(const void* input, size_t len,7224XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,7225XXH3_hashLong128_f f_hl128)7226{7227XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);7228/*7229* If an action is to be taken if `secret` conditions are not respected,7230* it should be done here.7231* For now, it's a contract pre-condition.7232* Adding a check and a branch here would cost performance at every hash.7233*/7234if (len <= 16)7235return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);7236if (len <= 128)7237return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);7238if (len <= XXH3_MIDSIZE_MAX)7239return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);7240return f_hl128(input, len, seed64, secret, secretLen);7241}724272437244/* === Public XXH128 API === */72457246/*! @ingroup XXH3_family */7247XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)7248{7249return XXH3_128bits_internal(input, len, 0,7250XXH3_kSecret, sizeof(XXH3_kSecret),7251XXH3_hashLong_128b_default);7252}72537254/*! @ingroup XXH3_family */7255XXH_PUBLIC_API XXH128_hash_t7256XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)7257{7258return XXH3_128bits_internal(input, len, 0,7259(const xxh_u8*)secret, secretSize,7260XXH3_hashLong_128b_withSecret);7261}72627263/*! @ingroup XXH3_family */7264XXH_PUBLIC_API XXH128_hash_t7265XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)7266{7267return XXH3_128bits_internal(input, len, seed,7268XXH3_kSecret, sizeof(XXH3_kSecret),7269XXH3_hashLong_128b_withSeed);7270}72717272/*! @ingroup XXH3_family */7273XXH_PUBLIC_API XXH128_hash_t7274XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)7275{7276if (len <= XXH3_MIDSIZE_MAX)7277return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);7278return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);7279}72807281/*! @ingroup XXH3_family */7282XXH_PUBLIC_API XXH128_hash_t7283XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)7284{7285return XXH3_128bits_withSeed(input, len, seed);7286}728772887289/* === XXH3 128-bit streaming === */7290#ifndef XXH_NO_STREAM7291/*7292* All initialization and update functions are identical to 64-bit streaming variant.7293* The only difference is the finalization routine.7294*/72957296/*! @ingroup XXH3_family */7297XXH_PUBLIC_API XXH_errorcode7298XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)7299{7300return XXH3_64bits_reset(statePtr);7301}73027303/*! @ingroup XXH3_family */7304XXH_PUBLIC_API XXH_errorcode7305XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)7306{7307return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);7308}73097310/*! @ingroup XXH3_family */7311XXH_PUBLIC_API XXH_errorcode7312XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)7313{7314return XXH3_64bits_reset_withSeed(statePtr, seed);7315}73167317/*! @ingroup XXH3_family */7318XXH_PUBLIC_API XXH_errorcode7319XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)7320{7321return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);7322}73237324/*! @ingroup XXH3_family */7325XXH_PUBLIC_API XXH_errorcode7326XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)7327{7328return XXH3_update_regular(state, input, len);7329}73307331/*! @ingroup XXH3_family */7332XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)7333{7334const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;7335if (state->totalLen > XXH3_MIDSIZE_MAX) {7336XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];7337XXH3_digest_long(acc, state, secret);7338XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);7339return XXH3_finalizeLong_128b(acc, secret, state->secretLimit + XXH_STRIPE_LEN, (xxh_u64)state->totalLen);7340}7341/* len <= XXH3_MIDSIZE_MAX : short code */7342if (state->useSeed)7343return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);7344return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),7345secret, state->secretLimit + XXH_STRIPE_LEN);7346}7347#endif /* !XXH_NO_STREAM */7348/* 128-bit utility functions */73497350/* return : 1 is equal, 0 if different */7351/*! @ingroup XXH3_family */7352XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)7353{7354/* note : XXH128_hash_t is compact, it has no padding byte */7355return !(XXH_memcmp(&h1, &h2, sizeof(h1)));7356}73577358/* This prototype is compatible with stdlib's qsort().7359* @return : >0 if *h128_1 > *h128_27360* <0 if *h128_1 < *h128_27361* =0 if *h128_1 == *h128_2 */7362/*! @ingroup XXH3_family */7363XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)7364{7365XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;7366XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;7367int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);7368/* note : bets that, in most cases, hash values are different */7369if (hcmp) return hcmp;7370return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);7371}737273737374/*====== Canonical representation ======*/7375/*! @ingroup XXH3_family */7376XXH_PUBLIC_API void7377XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)7378{7379XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));7380if (XXH_CPU_LITTLE_ENDIAN) {7381hash.high64 = XXH_swap64(hash.high64);7382hash.low64 = XXH_swap64(hash.low64);7383}7384XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));7385XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));7386}73877388/*! @ingroup XXH3_family */7389XXH_PUBLIC_API XXH128_hash_t7390XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)7391{7392XXH128_hash_t h;7393h.high64 = XXH_readBE64(src);7394h.low64 = XXH_readBE64(src->digest + 8);7395return h;7396}7397739873997400/* ==========================================7401* Secret generators7402* ==========================================7403*/7404#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))74057406XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)7407{7408XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );7409XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );7410}74117412/*! @ingroup XXH3_family */7413XXH_PUBLIC_API XXH_errorcode7414XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)7415{7416#if (XXH_DEBUGLEVEL >= 1)7417XXH_ASSERT(secretBuffer != NULL);7418XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);7419#else7420/* production mode, assert() are disabled */7421if (secretBuffer == NULL) return XXH_ERROR;7422if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;7423#endif74247425if (customSeedSize == 0) {7426customSeed = XXH3_kSecret;7427customSeedSize = XXH_SECRET_DEFAULT_SIZE;7428}7429#if (XXH_DEBUGLEVEL >= 1)7430XXH_ASSERT(customSeed != NULL);7431#else7432if (customSeed == NULL) return XXH_ERROR;7433#endif74347435/* Fill secretBuffer with a copy of customSeed - repeat as needed */7436{ size_t pos = 0;7437while (pos < secretSize) {7438size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);7439XXH_memcpy((char*)secretBuffer + pos, customSeed, toCopy);7440pos += toCopy;7441} }74427443{ size_t const nbSeg16 = secretSize / 16;7444size_t n;7445XXH128_canonical_t scrambler;7446XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));7447for (n=0; n<nbSeg16; n++) {7448XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);7449XXH3_combine16((char*)secretBuffer + n*16, h128);7450}7451/* last segment */7452XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));7453}7454return XXH_OK;7455}74567457/*! @ingroup XXH3_family */7458XXH_PUBLIC_API void7459XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)7460{7461XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];7462XXH3_initCustomSecret(secret, seed);7463XXH_ASSERT(secretBuffer != NULL);7464XXH_memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);7465}7466746774687469/* Pop our optimization override from above */7470#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \7471&& defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \7472&& defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */7473# pragma GCC pop_options7474#endif74757476#endif /* XXH_NO_LONG_LONG */74777478#endif /* XXH_NO_XXH3 */74797480/*!7481* @}7482*/7483#endif /* XXH_IMPLEMENTATION */748474857486#if defined (__cplusplus) && !defined(XXH_NO_EXTERNC_GUARD)7487} /* extern "C" */7488#endif748974907491