Path: blob/main/contrib/llvm-project/libc/src/__support/hash.h
213766 views
//===-- Portable string hash function ---------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#ifndef LLVM_LIBC_SRC___SUPPORT_HASH_H9#define LLVM_LIBC_SRC___SUPPORT_HASH_H1011#include "src/__support/CPP/bit.h" // rotl12#include "src/__support/CPP/limits.h" // numeric_limits13#include "src/__support/macros/attributes.h" // LIBC_INLINE14#include "src/__support/macros/config.h"15#include "src/__support/uint128.h" // UInt12816#include <stdint.h> // For uint64_t1718namespace LIBC_NAMESPACE_DECL {19namespace internal {2021// Folded multiplication.22// This function multiplies two 64-bit integers and xor the high and23// low 64-bit parts of the result.24LIBC_INLINE uint64_t folded_multiply(uint64_t x, uint64_t y) {25UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);26uint64_t low = static_cast<uint64_t>(p);27uint64_t high = static_cast<uint64_t>(p >> 64);28return low ^ high;29}3031// Read as little endian.32// Shift-and-or implementation does not give a satisfactory code on aarch64.33// Therefore, we use a union to read the value.34template <typename T> LIBC_INLINE T read_little_endian(const void *ptr) {35const uint8_t *bytes = static_cast<const uint8_t *>(ptr);36uint8_t buffer[sizeof(T)];37#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__38// Compiler should able to optimize this as a load followed by a byte39// swap. On aarch64 (-mbig-endian), this compiles to the following for40// int:41// ldr w0, [x0]42// rev w0, w043// ret44for (size_t i = 0; i < sizeof(T); ++i) {45buffer[i] = bytes[sizeof(T) - i - 1];46}47#else48for (size_t i = 0; i < sizeof(T); ++i) {49buffer[i] = bytes[i];50}51#endif52return cpp::bit_cast<T>(buffer);53}5455// Specialized read functions for small values. size must be <= 8.56LIBC_INLINE void read_small_values(const void *ptr, size_t size, uint64_t &low,57uint64_t &high) {58const uint8_t *bytes = static_cast<const uint8_t *>(ptr);59if (size >= 2) {60if (size >= 4) {61low = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[0]));62high =63static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[size - 4]));64} else {65low = static_cast<uint64_t>(read_little_endian<uint16_t>(&bytes[0]));66high = static_cast<uint64_t>(bytes[size - 1]);67}68} else {69if (size > 0) {70low = static_cast<uint64_t>(bytes[0]);71high = static_cast<uint64_t>(bytes[0]);72} else {73low = 0;74high = 0;75}76}77}7879// This constant comes from Kunth's prng (it empirically works well).80LIBC_INLINE_VAR constexpr uint64_t MULTIPLE = 6364136223846793005;81// Rotation amount for mixing.82LIBC_INLINE_VAR constexpr uint64_t ROTATE = 23;8384// Randomly generated values. For now, we use the same values as in aHash as85// they are widely tested.86// https://github.com/tkaitchuck/aHash/blob/9f6a2ad8b721fd28da8dc1d0b7996677b374357c/src/random_state.rs#L3887LIBC_INLINE_VAR constexpr uint64_t RANDOMNESS[2][4] = {88{0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0,890x082efa98ec4e6c89},90{0x452821e638d01377, 0xbe5466cf34e90c6c, 0xc0ac29b7c97c50dd,910x3f84d5b5b5470917},92};9394// This is a portable string hasher. It is not cryptographically secure.95// The quality of the hash is good enough to pass all tests in SMHasher.96// The implementation is derived from the generic routine of aHash.97class HashState {98uint64_t buffer;99uint64_t pad;100uint64_t extra_keys[2];101LIBC_INLINE void update(uint64_t low, uint64_t high) {102uint64_t combined =103folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);104buffer = (buffer + pad) ^ combined;105buffer = cpp::rotl(buffer, ROTATE);106}107LIBC_INLINE static uint64_t mix(uint64_t seed) {108HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],109RANDOMNESS[0][3]};110mixer.update(seed, 0);111return mixer.finish();112}113114public:115LIBC_INLINE constexpr HashState(uint64_t a, uint64_t b, uint64_t c,116uint64_t d)117: buffer(a), pad(b), extra_keys{c, d} {}118LIBC_INLINE HashState(uint64_t seed) {119// Mix one more round of the seed to make it stronger.120uint64_t mixed = mix(seed);121buffer = RANDOMNESS[1][0] ^ mixed;122pad = RANDOMNESS[1][1] ^ mixed;123extra_keys[0] = RANDOMNESS[1][2] ^ mixed;124extra_keys[1] = RANDOMNESS[1][3] ^ mixed;125}126LIBC_INLINE void update(const void *ptr, size_t size) {127uint8_t const *bytes = static_cast<const uint8_t *>(ptr);128buffer = (buffer + size) * MULTIPLE;129uint64_t low, high;130if (size > 8) {131if (size > 16) {132// update tail133low = read_little_endian<uint64_t>(&bytes[size - 16]);134high = read_little_endian<uint64_t>(&bytes[size - 8]);135update(low, high);136while (size > 16) {137low = read_little_endian<uint64_t>(&bytes[0]);138high = read_little_endian<uint64_t>(&bytes[8]);139update(low, high);140bytes += 16;141size -= 16;142}143} else {144low = read_little_endian<uint64_t>(&bytes[0]);145high = read_little_endian<uint64_t>(&bytes[size - 8]);146update(low, high);147}148} else {149read_small_values(ptr, size, low, high);150update(low, high);151}152}153LIBC_INLINE uint64_t finish() {154int rot = buffer & 63;155uint64_t folded = folded_multiply(buffer, pad);156return cpp::rotl(folded, rot);157}158};159160} // namespace internal161} // namespace LIBC_NAMESPACE_DECL162163#endif // LLVM_LIBC_SRC___SUPPORT_HASH_H164165166