Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/libc/src/__support/hash.h
213766 views
1
//===-- Portable string hash function ---------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#ifndef LLVM_LIBC_SRC___SUPPORT_HASH_H
10
#define LLVM_LIBC_SRC___SUPPORT_HASH_H
11
12
#include "src/__support/CPP/bit.h" // rotl
13
#include "src/__support/CPP/limits.h" // numeric_limits
14
#include "src/__support/macros/attributes.h" // LIBC_INLINE
15
#include "src/__support/macros/config.h"
16
#include "src/__support/uint128.h" // UInt128
17
#include <stdint.h> // For uint64_t
18
19
namespace LIBC_NAMESPACE_DECL {
20
namespace internal {
21
22
// Folded multiplication.
23
// This function multiplies two 64-bit integers and xor the high and
24
// low 64-bit parts of the result.
25
LIBC_INLINE uint64_t folded_multiply(uint64_t x, uint64_t y) {
26
UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);
27
uint64_t low = static_cast<uint64_t>(p);
28
uint64_t high = static_cast<uint64_t>(p >> 64);
29
return low ^ high;
30
}
31
32
// Read as little endian.
33
// Shift-and-or implementation does not give a satisfactory code on aarch64.
34
// Therefore, we use a union to read the value.
35
template <typename T> LIBC_INLINE T read_little_endian(const void *ptr) {
36
const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
37
uint8_t buffer[sizeof(T)];
38
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
39
// Compiler should able to optimize this as a load followed by a byte
40
// swap. On aarch64 (-mbig-endian), this compiles to the following for
41
// int:
42
// ldr w0, [x0]
43
// rev w0, w0
44
// ret
45
for (size_t i = 0; i < sizeof(T); ++i) {
46
buffer[i] = bytes[sizeof(T) - i - 1];
47
}
48
#else
49
for (size_t i = 0; i < sizeof(T); ++i) {
50
buffer[i] = bytes[i];
51
}
52
#endif
53
return cpp::bit_cast<T>(buffer);
54
}
55
56
// Specialized read functions for small values. size must be <= 8.
57
LIBC_INLINE void read_small_values(const void *ptr, size_t size, uint64_t &low,
58
uint64_t &high) {
59
const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
60
if (size >= 2) {
61
if (size >= 4) {
62
low = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[0]));
63
high =
64
static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[size - 4]));
65
} else {
66
low = static_cast<uint64_t>(read_little_endian<uint16_t>(&bytes[0]));
67
high = static_cast<uint64_t>(bytes[size - 1]);
68
}
69
} else {
70
if (size > 0) {
71
low = static_cast<uint64_t>(bytes[0]);
72
high = static_cast<uint64_t>(bytes[0]);
73
} else {
74
low = 0;
75
high = 0;
76
}
77
}
78
}
79
80
// This constant comes from Kunth's prng (it empirically works well).
81
LIBC_INLINE_VAR constexpr uint64_t MULTIPLE = 6364136223846793005;
82
// Rotation amount for mixing.
83
LIBC_INLINE_VAR constexpr uint64_t ROTATE = 23;
84
85
// Randomly generated values. For now, we use the same values as in aHash as
86
// they are widely tested.
87
// https://github.com/tkaitchuck/aHash/blob/9f6a2ad8b721fd28da8dc1d0b7996677b374357c/src/random_state.rs#L38
88
LIBC_INLINE_VAR constexpr uint64_t RANDOMNESS[2][4] = {
89
{0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0,
90
0x082efa98ec4e6c89},
91
{0x452821e638d01377, 0xbe5466cf34e90c6c, 0xc0ac29b7c97c50dd,
92
0x3f84d5b5b5470917},
93
};
94
95
// This is a portable string hasher. It is not cryptographically secure.
96
// The quality of the hash is good enough to pass all tests in SMHasher.
97
// The implementation is derived from the generic routine of aHash.
98
class HashState {
99
uint64_t buffer;
100
uint64_t pad;
101
uint64_t extra_keys[2];
102
LIBC_INLINE void update(uint64_t low, uint64_t high) {
103
uint64_t combined =
104
folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);
105
buffer = (buffer + pad) ^ combined;
106
buffer = cpp::rotl(buffer, ROTATE);
107
}
108
LIBC_INLINE static uint64_t mix(uint64_t seed) {
109
HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
110
RANDOMNESS[0][3]};
111
mixer.update(seed, 0);
112
return mixer.finish();
113
}
114
115
public:
116
LIBC_INLINE constexpr HashState(uint64_t a, uint64_t b, uint64_t c,
117
uint64_t d)
118
: buffer(a), pad(b), extra_keys{c, d} {}
119
LIBC_INLINE HashState(uint64_t seed) {
120
// Mix one more round of the seed to make it stronger.
121
uint64_t mixed = mix(seed);
122
buffer = RANDOMNESS[1][0] ^ mixed;
123
pad = RANDOMNESS[1][1] ^ mixed;
124
extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
125
extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
126
}
127
LIBC_INLINE void update(const void *ptr, size_t size) {
128
uint8_t const *bytes = static_cast<const uint8_t *>(ptr);
129
buffer = (buffer + size) * MULTIPLE;
130
uint64_t low, high;
131
if (size > 8) {
132
if (size > 16) {
133
// update tail
134
low = read_little_endian<uint64_t>(&bytes[size - 16]);
135
high = read_little_endian<uint64_t>(&bytes[size - 8]);
136
update(low, high);
137
while (size > 16) {
138
low = read_little_endian<uint64_t>(&bytes[0]);
139
high = read_little_endian<uint64_t>(&bytes[8]);
140
update(low, high);
141
bytes += 16;
142
size -= 16;
143
}
144
} else {
145
low = read_little_endian<uint64_t>(&bytes[0]);
146
high = read_little_endian<uint64_t>(&bytes[size - 8]);
147
update(low, high);
148
}
149
} else {
150
read_small_values(ptr, size, low, high);
151
update(low, high);
152
}
153
}
154
LIBC_INLINE uint64_t finish() {
155
int rot = buffer & 63;
156
uint64_t folded = folded_multiply(buffer, pad);
157
return cpp::rotl(folded, rot);
158
}
159
};
160
161
} // namespace internal
162
} // namespace LIBC_NAMESPACE_DECL
163
164
#endif // LLVM_LIBC_SRC___SUPPORT_HASH_H
165
166