Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/pkg
Path: blob/main/external/include/mum.h
2065 views
1
/* Copyright (c) 2016-2025
2
Vladimir Makarov <[email protected]>
3
4
Permission is hereby granted, free of charge, to any person
5
obtaining a copy of this software and associated documentation
6
files (the "Software"), to deal in the Software without
7
restriction, including without limitation the rights to use, copy,
8
modify, merge, publish, distribute, sublicense, and/or sell copies
9
of the Software, and to permit persons to whom the Software is
10
furnished to do so, subject to the following conditions:
11
12
The above copyright notice and this permission notice shall be
13
included in all copies or substantial portions of the Software.
14
15
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
SOFTWARE.
23
*/
24
25
/* This file implements MUM (MUltiply and Mix) hashing. We randomize input data by 64x64-bit
26
multiplication and mixing hi- and low-parts of the multiplication result by using an addition and
27
then mix it into the current state. We use prime numbers randomly generated with the equal
28
probability of their bit values for the multiplication. When all primes are used once, the state
29
is randomized and the same prime numbers are used again for data randomization.
30
31
The MUM hashing passes all SMHasher tests. Pseudo Random Number Generator based on MUM also
32
passes NIST Statistical Test Suite for Random and Pseudorandom Number Generators for
33
Cryptographic Applications (version 2.2.1) with 1000 bitstreams each containing 1M bits. MUM
34
hashing is also faster Spooky64 and City64 on small strings (at least upto 512-bit) on Haswell
35
and Power7. The MUM bulk speed (speed on very long data) is bigger than Spooky and City on
36
Power7. On Haswell the bulk speed is bigger than Spooky one and close to City speed. */
37
38
#ifndef __MUM_HASH__
39
#define __MUM_HASH__
40
41
#include <stddef.h>
42
#include <stdlib.h>
43
#include <string.h>
44
#include <limits.h>
45
46
#ifdef _MSC_VER
47
typedef unsigned __int16 uint16_t;
48
typedef unsigned __int32 uint32_t;
49
typedef unsigned __int64 uint64_t;
50
#else
51
#include <stdint.h>
52
#endif
53
54
#ifdef __GNUC__
55
#define _MUM_ATTRIBUTE_UNUSED __attribute__ ((unused))
56
#define _MUM_INLINE inline __attribute__ ((always_inline))
57
#else
58
#define _MUM_ATTRIBUTE_UNUSED
59
#define _MUM_INLINE inline
60
#endif
61
62
#if defined(MUM_QUALITY) && !defined(MUM_TARGET_INDEPENDENT_HASH)
63
#define MUM_TARGET_INDEPENDENT_HASH
64
#endif
65
66
/* Macro saying to use 128-bit integers implemented by GCC for some targets. */
67
#ifndef _MUM_USE_INT128
68
/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64. HOST_WIDE_INT is long if
69
HOST_BITS_PER_LONG > HOST_BITS_PER_INT, otherwise int. */
70
#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
71
#define _MUM_USE_INT128 1
72
#else
73
#define _MUM_USE_INT128 0
74
#endif
75
#endif
76
77
/* Here are different primes randomly generated with the equal probability of their bit values. They
78
are used to randomize input values. */
79
static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
80
static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
81
static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
82
static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
83
static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
84
static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
85
static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
86
87
static uint64_t _mum_primes[] = {
88
0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
89
0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
90
0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
91
0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
92
};
93
94
/* Multiply 64-bit V and P and return sum of high and low parts of the result. */
95
static _MUM_INLINE uint64_t _mum (uint64_t v, uint64_t p) {
96
uint64_t hi, lo;
97
#if _MUM_USE_INT128
98
__uint128_t r = (__uint128_t) v * (__uint128_t) p;
99
hi = (uint64_t) (r >> 64);
100
lo = (uint64_t) r;
101
#else
102
/* Implementation of 64x64->128-bit multiplication by four 32x32->64 bit multiplication. */
103
uint64_t hv = v >> 32, hp = p >> 32;
104
uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
105
uint64_t rh = hv * hp;
106
uint64_t rm_0 = hv * lp;
107
uint64_t rm_1 = hp * lv;
108
uint64_t rl = lv * lp;
109
uint64_t t, carry = 0;
110
111
/* We could ignore a carry bit here if we did not care about the same hash for 32-bit and 64-bit
112
targets. */
113
t = rl + (rm_0 << 32);
114
#ifdef MUM_TARGET_INDEPENDENT_HASH
115
carry = t < rl;
116
#endif
117
lo = t + (rm_1 << 32);
118
#ifdef MUM_TARGET_INDEPENDENT_HASH
119
carry += lo < t;
120
#endif
121
hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
122
#endif
123
/* We could use XOR here too but, for some reasons, on Haswell and Power7 using an addition
124
improves hashing performance by 10% for small strings. */
125
return hi + lo;
126
}
127
128
#if defined(_MSC_VER)
129
#define _mum_bswap_32(x) _byteswap_uint32_t (x)
130
#define _mum_bswap_64(x) _byteswap_uint64_t (x)
131
#elif defined(__APPLE__)
132
#include <libkern/OSByteOrder.h>
133
#define _mum_bswap_32(x) OSSwapInt32 (x)
134
#define _mum_bswap_64(x) OSSwapInt64 (x)
135
#elif defined(__GNUC__)
136
#define _mum_bswap32(x) __builtin_bswap32 (x)
137
#define _mum_bswap64(x) __builtin_bswap64 (x)
138
#else
139
#include <byteswap.h>
140
#define _mum_bswap32(x) bswap32 (x)
141
#define _mum_bswap64(x) bswap64 (x)
142
#endif
143
144
static _MUM_INLINE uint64_t _mum_le (uint64_t v) {
145
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
146
return v;
147
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
148
return _mum_bswap64 (v);
149
#else
150
#error "Unknown endianess"
151
#endif
152
}
153
154
static _MUM_INLINE uint32_t _mum_le32 (uint32_t v) {
155
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
156
return v;
157
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
158
return _mum_bswap32 (v);
159
#else
160
#error "Unknown endianess"
161
#endif
162
}
163
164
static _MUM_INLINE uint64_t _mum_le16 (uint16_t v) {
165
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
166
return v;
167
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
168
return (v >> 8) | ((v & 0xff) << 8);
169
#else
170
#error "Unknown endianess"
171
#endif
172
}
173
174
/* Macro defining how many times the most nested loop in _mum_hash_aligned will be unrolled by the
175
compiler (although it can make an own decision:). Use only a constant here to help a compiler to
176
unroll a major loop.
177
178
The macro value affects the result hash for strings > 128 bit. The unroll factor greatly affects
179
the hashing speed. We prefer the speed. */
180
#ifndef _MUM_UNROLL_FACTOR_POWER
181
#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
182
#define _MUM_UNROLL_FACTOR_POWER 3
183
#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
184
#define _MUM_UNROLL_FACTOR_POWER 4
185
#elif defined(MUM_V1) || defined(MUM_V2)
186
#define _MUM_UNROLL_FACTOR_POWER 2
187
#else
188
#define _MUM_UNROLL_FACTOR_POWER 3
189
#endif
190
#endif
191
192
#if _MUM_UNROLL_FACTOR_POWER < 1
193
#error "too small unroll factor"
194
#elif _MUM_UNROLL_FACTOR_POWER > 4
195
#error "We have not enough primes for such unroll factor"
196
#endif
197
198
#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
199
200
/* Rotate V left by SH. */
201
static _MUM_INLINE uint64_t _mum_rotl (uint64_t v, int sh) { return v << sh | v >> (64 - sh); }
202
203
#if defined(MUM_V1) || defined(MUM_V2) || !defined(MUM_QUALITY)
204
#define _MUM_TAIL_START(v) 0
205
#else
206
#define _MUM_TAIL_START(v) v
207
#endif
208
static _MUM_INLINE uint64_t
209
#if defined(__GNUC__) && !defined(__clang__)
210
__attribute__ ((__optimize__ ("unroll-loops")))
211
#endif
212
_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
213
uint64_t result = start;
214
const unsigned char *str = (const unsigned char *) key;
215
uint64_t u64;
216
size_t i;
217
size_t n;
218
219
#ifndef MUM_V2
220
result = _mum (result, _mum_block_start_prime);
221
#endif
222
while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
223
/* This loop could be vectorized when we have vector insns for 64x64->128-bit multiplication.
224
AVX2 currently only have vector insns for 4 32x32->64-bit multiplication and for 1
225
64x64->128-bit multiplication (pclmulqdq). */
226
#if defined(MUM_V1) || defined(MUM_V2)
227
for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
228
result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
229
#else
230
for (i = 0; i < _MUM_UNROLL_FACTOR; i += 2)
231
result ^= _mum (_mum_le (((uint64_t *) str)[i]) ^ _mum_primes[i],
232
_mum_le (((uint64_t *) str)[i + 1]) ^ _mum_primes[i + 1]);
233
#endif
234
len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
235
str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
236
/* We will use the same prime numbers on the next iterations -- randomize the state. */
237
result = _mum (result, _mum_unroll_prime);
238
}
239
n = len / sizeof (uint64_t);
240
#if defined(MUM_V1) || defined(MUM_V2) || !defined(MUM_QUALITY)
241
for (i = 0; i < n; i++) result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
242
#else
243
for (i = 0; i < n; i++)
244
result ^= _mum (_mum_le (((uint64_t *) str)[i]) + _mum_primes[i], _mum_primes[i]);
245
#endif
246
len -= n * sizeof (uint64_t);
247
str += n * sizeof (uint64_t);
248
switch (len) {
249
case 7:
250
u64 = _MUM_TAIL_START (_mum_primes[0]) + _mum_le32 (*(uint32_t *) str);
251
u64 += _mum_le16 (*(uint16_t *) (str + 4)) << 32;
252
u64 += (uint64_t) str[6] << 48;
253
return result ^ _mum (u64, _mum_tail_prime);
254
case 6:
255
u64 = _MUM_TAIL_START (_mum_primes[1]) + _mum_le32 (*(uint32_t *) str);
256
u64 += _mum_le16 (*(uint16_t *) (str + 4)) << 32;
257
return result ^ _mum (u64, _mum_tail_prime);
258
case 5:
259
u64 = _MUM_TAIL_START (_mum_primes[2]) + _mum_le32 (*(uint32_t *) str);
260
u64 += (uint64_t) str[4] << 32;
261
return result ^ _mum (u64, _mum_tail_prime);
262
case 4:
263
u64 = _MUM_TAIL_START (_mum_primes[3]) + _mum_le32 (*(uint32_t *) str);
264
return result ^ _mum (u64, _mum_tail_prime);
265
case 3:
266
u64 = _MUM_TAIL_START (_mum_primes[4]) + _mum_le16 (*(uint16_t *) str);
267
u64 += (uint64_t) str[2] << 16;
268
return result ^ _mum (u64, _mum_tail_prime);
269
case 2:
270
u64 = _MUM_TAIL_START (_mum_primes[5]) + _mum_le16 (*(uint16_t *) str);
271
return result ^ _mum (u64, _mum_tail_prime);
272
case 1:
273
u64 = _MUM_TAIL_START (_mum_primes[6]) + str[0];
274
return result ^ _mum (u64, _mum_tail_prime);
275
}
276
return result;
277
}
278
279
/* Final randomization of H. */
280
static _MUM_INLINE uint64_t _mum_final (uint64_t h) {
281
#if defined(MUM_V1)
282
h ^= _mum (h, _mum_finish_prime1);
283
h ^= _mum (h, _mum_finish_prime2);
284
#elif defined(MUM_V2)
285
h ^= _mum_rotl (h, 33);
286
h ^= _mum (h, _mum_finish_prime1);
287
#else
288
h = _mum (h, h);
289
#endif
290
return h;
291
}
292
293
#ifndef _MUM_UNALIGNED_ACCESS
294
#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) || defined(__s390__) \
295
|| defined(__m32c__) || defined(cris) || defined(__CR16__) || defined(__vax__) \
296
|| defined(__m68k__) || defined(__aarch64__) || defined(_M_AMD64) || defined(_M_IX86)
297
#define _MUM_UNALIGNED_ACCESS 1
298
#else
299
#define _MUM_UNALIGNED_ACCESS 0
300
#endif
301
#endif
302
303
/* When we need an aligned access to data being hashed we move part of the unaligned data to an
304
aligned block of given size and then process it, repeating processing the data by the block. */
305
#ifndef _MUM_BLOCK_LEN
306
#define _MUM_BLOCK_LEN 1024
307
#endif
308
309
#if _MUM_BLOCK_LEN < 8
310
#error "too small block length"
311
#endif
312
313
static _MUM_INLINE uint64_t
314
#if defined(__x86_64__) && defined(__GNUC__) && !defined(__clang__)
315
__attribute__ ((__target__ ("inline-all-stringops")))
316
#endif
317
_mum_hash_default (const void *key, size_t len, uint64_t seed) {
318
uint64_t result;
319
const unsigned char *str = (const unsigned char *) key;
320
size_t block_len;
321
uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
322
323
result = seed + len;
324
if (((size_t) str & 0x7) == 0)
325
result = _mum_hash_aligned (result, key, len);
326
else {
327
while (len != 0) {
328
block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
329
memcpy (buf, str, block_len);
330
result = _mum_hash_aligned (result, buf, block_len);
331
len -= block_len;
332
str += block_len;
333
}
334
}
335
return _mum_final (result);
336
}
337
338
static _MUM_INLINE uint64_t _mum_next_factor (void) {
339
uint64_t start = 0;
340
int i;
341
342
for (i = 0; i < 8; i++) start = (start << 8) | rand () % 256;
343
return start;
344
}
345
346
/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++ */
347
348
/* Set random multiplicators depending on SEED. */
349
static _MUM_INLINE void mum_hash_randomize (uint64_t seed) {
350
size_t i;
351
352
srand (seed);
353
_mum_hash_step_prime = _mum_next_factor ();
354
_mum_key_step_prime = _mum_next_factor ();
355
_mum_finish_prime1 = _mum_next_factor ();
356
_mum_finish_prime2 = _mum_next_factor ();
357
_mum_block_start_prime = _mum_next_factor ();
358
_mum_unroll_prime = _mum_next_factor ();
359
_mum_tail_prime = _mum_next_factor ();
360
for (i = 0; i < sizeof (_mum_primes) / sizeof (uint64_t); i++)
361
_mum_primes[i] = _mum_next_factor ();
362
}
363
364
/* Start hashing data with SEED. Return the state. */
365
static _MUM_INLINE uint64_t mum_hash_init (uint64_t seed) { return seed; }
366
367
/* Process data KEY with the state H and return the updated state. */
368
static _MUM_INLINE uint64_t mum_hash_step (uint64_t h, uint64_t key) {
369
return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
370
}
371
372
/* Return the result of hashing using the current state H. */
373
static _MUM_INLINE uint64_t mum_hash_finish (uint64_t h) { return _mum_final (h); }
374
375
/* Fast hashing of KEY with SEED. The hash is always the same for the same key on any target. */
376
static _MUM_INLINE size_t mum_hash64 (uint64_t key, uint64_t seed) {
377
return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
378
}
379
380
/* Hash data KEY of length LEN and SEED. The hash depends on the target endianess and the unroll
381
factor. */
382
static _MUM_INLINE uint64_t mum_hash (const void *key, size_t len, uint64_t seed) {
383
#if _MUM_UNALIGNED_ACCESS
384
return _mum_final (_mum_hash_aligned (seed + len, key, len));
385
#else
386
return _mum_hash_default (key, len, seed);
387
#endif
388
}
389
390
#endif
391
392