Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/pkg
Path: blob/main/external/include/mum.h
2649 views
1
/* Copyright (c) 2016-2025
2
Vladimir Makarov <[email protected]>
3
4
Permission is hereby granted, free of charge, to any person
5
obtaining a copy of this software and associated documentation
6
files (the "Software"), to deal in the Software without
7
restriction, including without limitation the rights to use, copy,
8
modify, merge, publish, distribute, sublicense, and/or sell copies
9
of the Software, and to permit persons to whom the Software is
10
furnished to do so, subject to the following conditions:
11
12
The above copyright notice and this permission notice shall be
13
included in all copies or substantial portions of the Software.
14
15
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
SOFTWARE.
23
*/
24
25
/* This file implements MUM (MUltiply and Mix) hashing. We randomize input data by 64x64-bit
26
multiplication and mixing hi- and low-parts of the multiplication result by using an addition and
27
then mix it into the current state. We use prime numbers randomly generated with the equal
28
probability of their bit values for the multiplication. When all primes are used once, the state
29
is randomized and the same prime numbers are used again for data randomization.
30
31
The MUM hashing passes all SMHasher tests. Pseudo Random Number Generator based on MUM also
32
passes NIST Statistical Test Suite for Random and Pseudorandom Number Generators for
33
Cryptographic Applications (version 2.2.1) with 1000 bitstreams each containing 1M bits. MUM
34
hashing is also faster Spooky64 and City64 on small strings (at least upto 512-bit) on Haswell
35
and Power7. The MUM bulk speed (speed on very long data) is bigger than Spooky and City on
36
Power7. On Haswell the bulk speed is bigger than Spooky one and close to City speed. */
37
38
#ifndef __MUM_HASH__
39
#define __MUM_HASH__
40
41
#include <stddef.h>
42
#include <stdlib.h>
43
#include <string.h>
44
#include <limits.h>
45
46
#ifdef _MSC_VER
47
typedef unsigned __int16 uint16_t;
48
typedef unsigned __int32 uint32_t;
49
typedef unsigned __int64 uint64_t;
50
#else
51
#include <stdint.h>
52
#endif
53
54
#ifdef __GNUC__
55
#define _MUM_ATTRIBUTE_UNUSED __attribute__ ((unused))
56
#define _MUM_INLINE inline __attribute__ ((always_inline))
57
#else
58
#define _MUM_ATTRIBUTE_UNUSED
59
#define _MUM_INLINE inline
60
#endif
61
62
#if defined(MUM_QUALITY) && !defined(MUM_TARGET_INDEPENDENT_HASH)
63
#define MUM_TARGET_INDEPENDENT_HASH
64
#endif
65
66
/* Macro saying to use 128-bit integers implemented by GCC for some targets. */
67
#ifndef _MUM_USE_INT128
68
/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64. HOST_WIDE_INT is long if
69
HOST_BITS_PER_LONG > HOST_BITS_PER_INT, otherwise int. */
70
#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
71
#define _MUM_USE_INT128 1
72
#else
73
#define _MUM_USE_INT128 0
74
#endif
75
#endif
76
77
/* Here are different primes randomly generated with the equal probability of their bit values. They
78
are used to randomize input values. */
79
static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
80
static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
81
static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
82
static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
83
static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
84
static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
85
static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
86
87
static uint64_t _mum_primes[] = {
88
0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
89
0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
90
0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
91
0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
92
};
93
94
/* Multiply 64-bit V and P and return sum of high and low parts of the result. */
95
static _MUM_INLINE uint64_t _mum (uint64_t v, uint64_t p) {
96
uint64_t hi, lo;
97
#if _MUM_USE_INT128
98
__uint128_t r = (__uint128_t) v * (__uint128_t) p;
99
hi = (uint64_t) (r >> 64);
100
lo = (uint64_t) r;
101
#else
102
/* Implementation of 64x64->128-bit multiplication by four 32x32->64 bit multiplication. */
103
uint64_t hv = v >> 32, hp = p >> 32;
104
uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
105
uint64_t rh = hv * hp;
106
uint64_t rm_0 = hv * lp;
107
uint64_t rm_1 = hp * lv;
108
uint64_t rl = lv * lp;
109
uint64_t t, carry = 0;
110
111
/* We could ignore a carry bit here if we did not care about the same hash for 32-bit and 64-bit
112
targets. */
113
t = rl + (rm_0 << 32);
114
#ifdef MUM_TARGET_INDEPENDENT_HASH
115
carry = t < rl;
116
#endif
117
lo = t + (rm_1 << 32);
118
#ifdef MUM_TARGET_INDEPENDENT_HASH
119
carry += lo < t;
120
#endif
121
hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
122
#endif
123
/* We could use XOR here too but, for some reasons, on Haswell and Power7 using an addition
124
improves hashing performance by 10% for small strings. */
125
return hi + lo;
126
}
127
128
#if defined(_MSC_VER)
129
#define _mum_bswap32(x) _byteswap_uint32_t (x)
130
#define _mum_bswap64(x) _byteswap_uint64_t (x)
131
#elif defined(__APPLE__)
132
#include <libkern/OSByteOrder.h>
133
#define _mum_bswap32(x) OSSwapInt32 (x)
134
#define _mum_bswap64(x) OSSwapInt64 (x)
135
#elif defined(__GNUC__)
136
#define _mum_bswap32(x) __builtin_bswap32 (x)
137
#define _mum_bswap64(x) __builtin_bswap64 (x)
138
#else
139
#include <byteswap.h>
140
#define _mum_bswap32(x) bswap32 (x)
141
#define _mum_bswap64(x) bswap64 (x)
142
#endif
143
144
static _MUM_INLINE uint64_t _mum_le (uint64_t v) {
145
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
146
return v;
147
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
148
return _mum_bswap64 (v);
149
#else
150
#error "Unknown endianess"
151
#endif
152
}
153
154
static _MUM_INLINE uint32_t _mum_le32 (uint32_t v) {
155
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
156
return v;
157
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
158
return _mum_bswap32 (v);
159
#else
160
#error "Unknown endianess"
161
#endif
162
}
163
164
static _MUM_INLINE uint64_t _mum_le16 (uint16_t v) {
165
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
166
return v;
167
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
168
return (v >> 8) | ((v & 0xff) << 8);
169
#else
170
#error "Unknown endianess"
171
#endif
172
}
173
174
/* Macro defining how many times the most nested loop in _mum_hash_aligned will be unrolled by the
175
compiler (although it can make an own decision:). Use only a constant here to help a compiler to
176
unroll a major loop.
177
178
The macro value affects the result hash for strings > 128 bit. The unroll factor greatly affects
179
the hashing speed. We prefer the speed. */
180
#ifndef _MUM_UNROLL_FACTOR_POWER
181
#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
182
#define _MUM_UNROLL_FACTOR_POWER 3
183
#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
184
#define _MUM_UNROLL_FACTOR_POWER 4
185
#elif defined(MUM_V1) || defined(MUM_V2)
186
#define _MUM_UNROLL_FACTOR_POWER 2
187
#else
188
#define _MUM_UNROLL_FACTOR_POWER 3
189
#endif
190
#endif
191
192
#if _MUM_UNROLL_FACTOR_POWER < 1
193
#error "too small unroll factor"
194
#elif _MUM_UNROLL_FACTOR_POWER > 4
195
#error "We have not enough primes for such unroll factor"
196
#endif
197
198
#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
199
200
/* Rotate V left by SH. */
201
static _MUM_INLINE uint64_t _mum_rotl (uint64_t v, int sh) { return v << sh | v >> (64 - sh); }
202
203
static _MUM_INLINE uint64_t _mum_xor (uint64_t a, uint64_t b) {
204
#ifdef MUM_V3
205
return a ^ b;
206
#else
207
return (a ^ b) != 0 ? a ^ b : b;
208
#endif
209
}
210
211
#if defined(MUM_V1) || defined(MUM_V2) || !defined(MUM_QUALITY)
212
#define _MUM_TAIL_START(v) 0
213
#else
214
#define _MUM_TAIL_START(v) v
215
#endif
216
static _MUM_INLINE uint64_t
217
#if defined(__GNUC__) && !defined(__clang__)
218
__attribute__ ((__optimize__ ("unroll-loops")))
219
#endif
220
_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
221
uint64_t result = start;
222
const unsigned char *str = (const unsigned char *) key;
223
uint64_t u64;
224
size_t i;
225
size_t n;
226
227
#ifndef MUM_V2
228
result = _mum (result, _mum_block_start_prime);
229
#endif
230
while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
231
/* This loop could be vectorized when we have vector insns for 64x64->128-bit multiplication.
232
AVX2 currently only have vector insns for 4 32x32->64-bit multiplication and for 1
233
64x64->128-bit multiplication (pclmulqdq). */
234
#if defined(MUM_V1) || defined(MUM_V2)
235
for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
236
result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
237
#else
238
for (i = 0; i < _MUM_UNROLL_FACTOR; i += 2)
239
result ^= _mum (_mum_xor (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]),
240
_mum_xor (_mum_le (((uint64_t *) str)[i + 1]), _mum_primes[i + 1]));
241
#endif
242
len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
243
str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
244
/* We will use the same prime numbers on the next iterations -- randomize the state. */
245
result = _mum (result, _mum_unroll_prime);
246
}
247
n = len / sizeof (uint64_t);
248
#if defined(MUM_V1) || defined(MUM_V2) || !defined(MUM_QUALITY)
249
for (i = 0; i < n; i++) result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
250
#else
251
for (i = 0; i < n; i++)
252
result ^= _mum (_mum_le (((uint64_t *) str)[i]) + _mum_primes[i], _mum_primes[i]);
253
#endif
254
len -= n * sizeof (uint64_t);
255
str += n * sizeof (uint64_t);
256
switch (len) {
257
case 7:
258
u64 = _MUM_TAIL_START (_mum_primes[0]) + _mum_le32 (*(uint32_t *) str);
259
u64 += _mum_le16 (*(uint16_t *) (str + 4)) << 32;
260
u64 += (uint64_t) str[6] << 48;
261
return result ^ _mum (u64, _mum_tail_prime);
262
case 6:
263
u64 = _MUM_TAIL_START (_mum_primes[1]) + _mum_le32 (*(uint32_t *) str);
264
u64 += _mum_le16 (*(uint16_t *) (str + 4)) << 32;
265
return result ^ _mum (u64, _mum_tail_prime);
266
case 5:
267
u64 = _MUM_TAIL_START (_mum_primes[2]) + _mum_le32 (*(uint32_t *) str);
268
u64 += (uint64_t) str[4] << 32;
269
return result ^ _mum (u64, _mum_tail_prime);
270
case 4:
271
u64 = _MUM_TAIL_START (_mum_primes[3]) + _mum_le32 (*(uint32_t *) str);
272
return result ^ _mum (u64, _mum_tail_prime);
273
case 3:
274
u64 = _MUM_TAIL_START (_mum_primes[4]) + _mum_le16 (*(uint16_t *) str);
275
u64 += (uint64_t) str[2] << 16;
276
return result ^ _mum (u64, _mum_tail_prime);
277
case 2:
278
u64 = _MUM_TAIL_START (_mum_primes[5]) + _mum_le16 (*(uint16_t *) str);
279
return result ^ _mum (u64, _mum_tail_prime);
280
case 1:
281
u64 = _MUM_TAIL_START (_mum_primes[6]) + str[0];
282
return result ^ _mum (u64, _mum_tail_prime);
283
}
284
return result;
285
}
286
287
/* Final randomization of H. */
288
static _MUM_INLINE uint64_t _mum_final (uint64_t h) {
289
#if defined(MUM_V1)
290
h ^= _mum (h, _mum_finish_prime1);
291
h ^= _mum (h, _mum_finish_prime2);
292
#elif defined(MUM_V2)
293
h ^= _mum_rotl (h, 33);
294
h ^= _mum (h, _mum_finish_prime1);
295
#else
296
h = _mum (h, h);
297
#endif
298
return h;
299
}
300
301
#ifndef _MUM_UNALIGNED_ACCESS
302
#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) || defined(__s390__) \
303
|| defined(__m32c__) || defined(cris) || defined(__CR16__) || defined(__vax__) \
304
|| defined(__m68k__) || defined(__aarch64__) || defined(_M_AMD64) || defined(_M_IX86)
305
#define _MUM_UNALIGNED_ACCESS 1
306
#else
307
#define _MUM_UNALIGNED_ACCESS 0
308
#endif
309
#endif
310
311
/* When we need an aligned access to data being hashed we move part of the unaligned data to an
312
aligned block of given size and then process it, repeating processing the data by the block. */
313
#ifndef _MUM_BLOCK_LEN
314
#define _MUM_BLOCK_LEN 1024
315
#endif
316
317
#if _MUM_BLOCK_LEN < 8
318
#error "too small block length"
319
#endif
320
321
static _MUM_INLINE uint64_t
322
#if defined(__x86_64__) && defined(__GNUC__) && !defined(__clang__)
323
__attribute__ ((__target__ ("inline-all-stringops")))
324
#endif
325
_mum_hash_default (const void *key, size_t len, uint64_t seed) {
326
uint64_t result;
327
const unsigned char *str = (const unsigned char *) key;
328
size_t block_len;
329
uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
330
331
result = seed + len;
332
if (((size_t) str & 0x7) == 0)
333
result = _mum_hash_aligned (result, key, len);
334
else {
335
while (len != 0) {
336
block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
337
memcpy (buf, str, block_len);
338
result = _mum_hash_aligned (result, buf, block_len);
339
len -= block_len;
340
str += block_len;
341
}
342
}
343
return _mum_final (result);
344
}
345
346
static _MUM_INLINE uint64_t _mum_next_factor (void) {
347
uint64_t start = 0;
348
int i;
349
350
for (i = 0; i < 8; i++) start = (start << 8) | rand () % 256;
351
return start;
352
}
353
354
/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++ */
355
356
/* Set random multiplicators depending on SEED. */
357
static _MUM_INLINE void mum_hash_randomize (uint64_t seed) {
358
size_t i;
359
360
srand (seed);
361
_mum_hash_step_prime = _mum_next_factor ();
362
_mum_key_step_prime = _mum_next_factor ();
363
_mum_finish_prime1 = _mum_next_factor ();
364
_mum_finish_prime2 = _mum_next_factor ();
365
_mum_block_start_prime = _mum_next_factor ();
366
_mum_unroll_prime = _mum_next_factor ();
367
_mum_tail_prime = _mum_next_factor ();
368
for (i = 0; i < sizeof (_mum_primes) / sizeof (uint64_t); i++)
369
_mum_primes[i] = _mum_next_factor ();
370
}
371
372
/* Start hashing data with SEED. Return the state. */
373
static _MUM_INLINE uint64_t mum_hash_init (uint64_t seed) { return seed; }
374
375
/* Process data KEY with the state H and return the updated state. */
376
static _MUM_INLINE uint64_t mum_hash_step (uint64_t h, uint64_t key) {
377
return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
378
}
379
380
/* Return the result of hashing using the current state H. */
381
static _MUM_INLINE uint64_t mum_hash_finish (uint64_t h) { return _mum_final (h); }
382
383
/* Fast hashing of KEY with SEED. The hash is always the same for the same key on any target. */
384
static _MUM_INLINE size_t mum_hash64 (uint64_t key, uint64_t seed) {
385
return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
386
}
387
388
/* Hash data KEY of length LEN and SEED. The hash depends on the target endianess and the unroll
389
factor. */
390
static _MUM_INLINE uint64_t mum_hash (const void *key, size_t len, uint64_t seed) {
391
#if _MUM_UNALIGNED_ACCESS
392
return _mum_final (_mum_hash_aligned (seed + len, key, len));
393
#else
394
return _mum_hash_default (key, len, seed);
395
#endif
396
}
397
398
#endif
399
400