CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/Data/Collections/Hashmaps.h
Views: 1401
1
#pragma once
2
3
#include <cstdint> /* uint32_t */
4
#include <cstring>
5
#include <vector>
6
7
#include "ext/xxhash.h"
8
#include "Common/CommonFuncs.h"
9
#include "Common/Log.h"
10
11
// TODO: Try hardware CRC. Unfortunately not available on older Intels or ARM32.
12
// Seems to be ubiquitous on ARM64 though.
13
template<class K>
14
inline uint32_t HashKey(const K &k) {
15
return XXH3_64bits(&k, sizeof(k)) & 0xFFFFFFFF;
16
}
17
template<class K>
18
inline bool KeyEquals(const K &a, const K &b) {
19
return !memcmp(&a, &b, sizeof(K));
20
}
21
22
enum class BucketState : uint8_t {
23
FREE,
24
TAKEN,
25
REMOVED, // for linear probing to work (and removal during deletion) we need tombstones
26
};
27
28
// Uses linear probing for cache-friendliness. Not segregating values from keys because
29
// we always use very small values, so it's probably better to have them in the same
30
// cache-line as the corresponding key.
31
// Enforces that value are pointers to make sure that combined storage makes sense.
32
template <class Key, class Value>
33
class DenseHashMap {
34
public:
35
DenseHashMap(int initialCapacity) : capacity_(initialCapacity) {
36
map.resize(initialCapacity);
37
state.resize(initialCapacity);
38
}
39
40
// Returns true if the entry was found, and writes the entry to *value.
41
// Returns false and does not write to value if no entry was found.
42
// Note that nulls can be stored.
43
bool Get(const Key &key, Value *value) const {
44
uint32_t mask = capacity_ - 1;
45
uint32_t pos = HashKey(key) & mask;
46
// No? Let's go into search mode. Linear probing.
47
uint32_t p = pos;
48
while (true) {
49
if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key)) {
50
*value = map[p].value;
51
return true;
52
} else if (state[p] == BucketState::FREE) {
53
return false;
54
}
55
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
56
if (p == pos) {
57
// We looped around the whole map.
58
_assert_msg_(false, "DenseHashMap: Hit full on Get()");
59
}
60
}
61
return false;
62
}
63
64
// Only works if Value can be nullptr
65
Value GetOrNull(const Key &key) const {
66
Value value;
67
if (Get(key, &value)) {
68
return value;
69
} else {
70
return (Value)nullptr;
71
}
72
}
73
74
bool ContainsKey(const Key &key) const {
75
// Slightly wasteful, though compiler might optimize it.
76
Value value;
77
return Get(key, &value);
78
}
79
80
// Asserts if we already had the key!
81
bool Insert(const Key &key, Value value) {
82
// Check load factor, resize if necessary. We never shrink.
83
if (count_ > capacity_ / 2) {
84
Grow(2);
85
}
86
uint32_t mask = capacity_ - 1;
87
uint32_t pos = HashKey(key) & mask;
88
uint32_t p = pos;
89
while (true) {
90
if (state[p] == BucketState::TAKEN) {
91
if (KeyEquals(key, map[p].key)) {
92
// Bad! We already got this one. Let's avoid this case.
93
_assert_msg_(false, "DenseHashMap: Duplicate key of size %d inserted", (int)sizeof(Key));
94
return false;
95
}
96
// continue looking....
97
} else {
98
// Got a place, either removed or FREE.
99
break;
100
}
101
p = (p + 1) & mask;
102
if (p == pos) {
103
// FULL! Error. Should not happen thanks to Grow().
104
_assert_msg_(false, "DenseHashMap: Hit full on Insert()");
105
}
106
}
107
if (state[p] == BucketState::REMOVED) {
108
removedCount_--;
109
}
110
state[p] = BucketState::TAKEN;
111
map[p].key = key;
112
map[p].value = value;
113
count_++;
114
return true;
115
}
116
117
bool Remove(const Key &key) {
118
uint32_t mask = capacity_ - 1;
119
uint32_t pos = HashKey(key) & mask;
120
uint32_t p = pos;
121
while (state[p] != BucketState::FREE) {
122
if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key)) {
123
// Got it! Mark it as removed.
124
state[p] = BucketState::REMOVED;
125
removedCount_++;
126
count_--;
127
return true;
128
}
129
p = (p + 1) & mask;
130
if (p == pos) {
131
// FULL! Error. Should not happen.
132
_assert_msg_(false, "DenseHashMap: Hit full on Remove()");
133
}
134
}
135
return false;
136
}
137
138
// This will never crash if you call it without locking - but, the value might not be right.
139
size_t size() const {
140
return count_;
141
}
142
143
template<class T>
144
inline void Iterate(T func) const {
145
for (size_t i = 0; i < map.size(); i++) {
146
if (state[i] == BucketState::TAKEN) {
147
func(map[i].key, map[i].value);
148
}
149
}
150
}
151
152
template<class T>
153
inline void IterateMut(T func) {
154
for (size_t i = 0; i < map.size(); i++) {
155
if (state[i] == BucketState::TAKEN) {
156
func(map[i].key, map[i].value);
157
}
158
}
159
}
160
161
// Note! Does NOT delete any pointed-to data (in case you stored pointers in the map).
162
void Clear() {
163
memset(state.data(), (int)BucketState::FREE, state.size());
164
count_ = 0;
165
removedCount_ = 0;
166
}
167
168
void Rebuild() {
169
Grow(1);
170
}
171
172
void Maintain() {
173
// Heuristic
174
if (removedCount_ >= capacity_ / 4) {
175
Rebuild();
176
}
177
}
178
179
private:
180
void Grow(int factor) {
181
// We simply move out the existing data, then we re-insert the old.
182
// This is extremely non-atomic and will need synchronization.
183
std::vector<Pair> old = std::move(map);
184
std::vector<BucketState> oldState = std::move(state);
185
// Can't assume move will clear, it just may clear.
186
map.clear();
187
state.clear();
188
189
int oldCount = count_;
190
capacity_ *= factor;
191
map.resize(capacity_);
192
state.resize(capacity_);
193
count_ = 0; // Insert will update it.
194
removedCount_ = 0;
195
for (size_t i = 0; i < old.size(); i++) {
196
if (oldState[i] == BucketState::TAKEN) {
197
Insert(old[i].key, old[i].value);
198
}
199
}
200
_assert_msg_(oldCount == count_, "DenseHashMap: count should not change in Grow()");
201
}
202
struct Pair {
203
Key key;
204
Value value;
205
};
206
std::vector<Pair> map;
207
std::vector<BucketState> state;
208
int capacity_;
209
int count_ = 0;
210
int removedCount_ = 0;
211
};
212
213
// Like the above, uses linear probing for cache-friendliness.
214
// Does not perform hashing at all so expects well-distributed keys.
215
template <class Value>
216
class PrehashMap {
217
public:
218
PrehashMap(int initialCapacity) : capacity_(initialCapacity) {
219
map.resize(initialCapacity);
220
state.resize(initialCapacity);
221
}
222
223
// Returns nullptr if no entry was found.
224
bool Get(uint32_t hash, Value *value) {
225
uint32_t mask = capacity_ - 1;
226
uint32_t pos = hash & mask;
227
// No? Let's go into search mode. Linear probing.
228
uint32_t p = pos;
229
while (true) {
230
if (state[p] == BucketState::TAKEN && hash == map[p].hash) {
231
*value = map[p].value;
232
return true;
233
} else if (state[p] == BucketState::FREE) {
234
return false;
235
}
236
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
237
if (p == pos) {
238
_assert_msg_(false, "PrehashMap: Hit full on Get()");
239
}
240
}
241
return false;
242
}
243
244
// Returns false if we already had the key! Which is a bit different.
245
bool Insert(uint32_t hash, Value value) {
246
// Check load factor, resize if necessary. We never shrink.
247
if (count_ > capacity_ / 2) {
248
Grow(2);
249
}
250
uint32_t mask = capacity_ - 1;
251
uint32_t pos = hash & mask;
252
uint32_t p = pos;
253
while (state[p] != BucketState::FREE) {
254
if (state[p] == BucketState::TAKEN) {
255
if (hash == map[p].hash)
256
return false; // Bad!
257
} else {
258
// Got a place, either removed or FREE.
259
break;
260
}
261
p = (p + 1) & mask;
262
if (p == pos) {
263
// FULL! Error. Should not happen thanks to Grow().
264
_assert_msg_(false, "PrehashMap: Hit full on Insert()");
265
}
266
}
267
if (state[p] == BucketState::REMOVED) {
268
removedCount_--;
269
}
270
state[p] = BucketState::TAKEN;
271
map[p].hash = hash;
272
map[p].value = value;
273
count_++;
274
return true;
275
}
276
277
bool Remove(uint32_t hash) {
278
uint32_t mask = capacity_ - 1;
279
uint32_t pos = hash & mask;
280
uint32_t p = pos;
281
while (state[p] != BucketState::FREE) {
282
if (state[p] == BucketState::TAKEN && hash == map[p].hash) {
283
// Got it!
284
state[p] = BucketState::REMOVED;
285
removedCount_++;
286
count_--;
287
return true;
288
}
289
p = (p + 1) & mask;
290
if (p == pos) {
291
_assert_msg_(false, "PrehashMap: Hit full on Remove()");
292
}
293
}
294
return false;
295
}
296
297
size_t size() {
298
return count_;
299
}
300
301
template<class T>
302
void Iterate(T func) const {
303
for (size_t i = 0; i < map.size(); i++) {
304
if (state[i] == BucketState::TAKEN) {
305
func(map[i].hash, map[i].value);
306
}
307
}
308
}
309
310
void Clear() {
311
memset(state.data(), (int)BucketState::FREE, state.size());
312
count_ = 0;
313
removedCount_ = 0;
314
}
315
316
// Gets rid of REMOVED tombstones, making lookups somewhat more efficient.
317
void Rebuild() {
318
Grow(1);
319
}
320
321
void Maintain() {
322
// Heuristic
323
if (removedCount_ >= capacity_ / 4) {
324
Rebuild();
325
}
326
}
327
328
private:
329
void Grow(int factor) {
330
// We simply move out the existing data, then we re-insert the old.
331
// This is extremely non-atomic and will need synchronization.
332
std::vector<Pair> old = std::move(map);
333
std::vector<BucketState> oldState = std::move(state);
334
// Can't assume move will clear, it just may clear.
335
map.clear();
336
state.clear();
337
338
int oldCount = count_;
339
int oldCapacity = capacity_;
340
capacity_ *= factor;
341
map.resize(capacity_);
342
state.resize(capacity_);
343
count_ = 0; // Insert will update it.
344
removedCount_ = 0;
345
for (size_t i = 0; i < old.size(); i++) {
346
if (oldState[i] == BucketState::TAKEN) {
347
Insert(old[i].hash, old[i].value);
348
}
349
}
350
INFO_LOG(Log::G3D, "Grew hashmap capacity from %d to %d", oldCapacity, capacity_);
351
_assert_msg_(oldCount == count_, "PrehashMap: count should not change in Grow()");
352
}
353
struct Pair {
354
uint32_t hash;
355
Value value;
356
};
357
std::vector<Pair> map;
358
std::vector<BucketState> state;
359
int capacity_;
360
int count_ = 0;
361
int removedCount_ = 0;
362
};
363
364