Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Python/hashtable.c
12 views
1
/* The implementation of the hash table (_Py_hashtable_t) is based on the
2
cfuhash project:
3
http://sourceforge.net/projects/libcfu/
4
5
Copyright of cfuhash:
6
----------------------------------
7
Creation date: 2005-06-24 21:22:40
8
Authors: Don
9
Change log:
10
11
Copyright (c) 2005 Don Owens
12
All rights reserved.
13
14
This code is released under the BSD license:
15
16
Redistribution and use in source and binary forms, with or without
17
modification, are permitted provided that the following conditions
18
are met:
19
20
* Redistributions of source code must retain the above copyright
21
notice, this list of conditions and the following disclaimer.
22
23
* Redistributions in binary form must reproduce the above
24
copyright notice, this list of conditions and the following
25
disclaimer in the documentation and/or other materials provided
26
with the distribution.
27
28
* Neither the name of the author nor the names of its
29
contributors may be used to endorse or promote products derived
30
from this software without specific prior written permission.
31
32
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43
OF THE POSSIBILITY OF SUCH DAMAGE.
44
----------------------------------
45
*/
46
47
#include "Python.h"
48
#include "pycore_hashtable.h"
49
50
#define HASHTABLE_MIN_SIZE 16
51
#define HASHTABLE_HIGH 0.50
52
#define HASHTABLE_LOW 0.10
53
#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
54
55
#define BUCKETS_HEAD(SLIST) \
56
((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
57
#define TABLE_HEAD(HT, BUCKET) \
58
((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
59
#define ENTRY_NEXT(ENTRY) \
60
((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
61
62
/* Forward declaration */
63
static int hashtable_rehash(_Py_hashtable_t *ht);
64
65
static void
66
_Py_slist_init(_Py_slist_t *list)
67
{
68
list->head = NULL;
69
}
70
71
72
static void
73
_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
74
{
75
item->next = list->head;
76
list->head = item;
77
}
78
79
80
static void
81
_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
82
_Py_slist_item_t *item)
83
{
84
if (previous != NULL)
85
previous->next = item->next;
86
else
87
list->head = item->next;
88
}
89
90
91
Py_uhash_t
92
_Py_hashtable_hash_ptr(const void *key)
93
{
94
return (Py_uhash_t)_Py_HashPointerRaw(key);
95
}
96
97
98
int
99
_Py_hashtable_compare_direct(const void *key1, const void *key2)
100
{
101
return (key1 == key2);
102
}
103
104
105
/* makes sure the real size of the buckets array is a power of 2 */
106
static size_t
107
round_size(size_t s)
108
{
109
size_t i;
110
if (s < HASHTABLE_MIN_SIZE)
111
return HASHTABLE_MIN_SIZE;
112
i = 1;
113
while (i < s)
114
i <<= 1;
115
return i;
116
}
117
118
119
size_t
120
_Py_hashtable_size(const _Py_hashtable_t *ht)
121
{
122
size_t size = sizeof(_Py_hashtable_t);
123
/* buckets */
124
size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
125
/* entries */
126
size += ht->nentries * sizeof(_Py_hashtable_entry_t);
127
return size;
128
}
129
130
131
_Py_hashtable_entry_t *
132
_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
133
{
134
Py_uhash_t key_hash = ht->hash_func(key);
135
size_t index = key_hash & (ht->nbuckets - 1);
136
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
137
while (1) {
138
if (entry == NULL) {
139
return NULL;
140
}
141
if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
142
break;
143
}
144
entry = ENTRY_NEXT(entry);
145
}
146
return entry;
147
}
148
149
150
// Specialized for:
151
// hash_func == _Py_hashtable_hash_ptr
152
// compare_func == _Py_hashtable_compare_direct
153
static _Py_hashtable_entry_t *
154
_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
155
{
156
Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
157
size_t index = key_hash & (ht->nbuckets - 1);
158
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
159
while (1) {
160
if (entry == NULL) {
161
return NULL;
162
}
163
// Compare directly keys (ignore entry->key_hash)
164
if (entry->key == key) {
165
break;
166
}
167
entry = ENTRY_NEXT(entry);
168
}
169
return entry;
170
}
171
172
173
void*
174
_Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
175
{
176
Py_uhash_t key_hash = ht->hash_func(key);
177
size_t index = key_hash & (ht->nbuckets - 1);
178
179
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
180
_Py_hashtable_entry_t *previous = NULL;
181
while (1) {
182
if (entry == NULL) {
183
// not found
184
return NULL;
185
}
186
if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
187
break;
188
}
189
previous = entry;
190
entry = ENTRY_NEXT(entry);
191
}
192
193
_Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
194
(_Py_slist_item_t *)entry);
195
ht->nentries--;
196
197
void *value = entry->value;
198
ht->alloc.free(entry);
199
200
if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
201
// Ignore failure: error cannot be reported to the caller
202
hashtable_rehash(ht);
203
}
204
return value;
205
}
206
207
208
int
209
_Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
210
{
211
_Py_hashtable_entry_t *entry;
212
213
#ifndef NDEBUG
214
/* Don't write the assertion on a single line because it is interesting
215
to know the duplicated entry if the assertion failed. The entry can
216
be read using a debugger. */
217
entry = ht->get_entry_func(ht, key);
218
assert(entry == NULL);
219
#endif
220
221
222
entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
223
if (entry == NULL) {
224
/* memory allocation failed */
225
return -1;
226
}
227
228
entry->key_hash = ht->hash_func(key);
229
entry->key = (void *)key;
230
entry->value = value;
231
232
ht->nentries++;
233
if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
234
if (hashtable_rehash(ht) < 0) {
235
ht->nentries--;
236
ht->alloc.free(entry);
237
return -1;
238
}
239
}
240
241
size_t index = entry->key_hash & (ht->nbuckets - 1);
242
_Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
243
return 0;
244
}
245
246
247
void*
248
_Py_hashtable_get(_Py_hashtable_t *ht, const void *key)
249
{
250
_Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key);
251
if (entry != NULL) {
252
return entry->value;
253
}
254
else {
255
return NULL;
256
}
257
}
258
259
260
int
261
_Py_hashtable_foreach(_Py_hashtable_t *ht,
262
_Py_hashtable_foreach_func func,
263
void *user_data)
264
{
265
for (size_t hv = 0; hv < ht->nbuckets; hv++) {
266
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
267
while (entry != NULL) {
268
int res = func(ht, entry->key, entry->value, user_data);
269
if (res) {
270
return res;
271
}
272
entry = ENTRY_NEXT(entry);
273
}
274
}
275
return 0;
276
}
277
278
279
static int
280
hashtable_rehash(_Py_hashtable_t *ht)
281
{
282
size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
283
if (new_size == ht->nbuckets) {
284
return 0;
285
}
286
287
size_t buckets_size = new_size * sizeof(ht->buckets[0]);
288
_Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
289
if (new_buckets == NULL) {
290
/* memory allocation failed */
291
return -1;
292
}
293
memset(new_buckets, 0, buckets_size);
294
295
for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
296
_Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
297
while (entry != NULL) {
298
assert(ht->hash_func(entry->key) == entry->key_hash);
299
_Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
300
size_t entry_index = entry->key_hash & (new_size - 1);
301
302
_Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
303
304
entry = next;
305
}
306
}
307
308
ht->alloc.free(ht->buckets);
309
ht->nbuckets = new_size;
310
ht->buckets = new_buckets;
311
return 0;
312
}
313
314
315
_Py_hashtable_t *
316
_Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
317
_Py_hashtable_compare_func compare_func,
318
_Py_hashtable_destroy_func key_destroy_func,
319
_Py_hashtable_destroy_func value_destroy_func,
320
_Py_hashtable_allocator_t *allocator)
321
{
322
_Py_hashtable_allocator_t alloc;
323
if (allocator == NULL) {
324
alloc.malloc = PyMem_Malloc;
325
alloc.free = PyMem_Free;
326
}
327
else {
328
alloc = *allocator;
329
}
330
331
_Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
332
if (ht == NULL) {
333
return ht;
334
}
335
336
ht->nbuckets = HASHTABLE_MIN_SIZE;
337
ht->nentries = 0;
338
339
size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
340
ht->buckets = alloc.malloc(buckets_size);
341
if (ht->buckets == NULL) {
342
alloc.free(ht);
343
return NULL;
344
}
345
memset(ht->buckets, 0, buckets_size);
346
347
ht->get_entry_func = _Py_hashtable_get_entry_generic;
348
ht->hash_func = hash_func;
349
ht->compare_func = compare_func;
350
ht->key_destroy_func = key_destroy_func;
351
ht->value_destroy_func = value_destroy_func;
352
ht->alloc = alloc;
353
if (ht->hash_func == _Py_hashtable_hash_ptr
354
&& ht->compare_func == _Py_hashtable_compare_direct)
355
{
356
ht->get_entry_func = _Py_hashtable_get_entry_ptr;
357
}
358
return ht;
359
}
360
361
362
_Py_hashtable_t *
363
_Py_hashtable_new(_Py_hashtable_hash_func hash_func,
364
_Py_hashtable_compare_func compare_func)
365
{
366
return _Py_hashtable_new_full(hash_func, compare_func,
367
NULL, NULL, NULL);
368
}
369
370
371
static void
372
_Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
373
{
374
if (ht->key_destroy_func) {
375
ht->key_destroy_func(entry->key);
376
}
377
if (ht->value_destroy_func) {
378
ht->value_destroy_func(entry->value);
379
}
380
ht->alloc.free(entry);
381
}
382
383
384
void
385
_Py_hashtable_clear(_Py_hashtable_t *ht)
386
{
387
for (size_t i=0; i < ht->nbuckets; i++) {
388
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
389
while (entry != NULL) {
390
_Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
391
_Py_hashtable_destroy_entry(ht, entry);
392
entry = next;
393
}
394
_Py_slist_init(&ht->buckets[i]);
395
}
396
ht->nentries = 0;
397
// Ignore failure: clear function is not expected to fail
398
// because of a memory allocation failure.
399
(void)hashtable_rehash(ht);
400
}
401
402
403
void
404
_Py_hashtable_destroy(_Py_hashtable_t *ht)
405
{
406
for (size_t i = 0; i < ht->nbuckets; i++) {
407
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
408
while (entry) {
409
_Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);
410
_Py_hashtable_destroy_entry(ht, entry);
411
entry = entry_next;
412
}
413
}
414
415
ht->alloc.free(ht->buckets);
416
ht->alloc.free(ht);
417
}
418
419