/* $NetBSD: hash.c,v 1.80 2025/04/22 19:28:50 rillig Exp $ */12/*3* Copyright (c) 1988, 1989, 1990 The Regents of the University of California.4* All rights reserved.5*6* This code is derived from software contributed to Berkeley by7* Adam de Boor.8*9* Redistribution and use in source and binary forms, with or without10* modification, are permitted provided that the following conditions11* are met:12* 1. Redistributions of source code must retain the above copyright13* notice, this list of conditions and the following disclaimer.14* 2. Redistributions in binary form must reproduce the above copyright15* notice, this list of conditions and the following disclaimer in the16* documentation and/or other materials provided with the distribution.17* 3. Neither the name of the University nor the names of its contributors18* may be used to endorse or promote products derived from this software19* without specific prior written permission.20*21* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND22* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE23* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE24* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE25* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL26* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS27* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)28* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT29* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY30* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF31* SUCH DAMAGE.32*/3334/*35* Copyright (c) 1988, 1989 by Adam de Boor36* Copyright (c) 1989 by Berkeley Softworks37* All rights reserved.38*39* This code is derived from software contributed to Berkeley by40* Adam de Boor.41*42* Redistribution and use in source and binary forms, with or without43* modification, are permitted provided that the following conditions44* are met:45* 1. Redistributions of source code must retain the above copyright46* notice, this list of conditions and the following disclaimer.47* 2. Redistributions in binary form must reproduce the above copyright48* notice, this list of conditions and the following disclaimer in the49* documentation and/or other materials provided with the distribution.50* 3. All advertising materials mentioning features or use of this software51* must display the following acknowledgement:52* This product includes software developed by the University of53* California, Berkeley and its contributors.54* 4. Neither the name of the University nor the names of its contributors55* may be used to endorse or promote products derived from this software56* without specific prior written permission.57*58* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND59* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE60* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE61* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE62* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL63* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS64* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)65* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT66* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY67* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF68* SUCH DAMAGE.69*/7071/* Hash tables with string keys and pointer values. */7273#include "make.h"7475/* "@(#)hash.c 8.1 (Berkeley) 6/6/93" */76MAKE_RCSID("$NetBSD: hash.c,v 1.80 2025/04/22 19:28:50 rillig Exp $");7778/*79* The ratio of # entries to # buckets at which we rebuild the table to80* make it larger.81*/82#define rebuildLimit 38384/* This hash function matches Gosling's Emacs and java.lang.String. */85static unsigned86Hash_String(const char *key, const char **out_keyEnd)87{88unsigned h;89const char *p;9091h = 0;92for (p = key; *p != '\0'; p++)93h = 31 * h + (unsigned char)*p;9495*out_keyEnd = p;96return h;97}9899/* This hash function matches Gosling's Emacs and java.lang.String. */100unsigned101Hash_Substring(Substring key)102{103unsigned h;104const char *p;105106h = 0;107for (p = key.start; p != key.end; p++)108h = 31 * h + (unsigned char)*p;109return h;110}111112static HashEntry *113HashTable_Find(HashTable *t, Substring key, unsigned h)114{115HashEntry *he;116size_t keyLen = Substring_Length(key);117118#ifdef DEBUG_HASH_LOOKUP119DEBUG4(HASH, "HashTable_Find: %p h=%08x key=%.*s\n",120t, h, (int)keyLen, key.start);121#endif122123for (he = t->buckets[h & t->bucketsMask]; he != NULL; he = he->next) {124if (he->hash == h &&125strncmp(he->key, key.start, keyLen) == 0 &&126he->key[keyLen] == '\0')127break;128}129130return he;131}132133/* Set up the hash table. */134void135HashTable_Init(HashTable *t)136{137unsigned n = 16, i;138HashEntry **buckets = bmake_malloc(sizeof *buckets * n);139for (i = 0; i < n; i++)140buckets[i] = NULL;141142t->buckets = buckets;143t->bucketsSize = n;144t->numEntries = 0;145t->bucketsMask = n - 1;146}147148/*149* Remove everything from the hash table and free up the memory for the keys150* of the hash table, but not for the values associated to these keys.151*/152void153HashTable_Done(HashTable *t)154{155HashEntry **buckets = t->buckets;156size_t i, n = t->bucketsSize;157158for (i = 0; i < n; i++) {159HashEntry *he = buckets[i];160while (he != NULL) {161HashEntry *next = he->next;162free(he);163he = next;164}165}166167free(t->buckets);168#ifdef CLEANUP169t->buckets = NULL;170#endif171}172173/* Find the entry corresponding to the key, or return NULL. */174HashEntry *175HashTable_FindEntry(HashTable *t, const char *key)176{177const char *keyEnd;178unsigned h = Hash_String(key, &keyEnd);179return HashTable_Find(t, Substring_Init(key, keyEnd), h);180}181182/* Find the value corresponding to the key, or return NULL. */183void *184HashTable_FindValue(HashTable *t, const char *key)185{186HashEntry *he = HashTable_FindEntry(t, key);187return he != NULL ? he->value : NULL;188}189190/*191* Find the value corresponding to the key and the precomputed hash,192* or return NULL.193*/194void *195HashTable_FindValueBySubstringHash(HashTable *t, Substring key, unsigned h)196{197HashEntry *he = HashTable_Find(t, key, h);198return he != NULL ? he->value : NULL;199}200201static unsigned202HashTable_MaxChain(const HashTable *t)203{204unsigned b, cl, max_cl = 0;205for (b = 0; b < t->bucketsSize; b++) {206const HashEntry *he = t->buckets[b];207for (cl = 0; he != NULL; he = he->next)208cl++;209if (cl > max_cl)210max_cl = cl;211}212return max_cl;213}214215/*216* Make the hash table larger. Any bucket numbers from the old table become217* invalid; the hash values stay valid though.218*/219static void220HashTable_Enlarge(HashTable *t)221{222unsigned oldSize = t->bucketsSize;223HashEntry **oldBuckets = t->buckets;224unsigned newSize = 2 * oldSize;225unsigned newMask = newSize - 1;226HashEntry **newBuckets = bmake_malloc(sizeof *newBuckets * newSize);227size_t i;228229for (i = 0; i < newSize; i++)230newBuckets[i] = NULL;231232for (i = 0; i < oldSize; i++) {233HashEntry *he = oldBuckets[i];234while (he != NULL) {235HashEntry *next = he->next;236he->next = newBuckets[he->hash & newMask];237newBuckets[he->hash & newMask] = he;238he = next;239}240}241242free(oldBuckets);243244t->bucketsSize = newSize;245t->bucketsMask = newMask;246t->buckets = newBuckets;247DEBUG4(HASH, "HashTable_Enlarge: %p size=%d entries=%d maxchain=%d\n",248(void *)t, t->bucketsSize, t->numEntries, HashTable_MaxChain(t));249}250251/*252* Find or create an entry corresponding to the key.253* Return in out_isNew whether a new entry has been created.254*/255HashEntry *256HashTable_CreateEntry(HashTable *t, const char *key, bool *out_isNew)257{258const char *keyEnd;259unsigned h = Hash_String(key, &keyEnd);260HashEntry *he = HashTable_Find(t, Substring_Init(key, keyEnd), h);261262if (he != NULL) {263if (out_isNew != NULL)264*out_isNew = false;265return he;266}267268if (t->numEntries >= rebuildLimit * t->bucketsSize)269HashTable_Enlarge(t);270271he = bmake_malloc(sizeof *he + (size_t)(keyEnd - key));272he->value = NULL;273he->hash = h;274memcpy(he->key, key, (size_t)(keyEnd - key) + 1);275276he->next = t->buckets[h & t->bucketsMask];277t->buckets[h & t->bucketsMask] = he;278t->numEntries++;279280if (out_isNew != NULL)281*out_isNew = true;282return he;283}284285void286HashTable_Set(HashTable *t, const char *key, void *value)287{288HashEntry *he = HashTable_CreateEntry(t, key, NULL);289HashEntry_Set(he, value);290}291292/* Delete the entry from the table, don't free the value of the entry. */293void294HashTable_DeleteEntry(HashTable *t, HashEntry *he)295{296HashEntry **ref = &t->buckets[he->hash & t->bucketsMask];297298for (; *ref != he; ref = &(*ref)->next)299continue;300*ref = he->next;301free(he);302t->numEntries--;303}304305/*306* Place the next entry from the hash table in hi->entry, or return false if307* the end of the table is reached.308*/309bool310HashIter_Next(HashIter *hi)311{312HashTable *t = hi->table;313HashEntry *he = hi->entry;314HashEntry **buckets = t->buckets;315unsigned bucketsSize = t->bucketsSize;316317if (he != NULL)318he = he->next; /* skip the most recently returned entry */319320while (he == NULL) { /* find the next nonempty chain */321if (hi->nextBucket >= bucketsSize)322return false;323he = buckets[hi->nextBucket++];324}325hi->entry = he;326return true;327}328329void330HashTable_DebugStats(const HashTable *t, const char *name)331{332DEBUG4(HASH, "HashTable \"%s\": size=%u entries=%u maxchain=%u\n",333name, t->bucketsSize, t->numEntries, HashTable_MaxChain(t));334}335336337