Path: blob/main/contrib/llvm-project/llvm/lib/Support/DJB.cpp
35233 views
//===-- Support/DJB.cpp ---DJB Hash -----------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains support for the DJ Bernstein hash function.9//10//===----------------------------------------------------------------------===//1112#include "llvm/Support/DJB.h"13#include "llvm/ADT/ArrayRef.h"14#include "llvm/Support/Compiler.h"15#include "llvm/Support/ConvertUTF.h"16#include "llvm/Support/Unicode.h"1718using namespace llvm;1920static UTF32 chopOneUTF32(StringRef &Buffer) {21UTF32 C;22const UTF8 *const Begin8Const =23reinterpret_cast<const UTF8 *>(Buffer.begin());24const UTF8 *Begin8 = Begin8Const;25UTF32 *Begin32 = &C;2627// In lenient mode we will always end up with a "reasonable" value in C for28// non-empty input.29assert(!Buffer.empty());30ConvertUTF8toUTF32(&Begin8, reinterpret_cast<const UTF8 *>(Buffer.end()),31&Begin32, &C + 1, lenientConversion);32Buffer = Buffer.drop_front(Begin8 - Begin8Const);33return C;34}3536static StringRef toUTF8(UTF32 C, MutableArrayRef<UTF8> Storage) {37const UTF32 *Begin32 = &C;38UTF8 *Begin8 = Storage.begin();3940// The case-folded output should always be a valid unicode character, so use41// strict mode here.42ConversionResult CR = ConvertUTF32toUTF8(&Begin32, &C + 1, &Begin8,43Storage.end(), strictConversion);44assert(CR == conversionOK && "Case folding produced invalid char?");45(void)CR;46return StringRef(reinterpret_cast<char *>(Storage.begin()),47Begin8 - Storage.begin());48}4950static UTF32 foldCharDwarf(UTF32 C) {51// DWARF v5 addition to the unicode folding rules.52// Fold "Latin Small Letter Dotless I" and "Latin Capital Letter I With Dot53// Above" into "i".54if (C == 0x130 || C == 0x131)55return 'i';56return sys::unicode::foldCharSimple(C);57}5859static std::optional<uint32_t> fastCaseFoldingDjbHash(StringRef Buffer,60uint32_t H) {61bool AllASCII = true;62for (unsigned char C : Buffer) {63H = H * 33 + ('A' <= C && C <= 'Z' ? C - 'A' + 'a' : C);64AllASCII &= C <= 0x7f;65}66if (AllASCII)67return H;68return std::nullopt;69}7071uint32_t llvm::caseFoldingDjbHash(StringRef Buffer, uint32_t H) {72if (std::optional<uint32_t> Result = fastCaseFoldingDjbHash(Buffer, H))73return *Result;7475std::array<UTF8, UNI_MAX_UTF8_BYTES_PER_CODE_POINT> Storage;76while (!Buffer.empty()) {77UTF32 C = foldCharDwarf(chopOneUTF32(Buffer));78StringRef Folded = toUTF8(C, Storage);79H = djbHash(Folded, H);80}81return H;82}838485