Path: blob/main/contrib/llvm-project/llvm/lib/Support/BLAKE3/blake3_dispatch.c
35266 views
#include <stdbool.h>1#include <stddef.h>2#include <stdint.h>34#include "blake3_impl.h"56#if defined(IS_X86)7#if defined(_MSC_VER)8#include <intrin.h>9#elif defined(__GNUC__)10#include <immintrin.h>11#else12#error "Unimplemented!"13#endif14#endif1516#define MAYBE_UNUSED(x) (void)((x))1718#if defined(IS_X86)19static uint64_t xgetbv(void) {20#if defined(_MSC_VER)21return _xgetbv(0);22#else23uint32_t eax = 0, edx = 0;24__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));25return ((uint64_t)edx << 32) | eax;26#endif27}2829static void cpuid(uint32_t out[4], uint32_t id) {30#if defined(_MSC_VER)31__cpuid((int *)out, id);32#elif defined(__i386__) || defined(_M_IX86)33__asm__ __volatile__("movl %%ebx, %1\n"34"cpuid\n"35"xchgl %1, %%ebx\n"36: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])37: "a"(id));38#else39__asm__ __volatile__("cpuid\n"40: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])41: "a"(id));42#endif43}4445static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {46#if defined(_MSC_VER)47__cpuidex((int *)out, id, sid);48#elif defined(__i386__) || defined(_M_IX86)49__asm__ __volatile__("movl %%ebx, %1\n"50"cpuid\n"51"xchgl %1, %%ebx\n"52: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])53: "a"(id), "c"(sid));54#else55__asm__ __volatile__("cpuid\n"56: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])57: "a"(id), "c"(sid));58#endif59}6061#endif6263enum cpu_feature {64SSE2 = 1 << 0,65SSSE3 = 1 << 1,66SSE41 = 1 << 2,67AVX = 1 << 3,68AVX2 = 1 << 4,69AVX512F = 1 << 5,70AVX512VL = 1 << 6,71/* ... */72UNDEFINED = 1 << 3073};7475#if !defined(BLAKE3_TESTING)76static /* Allow the variable to be controlled manually for testing */77#endif78enum cpu_feature g_cpu_features = UNDEFINED;7980LLVM_ATTRIBUTE_USED81#if !defined(BLAKE3_TESTING)82static83#endif84enum cpu_feature85get_cpu_features(void) {8687if (g_cpu_features != UNDEFINED) {88return g_cpu_features;89} else {90#if defined(IS_X86)91uint32_t regs[4] = {0};92uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];93(void)edx;94enum cpu_feature features = 0;95cpuid(regs, 0);96const int max_id = *eax;97cpuid(regs, 1);98#if defined(__amd64__) || defined(_M_X64)99features |= SSE2;100#else101if (*edx & (1UL << 26))102features |= SSE2;103#endif104if (*ecx & (1UL << 0))105features |= SSSE3;106if (*ecx & (1UL << 19))107features |= SSE41;108109if (*ecx & (1UL << 27)) { // OSXSAVE110const uint64_t mask = xgetbv();111if ((mask & 6) == 6) { // SSE and AVX states112if (*ecx & (1UL << 28))113features |= AVX;114if (max_id >= 7) {115cpuidex(regs, 7, 0);116if (*ebx & (1UL << 5))117features |= AVX2;118if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm119if (*ebx & (1UL << 31))120features |= AVX512VL;121if (*ebx & (1UL << 16))122features |= AVX512F;123}124}125}126}127g_cpu_features = features;128return features;129#else130/* How to detect NEON? */131return 0;132#endif133}134}135136void blake3_compress_in_place(uint32_t cv[8],137const uint8_t block[BLAKE3_BLOCK_LEN],138uint8_t block_len, uint64_t counter,139uint8_t flags) {140#if defined(IS_X86)141const enum cpu_feature features = get_cpu_features();142MAYBE_UNUSED(features);143#if !defined(BLAKE3_NO_AVX512)144if (features & AVX512VL) {145blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);146return;147}148#endif149#if !defined(BLAKE3_NO_SSE41)150if (features & SSE41) {151blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);152return;153}154#endif155#if !defined(BLAKE3_NO_SSE2)156if (features & SSE2) {157blake3_compress_in_place_sse2(cv, block, block_len, counter, flags);158return;159}160#endif161#endif162blake3_compress_in_place_portable(cv, block, block_len, counter, flags);163}164165void blake3_compress_xof(const uint32_t cv[8],166const uint8_t block[BLAKE3_BLOCK_LEN],167uint8_t block_len, uint64_t counter, uint8_t flags,168uint8_t out[64]) {169#if defined(IS_X86)170const enum cpu_feature features = get_cpu_features();171MAYBE_UNUSED(features);172#if !defined(BLAKE3_NO_AVX512)173if (features & AVX512VL) {174blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);175return;176}177#endif178#if !defined(BLAKE3_NO_SSE41)179if (features & SSE41) {180blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);181return;182}183#endif184#if !defined(BLAKE3_NO_SSE2)185if (features & SSE2) {186blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out);187return;188}189#endif190#endif191blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);192}193194void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,195size_t blocks, const uint32_t key[8], uint64_t counter,196bool increment_counter, uint8_t flags,197uint8_t flags_start, uint8_t flags_end, uint8_t *out) {198#if defined(IS_X86)199const enum cpu_feature features = get_cpu_features();200MAYBE_UNUSED(features);201#if !defined(BLAKE3_NO_AVX512)202if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {203blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,204increment_counter, flags, flags_start, flags_end,205out);206return;207}208#endif209#if !defined(BLAKE3_NO_AVX2)210if (features & AVX2) {211blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,212increment_counter, flags, flags_start, flags_end,213out);214return;215}216#endif217#if !defined(BLAKE3_NO_SSE41)218if (features & SSE41) {219blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,220increment_counter, flags, flags_start, flags_end,221out);222return;223}224#endif225#if !defined(BLAKE3_NO_SSE2)226if (features & SSE2) {227blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,228increment_counter, flags, flags_start, flags_end,229out);230return;231}232#endif233#endif234235#if BLAKE3_USE_NEON == 1236blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,237increment_counter, flags, flags_start, flags_end, out);238return;239#endif240241blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,242increment_counter, flags, flags_start, flags_end,243out);244}245246// The dynamically detected SIMD degree of the current platform.247size_t blake3_simd_degree(void) {248#if defined(IS_X86)249const enum cpu_feature features = get_cpu_features();250MAYBE_UNUSED(features);251#if !defined(BLAKE3_NO_AVX512)252if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {253return 16;254}255#endif256#if !defined(BLAKE3_NO_AVX2)257if (features & AVX2) {258return 8;259}260#endif261#if !defined(BLAKE3_NO_SSE41)262if (features & SSE41) {263return 4;264}265#endif266#if !defined(BLAKE3_NO_SSE2)267if (features & SSE2) {268return 4;269}270#endif271#endif272#if BLAKE3_USE_NEON == 1273return 4;274#endif275return 1;276}277278279