Path: blob/main/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
48676 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/2122/*23* Copyright (c) 2021-2022 Tino Reichardt <[email protected]>24*/2526#include <sys/simd.h>27#include <sys/zfs_context.h>28#include <sys/zfs_impl.h>29#include <sys/blake3.h>3031#include "blake3_impl.h"3233#if !defined(OMIT_SIMD) && (defined(__aarch64__) || \34(defined(__x86_64) && defined(HAVE_SSE2)) || \35(defined(__PPC64__) && defined(__LITTLE_ENDIAN__)))36#define USE_SIMD37#endif3839#ifdef USE_SIMD40extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],41const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,42uint64_t counter, uint8_t flags);4344extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],45const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,46uint64_t counter, uint8_t flags, uint8_t out[64]);4748extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,49size_t num_inputs, size_t blocks, const uint32_t key[8],50uint64_t counter, boolean_t increment_counter, uint8_t flags,51uint8_t flags_start, uint8_t flags_end, uint8_t *out);5253static void blake3_compress_in_place_sse2(uint32_t cv[8],54const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,55uint64_t counter, uint8_t flags) {56kfpu_begin();57zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,58flags);59kfpu_end();60}6162static void blake3_compress_xof_sse2(const uint32_t cv[8],63const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,64uint64_t counter, uint8_t flags, uint8_t out[64]) {65kfpu_begin();66zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,67out);68kfpu_end();69}7071static void blake3_hash_many_sse2(const uint8_t * const *inputs,72size_t num_inputs, size_t blocks, const uint32_t key[8],73uint64_t counter, boolean_t increment_counter, uint8_t flags,74uint8_t flags_start, uint8_t flags_end, uint8_t *out) {75kfpu_begin();76zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,77increment_counter, flags, flags_start, flags_end, out);78kfpu_end();79}8081static boolean_t blake3_is_sse2_supported(void)82{83#if defined(__x86_64)84return (kfpu_allowed() && zfs_sse2_available());85#elif defined(__PPC64__)86return (kfpu_allowed() && zfs_vsx_available());87#else88return (kfpu_allowed());89#endif90}9192const blake3_ops_t blake3_sse2_impl = {93.compress_in_place = blake3_compress_in_place_sse2,94.compress_xof = blake3_compress_xof_sse2,95.hash_many = blake3_hash_many_sse2,96.is_supported = blake3_is_sse2_supported,97.degree = 4,98.name = "sse2"99};100#endif101102#ifdef USE_SIMD103104extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],105const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,106uint64_t counter, uint8_t flags);107108extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],109const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,110uint64_t counter, uint8_t flags, uint8_t out[64]);111112extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,113size_t num_inputs, size_t blocks, const uint32_t key[8],114uint64_t counter, boolean_t increment_counter, uint8_t flags,115uint8_t flags_start, uint8_t flags_end, uint8_t *out);116117static void blake3_compress_in_place_sse41(uint32_t cv[8],118const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,119uint64_t counter, uint8_t flags) {120kfpu_begin();121zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,122flags);123kfpu_end();124}125126static void blake3_compress_xof_sse41(const uint32_t cv[8],127const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,128uint64_t counter, uint8_t flags, uint8_t out[64]) {129kfpu_begin();130zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,131out);132kfpu_end();133}134135static void blake3_hash_many_sse41(const uint8_t * const *inputs,136size_t num_inputs, size_t blocks, const uint32_t key[8],137uint64_t counter, boolean_t increment_counter, uint8_t flags,138uint8_t flags_start, uint8_t flags_end, uint8_t *out) {139kfpu_begin();140zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,141increment_counter, flags, flags_start, flags_end, out);142kfpu_end();143}144145static boolean_t blake3_is_sse41_supported(void)146{147#if defined(__x86_64)148return (kfpu_allowed() && zfs_sse4_1_available());149#elif defined(__PPC64__)150return (kfpu_allowed() && zfs_vsx_available());151#else152return (kfpu_allowed());153#endif154}155156const blake3_ops_t blake3_sse41_impl = {157.compress_in_place = blake3_compress_in_place_sse41,158.compress_xof = blake3_compress_xof_sse41,159.hash_many = blake3_hash_many_sse41,160.is_supported = blake3_is_sse41_supported,161.degree = 4,162.name = "sse41"163};164#endif165166#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)167extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,168size_t num_inputs, size_t blocks, const uint32_t key[8],169uint64_t counter, boolean_t increment_counter, uint8_t flags,170uint8_t flags_start, uint8_t flags_end, uint8_t *out);171172static void blake3_hash_many_avx2(const uint8_t * const *inputs,173size_t num_inputs, size_t blocks, const uint32_t key[8],174uint64_t counter, boolean_t increment_counter, uint8_t flags,175uint8_t flags_start, uint8_t flags_end, uint8_t *out) {176kfpu_begin();177zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,178increment_counter, flags, flags_start, flags_end, out);179kfpu_end();180}181182static boolean_t blake3_is_avx2_supported(void)183{184return (kfpu_allowed() && zfs_sse4_1_available() &&185zfs_avx2_available());186}187188const blake3_ops_t189blake3_avx2_impl = {190.compress_in_place = blake3_compress_in_place_sse41,191.compress_xof = blake3_compress_xof_sse41,192.hash_many = blake3_hash_many_avx2,193.is_supported = blake3_is_avx2_supported,194.degree = 8,195.name = "avx2"196};197#endif198199#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)200extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],201const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,202uint64_t counter, uint8_t flags);203204extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],205const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,206uint64_t counter, uint8_t flags, uint8_t out[64]);207208extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,209size_t num_inputs, size_t blocks, const uint32_t key[8],210uint64_t counter, boolean_t increment_counter, uint8_t flags,211uint8_t flags_start, uint8_t flags_end, uint8_t *out);212213static void blake3_compress_in_place_avx512(uint32_t cv[8],214const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,215uint64_t counter, uint8_t flags) {216kfpu_begin();217zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,218flags);219kfpu_end();220}221222static void blake3_compress_xof_avx512(const uint32_t cv[8],223const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,224uint64_t counter, uint8_t flags, uint8_t out[64]) {225kfpu_begin();226zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,227out);228kfpu_end();229}230231static void blake3_hash_many_avx512(const uint8_t * const *inputs,232size_t num_inputs, size_t blocks, const uint32_t key[8],233uint64_t counter, boolean_t increment_counter, uint8_t flags,234uint8_t flags_start, uint8_t flags_end, uint8_t *out) {235kfpu_begin();236zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,237increment_counter, flags, flags_start, flags_end, out);238kfpu_end();239}240241static boolean_t blake3_is_avx512_supported(void)242{243return (kfpu_allowed() && zfs_avx512f_available() &&244zfs_avx512vl_available());245}246247const blake3_ops_t blake3_avx512_impl = {248.compress_in_place = blake3_compress_in_place_avx512,249.compress_xof = blake3_compress_xof_avx512,250.hash_many = blake3_hash_many_avx512,251.is_supported = blake3_is_avx512_supported,252.degree = 16,253.name = "avx512"254};255#endif256257extern const blake3_ops_t blake3_generic_impl;258259static const blake3_ops_t *const blake3_impls[] = {260&blake3_generic_impl,261#ifdef USE_SIMD262#if defined(__aarch64__) || \263(defined(__x86_64) && defined(HAVE_SSE2)) || \264(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))265&blake3_sse2_impl,266#endif267#if defined(__aarch64__) || \268(defined(__x86_64) && defined(HAVE_SSE4_1)) || \269(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))270&blake3_sse41_impl,271#endif272#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)273&blake3_avx2_impl,274#endif275#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)276&blake3_avx512_impl,277#endif278#endif279};280281/* use the generic implementation functions */282#define IMPL_NAME "blake3"283#define IMPL_OPS_T blake3_ops_t284#define IMPL_ARRAY blake3_impls285#define IMPL_GET_OPS blake3_get_ops286#define ZFS_IMPL_OPS zfs_blake3_ops287#include <generic_impl.c>288289#ifdef _KERNEL290void **blake3_per_cpu_ctx;291292void293blake3_per_cpu_ctx_init(void)294{295/*296* Create "The Godfather" ptr to hold all blake3 ctx297*/298blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP);299for (int i = 0; i < max_ncpus; i++) {300blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),301KM_SLEEP);302}303}304305void306blake3_per_cpu_ctx_fini(void)307{308for (int i = 0; i < max_ncpus; i++) {309memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX));310kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX));311}312memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));313kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));314}315316#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ")317318#if defined(__linux__)319320static int321blake3_param_get(char *buffer, zfs_kernel_param_t *unused)322{323const uint32_t impl = IMPL_READ(generic_impl_chosen);324char *fmt;325int cnt = 0;326327/* cycling */328fmt = IMPL_FMT(impl, IMPL_CYCLE);329cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "cycle");330331/* list fastest */332fmt = IMPL_FMT(impl, IMPL_FASTEST);333cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest");334335/* list all supported implementations */336generic_impl_init();337for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {338fmt = IMPL_FMT(impl, i);339cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,340blake3_impls[i]->name);341}342343return (cnt);344}345346static int347blake3_param_set(const char *val, zfs_kernel_param_t *unused)348{349(void) unused;350return (generic_impl_setname(val));351}352353#elif defined(__FreeBSD__)354355#include <sys/sbuf.h>356357static int358blake3_param(ZFS_MODULE_PARAM_ARGS)359{360int err;361362generic_impl_init();363if (req->newptr == NULL) {364const uint32_t impl = IMPL_READ(generic_impl_chosen);365const int init_buflen = 64;366const char *fmt;367struct sbuf *s;368369s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);370371/* cycling */372fmt = IMPL_FMT(impl, IMPL_CYCLE);373(void) sbuf_printf(s, fmt, "cycle");374375/* list fastest */376fmt = IMPL_FMT(impl, IMPL_FASTEST);377(void) sbuf_printf(s, fmt, "fastest");378379/* list all supported implementations */380for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {381fmt = IMPL_FMT(impl, i);382(void) sbuf_printf(s, fmt, generic_supp_impls[i]->name);383}384385err = sbuf_finish(s);386sbuf_delete(s);387388return (err);389}390391char buf[16];392393err = sysctl_handle_string(oidp, buf, sizeof (buf), req);394if (err) {395return (err);396}397398return (-generic_impl_setname(buf));399}400#endif401402#undef IMPL_FMT403404ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl,405blake3_param_set, blake3_param_get, ZMOD_RW, \406"Select BLAKE3 implementation.");407#endif408409410