Path: blob/main/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c
48676 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/2122/*23* Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE324* Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor25* Copyright (c) 2021-2022 Tino Reichardt <[email protected]>26*/2728#include <sys/simd.h>29#include <sys/zfs_context.h>30#include "blake3_impl.h"3132#define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))33static inline void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,34uint32_t x, uint32_t y)35{36state[a] = state[a] + state[b] + x;37state[d] = rotr32(state[d] ^ state[a], 16);38state[c] = state[c] + state[d];39state[b] = rotr32(state[b] ^ state[c], 12);40state[a] = state[a] + state[b] + y;41state[d] = rotr32(state[d] ^ state[a], 8);42state[c] = state[c] + state[d];43state[b] = rotr32(state[b] ^ state[c], 7);44}4546static inline void round_fn(uint32_t state[16], const uint32_t *msg,47size_t round)48{49/* Select the message schedule based on the round. */50const uint8_t *schedule = BLAKE3_MSG_SCHEDULE[round];5152/* Mix the columns. */53g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);54g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);55g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);56g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);5758/* Mix the rows. */59g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);60g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);61g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);62g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);63}6465static inline void compress_pre(uint32_t state[16], const uint32_t cv[8],66const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,67uint64_t counter, uint8_t flags)68{69uint32_t block_words[16];70block_words[0] = load32(block + 4 * 0);71block_words[1] = load32(block + 4 * 1);72block_words[2] = load32(block + 4 * 2);73block_words[3] = load32(block + 4 * 3);74block_words[4] = load32(block + 4 * 4);75block_words[5] = load32(block + 4 * 5);76block_words[6] = load32(block + 4 * 6);77block_words[7] = load32(block + 4 * 7);78block_words[8] = load32(block + 4 * 8);79block_words[9] = load32(block + 4 * 9);80block_words[10] = load32(block + 4 * 10);81block_words[11] = load32(block + 4 * 11);82block_words[12] = load32(block + 4 * 12);83block_words[13] = load32(block + 4 * 13);84block_words[14] = load32(block + 4 * 14);85block_words[15] = load32(block + 4 * 15);8687state[0] = cv[0];88state[1] = cv[1];89state[2] = cv[2];90state[3] = cv[3];91state[4] = cv[4];92state[5] = cv[5];93state[6] = cv[6];94state[7] = cv[7];95state[8] = BLAKE3_IV[0];96state[9] = BLAKE3_IV[1];97state[10] = BLAKE3_IV[2];98state[11] = BLAKE3_IV[3];99state[12] = counter_low(counter);100state[13] = counter_high(counter);101state[14] = (uint32_t)block_len;102state[15] = (uint32_t)flags;103104round_fn(state, &block_words[0], 0);105round_fn(state, &block_words[0], 1);106round_fn(state, &block_words[0], 2);107round_fn(state, &block_words[0], 3);108round_fn(state, &block_words[0], 4);109round_fn(state, &block_words[0], 5);110round_fn(state, &block_words[0], 6);111}112113static inline void blake3_compress_in_place_generic(uint32_t cv[8],114const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,115uint64_t counter, uint8_t flags)116{117uint32_t state[16];118compress_pre(state, cv, block, block_len, counter, flags);119cv[0] = state[0] ^ state[8];120cv[1] = state[1] ^ state[9];121cv[2] = state[2] ^ state[10];122cv[3] = state[3] ^ state[11];123cv[4] = state[4] ^ state[12];124cv[5] = state[5] ^ state[13];125cv[6] = state[6] ^ state[14];126cv[7] = state[7] ^ state[15];127}128129static inline void hash_one_generic(const uint8_t *input, size_t blocks,130const uint32_t key[8], uint64_t counter, uint8_t flags,131uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN])132{133uint32_t cv[8];134memcpy(cv, key, BLAKE3_KEY_LEN);135uint8_t block_flags = flags | flags_start;136while (blocks > 0) {137if (blocks == 1) {138block_flags |= flags_end;139}140blake3_compress_in_place_generic(cv, input, BLAKE3_BLOCK_LEN,141counter, block_flags);142input = &input[BLAKE3_BLOCK_LEN];143blocks -= 1;144block_flags = flags;145}146store_cv_words(out, cv);147}148149static inline void blake3_compress_xof_generic(const uint32_t cv[8],150const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,151uint64_t counter, uint8_t flags, uint8_t out[64])152{153uint32_t state[16];154compress_pre(state, cv, block, block_len, counter, flags);155156store32(&out[0 * 4], state[0] ^ state[8]);157store32(&out[1 * 4], state[1] ^ state[9]);158store32(&out[2 * 4], state[2] ^ state[10]);159store32(&out[3 * 4], state[3] ^ state[11]);160store32(&out[4 * 4], state[4] ^ state[12]);161store32(&out[5 * 4], state[5] ^ state[13]);162store32(&out[6 * 4], state[6] ^ state[14]);163store32(&out[7 * 4], state[7] ^ state[15]);164store32(&out[8 * 4], state[8] ^ cv[0]);165store32(&out[9 * 4], state[9] ^ cv[1]);166store32(&out[10 * 4], state[10] ^ cv[2]);167store32(&out[11 * 4], state[11] ^ cv[3]);168store32(&out[12 * 4], state[12] ^ cv[4]);169store32(&out[13 * 4], state[13] ^ cv[5]);170store32(&out[14 * 4], state[14] ^ cv[6]);171store32(&out[15 * 4], state[15] ^ cv[7]);172}173174static inline void blake3_hash_many_generic(const uint8_t * const *inputs,175size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter,176boolean_t increment_counter, uint8_t flags, uint8_t flags_start,177uint8_t flags_end, uint8_t *out)178{179while (num_inputs > 0) {180hash_one_generic(inputs[0], blocks, key, counter, flags,181flags_start, flags_end, out);182if (increment_counter) {183counter += 1;184}185inputs += 1;186num_inputs -= 1;187out = &out[BLAKE3_OUT_LEN];188}189}190191/* the generic implementation is always okay */192static boolean_t blake3_is_supported(void)193{194return (B_TRUE);195}196197const blake3_ops_t blake3_generic_impl = {198.compress_in_place = blake3_compress_in_place_generic,199.compress_xof = blake3_compress_xof_generic,200.hash_many = blake3_hash_many_generic,201.is_supported = blake3_is_supported,202.degree = 4,203.name = "generic"204};205206207