Path: blob/main/sys/contrib/openzfs/module/zcommon/zfs_fletcher_superscalar.c
48383 views
// SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only1/*2* Implement fast Fletcher4 using superscalar pipelines.3*4* Use regular C code to compute5* Fletcher4 in two incremental 64-bit parallel accumulator streams,6* and then combine the streams to form the final four checksum words.7* This implementation is a derivative of the AVX SIMD implementation by8* James Guilford and Jinshan Xiong from Intel (see zfs_fletcher_intel.c).9*10* Copyright (C) 2016 Romain Dolbeau.11*12* Authors:13* Romain Dolbeau <[email protected]>14*15* This software is available to you under a choice of one of two16* licenses. You may choose to be licensed under the terms of the GNU17* General Public License (GPL) Version 2, available from the file18* COPYING in the main directory of this source tree, or the19* OpenIB.org BSD license below:20*21* Redistribution and use in source and binary forms, with or22* without modification, are permitted provided that the following23* conditions are met:24*25* - Redistributions of source code must retain the above26* copyright notice, this list of conditions and the following27* disclaimer.28*29* - Redistributions in binary form must reproduce the above30* copyright notice, this list of conditions and the following31* disclaimer in the documentation and/or other materials32* provided with the distribution.33*34* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,35* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF36* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND37* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS38* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN39* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN40* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE41* SOFTWARE.42*/4344#include <sys/param.h>45#include <sys/byteorder.h>46#include <sys/spa_checksum.h>47#include <sys/string.h>48#include <zfs_fletcher.h>4950static void51fletcher_4_superscalar_init(fletcher_4_ctx_t *ctx)52{53memset(ctx->superscalar, 0, 4 * sizeof (zfs_fletcher_superscalar_t));54}5556static void57fletcher_4_superscalar_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)58{59uint64_t A, B, C, D;60A = ctx->superscalar[0].v[0] + ctx->superscalar[0].v[1];61B = 2 * ctx->superscalar[1].v[0] + 2 * ctx->superscalar[1].v[1] -62ctx->superscalar[0].v[1];63C = 4 * ctx->superscalar[2].v[0] - ctx->superscalar[1].v[0] +644 * ctx->superscalar[2].v[1] - 3 * ctx->superscalar[1].v[1];65D = 8 * ctx->superscalar[3].v[0] - 4 * ctx->superscalar[2].v[0] +668 * ctx->superscalar[3].v[1] - 8 * ctx->superscalar[2].v[1] +67ctx->superscalar[1].v[1];68ZIO_SET_CHECKSUM(zcp, A, B, C, D);69}7071static void72fletcher_4_superscalar_native(fletcher_4_ctx_t *ctx,73const void *buf, uint64_t size)74{75const uint32_t *ip = buf;76const uint32_t *ipend = ip + (size / sizeof (uint32_t));77uint64_t a, b, c, d;78uint64_t a2, b2, c2, d2;7980a = ctx->superscalar[0].v[0];81b = ctx->superscalar[1].v[0];82c = ctx->superscalar[2].v[0];83d = ctx->superscalar[3].v[0];84a2 = ctx->superscalar[0].v[1];85b2 = ctx->superscalar[1].v[1];86c2 = ctx->superscalar[2].v[1];87d2 = ctx->superscalar[3].v[1];8889do {90a += ip[0];91a2 += ip[1];92b += a;93b2 += a2;94c += b;95c2 += b2;96d += c;97d2 += c2;98} while ((ip += 2) < ipend);99100ctx->superscalar[0].v[0] = a;101ctx->superscalar[1].v[0] = b;102ctx->superscalar[2].v[0] = c;103ctx->superscalar[3].v[0] = d;104ctx->superscalar[0].v[1] = a2;105ctx->superscalar[1].v[1] = b2;106ctx->superscalar[2].v[1] = c2;107ctx->superscalar[3].v[1] = d2;108}109110static void111fletcher_4_superscalar_byteswap(fletcher_4_ctx_t *ctx,112const void *buf, uint64_t size)113{114const uint32_t *ip = buf;115const uint32_t *ipend = ip + (size / sizeof (uint32_t));116uint64_t a, b, c, d;117uint64_t a2, b2, c2, d2;118119a = ctx->superscalar[0].v[0];120b = ctx->superscalar[1].v[0];121c = ctx->superscalar[2].v[0];122d = ctx->superscalar[3].v[0];123a2 = ctx->superscalar[0].v[1];124b2 = ctx->superscalar[1].v[1];125c2 = ctx->superscalar[2].v[1];126d2 = ctx->superscalar[3].v[1];127128do {129a += BSWAP_32(ip[0]);130a2 += BSWAP_32(ip[1]);131b += a;132b2 += a2;133c += b;134c2 += b2;135d += c;136d2 += c2;137} while ((ip += 2) < ipend);138139ctx->superscalar[0].v[0] = a;140ctx->superscalar[1].v[0] = b;141ctx->superscalar[2].v[0] = c;142ctx->superscalar[3].v[0] = d;143ctx->superscalar[0].v[1] = a2;144ctx->superscalar[1].v[1] = b2;145ctx->superscalar[2].v[1] = c2;146ctx->superscalar[3].v[1] = d2;147}148149static boolean_t fletcher_4_superscalar_valid(void)150{151return (B_TRUE);152}153154const fletcher_4_ops_t fletcher_4_superscalar_ops = {155.init_native = fletcher_4_superscalar_init,156.compute_native = fletcher_4_superscalar_native,157.fini_native = fletcher_4_superscalar_fini,158.init_byteswap = fletcher_4_superscalar_init,159.compute_byteswap = fletcher_4_superscalar_byteswap,160.fini_byteswap = fletcher_4_superscalar_fini,161.valid = fletcher_4_superscalar_valid,162.uses_fpu = B_FALSE,163.name = "superscalar"164};165166167