/*-1* Copyright (c) 2014 The FreeBSD Foundation2*3* This software was developed by John-Mark Gurney under4* the sponsorship of the FreeBSD Foundation and5* Rubicon Communications, LLC (Netgate).6* Redistribution and use in source and binary forms, with or without7* modification, are permitted provided that the following conditions8* are met:9* 1. Redistributions of source code must retain the above copyright10* notice, this list of conditions and the following disclaimer.11* 2. Redistributions in binary form must reproduce the above copyright12* notice, this list of conditions and the following disclaimer in the13* documentation and/or other materials provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND16* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE17* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE18* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE19* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL20* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS21* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)22* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT23* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY24* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF25* SUCH DAMAGE.26*27*/2829#include "gfmult.h"3031#define REV_POLY_REDUCT 0xe1 /* 0x87 bit reversed */3233/* reverse the bits of a nibble */34static const uint8_t nib_rev[] = {350x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,360x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,37};3839/* calculate v * 2 */40static inline struct gf12841gf128_mulalpha(struct gf128 v)42{43uint64_t mask;4445mask = !!(v.v[1] & 1);46mask = ~(mask - 1);47v.v[1] = (v.v[1] >> 1) | ((v.v[0] & 1) << 63);48v.v[0] = (v.v[0] >> 1) ^ ((mask & REV_POLY_REDUCT) << 56);4950return v;51}5253/*54* Generate a table for 0-16 * h. Store the results in the table w/ indexes55* bit reversed, and the words striped across the values.56*/57void58gf128_genmultable(struct gf128 h, struct gf128table *t)59{60struct gf128 tbl[16];61int i;6263tbl[0] = MAKE_GF128(0, 0);64tbl[1] = h;6566for (i = 2; i < 16; i += 2) {67tbl[i] = gf128_mulalpha(tbl[i / 2]);68tbl[i + 1] = gf128_add(tbl[i], h);69}7071for (i = 0; i < 16; i++) {72t->a[nib_rev[i]] = tbl[i].v[0] >> 32;73t->b[nib_rev[i]] = tbl[i].v[0];74t->c[nib_rev[i]] = tbl[i].v[1] >> 32;75t->d[nib_rev[i]] = tbl[i].v[1];76}77}7879/*80* Generate tables containing h, h^2, h^3 and h^4, starting at 0.81*/82void83gf128_genmultable4(struct gf128 h, struct gf128table4 *t)84{85struct gf128 h2, h3, h4;8687gf128_genmultable(h, &t->tbls[0]);8889h2 = gf128_mul(h, &t->tbls[0]);9091gf128_genmultable(h2, &t->tbls[1]);9293h3 = gf128_mul(h, &t->tbls[1]);94gf128_genmultable(h3, &t->tbls[2]);9596h4 = gf128_mul(h2, &t->tbls[1]);97gf128_genmultable(h4, &t->tbls[3]);98}99100/*101* Read a row from the table.102*/103static inline struct gf128104readrow(struct gf128table *tbl, unsigned bits)105{106struct gf128 r;107108bits = bits % 16;109110r.v[0] = ((uint64_t)tbl->a[bits] << 32) | tbl->b[bits];111r.v[1] = ((uint64_t)tbl->c[bits] << 32) | tbl->d[bits];112113return r;114}115116/*117* These are the reduction values. Since we are dealing with bit reversed118* version, the values need to be bit reversed, AND the indexes are also119* bit reversed to make lookups quicker.120*/121static uint16_t reduction[] = {1220x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,1230xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0,124};125126/*127* Calculate:128* (x*2^4 + word[3,0]*h) *129* 2^4 + word[7,4]*h) *130* ...131* 2^4 + word[63,60]*h132*/133static struct gf128134gfmultword(uint64_t word, struct gf128 x, struct gf128table *tbl)135{136struct gf128 row;137unsigned bits;138unsigned redbits;139int i;140141for (i = 0; i < 64; i += 4) {142bits = word % 16;143144/* fetch row */145row = readrow(tbl, bits);146147/* x * 2^4 */148redbits = x.v[1] % 16;149x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;150x.v[0] >>= 4;151x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);152153word >>= 4;154155x = gf128_add(x, row);156}157158return x;159}160161/*162* Calculate163* (x*2^4 + worda[3,0]*h^4+wordb[3,0]*h^3+...+wordd[3,0]*h) *164* ...165* 2^4 + worda[63,60]*h^4+ ... + wordd[63,60]*h166*167* Passing/returning struct is .5% faster than passing in via pointer on168* amd64.169*/170static struct gf128171gfmultword4(uint64_t worda, uint64_t wordb, uint64_t wordc, uint64_t wordd,172struct gf128 x, struct gf128table4 *tbl)173{174struct gf128 rowa, rowb, rowc, rowd;175unsigned bitsa, bitsb, bitsc, bitsd;176unsigned redbits;177int i;178179/*180* XXX - nibble reverse words to save a shift? probably not as181* nibble reverse would take 20 ops (5 * 4) verse 16182*/183184for (i = 0; i < 64; i += 4) {185bitsa = worda % 16;186bitsb = wordb % 16;187bitsc = wordc % 16;188bitsd = wordd % 16;189190/* fetch row */191rowa = readrow(&tbl->tbls[3], bitsa);192rowb = readrow(&tbl->tbls[2], bitsb);193rowc = readrow(&tbl->tbls[1], bitsc);194rowd = readrow(&tbl->tbls[0], bitsd);195196/* x * 2^4 */197redbits = x.v[1] % 16;198x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;199x.v[0] >>= 4;200x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);201202worda >>= 4;203wordb >>= 4;204wordc >>= 4;205wordd >>= 4;206207x = gf128_add(x, gf128_add(rowa, gf128_add(rowb,208gf128_add(rowc, rowd))));209}210211return x;212}213214struct gf128215gf128_mul(struct gf128 v, struct gf128table *tbl)216{217struct gf128 ret;218219ret = MAKE_GF128(0, 0);220221ret = gfmultword(v.v[1], ret, tbl);222ret = gfmultword(v.v[0], ret, tbl);223224return ret;225}226227/*228* Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:229* (((a*h+b)*h+c)*h+d)*h230*/231struct gf128232gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, struct gf128 d,233struct gf128table4 *tbl)234{235struct gf128 tmp;236237tmp = MAKE_GF128(0, 0);238239tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);240tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);241242return tmp;243}244245/*246* a = data[0..15] + r247* b = data[16..31]248* c = data[32..47]249* d = data[48..63]250*251* Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:252* (((a*h+b)*h+c)*h+d)*h253*/254struct gf128255gf128_mul4b(struct gf128 r, const uint8_t *v, struct gf128table4 *tbl)256{257struct gf128 a, b, c, d;258struct gf128 tmp;259260tmp = MAKE_GF128(0, 0);261262a = gf128_add(r, gf128_read(&v[0*16]));263b = gf128_read(&v[1*16]);264c = gf128_read(&v[2*16]);265d = gf128_read(&v[3*16]);266267tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);268tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);269270return tmp;271}272273274