Path: blob/main/sys/crypto/armv8/armv8_crypto_wrap.c
39507 views
/*-1* Copyright (c) 2016 The FreeBSD Foundation2* Copyright (c) 2020 Ampere Computing3* All rights reserved.4*5* This software was developed by Andrew Turner under6* sponsorship from the FreeBSD Foundation.7*8* Redistribution and use in source and binary forms, with or without9* modification, are permitted provided that the following conditions10* are met:11* 1. Redistributions of source code must retain the above copyright12* notice, this list of conditions and the following disclaimer.13* 2. Redistributions in binary form must reproduce the above copyright14* notice, this list of conditions and the following disclaimer in the15* documentation and/or other materials provided with the distribution.16*17* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND18* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE19* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE20* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE21* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL22* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS23* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)24* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT25* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY26* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF27* SUCH DAMAGE.28*29* This file is derived from aesni_wrap.c:30* Copyright (C) 2008 Damien Miller <[email protected]>31* Copyright (c) 2010 Konstantin Belousov <[email protected]>32* Copyright (c) 2010-2011 Pawel Jakub Dawidek <[email protected]>33* Copyright 2012-2013 John-Mark Gurney <[email protected]>34* Copyright (c) 2014 The FreeBSD Foundation35*/3637/*38* This code is built with floating-point enabled. Make sure to have entered39* into floating-point context before calling any of these functions.40*/4142#include <sys/param.h>43#include <sys/systm.h>44#include <sys/malloc.h>45#include <sys/queue.h>4647#include <opencrypto/cryptodev.h>48#include <opencrypto/gmac.h>49#include <crypto/rijndael/rijndael.h>50#include <crypto/armv8/armv8_crypto.h>5152#include <arm_neon.h>5354static uint8x16_t55armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from)56{57uint8x16_t tmp;58int i;5960tmp = from;61for (i = 0; i < rounds - 1; i += 2) {62tmp = vaeseq_u8(tmp, keysched[i]);63tmp = vaesmcq_u8(tmp);64tmp = vaeseq_u8(tmp, keysched[i + 1]);65tmp = vaesmcq_u8(tmp);66}6768tmp = vaeseq_u8(tmp, keysched[rounds - 1]);69tmp = vaesmcq_u8(tmp);70tmp = vaeseq_u8(tmp, keysched[rounds]);71tmp = veorq_u8(tmp, keysched[rounds + 1]);7273return (tmp);74}7576static uint8x16_t77armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from)78{79uint8x16_t tmp;80int i;8182tmp = from;83for (i = 0; i < rounds - 1; i += 2) {84tmp = vaesdq_u8(tmp, keysched[i]);85tmp = vaesimcq_u8(tmp);86tmp = vaesdq_u8(tmp, keysched[i+1]);87tmp = vaesimcq_u8(tmp);88}8990tmp = vaesdq_u8(tmp, keysched[rounds - 1]);91tmp = vaesimcq_u8(tmp);92tmp = vaesdq_u8(tmp, keysched[rounds]);93tmp = veorq_u8(tmp, keysched[rounds + 1]);9495return (tmp);96}9798void99armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len,100struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,101const uint8_t iv[static AES_BLOCK_LEN])102{103uint8x16_t tot, ivreg, tmp;104uint8_t block[AES_BLOCK_LEN], *from, *to;105size_t fromseglen, oseglen, seglen, toseglen;106107KASSERT(len % AES_BLOCK_LEN == 0,108("%s: length %zu not a multiple of the block size", __func__, len));109110ivreg = vld1q_u8(iv);111for (; len > 0; len -= seglen) {112from = crypto_cursor_segment(fromc, &fromseglen);113to = crypto_cursor_segment(toc, &toseglen);114115seglen = ulmin(len, ulmin(fromseglen, toseglen));116if (seglen < AES_BLOCK_LEN) {117crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);118tmp = vld1q_u8(block);119tot = armv8_aes_enc(key->aes_rounds - 1,120(const void *)key->aes_key, veorq_u8(tmp, ivreg));121ivreg = tot;122vst1q_u8(block, tot);123crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);124seglen = AES_BLOCK_LEN;125} else {126for (oseglen = seglen; seglen >= AES_BLOCK_LEN;127seglen -= AES_BLOCK_LEN) {128tmp = vld1q_u8(from);129tot = armv8_aes_enc(key->aes_rounds - 1,130(const void *)key->aes_key,131veorq_u8(tmp, ivreg));132ivreg = tot;133vst1q_u8(to, tot);134from += AES_BLOCK_LEN;135to += AES_BLOCK_LEN;136}137seglen = oseglen - seglen;138crypto_cursor_advance(fromc, seglen);139crypto_cursor_advance(toc, seglen);140}141}142143explicit_bzero(block, sizeof(block));144}145146void147armv8_aes_decrypt_cbc(const AES_key_t *key, size_t len,148struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,149const uint8_t iv[static AES_BLOCK_LEN])150{151uint8x16_t ivreg, nextiv, tmp;152uint8_t block[AES_BLOCK_LEN], *from, *to;153size_t fromseglen, oseglen, seglen, toseglen;154155KASSERT(len % AES_BLOCK_LEN == 0,156("%s: length %zu not a multiple of the block size", __func__, len));157158ivreg = vld1q_u8(iv);159for (; len > 0; len -= seglen) {160from = crypto_cursor_segment(fromc, &fromseglen);161to = crypto_cursor_segment(toc, &toseglen);162163seglen = ulmin(len, ulmin(fromseglen, toseglen));164if (seglen < AES_BLOCK_LEN) {165crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);166nextiv = vld1q_u8(block);167tmp = armv8_aes_dec(key->aes_rounds - 1,168(const void *)key->aes_key, nextiv);169vst1q_u8(block, veorq_u8(tmp, ivreg));170ivreg = nextiv;171crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);172seglen = AES_BLOCK_LEN;173} else {174for (oseglen = seglen; seglen >= AES_BLOCK_LEN;175seglen -= AES_BLOCK_LEN) {176nextiv = vld1q_u8(from);177tmp = armv8_aes_dec(key->aes_rounds - 1,178(const void *)key->aes_key, nextiv);179vst1q_u8(to, veorq_u8(tmp, ivreg));180ivreg = nextiv;181from += AES_BLOCK_LEN;182to += AES_BLOCK_LEN;183}184crypto_cursor_advance(fromc, oseglen - seglen);185crypto_cursor_advance(toc, oseglen - seglen);186seglen = oseglen - seglen;187}188}189190explicit_bzero(block, sizeof(block));191}192193#define AES_XTS_BLOCKSIZE 16194#define AES_XTS_IVSIZE 8195#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */196197static inline int32x4_t198xts_crank_lfsr(int32x4_t inp)199{200const int32x4_t alphamask = {AES_XTS_ALPHA, 1, 1, 1};201int32x4_t xtweak, ret;202203/* set up xor mask */204xtweak = vextq_s32(inp, inp, 3);205xtweak = vshrq_n_s32(xtweak, 31);206xtweak &= alphamask;207208/* next term */209ret = vshlq_n_s32(inp, 1);210ret ^= xtweak;211212return ret;213}214215static void216armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule,217uint8x16_t *tweak, const uint8_t *from, uint8_t *to, int do_encrypt)218{219uint8x16_t block;220221block = vld1q_u8(from) ^ *tweak;222223if (do_encrypt)224block = armv8_aes_enc(rounds - 1, key_schedule, block);225else226block = armv8_aes_dec(rounds - 1, key_schedule, block);227228vst1q_u8(to, block ^ *tweak);229230*tweak = vreinterpretq_u8_s32(xts_crank_lfsr(vreinterpretq_s32_u8(*tweak)));231}232233static void234armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule,235const uint8x16_t *tweak_schedule, size_t len,236struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,237const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)238{239uint8x16_t tweakreg;240uint8_t block[AES_XTS_BLOCKSIZE] __aligned(16);241uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);242uint8_t *from, *to;243size_t fromseglen, oseglen, seglen, toseglen;244245KASSERT(len % AES_XTS_BLOCKSIZE == 0,246("%s: length %zu not a multiple of the block size", __func__, len));247248/*249* Prepare tweak as E_k2(IV). IV is specified as LE representation250* of a 64-bit block number which we allow to be passed in directly.251*/252#if BYTE_ORDER == LITTLE_ENDIAN253bcopy(iv, tweak, AES_XTS_IVSIZE);254/* Last 64 bits of IV are always zero. */255bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);256#else257#error Only LITTLE_ENDIAN architectures are supported.258#endif259tweakreg = vld1q_u8(tweak);260tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg);261262for (; len > 0; len -= seglen) {263from = crypto_cursor_segment(fromc, &fromseglen);264to = crypto_cursor_segment(toc, &toseglen);265266seglen = ulmin(len, ulmin(fromseglen, toseglen));267if (seglen < AES_XTS_BLOCKSIZE) {268crypto_cursor_copydata(fromc, AES_XTS_BLOCKSIZE, block);269armv8_aes_crypt_xts_block(rounds, data_schedule,270&tweakreg, block, block, do_encrypt);271crypto_cursor_copyback(toc, AES_XTS_BLOCKSIZE, block);272seglen = AES_XTS_BLOCKSIZE;273} else {274for (oseglen = seglen; seglen >= AES_XTS_BLOCKSIZE;275seglen -= AES_XTS_BLOCKSIZE) {276armv8_aes_crypt_xts_block(rounds, data_schedule,277&tweakreg, from, to, do_encrypt);278from += AES_XTS_BLOCKSIZE;279to += AES_XTS_BLOCKSIZE;280}281seglen = oseglen - seglen;282crypto_cursor_advance(fromc, seglen);283crypto_cursor_advance(toc, seglen);284}285}286287explicit_bzero(block, sizeof(block));288}289290void291armv8_aes_encrypt_xts(AES_key_t *data_schedule,292const void *tweak_schedule, size_t len, struct crypto_buffer_cursor *fromc,293struct crypto_buffer_cursor *toc, const uint8_t iv[static AES_BLOCK_LEN])294{295armv8_aes_crypt_xts(data_schedule->aes_rounds,296(const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,297toc, iv, 1);298}299300void301armv8_aes_decrypt_xts(AES_key_t *data_schedule,302const void *tweak_schedule, size_t len,303struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,304const uint8_t iv[static AES_BLOCK_LEN])305{306armv8_aes_crypt_xts(data_schedule->aes_rounds,307(const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,308toc, iv, 0);309310}311#define AES_INC_COUNTER(counter) \312do { \313for (int pos = AES_BLOCK_LEN - 1; \314pos >= 0; pos--) \315if (++(counter)[pos]) \316break; \317} while (0)318319struct armv8_gcm_state {320__uint128_val_t EK0;321__uint128_val_t EKi;322__uint128_val_t Xi;323__uint128_val_t lenblock;324uint8_t aes_counter[AES_BLOCK_LEN];325};326327static void328armv8_aes_gmac_setup(struct armv8_gcm_state *s, AES_key_t *aes_key,329const uint8_t *authdata, size_t authdatalen,330const uint8_t iv[static AES_GCM_IV_LEN], const __uint128_val_t *Htable)331{332uint8_t block[AES_BLOCK_LEN];333size_t trailer;334335bzero(s->aes_counter, AES_BLOCK_LEN);336memcpy(s->aes_counter, iv, AES_GCM_IV_LEN);337338/* Setup the counter */339s->aes_counter[AES_BLOCK_LEN - 1] = 1;340341/* EK0 for a final GMAC round */342aes_v8_encrypt(s->aes_counter, s->EK0.c, aes_key);343344/* GCM starts with 2 as counter, 1 is used for final xor of tag. */345s->aes_counter[AES_BLOCK_LEN - 1] = 2;346347memset(s->Xi.c, 0, sizeof(s->Xi.c));348trailer = authdatalen % AES_BLOCK_LEN;349if (authdatalen - trailer > 0) {350gcm_ghash_v8(s->Xi.u, Htable, authdata, authdatalen - trailer);351authdata += authdatalen - trailer;352}353if (trailer > 0 || authdatalen == 0) {354memset(block, 0, sizeof(block));355memcpy(block, authdata, trailer);356gcm_ghash_v8(s->Xi.u, Htable, block, AES_BLOCK_LEN);357}358}359360static void361armv8_aes_gmac_finish(struct armv8_gcm_state *s, size_t len,362size_t authdatalen, const __uint128_val_t *Htable)363{364/* Lengths block */365s->lenblock.u[0] = s->lenblock.u[1] = 0;366s->lenblock.d[1] = htobe32(authdatalen * 8);367s->lenblock.d[3] = htobe32(len * 8);368gcm_ghash_v8(s->Xi.u, Htable, s->lenblock.c, AES_BLOCK_LEN);369370s->Xi.u[0] ^= s->EK0.u[0];371s->Xi.u[1] ^= s->EK0.u[1];372}373374static void375armv8_aes_encrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,376const uint64_t *from, uint64_t *to)377{378aes_v8_encrypt(s->aes_counter, s->EKi.c, aes_key);379AES_INC_COUNTER(s->aes_counter);380to[0] = from[0] ^ s->EKi.u[0];381to[1] = from[1] ^ s->EKi.u[1];382}383384static void385armv8_aes_decrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,386const uint64_t *from, uint64_t *to)387{388armv8_aes_encrypt_gcm_block(s, aes_key, from, to);389}390391void392armv8_aes_encrypt_gcm(AES_key_t *aes_key, size_t len,393struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,394size_t authdatalen, const uint8_t *authdata,395uint8_t tag[static GMAC_DIGEST_LEN],396const uint8_t iv[static AES_GCM_IV_LEN],397const __uint128_val_t *Htable)398{399struct armv8_gcm_state s;400uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN);401uint64_t *from64, *to64;402size_t fromseglen, i, olen, oseglen, seglen, toseglen;403404armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);405406for (olen = len; len > 0; len -= seglen) {407from64 = crypto_cursor_segment(fromc, &fromseglen);408to64 = crypto_cursor_segment(toc, &toseglen);409410seglen = ulmin(len, ulmin(fromseglen, toseglen));411if (seglen < AES_BLOCK_LEN) {412seglen = ulmin(len, AES_BLOCK_LEN);413414memset(block, 0, sizeof(block));415crypto_cursor_copydata(fromc, (int)seglen, block);416417if (seglen == AES_BLOCK_LEN) {418armv8_aes_encrypt_gcm_block(&s, aes_key,419(uint64_t *)block, (uint64_t *)block);420} else {421aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);422AES_INC_COUNTER(s.aes_counter);423for (i = 0; i < seglen; i++)424block[i] ^= s.EKi.c[i];425}426gcm_ghash_v8(s.Xi.u, Htable, block, seglen);427428crypto_cursor_copyback(toc, (int)seglen, block);429} else {430for (oseglen = seglen; seglen >= AES_BLOCK_LEN;431seglen -= AES_BLOCK_LEN) {432armv8_aes_encrypt_gcm_block(&s, aes_key, from64,433to64);434gcm_ghash_v8(s.Xi.u, Htable, (uint8_t *)to64,435AES_BLOCK_LEN);436437from64 += 2;438to64 += 2;439}440441seglen = oseglen - seglen;442crypto_cursor_advance(fromc, seglen);443crypto_cursor_advance(toc, seglen);444}445}446447armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);448memcpy(tag, s.Xi.c, GMAC_DIGEST_LEN);449450explicit_bzero(block, sizeof(block));451explicit_bzero(&s, sizeof(s));452}453454int455armv8_aes_decrypt_gcm(AES_key_t *aes_key, size_t len,456struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,457size_t authdatalen, const uint8_t *authdata,458const uint8_t tag[static GMAC_DIGEST_LEN],459const uint8_t iv[static AES_GCM_IV_LEN],460const __uint128_val_t *Htable)461{462struct armv8_gcm_state s;463struct crypto_buffer_cursor fromcc;464uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN), *from;465uint64_t *block64, *from64, *to64;466size_t fromseglen, olen, oseglen, seglen, toseglen;467int error;468469armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);470471crypto_cursor_copy(fromc, &fromcc);472for (olen = len; len > 0; len -= seglen) {473from = crypto_cursor_segment(&fromcc, &fromseglen);474seglen = ulmin(len, fromseglen);475seglen -= seglen % AES_BLOCK_LEN;476if (seglen > 0) {477gcm_ghash_v8(s.Xi.u, Htable, from, seglen);478crypto_cursor_advance(&fromcc, seglen);479} else {480memset(block, 0, sizeof(block));481seglen = ulmin(len, AES_BLOCK_LEN);482crypto_cursor_copydata(&fromcc, seglen, block);483gcm_ghash_v8(s.Xi.u, Htable, block, seglen);484}485}486487armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);488489if (timingsafe_bcmp(tag, s.Xi.c, GMAC_DIGEST_LEN) != 0) {490error = EBADMSG;491goto out;492}493494block64 = (uint64_t *)block;495for (len = olen; len > 0; len -= seglen) {496from64 = crypto_cursor_segment(fromc, &fromseglen);497to64 = crypto_cursor_segment(toc, &toseglen);498499seglen = ulmin(len, ulmin(fromseglen, toseglen));500if (seglen < AES_BLOCK_LEN) {501seglen = ulmin(len, AES_BLOCK_LEN);502503memset(block, 0, sizeof(block));504crypto_cursor_copydata(fromc, seglen, block);505506armv8_aes_decrypt_gcm_block(&s, aes_key, block64,507block64);508509crypto_cursor_copyback(toc, (int)seglen, block);510} else {511for (oseglen = seglen; seglen >= AES_BLOCK_LEN;512seglen -= AES_BLOCK_LEN) {513armv8_aes_decrypt_gcm_block(&s, aes_key, from64,514to64);515516from64 += 2;517to64 += 2;518}519520seglen = oseglen - seglen;521crypto_cursor_advance(fromc, seglen);522crypto_cursor_advance(toc, seglen);523}524}525526error = 0;527out:528explicit_bzero(block, sizeof(block));529explicit_bzero(&s, sizeof(s));530return (error);531}532533534