/*-1* Copyright (c) 2014-2021 The FreeBSD Foundation2* Copyright (c) 2018 iXsystems, Inc3* All rights reserved.4*5* Portions of this software were developed by John-Mark Gurney6* under the sponsorship of the FreeBSD Foundation and7* Rubicon Communications, LLC (Netgate).8*9* Portions of this software were developed by Ararat River10* Consulting, LLC under sponsorship of the FreeBSD Foundation.11*12* Redistribution and use in source and binary forms, with or without13* modification, are permitted provided that the following conditions14* are met:15* 1. Redistributions of source code must retain the above copyright16* notice, this list of conditions and the following disclaimer.17* 2. Redistributions in binary form must reproduce the above copyright18* notice, this list of conditions and the following disclaimer in the19* documentation and/or other materials provided with the distribution.20*21* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND22* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE23* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE24* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE25* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL26* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS27* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)28* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT29* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY30* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF31* SUCH DAMAGE.32*33*34* This file implements AES-CCM+CBC-MAC, as described35* at https://tools.ietf.org/html/rfc3610, using Intel's36* AES-NI instructions.37*38*/3940#include <sys/types.h>41#include <sys/endian.h>42#include <sys/param.h>4344#include <sys/systm.h>45#include <crypto/aesni/aesni.h>46#include <crypto/aesni/aesni_os.h>47#include <crypto/aesni/aesencdec.h>48#define AESNI_ENC(d, k, nr) aesni_enc(nr-1, (const __m128i*)k, d)4950#include <wmmintrin.h>51#include <emmintrin.h>52#include <smmintrin.h>5354/*55* Encrypt a single 128-bit block after56* doing an xor. This is also used to57* decrypt (yay symmetric encryption).58*/59static inline __m128i60xor_and_encrypt(__m128i a, __m128i b, const unsigned char *k, int nr)61{62__m128i retval = _mm_xor_si128(a, b);6364retval = AESNI_ENC(retval, k, nr);65return (retval);66}6768/*69* Put value at the end of block, starting at offset.70* (This goes backwards, putting bytes in *until* it71* reaches offset.)72*/73static void74append_int(size_t value, __m128i *block, size_t offset)75{76int indx = sizeof(*block) - 1;77uint8_t *bp = (uint8_t*)block;7879while (indx > (sizeof(*block) - offset)) {80bp[indx] = value & 0xff;81indx--;82value >>= 8;83}84}8586/*87* Start the CBC-MAC process. This handles the auth data.88*/89static __m128i90cbc_mac_start(const unsigned char *auth_data, size_t auth_len,91const unsigned char *nonce, size_t nonce_len,92const unsigned char *key, int nr,93size_t data_len, size_t tag_len)94{95__m128i cbc_block, staging_block;96uint8_t *byte_ptr;97/* This defines where the message length goes */98int L = sizeof(__m128i) - 1 - nonce_len;99100/*101* Set up B0 here. This has the flags byte,102* followed by the nonce, followed by the103* length of the message.104*/105cbc_block = _mm_setzero_si128();106byte_ptr = (uint8_t*)&cbc_block;107byte_ptr[0] = ((auth_len > 0) ? 1 : 0) * 64 |108(((tag_len - 2) / 2) * 8) |109(L - 1);110bcopy(nonce, byte_ptr + 1, nonce_len);111append_int(data_len, &cbc_block, L+1);112cbc_block = AESNI_ENC(cbc_block, key, nr);113114if (auth_len != 0) {115/*116* We need to start by appending the length descriptor.117*/118uint32_t auth_amt;119size_t copy_amt;120const uint8_t *auth_ptr = auth_data;121122staging_block = _mm_setzero_si128();123124/*125* The current OCF calling convention means that126* there can never be more than 4g of authentication127* data, so we don't handle the 0xffff case.128*/129KASSERT(auth_len < (1ULL << 32),130("%s: auth_len (%zu) larger than 4GB",131__FUNCTION__, auth_len));132133if (auth_len < ((1 << 16) - (1 << 8))) {134/*135* If the auth data length is less than136* 0xff00, we don't need to encode a length137* specifier, just the length of the auth138* data.139*/140be16enc(&staging_block, auth_len);141auth_amt = 2;142} else if (auth_len < (1ULL << 32)) {143/*144* Two bytes for the length prefix, and then145* four bytes for the length. This makes a total146* of 6 bytes to describe the auth data length.147*/148be16enc(&staging_block, 0xfffe);149be32enc((char*)&staging_block + 2, auth_len);150auth_amt = 6;151} else152panic("%s: auth len too large", __FUNCTION__);153154/*155* Need to copy abytes into blocks. The first block is156* already partially filled, by auth_amt, so we need157* to handle that. The last block needs to be zero padded.158*/159copy_amt = MIN(auth_len,160sizeof(staging_block) - auth_amt);161byte_ptr = (uint8_t*)&staging_block;162bcopy(auth_ptr, &byte_ptr[auth_amt], copy_amt);163auth_ptr += copy_amt;164165cbc_block = xor_and_encrypt(cbc_block, staging_block, key, nr);166167while (auth_ptr < auth_data + auth_len) {168copy_amt = MIN((auth_data + auth_len) - auth_ptr,169sizeof(staging_block));170if (copy_amt < sizeof(staging_block))171bzero(&staging_block, sizeof(staging_block));172bcopy(auth_ptr, &staging_block, copy_amt);173cbc_block = xor_and_encrypt(cbc_block, staging_block,174key, nr);175auth_ptr += copy_amt;176}177}178return (cbc_block);179}180181/*182* Implement AES CCM+CBC-MAC encryption and authentication.183*184* A couple of notes:185* Since abytes is limited to a 32 bit value here, the AAD is186* limited to 4 gigabytes or less.187*/188void189AES_CCM_encrypt(const unsigned char *in, unsigned char *out,190const unsigned char *addt, const unsigned char *nonce,191unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,192int tag_length, const unsigned char *key, int nr)193{194int L;195int counter = 1; /* S0 has 0, S1 has 1 */196size_t copy_amt, total = 0;197uint8_t *byte_ptr;198__m128i s0, rolling_mac, s_x, staging_block;199200/* NIST 800-38c section A.1 says n is [7, 13]. */201if (nlen < 7 || nlen > 13)202panic("%s: bad nonce length %d", __FUNCTION__, nlen);203204/*205* We need to know how many bytes to use to describe206* the length of the data. Normally, nlen should be207* 12, which leaves us 3 bytes to do that -- 16mbytes of208* data to encrypt. But it can be longer or shorter;209* this impacts the length of the message.210*/211L = sizeof(__m128i) - 1 - nlen;212213/*214* Clear out the blocks215*/216s0 = _mm_setzero_si128();217218rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,219key, nr, nbytes, tag_length);220221/* s0 has flags, nonce, and then 0 */222byte_ptr = (uint8_t*)&s0;223byte_ptr[0] = L - 1; /* but the flags byte only has L' */224bcopy(nonce, &byte_ptr[1], nlen);225226/*227* Now to cycle through the rest of the data.228*/229bcopy(&s0, &s_x, sizeof(s0));230231while (total < nbytes) {232/*233* Copy the plain-text data into staging_block.234* This may need to be zero-padded.235*/236copy_amt = MIN(nbytes - total, sizeof(staging_block));237bcopy(in+total, &staging_block, copy_amt);238if (copy_amt < sizeof(staging_block)) {239byte_ptr = (uint8_t*)&staging_block;240bzero(&byte_ptr[copy_amt],241sizeof(staging_block) - copy_amt);242}243rolling_mac = xor_and_encrypt(rolling_mac, staging_block,244key, nr);245/* Put the counter into the s_x block */246append_int(counter++, &s_x, L+1);247/* Encrypt that */248__m128i X = AESNI_ENC(s_x, key, nr);249/* XOR the plain-text with the encrypted counter block */250staging_block = _mm_xor_si128(staging_block, X);251/* And copy it out */252bcopy(&staging_block, out+total, copy_amt);253total += copy_amt;254}255/*256* Allegedly done with it! Except for the tag.257*/258s0 = AESNI_ENC(s0, key, nr);259staging_block = _mm_xor_si128(s0, rolling_mac);260bcopy(&staging_block, tag, tag_length);261explicit_bzero(&s0, sizeof(s0));262explicit_bzero(&staging_block, sizeof(staging_block));263explicit_bzero(&s_x, sizeof(s_x));264explicit_bzero(&rolling_mac, sizeof(rolling_mac));265}266267/*268* Implement AES CCM+CBC-MAC decryption and authentication.269* Returns 0 on failure, 1 on success.270*271* The primary difference here is that each encrypted block272* needs to be hashed&encrypted after it is decrypted (since273* the CBC-MAC is based on the plain text). This means that274* we do the decryption twice -- first to verify the tag,275* and second to decrypt and copy it out.276*277* To avoid annoying code copying, we implement the main278* loop as a separate function.279*280* Call with out as NULL to not store the decrypted results;281* call with hashp as NULL to not run the authentication.282* Calling with neither as NULL does the decryption and283* authentication as a single pass (which is not allowed284* per the specification, really).285*286* If hashp is non-NULL, it points to the post-AAD computed287* checksum.288*/289static void290decrypt_loop(const unsigned char *in, unsigned char *out, size_t nbytes,291__m128i s0, size_t nonce_length, __m128i *macp,292const unsigned char *key, int nr)293{294size_t total = 0;295__m128i s_x = s0, mac_block;296int counter = 1;297const size_t L = sizeof(__m128i) - 1 - nonce_length;298__m128i pad_block, staging_block;299300/*301* The starting mac (post AAD, if any).302*/303if (macp != NULL)304mac_block = *macp;305306while (total < nbytes) {307size_t copy_amt = MIN(nbytes - total, sizeof(staging_block));308309if (copy_amt < sizeof(staging_block)) {310staging_block = _mm_setzero_si128();311}312bcopy(in+total, &staging_block, copy_amt);313314/*315* staging_block has the current block of input data,316* zero-padded if necessary. This is used in computing317* both the decrypted data, and the authentication tag.318*/319append_int(counter++, &s_x, L+1);320/*321* The tag is computed based on the decrypted data.322*/323pad_block = AESNI_ENC(s_x, key, nr);324if (copy_amt < sizeof(staging_block)) {325/*326* Need to pad out pad_block with 0.327* (staging_block was set to 0's above.)328*/329uint8_t *end_of_buffer = (uint8_t*)&pad_block;330bzero(end_of_buffer + copy_amt,331sizeof(pad_block) - copy_amt);332}333staging_block = _mm_xor_si128(staging_block, pad_block);334335if (out)336bcopy(&staging_block, out+total, copy_amt);337338if (macp)339mac_block = xor_and_encrypt(mac_block, staging_block,340key, nr);341total += copy_amt;342}343344if (macp)345*macp = mac_block;346347explicit_bzero(&pad_block, sizeof(pad_block));348explicit_bzero(&staging_block, sizeof(staging_block));349explicit_bzero(&mac_block, sizeof(mac_block));350}351352/*353* The exposed decryption routine. This is practically a354* copy of the encryption routine, except that the order355* in which the tag is created is changed.356* XXX combine the two functions at some point!357*/358int359AES_CCM_decrypt(const unsigned char *in, unsigned char *out,360const unsigned char *addt, const unsigned char *nonce,361const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,362int tag_length, const unsigned char *key, int nr)363{364int L;365__m128i s0, rolling_mac, staging_block;366uint8_t *byte_ptr;367368if (nlen < 0 || nlen > 15)369panic("%s: bad nonce length %d", __FUNCTION__, nlen);370371/*372* We need to know how many bytes to use to describe373* the length of the data. Normally, nlen should be374* 12, which leaves us 3 bytes to do that -- 16mbytes of375* data to encrypt. But it can be longer or shorter.376*/377L = sizeof(__m128i) - 1 - nlen;378379/*380* Clear out the blocks381*/382s0 = _mm_setzero_si128();383384rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,385key, nr, nbytes, tag_length);386/* s0 has flags, nonce, and then 0 */387byte_ptr = (uint8_t*)&s0;388byte_ptr[0] = L-1; /* but the flags byte only has L' */389bcopy(nonce, &byte_ptr[1], nlen);390391/*392* Now to cycle through the rest of the data.393*/394decrypt_loop(in, NULL, nbytes, s0, nlen, &rolling_mac, key, nr);395396/*397* Compare the tag.398*/399staging_block = _mm_xor_si128(AESNI_ENC(s0, key, nr), rolling_mac);400if (timingsafe_bcmp(&staging_block, tag, tag_length) != 0) {401return (0);402}403404/*405* Push out the decryption results this time.406*/407decrypt_loop(in, out, nbytes, s0, nlen, NULL, key, nr);408return (1);409}410411412