/*1* Copyright (c) 2017 Thomas Pornin <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining4* a copy of this software and associated documentation files (the5* "Software"), to deal in the Software without restriction, including6* without limitation the rights to use, copy, modify, merge, publish,7* distribute, sublicense, and/or sell copies of the Software, and to8* permit persons to whom the Software is furnished to do so, subject to9* the following conditions:10*11* The above copyright notice and this permission notice shall be12* included in all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,15* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF16* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND17* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS18* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN19* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN20* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#include "inner.h"2526/*27* Implementation Notes28* ====================29*30* The combined CTR + CBC-MAC functions can only handle full blocks,31* so some buffering is necessary. Moreover, EAX has a special padding32* rule for CBC-MAC, which implies that we cannot compute the MAC over33* the last received full block until we know whether we are at the34* end of the data or not.35*36* - 'ptr' contains a value from 1 to 16, which is the number of bytes37* accumulated in buf[] that still needs to be processed with the38* current OMAC computation. Beware that this can go to 16: a39* complete block cannot be processed until it is known whether it40* is the last block or not. However, it can never be 0, because41* OMAC^t works on an input that is at least one-block long.42*43* - When processing the message itself, CTR encryption/decryption is44* also done at the same time. The first 'ptr' bytes of buf[] then45* contains the encrypted bytes, while the last '16 - ptr' bytes of46* buf[] are the remnants of the stream block, to be used against47* the next input bytes, when available.48*49* - The current counter and running CBC-MAC values are kept in 'ctr'50* and 'cbcmac', respectively.51*52* - The derived keys for padding are kept in L2 and L4 (double and53* quadruple of Enc_K(0^n), in GF(2^128), respectively).54*/5556/*57* Start an OMAC computation; the first block is the big-endian58* representation of the provided value ('val' must fit on one byte).59* We make it a delayed block because it may also be the last one,60*/61static void62omac_start(br_eax_context *ctx, unsigned val)63{64memset(ctx->cbcmac, 0, sizeof ctx->cbcmac);65memset(ctx->buf, 0, sizeof ctx->buf);66ctx->buf[15] = val;67ctx->ptr = 16;68}6970/*71* Double a value in finite field GF(2^128), defined with modulus72* X^128+X^7+X^2+X+1.73*/74static void75double_gf128(unsigned char *dst, const unsigned char *src)76{77unsigned cc;78int i;7980cc = 0x87 & -((unsigned)src[0] >> 7);81for (i = 15; i >= 0; i --) {82unsigned z;8384z = (src[i] << 1) ^ cc;85cc = z >> 8;86dst[i] = (unsigned char)z;87}88}8990/*91* Apply padding to the last block, currently in ctx->buf (with92* ctx->ptr bytes), and finalize OMAC computation.93*/94static void95do_pad(br_eax_context *ctx)96{97unsigned char *pad;98size_t ptr, u;99100ptr = ctx->ptr;101if (ptr == 16) {102pad = ctx->L2;103} else {104ctx->buf[ptr ++] = 0x80;105memset(ctx->buf + ptr, 0x00, 16 - ptr);106pad = ctx->L4;107}108for (u = 0; u < sizeof ctx->buf; u ++) {109ctx->buf[u] ^= pad[u];110}111(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf);112}113114/*115* Apply CBC-MAC on the provided data, with buffering management.116*117* Upon entry, two situations are acceptable:118*119* ctx->ptr == 0: there is no data to process in ctx->buf120* ctx->ptr == 16: there is a full block of unprocessed data in ctx->buf121*122* Upon exit, ctx->ptr may be zero only if it was already zero on entry,123* and len == 0. In all other situations, ctx->ptr will be non-zero on124* exit (and may have value 16).125*/126static void127do_cbcmac_chunk(br_eax_context *ctx, const void *data, size_t len)128{129size_t ptr;130131if (len == 0) {132return;133}134ptr = len & (size_t)15;135if (ptr == 0) {136len -= 16;137ptr = 16;138} else {139len -= ptr;140}141if (ctx->ptr == 16) {142(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,143ctx->buf, sizeof ctx->buf);144}145(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, data, len);146memcpy(ctx->buf, (const unsigned char *)data + len, ptr);147ctx->ptr = ptr;148}149150/* see bearssl_aead.h */151void152br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx)153{154unsigned char tmp[16], iv[16];155156ctx->vtable = &br_eax_vtable;157ctx->bctx = bctx;158159/*160* Encrypt a whole-zero block to compute L2 and L4.161*/162memset(tmp, 0, sizeof tmp);163memset(iv, 0, sizeof iv);164(*bctx)->ctr(bctx, iv, tmp, sizeof tmp);165double_gf128(ctx->L2, tmp);166double_gf128(ctx->L4, ctx->L2);167}168169/* see bearssl_aead.h */170void171br_eax_capture(const br_eax_context *ctx, br_eax_state *st)172{173/*174* We capture the three OMAC* states _after_ processing the175* initial block (assuming that nonce, message and AAD are176* all non-empty).177*/178int i;179180memset(st->st, 0, sizeof st->st);181for (i = 0; i < 3; i ++) {182unsigned char tmp[16];183184memset(tmp, 0, sizeof tmp);185tmp[15] = (unsigned char)i;186(*ctx->bctx)->mac(ctx->bctx, st->st[i], tmp, sizeof tmp);187}188}189190/* see bearssl_aead.h */191void192br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len)193{194/*195* Process nonce with OMAC^0.196*/197omac_start(ctx, 0);198do_cbcmac_chunk(ctx, nonce, len);199do_pad(ctx);200memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);201202/*203* Start OMAC^1 for the AAD ("header" in the EAX specification).204*/205omac_start(ctx, 1);206207/*208* We use ctx->head[0] as temporary flag to mark that we are209* using a "normal" reset().210*/211ctx->head[0] = 0;212}213214/* see bearssl_aead.h */215void216br_eax_reset_pre_aad(br_eax_context *ctx, const br_eax_state *st,217const void *nonce, size_t len)218{219if (len == 0) {220omac_start(ctx, 0);221} else {222memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac);223ctx->ptr = 0;224do_cbcmac_chunk(ctx, nonce, len);225}226do_pad(ctx);227memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);228229memcpy(ctx->cbcmac, st->st[1], sizeof ctx->cbcmac);230ctx->ptr = 0;231232memcpy(ctx->ctr, st->st[2], sizeof ctx->ctr);233234/*235* We use ctx->head[0] as a flag to indicate that we use a236* a recorded state, with ctx->ctr containing the preprocessed237* first block for OMAC^2.238*/239ctx->head[0] = 1;240}241242/* see bearssl_aead.h */243void244br_eax_reset_post_aad(br_eax_context *ctx, const br_eax_state *st,245const void *nonce, size_t len)246{247if (len == 0) {248omac_start(ctx, 0);249} else {250memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac);251ctx->ptr = 0;252do_cbcmac_chunk(ctx, nonce, len);253}254do_pad(ctx);255memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);256memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce);257258memcpy(ctx->head, st->st[1], sizeof ctx->head);259260memcpy(ctx->cbcmac, st->st[2], sizeof ctx->cbcmac);261ctx->ptr = 0;262}263264/* see bearssl_aead.h */265void266br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len)267{268size_t ptr;269270ptr = ctx->ptr;271272/*273* If there is a partial block, first complete it.274*/275if (ptr < 16) {276size_t clen;277278clen = 16 - ptr;279if (len <= clen) {280memcpy(ctx->buf + ptr, data, len);281ctx->ptr = ptr + len;282return;283}284memcpy(ctx->buf + ptr, data, clen);285data = (const unsigned char *)data + clen;286len -= clen;287}288289/*290* We now have a full block in buf[], and this is not the last291* block.292*/293do_cbcmac_chunk(ctx, data, len);294}295296/* see bearssl_aead.h */297void298br_eax_flip(br_eax_context *ctx)299{300int from_capture;301302/*303* ctx->head[0] may be non-zero if the context was reset with304* a pre-AAD captured state. In that case, ctx->ctr[] contains305* the state for OMAC^2 _after_ processing the first block.306*/307from_capture = ctx->head[0];308309/*310* Complete the OMAC computation on the AAD.311*/312do_pad(ctx);313memcpy(ctx->head, ctx->cbcmac, sizeof ctx->cbcmac);314315/*316* Start OMAC^2 for the encrypted data.317* If the context was initialized from a captured state, then318* the OMAC^2 value is in the ctr[] array.319*/320if (from_capture) {321memcpy(ctx->cbcmac, ctx->ctr, sizeof ctx->cbcmac);322ctx->ptr = 0;323} else {324omac_start(ctx, 2);325}326327/*328* Initial counter value for CTR is the processed nonce.329*/330memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce);331}332333/* see bearssl_aead.h */334void335br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len)336{337unsigned char *dbuf;338size_t ptr;339340/*341* Ensure that there is actual data to process.342*/343if (len == 0) {344return;345}346347dbuf = data;348ptr = ctx->ptr;349350/*351* We may have ptr == 0 here if we initialized from a captured352* state. In that case, there is no partially consumed block353* or unprocessed data.354*/355if (ptr != 0 && ptr != 16) {356/*357* We have a partially consumed block.358*/359size_t u, clen;360361clen = 16 - ptr;362if (len <= clen) {363clen = len;364}365if (encrypt) {366for (u = 0; u < clen; u ++) {367ctx->buf[ptr + u] ^= dbuf[u];368}369memcpy(dbuf, ctx->buf + ptr, clen);370} else {371for (u = 0; u < clen; u ++) {372unsigned dx, sx;373374sx = ctx->buf[ptr + u];375dx = dbuf[u];376ctx->buf[ptr + u] = dx;377dbuf[u] = sx ^ dx;378}379}380381if (len <= clen) {382ctx->ptr = ptr + clen;383return;384}385dbuf += clen;386len -= clen;387}388389/*390* We now have a complete encrypted block in buf[] that must still391* be processed with OMAC, and this is not the final buf.392* Exception: when ptr == 0, no block has been produced yet.393*/394if (ptr != 0) {395(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,396ctx->buf, sizeof ctx->buf);397}398399/*400* Do CTR encryption or decryption and CBC-MAC for all full blocks401* except the last.402*/403ptr = len & (size_t)15;404if (ptr == 0) {405len -= 16;406ptr = 16;407} else {408len -= ptr;409}410if (encrypt) {411(*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,412dbuf, len);413} else {414(*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,415dbuf, len);416}417dbuf += len;418419/*420* Compute next block of CTR stream, and use it to finish421* encrypting or decrypting the data.422*/423memset(ctx->buf, 0, sizeof ctx->buf);424(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, ctx->buf, sizeof ctx->buf);425if (encrypt) {426size_t u;427428for (u = 0; u < ptr; u ++) {429ctx->buf[u] ^= dbuf[u];430}431memcpy(dbuf, ctx->buf, ptr);432} else {433size_t u;434435for (u = 0; u < ptr; u ++) {436unsigned dx, sx;437438sx = ctx->buf[u];439dx = dbuf[u];440ctx->buf[u] = dx;441dbuf[u] = sx ^ dx;442}443}444ctx->ptr = ptr;445}446447/*448* Complete tag computation. The final tag is written in ctx->cbcmac.449*/450static void451do_final(br_eax_context *ctx)452{453size_t u;454455do_pad(ctx);456457/*458* Authentication tag is the XOR of the three OMAC outputs for459* the nonce, AAD and encrypted data.460*/461for (u = 0; u < 16; u ++) {462ctx->cbcmac[u] ^= ctx->nonce[u] ^ ctx->head[u];463}464}465466/* see bearssl_aead.h */467void468br_eax_get_tag(br_eax_context *ctx, void *tag)469{470do_final(ctx);471memcpy(tag, ctx->cbcmac, sizeof ctx->cbcmac);472}473474/* see bearssl_aead.h */475void476br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len)477{478do_final(ctx);479memcpy(tag, ctx->cbcmac, len);480}481482/* see bearssl_aead.h */483uint32_t484br_eax_check_tag_trunc(br_eax_context *ctx, const void *tag, size_t len)485{486unsigned char tmp[16];487size_t u;488int x;489490br_eax_get_tag(ctx, tmp);491x = 0;492for (u = 0; u < len; u ++) {493x |= tmp[u] ^ ((const unsigned char *)tag)[u];494}495return EQ0(x);496}497498/* see bearssl_aead.h */499uint32_t500br_eax_check_tag(br_eax_context *ctx, const void *tag)501{502return br_eax_check_tag_trunc(ctx, tag, 16);503}504505/* see bearssl_aead.h */506const br_aead_class br_eax_vtable = {50716,508(void (*)(const br_aead_class **, const void *, size_t))509&br_eax_reset,510(void (*)(const br_aead_class **, const void *, size_t))511&br_eax_aad_inject,512(void (*)(const br_aead_class **))513&br_eax_flip,514(void (*)(const br_aead_class **, int, void *, size_t))515&br_eax_run,516(void (*)(const br_aead_class **, void *))517&br_eax_get_tag,518(uint32_t (*)(const br_aead_class **, const void *))519&br_eax_check_tag,520(void (*)(const br_aead_class **, void *, size_t))521&br_eax_get_tag_trunc,522(uint32_t (*)(const br_aead_class **, const void *, size_t))523&br_eax_check_tag_trunc524};525526527