Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/armv8/armv8_crypto_wrap.c
39507 views
1
/*-
2
* Copyright (c) 2016 The FreeBSD Foundation
3
* Copyright (c) 2020 Ampere Computing
4
* All rights reserved.
5
*
6
* This software was developed by Andrew Turner under
7
* sponsorship from the FreeBSD Foundation.
8
*
9
* Redistribution and use in source and binary forms, with or without
10
* modification, are permitted provided that the following conditions
11
* are met:
12
* 1. Redistributions of source code must retain the above copyright
13
* notice, this list of conditions and the following disclaimer.
14
* 2. Redistributions in binary form must reproduce the above copyright
15
* notice, this list of conditions and the following disclaimer in the
16
* documentation and/or other materials provided with the distribution.
17
*
18
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
* SUCH DAMAGE.
29
*
30
* This file is derived from aesni_wrap.c:
31
* Copyright (C) 2008 Damien Miller <[email protected]>
32
* Copyright (c) 2010 Konstantin Belousov <[email protected]>
33
* Copyright (c) 2010-2011 Pawel Jakub Dawidek <[email protected]>
34
* Copyright 2012-2013 John-Mark Gurney <[email protected]>
35
* Copyright (c) 2014 The FreeBSD Foundation
36
*/
37
38
/*
39
* This code is built with floating-point enabled. Make sure to have entered
40
* into floating-point context before calling any of these functions.
41
*/
42
43
#include <sys/param.h>
44
#include <sys/systm.h>
45
#include <sys/malloc.h>
46
#include <sys/queue.h>
47
48
#include <opencrypto/cryptodev.h>
49
#include <opencrypto/gmac.h>
50
#include <crypto/rijndael/rijndael.h>
51
#include <crypto/armv8/armv8_crypto.h>
52
53
#include <arm_neon.h>
54
55
static uint8x16_t
56
armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
57
{
58
uint8x16_t tmp;
59
int i;
60
61
tmp = from;
62
for (i = 0; i < rounds - 1; i += 2) {
63
tmp = vaeseq_u8(tmp, keysched[i]);
64
tmp = vaesmcq_u8(tmp);
65
tmp = vaeseq_u8(tmp, keysched[i + 1]);
66
tmp = vaesmcq_u8(tmp);
67
}
68
69
tmp = vaeseq_u8(tmp, keysched[rounds - 1]);
70
tmp = vaesmcq_u8(tmp);
71
tmp = vaeseq_u8(tmp, keysched[rounds]);
72
tmp = veorq_u8(tmp, keysched[rounds + 1]);
73
74
return (tmp);
75
}
76
77
static uint8x16_t
78
armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
79
{
80
uint8x16_t tmp;
81
int i;
82
83
tmp = from;
84
for (i = 0; i < rounds - 1; i += 2) {
85
tmp = vaesdq_u8(tmp, keysched[i]);
86
tmp = vaesimcq_u8(tmp);
87
tmp = vaesdq_u8(tmp, keysched[i+1]);
88
tmp = vaesimcq_u8(tmp);
89
}
90
91
tmp = vaesdq_u8(tmp, keysched[rounds - 1]);
92
tmp = vaesimcq_u8(tmp);
93
tmp = vaesdq_u8(tmp, keysched[rounds]);
94
tmp = veorq_u8(tmp, keysched[rounds + 1]);
95
96
return (tmp);
97
}
98
99
void
100
armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len,
101
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
102
const uint8_t iv[static AES_BLOCK_LEN])
103
{
104
uint8x16_t tot, ivreg, tmp;
105
uint8_t block[AES_BLOCK_LEN], *from, *to;
106
size_t fromseglen, oseglen, seglen, toseglen;
107
108
KASSERT(len % AES_BLOCK_LEN == 0,
109
("%s: length %zu not a multiple of the block size", __func__, len));
110
111
ivreg = vld1q_u8(iv);
112
for (; len > 0; len -= seglen) {
113
from = crypto_cursor_segment(fromc, &fromseglen);
114
to = crypto_cursor_segment(toc, &toseglen);
115
116
seglen = ulmin(len, ulmin(fromseglen, toseglen));
117
if (seglen < AES_BLOCK_LEN) {
118
crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);
119
tmp = vld1q_u8(block);
120
tot = armv8_aes_enc(key->aes_rounds - 1,
121
(const void *)key->aes_key, veorq_u8(tmp, ivreg));
122
ivreg = tot;
123
vst1q_u8(block, tot);
124
crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);
125
seglen = AES_BLOCK_LEN;
126
} else {
127
for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
128
seglen -= AES_BLOCK_LEN) {
129
tmp = vld1q_u8(from);
130
tot = armv8_aes_enc(key->aes_rounds - 1,
131
(const void *)key->aes_key,
132
veorq_u8(tmp, ivreg));
133
ivreg = tot;
134
vst1q_u8(to, tot);
135
from += AES_BLOCK_LEN;
136
to += AES_BLOCK_LEN;
137
}
138
seglen = oseglen - seglen;
139
crypto_cursor_advance(fromc, seglen);
140
crypto_cursor_advance(toc, seglen);
141
}
142
}
143
144
explicit_bzero(block, sizeof(block));
145
}
146
147
void
148
armv8_aes_decrypt_cbc(const AES_key_t *key, size_t len,
149
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
150
const uint8_t iv[static AES_BLOCK_LEN])
151
{
152
uint8x16_t ivreg, nextiv, tmp;
153
uint8_t block[AES_BLOCK_LEN], *from, *to;
154
size_t fromseglen, oseglen, seglen, toseglen;
155
156
KASSERT(len % AES_BLOCK_LEN == 0,
157
("%s: length %zu not a multiple of the block size", __func__, len));
158
159
ivreg = vld1q_u8(iv);
160
for (; len > 0; len -= seglen) {
161
from = crypto_cursor_segment(fromc, &fromseglen);
162
to = crypto_cursor_segment(toc, &toseglen);
163
164
seglen = ulmin(len, ulmin(fromseglen, toseglen));
165
if (seglen < AES_BLOCK_LEN) {
166
crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);
167
nextiv = vld1q_u8(block);
168
tmp = armv8_aes_dec(key->aes_rounds - 1,
169
(const void *)key->aes_key, nextiv);
170
vst1q_u8(block, veorq_u8(tmp, ivreg));
171
ivreg = nextiv;
172
crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);
173
seglen = AES_BLOCK_LEN;
174
} else {
175
for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
176
seglen -= AES_BLOCK_LEN) {
177
nextiv = vld1q_u8(from);
178
tmp = armv8_aes_dec(key->aes_rounds - 1,
179
(const void *)key->aes_key, nextiv);
180
vst1q_u8(to, veorq_u8(tmp, ivreg));
181
ivreg = nextiv;
182
from += AES_BLOCK_LEN;
183
to += AES_BLOCK_LEN;
184
}
185
crypto_cursor_advance(fromc, oseglen - seglen);
186
crypto_cursor_advance(toc, oseglen - seglen);
187
seglen = oseglen - seglen;
188
}
189
}
190
191
explicit_bzero(block, sizeof(block));
192
}
193
194
#define AES_XTS_BLOCKSIZE 16
195
#define AES_XTS_IVSIZE 8
196
#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
197
198
static inline int32x4_t
199
xts_crank_lfsr(int32x4_t inp)
200
{
201
const int32x4_t alphamask = {AES_XTS_ALPHA, 1, 1, 1};
202
int32x4_t xtweak, ret;
203
204
/* set up xor mask */
205
xtweak = vextq_s32(inp, inp, 3);
206
xtweak = vshrq_n_s32(xtweak, 31);
207
xtweak &= alphamask;
208
209
/* next term */
210
ret = vshlq_n_s32(inp, 1);
211
ret ^= xtweak;
212
213
return ret;
214
}
215
216
static void
217
armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule,
218
uint8x16_t *tweak, const uint8_t *from, uint8_t *to, int do_encrypt)
219
{
220
uint8x16_t block;
221
222
block = vld1q_u8(from) ^ *tweak;
223
224
if (do_encrypt)
225
block = armv8_aes_enc(rounds - 1, key_schedule, block);
226
else
227
block = armv8_aes_dec(rounds - 1, key_schedule, block);
228
229
vst1q_u8(to, block ^ *tweak);
230
231
*tweak = vreinterpretq_u8_s32(xts_crank_lfsr(vreinterpretq_s32_u8(*tweak)));
232
}
233
234
static void
235
armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule,
236
const uint8x16_t *tweak_schedule, size_t len,
237
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
238
const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
239
{
240
uint8x16_t tweakreg;
241
uint8_t block[AES_XTS_BLOCKSIZE] __aligned(16);
242
uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
243
uint8_t *from, *to;
244
size_t fromseglen, oseglen, seglen, toseglen;
245
246
KASSERT(len % AES_XTS_BLOCKSIZE == 0,
247
("%s: length %zu not a multiple of the block size", __func__, len));
248
249
/*
250
* Prepare tweak as E_k2(IV). IV is specified as LE representation
251
* of a 64-bit block number which we allow to be passed in directly.
252
*/
253
#if BYTE_ORDER == LITTLE_ENDIAN
254
bcopy(iv, tweak, AES_XTS_IVSIZE);
255
/* Last 64 bits of IV are always zero. */
256
bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
257
#else
258
#error Only LITTLE_ENDIAN architectures are supported.
259
#endif
260
tweakreg = vld1q_u8(tweak);
261
tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg);
262
263
for (; len > 0; len -= seglen) {
264
from = crypto_cursor_segment(fromc, &fromseglen);
265
to = crypto_cursor_segment(toc, &toseglen);
266
267
seglen = ulmin(len, ulmin(fromseglen, toseglen));
268
if (seglen < AES_XTS_BLOCKSIZE) {
269
crypto_cursor_copydata(fromc, AES_XTS_BLOCKSIZE, block);
270
armv8_aes_crypt_xts_block(rounds, data_schedule,
271
&tweakreg, block, block, do_encrypt);
272
crypto_cursor_copyback(toc, AES_XTS_BLOCKSIZE, block);
273
seglen = AES_XTS_BLOCKSIZE;
274
} else {
275
for (oseglen = seglen; seglen >= AES_XTS_BLOCKSIZE;
276
seglen -= AES_XTS_BLOCKSIZE) {
277
armv8_aes_crypt_xts_block(rounds, data_schedule,
278
&tweakreg, from, to, do_encrypt);
279
from += AES_XTS_BLOCKSIZE;
280
to += AES_XTS_BLOCKSIZE;
281
}
282
seglen = oseglen - seglen;
283
crypto_cursor_advance(fromc, seglen);
284
crypto_cursor_advance(toc, seglen);
285
}
286
}
287
288
explicit_bzero(block, sizeof(block));
289
}
290
291
void
292
armv8_aes_encrypt_xts(AES_key_t *data_schedule,
293
const void *tweak_schedule, size_t len, struct crypto_buffer_cursor *fromc,
294
struct crypto_buffer_cursor *toc, const uint8_t iv[static AES_BLOCK_LEN])
295
{
296
armv8_aes_crypt_xts(data_schedule->aes_rounds,
297
(const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,
298
toc, iv, 1);
299
}
300
301
void
302
armv8_aes_decrypt_xts(AES_key_t *data_schedule,
303
const void *tweak_schedule, size_t len,
304
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
305
const uint8_t iv[static AES_BLOCK_LEN])
306
{
307
armv8_aes_crypt_xts(data_schedule->aes_rounds,
308
(const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,
309
toc, iv, 0);
310
311
}
312
#define AES_INC_COUNTER(counter) \
313
do { \
314
for (int pos = AES_BLOCK_LEN - 1; \
315
pos >= 0; pos--) \
316
if (++(counter)[pos]) \
317
break; \
318
} while (0)
319
320
struct armv8_gcm_state {
321
__uint128_val_t EK0;
322
__uint128_val_t EKi;
323
__uint128_val_t Xi;
324
__uint128_val_t lenblock;
325
uint8_t aes_counter[AES_BLOCK_LEN];
326
};
327
328
static void
329
armv8_aes_gmac_setup(struct armv8_gcm_state *s, AES_key_t *aes_key,
330
const uint8_t *authdata, size_t authdatalen,
331
const uint8_t iv[static AES_GCM_IV_LEN], const __uint128_val_t *Htable)
332
{
333
uint8_t block[AES_BLOCK_LEN];
334
size_t trailer;
335
336
bzero(s->aes_counter, AES_BLOCK_LEN);
337
memcpy(s->aes_counter, iv, AES_GCM_IV_LEN);
338
339
/* Setup the counter */
340
s->aes_counter[AES_BLOCK_LEN - 1] = 1;
341
342
/* EK0 for a final GMAC round */
343
aes_v8_encrypt(s->aes_counter, s->EK0.c, aes_key);
344
345
/* GCM starts with 2 as counter, 1 is used for final xor of tag. */
346
s->aes_counter[AES_BLOCK_LEN - 1] = 2;
347
348
memset(s->Xi.c, 0, sizeof(s->Xi.c));
349
trailer = authdatalen % AES_BLOCK_LEN;
350
if (authdatalen - trailer > 0) {
351
gcm_ghash_v8(s->Xi.u, Htable, authdata, authdatalen - trailer);
352
authdata += authdatalen - trailer;
353
}
354
if (trailer > 0 || authdatalen == 0) {
355
memset(block, 0, sizeof(block));
356
memcpy(block, authdata, trailer);
357
gcm_ghash_v8(s->Xi.u, Htable, block, AES_BLOCK_LEN);
358
}
359
}
360
361
static void
362
armv8_aes_gmac_finish(struct armv8_gcm_state *s, size_t len,
363
size_t authdatalen, const __uint128_val_t *Htable)
364
{
365
/* Lengths block */
366
s->lenblock.u[0] = s->lenblock.u[1] = 0;
367
s->lenblock.d[1] = htobe32(authdatalen * 8);
368
s->lenblock.d[3] = htobe32(len * 8);
369
gcm_ghash_v8(s->Xi.u, Htable, s->lenblock.c, AES_BLOCK_LEN);
370
371
s->Xi.u[0] ^= s->EK0.u[0];
372
s->Xi.u[1] ^= s->EK0.u[1];
373
}
374
375
static void
376
armv8_aes_encrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,
377
const uint64_t *from, uint64_t *to)
378
{
379
aes_v8_encrypt(s->aes_counter, s->EKi.c, aes_key);
380
AES_INC_COUNTER(s->aes_counter);
381
to[0] = from[0] ^ s->EKi.u[0];
382
to[1] = from[1] ^ s->EKi.u[1];
383
}
384
385
static void
386
armv8_aes_decrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,
387
const uint64_t *from, uint64_t *to)
388
{
389
armv8_aes_encrypt_gcm_block(s, aes_key, from, to);
390
}
391
392
void
393
armv8_aes_encrypt_gcm(AES_key_t *aes_key, size_t len,
394
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
395
size_t authdatalen, const uint8_t *authdata,
396
uint8_t tag[static GMAC_DIGEST_LEN],
397
const uint8_t iv[static AES_GCM_IV_LEN],
398
const __uint128_val_t *Htable)
399
{
400
struct armv8_gcm_state s;
401
uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN);
402
uint64_t *from64, *to64;
403
size_t fromseglen, i, olen, oseglen, seglen, toseglen;
404
405
armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
406
407
for (olen = len; len > 0; len -= seglen) {
408
from64 = crypto_cursor_segment(fromc, &fromseglen);
409
to64 = crypto_cursor_segment(toc, &toseglen);
410
411
seglen = ulmin(len, ulmin(fromseglen, toseglen));
412
if (seglen < AES_BLOCK_LEN) {
413
seglen = ulmin(len, AES_BLOCK_LEN);
414
415
memset(block, 0, sizeof(block));
416
crypto_cursor_copydata(fromc, (int)seglen, block);
417
418
if (seglen == AES_BLOCK_LEN) {
419
armv8_aes_encrypt_gcm_block(&s, aes_key,
420
(uint64_t *)block, (uint64_t *)block);
421
} else {
422
aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
423
AES_INC_COUNTER(s.aes_counter);
424
for (i = 0; i < seglen; i++)
425
block[i] ^= s.EKi.c[i];
426
}
427
gcm_ghash_v8(s.Xi.u, Htable, block, seglen);
428
429
crypto_cursor_copyback(toc, (int)seglen, block);
430
} else {
431
for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
432
seglen -= AES_BLOCK_LEN) {
433
armv8_aes_encrypt_gcm_block(&s, aes_key, from64,
434
to64);
435
gcm_ghash_v8(s.Xi.u, Htable, (uint8_t *)to64,
436
AES_BLOCK_LEN);
437
438
from64 += 2;
439
to64 += 2;
440
}
441
442
seglen = oseglen - seglen;
443
crypto_cursor_advance(fromc, seglen);
444
crypto_cursor_advance(toc, seglen);
445
}
446
}
447
448
armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);
449
memcpy(tag, s.Xi.c, GMAC_DIGEST_LEN);
450
451
explicit_bzero(block, sizeof(block));
452
explicit_bzero(&s, sizeof(s));
453
}
454
455
int
456
armv8_aes_decrypt_gcm(AES_key_t *aes_key, size_t len,
457
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
458
size_t authdatalen, const uint8_t *authdata,
459
const uint8_t tag[static GMAC_DIGEST_LEN],
460
const uint8_t iv[static AES_GCM_IV_LEN],
461
const __uint128_val_t *Htable)
462
{
463
struct armv8_gcm_state s;
464
struct crypto_buffer_cursor fromcc;
465
uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN), *from;
466
uint64_t *block64, *from64, *to64;
467
size_t fromseglen, olen, oseglen, seglen, toseglen;
468
int error;
469
470
armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
471
472
crypto_cursor_copy(fromc, &fromcc);
473
for (olen = len; len > 0; len -= seglen) {
474
from = crypto_cursor_segment(&fromcc, &fromseglen);
475
seglen = ulmin(len, fromseglen);
476
seglen -= seglen % AES_BLOCK_LEN;
477
if (seglen > 0) {
478
gcm_ghash_v8(s.Xi.u, Htable, from, seglen);
479
crypto_cursor_advance(&fromcc, seglen);
480
} else {
481
memset(block, 0, sizeof(block));
482
seglen = ulmin(len, AES_BLOCK_LEN);
483
crypto_cursor_copydata(&fromcc, seglen, block);
484
gcm_ghash_v8(s.Xi.u, Htable, block, seglen);
485
}
486
}
487
488
armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);
489
490
if (timingsafe_bcmp(tag, s.Xi.c, GMAC_DIGEST_LEN) != 0) {
491
error = EBADMSG;
492
goto out;
493
}
494
495
block64 = (uint64_t *)block;
496
for (len = olen; len > 0; len -= seglen) {
497
from64 = crypto_cursor_segment(fromc, &fromseglen);
498
to64 = crypto_cursor_segment(toc, &toseglen);
499
500
seglen = ulmin(len, ulmin(fromseglen, toseglen));
501
if (seglen < AES_BLOCK_LEN) {
502
seglen = ulmin(len, AES_BLOCK_LEN);
503
504
memset(block, 0, sizeof(block));
505
crypto_cursor_copydata(fromc, seglen, block);
506
507
armv8_aes_decrypt_gcm_block(&s, aes_key, block64,
508
block64);
509
510
crypto_cursor_copyback(toc, (int)seglen, block);
511
} else {
512
for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
513
seglen -= AES_BLOCK_LEN) {
514
armv8_aes_decrypt_gcm_block(&s, aes_key, from64,
515
to64);
516
517
from64 += 2;
518
to64 += 2;
519
}
520
521
seglen = oseglen - seglen;
522
crypto_cursor_advance(fromc, seglen);
523
crypto_cursor_advance(toc, seglen);
524
}
525
}
526
527
error = 0;
528
out:
529
explicit_bzero(block, sizeof(block));
530
explicit_bzero(&s, sizeof(s));
531
return (error);
532
}
533
534