Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/aesni/aesni_wrap.c
39482 views
1
/*-
2
* Copyright (C) 2008 Damien Miller <[email protected]>
3
* Copyright (c) 2010 Konstantin Belousov <[email protected]>
4
* Copyright (c) 2010-2011 Pawel Jakub Dawidek <[email protected]>
5
* Copyright 2012-2013 John-Mark Gurney <[email protected]>
6
* Copyright (c) 2014 The FreeBSD Foundation
7
* All rights reserved.
8
*
9
* Portions of this software were developed by John-Mark Gurney
10
* under sponsorship of the FreeBSD Foundation and
11
* Rubicon Communications, LLC (Netgate).
12
*
13
* Redistribution and use in source and binary forms, with or without
14
* modification, are permitted provided that the following conditions
15
* are met:
16
* 1. Redistributions of source code must retain the above copyright
17
* notice, this list of conditions and the following disclaimer.
18
* 2. Redistributions in binary form must reproduce the above copyright
19
* notice, this list of conditions and the following disclaimer in the
20
* documentation and/or other materials provided with the distribution.
21
*
22
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
23
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
26
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
* SUCH DAMAGE.
33
*/
34
35
#include <sys/param.h>
36
#include <sys/libkern.h>
37
#include <sys/malloc.h>
38
#include <sys/proc.h>
39
#include <sys/systm.h>
40
#include <crypto/aesni/aesni.h>
41
42
#include <opencrypto/gmac.h>
43
44
#include "aesencdec.h"
45
#include <smmintrin.h>
46
47
MALLOC_DECLARE(M_AESNI);
48
49
struct blocks8 {
50
__m128i blk[8];
51
} __packed;
52
53
void
54
aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
55
const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
56
{
57
__m128i tot, ivreg;
58
size_t i;
59
60
len /= AES_BLOCK_LEN;
61
ivreg = _mm_loadu_si128((const __m128i *)iv);
62
for (i = 0; i < len; i++) {
63
tot = aesni_enc(rounds - 1, key_schedule,
64
_mm_loadu_si128((const __m128i *)from) ^ ivreg);
65
ivreg = tot;
66
_mm_storeu_si128((__m128i *)to, tot);
67
from += AES_BLOCK_LEN;
68
to += AES_BLOCK_LEN;
69
}
70
}
71
72
void
73
aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
74
uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN])
75
{
76
__m128i blocks[8];
77
struct blocks8 *blks;
78
__m128i ivreg, nextiv;
79
size_t i, j, cnt;
80
81
ivreg = _mm_loadu_si128((const __m128i *)iv);
82
cnt = len / AES_BLOCK_LEN / 8;
83
for (i = 0; i < cnt; i++) {
84
blks = (struct blocks8 *)buf;
85
aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
86
blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
87
blks->blk[6], blks->blk[7], &blocks[0]);
88
for (j = 0; j < 8; j++) {
89
nextiv = blks->blk[j];
90
blks->blk[j] = blocks[j] ^ ivreg;
91
ivreg = nextiv;
92
}
93
buf += AES_BLOCK_LEN * 8;
94
}
95
i *= 8;
96
cnt = len / AES_BLOCK_LEN;
97
for (; i < cnt; i++) {
98
nextiv = _mm_loadu_si128((void *)buf);
99
_mm_storeu_si128((void *)buf,
100
aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
101
ivreg = nextiv;
102
buf += AES_BLOCK_LEN;
103
}
104
}
105
106
void
107
aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
108
const uint8_t *from, uint8_t *to)
109
{
110
__m128i tot;
111
__m128i tout[8];
112
struct blocks8 *top;
113
const struct blocks8 *blks;
114
size_t i, cnt;
115
116
cnt = len / AES_BLOCK_LEN / 8;
117
for (i = 0; i < cnt; i++) {
118
blks = (const struct blocks8 *)from;
119
top = (struct blocks8 *)to;
120
aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
121
blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
122
blks->blk[6], blks->blk[7], tout);
123
top->blk[0] = tout[0];
124
top->blk[1] = tout[1];
125
top->blk[2] = tout[2];
126
top->blk[3] = tout[3];
127
top->blk[4] = tout[4];
128
top->blk[5] = tout[5];
129
top->blk[6] = tout[6];
130
top->blk[7] = tout[7];
131
from += AES_BLOCK_LEN * 8;
132
to += AES_BLOCK_LEN * 8;
133
}
134
i *= 8;
135
cnt = len / AES_BLOCK_LEN;
136
for (; i < cnt; i++) {
137
tot = aesni_enc(rounds - 1, key_schedule,
138
_mm_loadu_si128((const __m128i *)from));
139
_mm_storeu_si128((__m128i *)to, tot);
140
from += AES_BLOCK_LEN;
141
to += AES_BLOCK_LEN;
142
}
143
}
144
145
void
146
aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
147
const uint8_t *from, uint8_t *to)
148
{
149
__m128i tot;
150
__m128i tout[8];
151
const struct blocks8 *blks;
152
struct blocks8 *top;
153
size_t i, cnt;
154
155
cnt = len / AES_BLOCK_LEN / 8;
156
for (i = 0; i < cnt; i++) {
157
blks = (const struct blocks8 *)from;
158
top = (struct blocks8 *)to;
159
aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
160
blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
161
blks->blk[6], blks->blk[7], tout);
162
top->blk[0] = tout[0];
163
top->blk[1] = tout[1];
164
top->blk[2] = tout[2];
165
top->blk[3] = tout[3];
166
top->blk[4] = tout[4];
167
top->blk[5] = tout[5];
168
top->blk[6] = tout[6];
169
top->blk[7] = tout[7];
170
from += AES_BLOCK_LEN * 8;
171
to += AES_BLOCK_LEN * 8;
172
}
173
i *= 8;
174
cnt = len / AES_BLOCK_LEN;
175
for (; i < cnt; i++) {
176
tot = aesni_dec(rounds - 1, key_schedule,
177
_mm_loadu_si128((const __m128i *)from));
178
_mm_storeu_si128((__m128i *)to, tot);
179
from += AES_BLOCK_LEN;
180
to += AES_BLOCK_LEN;
181
}
182
}
183
184
/*
185
* mixed endian increment, low 64bits stored in hi word to be compatible
186
* with _icm's BSWAP.
187
*/
188
static inline __m128i
189
nextc(__m128i x)
190
{
191
const __m128i ONE = _mm_setr_epi32(0, 0, 1, 0);
192
const __m128i ZERO = _mm_setzero_si128();
193
194
x = _mm_add_epi64(x, ONE);
195
__m128i t = _mm_cmpeq_epi64(x, ZERO);
196
t = _mm_unpackhi_epi64(t, ZERO);
197
x = _mm_sub_epi64(x, t);
198
199
return x;
200
}
201
202
void
203
aesni_encrypt_icm(int rounds, const void *key_schedule, size_t len,
204
const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
205
{
206
__m128i tot;
207
__m128i tmp1, tmp2, tmp3, tmp4;
208
__m128i tmp5, tmp6, tmp7, tmp8;
209
__m128i ctr1, ctr2, ctr3, ctr4;
210
__m128i ctr5, ctr6, ctr7, ctr8;
211
__m128i BSWAP_EPI64;
212
__m128i tout[8];
213
__m128i block;
214
struct blocks8 *top;
215
const struct blocks8 *blks;
216
size_t i, cnt, resid;
217
218
BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7);
219
220
ctr1 = _mm_loadu_si128((const __m128i *)iv);
221
ctr1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
222
223
cnt = len / AES_BLOCK_LEN / 8;
224
for (i = 0; i < cnt; i++) {
225
tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
226
ctr2 = nextc(ctr1);
227
tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64);
228
ctr3 = nextc(ctr2);
229
tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64);
230
ctr4 = nextc(ctr3);
231
tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64);
232
ctr5 = nextc(ctr4);
233
tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64);
234
ctr6 = nextc(ctr5);
235
tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64);
236
ctr7 = nextc(ctr6);
237
tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64);
238
ctr8 = nextc(ctr7);
239
tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64);
240
ctr1 = nextc(ctr8);
241
242
blks = (const struct blocks8 *)from;
243
top = (struct blocks8 *)to;
244
aesni_enc8(rounds - 1, key_schedule, tmp1, tmp2, tmp3, tmp4,
245
tmp5, tmp6, tmp7, tmp8, tout);
246
247
top->blk[0] = blks->blk[0] ^ tout[0];
248
top->blk[1] = blks->blk[1] ^ tout[1];
249
top->blk[2] = blks->blk[2] ^ tout[2];
250
top->blk[3] = blks->blk[3] ^ tout[3];
251
top->blk[4] = blks->blk[4] ^ tout[4];
252
top->blk[5] = blks->blk[5] ^ tout[5];
253
top->blk[6] = blks->blk[6] ^ tout[6];
254
top->blk[7] = blks->blk[7] ^ tout[7];
255
256
from += AES_BLOCK_LEN * 8;
257
to += AES_BLOCK_LEN * 8;
258
}
259
i *= 8;
260
cnt = len / AES_BLOCK_LEN;
261
for (; i < cnt; i++) {
262
tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
263
ctr1 = nextc(ctr1);
264
265
tot = aesni_enc(rounds - 1, key_schedule, tmp1);
266
267
tot = tot ^ _mm_loadu_si128((const __m128i *)from);
268
_mm_storeu_si128((__m128i *)to, tot);
269
270
from += AES_BLOCK_LEN;
271
to += AES_BLOCK_LEN;
272
}
273
274
/*
275
* Handle remaining partial round. Copy the remaining payload onto the
276
* stack to ensure that the full block can be loaded safely.
277
*/
278
resid = len % AES_BLOCK_LEN;
279
if (resid != 0) {
280
tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
281
tot = aesni_enc(rounds - 1, key_schedule, tmp1);
282
block = _mm_setzero_si128();
283
memcpy(&block, from, resid);
284
tot = tot ^ _mm_loadu_si128(&block);
285
memcpy(to, &tot, resid);
286
explicit_bzero(&block, sizeof(block));
287
}
288
}
289
290
#define AES_XTS_BLOCKSIZE 16
291
#define AES_XTS_IVSIZE 8
292
#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
293
294
static inline __m128i
295
xts_crank_lfsr(__m128i inp)
296
{
297
const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
298
__m128i xtweak, ret;
299
300
/* set up xor mask */
301
xtweak = _mm_shuffle_epi32(inp, 0x93);
302
xtweak = _mm_srai_epi32(xtweak, 31);
303
xtweak &= alphamask;
304
305
/* next term */
306
ret = _mm_slli_epi32(inp, 1);
307
ret ^= xtweak;
308
309
return ret;
310
}
311
312
static void
313
aesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
314
const uint8_t *from, uint8_t *to, int do_encrypt)
315
{
316
__m128i block;
317
318
block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
319
320
if (do_encrypt)
321
block = aesni_enc(rounds - 1, key_schedule, block);
322
else
323
block = aesni_dec(rounds - 1, key_schedule, block);
324
325
_mm_storeu_si128((__m128i *)to, block ^ *tweak);
326
327
*tweak = xts_crank_lfsr(*tweak);
328
}
329
330
static void
331
aesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
332
const uint8_t *from, uint8_t *to, int do_encrypt)
333
{
334
__m128i tmptweak;
335
__m128i a, b, c, d, e, f, g, h;
336
__m128i tweaks[8];
337
__m128i tmp[8];
338
__m128i *top;
339
const __m128i *fromp;
340
341
tmptweak = *tweak;
342
343
/*
344
* unroll the loop. This lets gcc put values directly in the
345
* register and saves memory accesses.
346
*/
347
fromp = (const __m128i *)from;
348
#define PREPINP(v, pos) \
349
do { \
350
tweaks[(pos)] = tmptweak; \
351
(v) = _mm_loadu_si128(&fromp[pos]) ^ \
352
tmptweak; \
353
tmptweak = xts_crank_lfsr(tmptweak); \
354
} while (0)
355
PREPINP(a, 0);
356
PREPINP(b, 1);
357
PREPINP(c, 2);
358
PREPINP(d, 3);
359
PREPINP(e, 4);
360
PREPINP(f, 5);
361
PREPINP(g, 6);
362
PREPINP(h, 7);
363
*tweak = tmptweak;
364
365
if (do_encrypt)
366
aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
367
tmp);
368
else
369
aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
370
tmp);
371
372
top = (__m128i *)to;
373
_mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
374
_mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
375
_mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
376
_mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
377
_mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
378
_mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
379
_mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
380
_mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
381
}
382
383
static void
384
aesni_crypt_xts(int rounds, const __m128i *data_schedule,
385
const __m128i *tweak_schedule, size_t len, const uint8_t *from,
386
uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
387
{
388
__m128i tweakreg;
389
uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
390
size_t i, cnt;
391
392
/*
393
* Prepare tweak as E_k2(IV). IV is specified as LE representation
394
* of a 64-bit block number which we allow to be passed in directly.
395
*/
396
#if BYTE_ORDER == LITTLE_ENDIAN
397
bcopy(iv, tweak, AES_XTS_IVSIZE);
398
/* Last 64 bits of IV are always zero. */
399
bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
400
#else
401
#error Only LITTLE_ENDIAN architectures are supported.
402
#endif
403
tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
404
tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
405
406
cnt = len / AES_XTS_BLOCKSIZE / 8;
407
for (i = 0; i < cnt; i++) {
408
aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
409
from, to, do_encrypt);
410
from += AES_XTS_BLOCKSIZE * 8;
411
to += AES_XTS_BLOCKSIZE * 8;
412
}
413
i *= 8;
414
cnt = len / AES_XTS_BLOCKSIZE;
415
for (; i < cnt; i++) {
416
aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
417
from, to, do_encrypt);
418
from += AES_XTS_BLOCKSIZE;
419
to += AES_XTS_BLOCKSIZE;
420
}
421
}
422
423
void
424
aesni_encrypt_xts(int rounds, const void *data_schedule,
425
const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
426
const uint8_t iv[static AES_BLOCK_LEN])
427
{
428
429
aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
430
iv, 1);
431
}
432
433
void
434
aesni_decrypt_xts(int rounds, const void *data_schedule,
435
const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
436
const uint8_t iv[static AES_BLOCK_LEN])
437
{
438
439
aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
440
iv, 0);
441
}
442
443
void
444
aesni_cipher_setup_common(struct aesni_session *ses,
445
const struct crypto_session_params *csp, const uint8_t *key, int keylen)
446
{
447
int decsched;
448
449
decsched = 1;
450
451
switch (csp->csp_cipher_alg) {
452
case CRYPTO_AES_ICM:
453
case CRYPTO_AES_NIST_GCM_16:
454
case CRYPTO_AES_CCM_16:
455
decsched = 0;
456
break;
457
}
458
459
if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
460
keylen /= 2;
461
462
switch (keylen * 8) {
463
case 128:
464
ses->rounds = AES128_ROUNDS;
465
break;
466
case 192:
467
ses->rounds = AES192_ROUNDS;
468
break;
469
case 256:
470
ses->rounds = AES256_ROUNDS;
471
break;
472
default:
473
panic("shouldn't happen");
474
}
475
476
aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
477
if (decsched)
478
aesni_set_deckey(ses->enc_schedule, ses->dec_schedule,
479
ses->rounds);
480
481
if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
482
aesni_set_enckey(key + keylen, ses->xts_schedule,
483
ses->rounds);
484
}
485
486