CoCalc -- aes-aesni.S

GitHub Repository: torvalds/linux
Path: blob/master/lib/crypto/x86/aes-aesni.S
¹²¹⁸⁴⁸ views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
//
3
// AES block cipher using AES-NI instructions
4
//
5
// Copyright 2026 Google LLC
6
//
7
// The code in this file supports 32-bit and 64-bit CPUs, and it doesn't require
8
// AVX.  It does use up to SSE4.1, which all CPUs with AES-NI have.
9
#include <linux/linkage.h>
10

11
.section .rodata
12
#ifdef __x86_64__
13
#define RODATA(label)	label(%rip)
14
#else
15
#define RODATA(label)	label
16
#endif
17

18
	// A mask for pshufb that extracts the last dword, rotates it right by 8
19
	// bits, and copies the result to all four dwords.
20
.p2align 4
21
.Lmask:
22
	.byte	13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12
23

24
	// The AES round constants, used during key expansion
25
.Lrcon:
26
	.long	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36
27

28
.text
29

30
// Transform four dwords [a0, a1, a2, a3] in \a into
31
// [a0, a0^a1, a0^a1^a2, a0^a1^a2^a3].  \tmp is a temporary xmm register.
32
//
33
// Note: this could be done in four instructions, shufps + pxor + shufps + pxor,
34
// if the temporary register were zero-initialized ahead of time.  We instead do
35
// it in an easier-to-understand way that doesn't require zero-initialization
36
// and avoids the unusual shufps instruction.  movdqa is usually "free" anyway.
37
.macro	_prefix_sum	a, tmp
38
	movdqa		\a, \tmp	// [a0, a1, a2, a3]
39
	pslldq		$4, \a		// [0, a0, a1, a2]
40
	pxor		\tmp, \a	// [a0, a0^a1, a1^a2, a2^a3]
41
	movdqa		\a, \tmp
42
	pslldq		$8, \a		// [0, 0, a0, a0^a1]
43
	pxor		\tmp, \a	// [a0, a0^a1, a0^a1^a2, a0^a1^a2^a3]
44
.endm
45

46
.macro	_gen_round_key	a, b
47
	// Compute four copies of rcon[i] ^ SubBytes(ror32(w, 8)), where w is
48
	// the last dword of the previous round key (given in \b).
49
	//
50
	// 'aesenclast src, dst' does dst = src XOR SubBytes(ShiftRows(dst)).
51
	// It is used here solely for the SubBytes and the XOR.  The ShiftRows
52
	// is a no-op because all four columns are the same here.
53
	//
54
	// Don't use the 'aeskeygenassist' instruction, since:
55
	//  - On most Intel CPUs it is microcoded, making it have a much higher
56
	//    latency and use more execution ports than 'aesenclast'.
57
	//  - It cannot be used in a loop, since it requires an immediate.
58
	//  - It doesn't do much more than 'aesenclast' in the first place.
59
	movdqa		\b, %xmm2
60
	pshufb		MASK, %xmm2
61
	aesenclast	RCON, %xmm2
62

63
	// XOR in the prefix sum of the four dwords of \a, which is the
64
	// previous round key (AES-128) or the first round key in the previous
65
	// pair of round keys (AES-256).  The result is the next round key.
66
	_prefix_sum	\a, tmp=%xmm3
67
	pxor		%xmm2, \a
68

69
	// Store the next round key to memory.  Also leave it in \a.
70
	movdqu		\a, (RNDKEYS)
71
.endm
72

73
.macro	_aes_expandkey_aesni	is_aes128
74
#ifdef __x86_64__
75
	// Arguments
76
	.set	RNDKEYS,	%rdi
77
	.set	INV_RNDKEYS,	%rsi
78
	.set	IN_KEY,		%rdx
79

80
	// Other local variables
81
	.set	RCON_PTR,	%rcx
82
	.set	COUNTER,	%eax
83
#else
84
	// Arguments, assuming -mregparm=3
85
	.set	RNDKEYS,	%eax
86
	.set	INV_RNDKEYS,	%edx
87
	.set	IN_KEY,		%ecx
88

89
	// Other local variables
90
	.set	RCON_PTR,	%ebx
91
	.set	COUNTER,	%esi
92
#endif
93
	.set	RCON,		%xmm6
94
	.set	MASK,		%xmm7
95

96
#ifdef __i386__
97
	push		%ebx
98
	push		%esi
99
#endif
100

101
.if \is_aes128
102
	// AES-128: the first round key is simply a copy of the raw key.
103
	movdqu		(IN_KEY), %xmm0
104
	movdqu		%xmm0, (RNDKEYS)
105
.else
106
	// AES-256: the first two round keys are simply a copy of the raw key.
107
	movdqu		(IN_KEY), %xmm0
108
	movdqu		%xmm0, (RNDKEYS)
109
	movdqu		16(IN_KEY), %xmm1
110
	movdqu		%xmm1, 16(RNDKEYS)
111
	add		$32, RNDKEYS
112
.endif
113

114
	// Generate the remaining round keys.
115
	movdqa		RODATA(.Lmask), MASK
116
.if \is_aes128
117
	lea		RODATA(.Lrcon), RCON_PTR
118
	mov		$10, COUNTER
119
.Lgen_next_aes128_round_key:
120
	add		$16, RNDKEYS
121
	movd		(RCON_PTR), RCON
122
	pshufd		$0x00, RCON, RCON
123
	add		$4, RCON_PTR
124
	_gen_round_key	%xmm0, %xmm0
125
	dec		COUNTER
126
	jnz		.Lgen_next_aes128_round_key
127
.else
128
	// AES-256: only the first 7 round constants are needed, so instead of
129
	// loading each one from memory, just start by loading [1, 1, 1, 1] and
130
	// then generate the rest by doubling.
131
	pshufd		$0x00, RODATA(.Lrcon), RCON
132
	pxor		%xmm5, %xmm5	// All-zeroes
133
	mov		$7, COUNTER
134
.Lgen_next_aes256_round_key_pair:
135
	// Generate the next AES-256 round key: either the first of a pair of
136
	// two, or the last one.
137
	_gen_round_key	%xmm0, %xmm1
138

139
	dec		COUNTER
140
	jz		.Lgen_aes256_round_keys_done
141

142
	// Generate the second AES-256 round key of the pair.  Compared to the
143
	// first, there's no rotation and no XOR of a round constant.
144
	pshufd		$0xff, %xmm0, %xmm2	// Get four copies of last dword
145
	aesenclast	%xmm5, %xmm2		// Just does SubBytes
146
	_prefix_sum	%xmm1, tmp=%xmm3
147
	pxor		%xmm2, %xmm1
148
	movdqu		%xmm1, 16(RNDKEYS)
149
	add		$32, RNDKEYS
150
	paddd		RCON, RCON		// RCON <<= 1
151
	jmp		.Lgen_next_aes256_round_key_pair
152
.Lgen_aes256_round_keys_done:
153
.endif
154

155
	// If INV_RNDKEYS is non-NULL, write the round keys for the Equivalent
156
	// Inverse Cipher to it.  To do that, reverse the standard round keys,
157
	// and apply aesimc (InvMixColumn) to each except the first and last.
158
	test		INV_RNDKEYS, INV_RNDKEYS
159
	jz		.Ldone\@
160
	movdqu		(RNDKEYS), %xmm0	// Last standard round key
161
	movdqu		%xmm0, (INV_RNDKEYS)	// => First inverse round key
162
.if \is_aes128
163
	mov		$9, COUNTER
164
.else
165
	mov		$13, COUNTER
166
.endif
167
.Lgen_next_inv_round_key\@:
168
	sub		$16, RNDKEYS
169
	add		$16, INV_RNDKEYS
170
	movdqu		(RNDKEYS), %xmm0
171
	aesimc		%xmm0, %xmm0
172
	movdqu		%xmm0, (INV_RNDKEYS)
173
	dec		COUNTER
174
	jnz		.Lgen_next_inv_round_key\@
175
	movdqu		-16(RNDKEYS), %xmm0	// First standard round key
176
	movdqu		%xmm0, 16(INV_RNDKEYS)	// => Last inverse round key
177

178
.Ldone\@:
179
#ifdef __i386__
180
	pop		%esi
181
	pop		%ebx
182
#endif
183
	RET
184
.endm
185

186
// void aes128_expandkey_aesni(u32 rndkeys[], u32 *inv_rndkeys,
187
//			       const u8 in_key[AES_KEYSIZE_128]);
188
SYM_FUNC_START(aes128_expandkey_aesni)
189
	_aes_expandkey_aesni	1
190
SYM_FUNC_END(aes128_expandkey_aesni)
191

192
// void aes256_expandkey_aesni(u32 rndkeys[], u32 *inv_rndkeys,
193
//			       const u8 in_key[AES_KEYSIZE_256]);
194
SYM_FUNC_START(aes256_expandkey_aesni)
195
	_aes_expandkey_aesni	0
196
SYM_FUNC_END(aes256_expandkey_aesni)
197

198
.macro	_aes_crypt_aesni	enc
199
#ifdef __x86_64__
200
	.set	RNDKEYS,	%rdi
201
	.set	NROUNDS,	%esi
202
	.set	OUT,		%rdx
203
	.set	IN,		%rcx
204
#else
205
	// Assuming -mregparm=3
206
	.set	RNDKEYS,	%eax
207
	.set	NROUNDS,	%edx
208
	.set	OUT,		%ecx
209
	.set	IN,		%ebx	// Passed on stack
210
#endif
211

212
#ifdef __i386__
213
	push		%ebx
214
	mov		8(%esp), %ebx
215
#endif
216

217
	// Zero-th round
218
	movdqu		(IN), %xmm0
219
	movdqu		(RNDKEYS), %xmm1
220
	pxor		%xmm1, %xmm0
221

222
	// Normal rounds
223
	add		$16, RNDKEYS
224
	dec		NROUNDS
225
.Lnext_round\@:
226
	movdqu		(RNDKEYS), %xmm1
227
.if \enc
228
	aesenc		%xmm1, %xmm0
229
.else
230
	aesdec		%xmm1, %xmm0
231
.endif
232
	add		$16, RNDKEYS
233
	dec		NROUNDS
234
	jne		.Lnext_round\@
235

236
	// Last round
237
	movdqu		(RNDKEYS), %xmm1
238
.if \enc
239
	aesenclast	%xmm1, %xmm0
240
.else
241
	aesdeclast	%xmm1, %xmm0
242
.endif
243
	movdqu		%xmm0, (OUT)
244

245
#ifdef __i386__
246
	pop		%ebx
247
#endif
248
	RET
249
.endm
250

251
// void aes_encrypt_aesni(const u32 rndkeys[], int nrounds,
252
//			  u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);
253
SYM_FUNC_START(aes_encrypt_aesni)
254
	_aes_crypt_aesni	1
255
SYM_FUNC_END(aes_encrypt_aesni)
256

257
// void aes_decrypt_aesni(const u32 inv_rndkeys[], int nrounds,
258
//			  u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);
259
SYM_FUNC_START(aes_decrypt_aesni)
260
	_aes_crypt_aesni	0
261
SYM_FUNC_END(aes_decrypt_aesni)
262

263
Product

Resources

Company