CoCalc -- aes-neon.S

GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/crypto/aes-neon.S
²⁶⁴²⁴ views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
4
 *
5
 * Copyright (C) 2013 - 2017 Linaro Ltd. <[email protected]>
6
 */
7

8
#include <linux/linkage.h>
9
#include <asm/assembler.h>
10

11
#define AES_FUNC_START(func)		SYM_FUNC_START(neon_ ## func)
12
#define AES_FUNC_END(func)		SYM_FUNC_END(neon_ ## func)
13

14
	xtsmask		.req	v7
15
	cbciv		.req	v7
16
	vctr		.req	v4
17

18
	.macro		xts_reload_mask, tmp
19
	xts_load_mask	\tmp
20
	.endm
21

22
	/* special case for the neon-bs driver calling into this one for CTS */
23
	.macro		xts_cts_skip_tw, reg, lbl
24
	tbnz		\reg, #1, \lbl
25
	.endm
26

27
	/* multiply by polynomial 'x' in GF(2^8) */
28
	.macro		mul_by_x, out, in, temp, const
29
	sshr		\temp, \in, #7
30
	shl		\out, \in, #1
31
	and		\temp, \temp, \const
32
	eor		\out, \out, \temp
33
	.endm
34

35
	/* multiply by polynomial 'x^2' in GF(2^8) */
36
	.macro		mul_by_x2, out, in, temp, const
37
	ushr		\temp, \in, #6
38
	shl		\out, \in, #2
39
	pmul		\temp, \temp, \const
40
	eor		\out, \out, \temp
41
	.endm
42

43
	/* preload the entire Sbox */
44
	.macro		prepare, sbox, shiftrows, temp
45
	movi		v12.16b, #0x1b
46
	ldr_l		q13, \shiftrows, \temp
47
	ldr_l		q14, .Lror32by8, \temp
48
	adr_l		\temp, \sbox
49
	ld1		{v16.16b-v19.16b}, [\temp], #64
50
	ld1		{v20.16b-v23.16b}, [\temp], #64
51
	ld1		{v24.16b-v27.16b}, [\temp], #64
52
	ld1		{v28.16b-v31.16b}, [\temp]
53
	.endm
54

55
	/* do preload for encryption */
56
	.macro		enc_prepare, ignore0, ignore1, temp
57
	prepare		crypto_aes_sbox, .LForward_ShiftRows, \temp
58
	.endm
59

60
	.macro		enc_switch_key, ignore0, ignore1, temp
61
	/* do nothing */
62
	.endm
63

64
	/* do preload for decryption */
65
	.macro		dec_prepare, ignore0, ignore1, temp
66
	prepare		crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
67
	.endm
68

69
	/* apply SubBytes transformation using the preloaded Sbox */
70
	.macro		sub_bytes, in
71
	sub		v9.16b, \in\().16b, v15.16b
72
	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
73
	sub		v10.16b, v9.16b, v15.16b
74
	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
75
	sub		v11.16b, v10.16b, v15.16b
76
	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
77
	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
78
	.endm
79

80
	/* apply MixColumns transformation */
81
	.macro		mix_columns, in, enc
82
	.if		\enc == 0
83
	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
84
	mul_by_x2	v8.16b, \in\().16b, v9.16b, v12.16b
85
	eor		\in\().16b, \in\().16b, v8.16b
86
	rev32		v8.8h, v8.8h
87
	eor		\in\().16b, \in\().16b, v8.16b
88
	.endif
89

90
	mul_by_x	v9.16b, \in\().16b, v8.16b, v12.16b
91
	rev32		v8.8h, \in\().8h
92
	eor		v8.16b, v8.16b, v9.16b
93
	eor		\in\().16b, \in\().16b, v8.16b
94
	tbl		\in\().16b, {\in\().16b}, v14.16b
95
	eor		\in\().16b, \in\().16b, v8.16b
96
	.endm
97

98
	.macro		do_block, enc, in, rounds, rk, rkp, i
99
	ld1		{v15.4s}, [\rk]
100
	add		\rkp, \rk, #16
101
	mov		\i, \rounds
102
.La\@:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
103
	movi		v15.16b, #0x40
104
	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
105
	sub_bytes	\in
106
	sub		\i, \i, #1
107
	ld1		{v15.4s}, [\rkp], #16
108
	cbz		\i, .Lb\@
109
	mix_columns	\in, \enc
110
	b		.La\@
111
.Lb\@:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
112
	.endm
113

114
	.macro		encrypt_block, in, rounds, rk, rkp, i
115
	do_block	1, \in, \rounds, \rk, \rkp, \i
116
	.endm
117

118
	.macro		decrypt_block, in, rounds, rk, rkp, i
119
	do_block	0, \in, \rounds, \rk, \rkp, \i
120
	.endm
121

122
	/*
123
	 * Interleaved versions: functionally equivalent to the
124
	 * ones above, but applied to AES states in parallel.
125
	 */
126

127
	.macro		sub_bytes_4x, in0, in1, in2, in3
128
	sub		v8.16b, \in0\().16b, v15.16b
129
	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
130
	sub		v9.16b, \in1\().16b, v15.16b
131
	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
132
	sub		v10.16b, \in2\().16b, v15.16b
133
	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
134
	sub		v11.16b, \in3\().16b, v15.16b
135
	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
136
	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
137
	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
138
	sub		v8.16b, v8.16b, v15.16b
139
	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
140
	sub		v9.16b, v9.16b, v15.16b
141
	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
142
	sub		v10.16b, v10.16b, v15.16b
143
	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
144
	sub		v11.16b, v11.16b, v15.16b
145
	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
146
	sub		v8.16b, v8.16b, v15.16b
147
	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
148
	sub		v9.16b, v9.16b, v15.16b
149
	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
150
	sub		v10.16b, v10.16b, v15.16b
151
	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
152
	sub		v11.16b, v11.16b, v15.16b
153
	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
154
	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
155
	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
156
	.endm
157

158
	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
159
	sshr		\tmp0\().16b, \in0\().16b, #7
160
	shl		\out0\().16b, \in0\().16b, #1
161
	sshr		\tmp1\().16b, \in1\().16b, #7
162
	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
163
	shl		\out1\().16b, \in1\().16b, #1
164
	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
165
	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
166
	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
167
	.endm
168

169
	.macro		mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
170
	ushr		\tmp0\().16b, \in0\().16b, #6
171
	shl		\out0\().16b, \in0\().16b, #2
172
	ushr		\tmp1\().16b, \in1\().16b, #6
173
	pmul		\tmp0\().16b, \tmp0\().16b, \const\().16b
174
	shl		\out1\().16b, \in1\().16b, #2
175
	pmul		\tmp1\().16b, \tmp1\().16b, \const\().16b
176
	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
177
	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
178
	.endm
179

180
	.macro		mix_columns_2x, in0, in1, enc
181
	.if		\enc == 0
182
	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
183
	mul_by_x2_2x	v8, v9, \in0, \in1, v10, v11, v12
184
	eor		\in0\().16b, \in0\().16b, v8.16b
185
	rev32		v8.8h, v8.8h
186
	eor		\in1\().16b, \in1\().16b, v9.16b
187
	rev32		v9.8h, v9.8h
188
	eor		\in0\().16b, \in0\().16b, v8.16b
189
	eor		\in1\().16b, \in1\().16b, v9.16b
190
	.endif
191

192
	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v12
193
	rev32		v10.8h, \in0\().8h
194
	rev32		v11.8h, \in1\().8h
195
	eor		v10.16b, v10.16b, v8.16b
196
	eor		v11.16b, v11.16b, v9.16b
197
	eor		\in0\().16b, \in0\().16b, v10.16b
198
	eor		\in1\().16b, \in1\().16b, v11.16b
199
	tbl		\in0\().16b, {\in0\().16b}, v14.16b
200
	tbl		\in1\().16b, {\in1\().16b}, v14.16b
201
	eor		\in0\().16b, \in0\().16b, v10.16b
202
	eor		\in1\().16b, \in1\().16b, v11.16b
203
	.endm
204

205
	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
206
	ld1		{v15.4s}, [\rk]
207
	add		\rkp, \rk, #16
208
	mov		\i, \rounds
209
.La\@:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
210
	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
211
	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
212
	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
213
	movi		v15.16b, #0x40
214
	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
215
	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
216
	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
217
	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
218
	sub_bytes_4x	\in0, \in1, \in2, \in3
219
	sub		\i, \i, #1
220
	ld1		{v15.4s}, [\rkp], #16
221
	cbz		\i, .Lb\@
222
	mix_columns_2x	\in0, \in1, \enc
223
	mix_columns_2x	\in2, \in3, \enc
224
	b		.La\@
225
.Lb\@:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
226
	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
227
	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
228
	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
229
	.endm
230

231
	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
232
	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
233
	.endm
234

235
	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
236
	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
237
	.endm
238

239
#include "aes-modes.S"
240

241
	.section	".rodata", "a"
242
	.align		4
243
.LForward_ShiftRows:
244
	.octa		0x0b06010c07020d08030e09040f0a0500
245

246
.LReverse_ShiftRows:
247
	.octa		0x0306090c0f0205080b0e0104070a0d00
248

249
.Lror32by8:
250
	.octa		0x0c0f0e0d080b0a090407060500030201
251

252
Product

Resources

Company