Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/crypto/arm64/sha3-ce-core.S
38184 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* Core SHA-3 transform using v8.2 Crypto Extensions
4
*
5
* Copyright (C) 2018 Linaro Ltd <[email protected]>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License version 2 as
9
* published by the Free Software Foundation.
10
*/
11
12
#include <linux/linkage.h>
13
#include <asm/assembler.h>
14
15
.irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
16
.set .Lv\b\().2d, \b
17
.set .Lv\b\().16b, \b
18
.endr
19
20
/*
21
* ARMv8.2 Crypto Extensions instructions
22
*/
23
.macro eor3, rd, rn, rm, ra
24
.inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
25
.endm
26
27
.macro rax1, rd, rn, rm
28
.inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
29
.endm
30
31
.macro bcax, rd, rn, rm, ra
32
.inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
33
.endm
34
35
.macro xar, rd, rn, rm, imm6
36
.inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
37
.endm
38
39
/*
40
* size_t sha3_ce_transform(struct sha3_state *state, const u8 *data,
41
* size_t nblocks, size_t block_size)
42
*
43
* block_size is assumed to be one of 72 (SHA3-512), 104 (SHA3-384), 136
44
* (SHA3-256 and SHAKE256), 144 (SHA3-224), or 168 (SHAKE128).
45
*/
46
.text
47
SYM_FUNC_START(sha3_ce_transform)
48
/* load state */
49
add x8, x0, #32
50
ld1 { v0.1d- v3.1d}, [x0]
51
ld1 { v4.1d- v7.1d}, [x8], #32
52
ld1 { v8.1d-v11.1d}, [x8], #32
53
ld1 {v12.1d-v15.1d}, [x8], #32
54
ld1 {v16.1d-v19.1d}, [x8], #32
55
ld1 {v20.1d-v23.1d}, [x8], #32
56
ld1 {v24.1d}, [x8]
57
58
0: sub x2, x2, #1
59
mov w8, #24
60
adr_l x9, .Lsha3_rcon
61
62
/* load input */
63
ld1 {v25.8b-v28.8b}, [x1], #32
64
ld1 {v29.8b}, [x1], #8
65
eor v0.8b, v0.8b, v25.8b
66
eor v1.8b, v1.8b, v26.8b
67
eor v2.8b, v2.8b, v27.8b
68
eor v3.8b, v3.8b, v28.8b
69
eor v4.8b, v4.8b, v29.8b
70
71
ld1 {v25.8b-v28.8b}, [x1], #32
72
eor v5.8b, v5.8b, v25.8b
73
eor v6.8b, v6.8b, v26.8b
74
eor v7.8b, v7.8b, v27.8b
75
eor v8.8b, v8.8b, v28.8b
76
cmp x3, #72
77
b.eq 3f /* SHA3-512 (block_size=72)? */
78
79
ld1 {v25.8b-v28.8b}, [x1], #32
80
eor v9.8b, v9.8b, v25.8b
81
eor v10.8b, v10.8b, v26.8b
82
eor v11.8b, v11.8b, v27.8b
83
eor v12.8b, v12.8b, v28.8b
84
cmp x3, #104
85
b.eq 3f /* SHA3-384 (block_size=104)? */
86
87
ld1 {v25.8b-v28.8b}, [x1], #32
88
eor v13.8b, v13.8b, v25.8b
89
eor v14.8b, v14.8b, v26.8b
90
eor v15.8b, v15.8b, v27.8b
91
eor v16.8b, v16.8b, v28.8b
92
cmp x3, #144
93
b.lt 3f /* SHA3-256 or SHAKE256 (block_size=136)? */
94
b.eq 2f /* SHA3-224 (block_size=144)? */
95
96
/* SHAKE128 (block_size=168) */
97
ld1 {v25.8b-v28.8b}, [x1], #32
98
eor v17.8b, v17.8b, v25.8b
99
eor v18.8b, v18.8b, v26.8b
100
eor v19.8b, v19.8b, v27.8b
101
eor v20.8b, v20.8b, v28.8b
102
b 3f
103
2:
104
/* SHA3-224 (block_size=144) */
105
ld1 {v25.8b}, [x1], #8
106
eor v17.8b, v17.8b, v25.8b
107
108
3: sub w8, w8, #1
109
110
eor3 v29.16b, v4.16b, v9.16b, v14.16b
111
eor3 v26.16b, v1.16b, v6.16b, v11.16b
112
eor3 v28.16b, v3.16b, v8.16b, v13.16b
113
eor3 v25.16b, v0.16b, v5.16b, v10.16b
114
eor3 v27.16b, v2.16b, v7.16b, v12.16b
115
eor3 v29.16b, v29.16b, v19.16b, v24.16b
116
eor3 v26.16b, v26.16b, v16.16b, v21.16b
117
eor3 v28.16b, v28.16b, v18.16b, v23.16b
118
eor3 v25.16b, v25.16b, v15.16b, v20.16b
119
eor3 v27.16b, v27.16b, v17.16b, v22.16b
120
121
rax1 v30.2d, v29.2d, v26.2d // bc[0]
122
rax1 v26.2d, v26.2d, v28.2d // bc[2]
123
rax1 v28.2d, v28.2d, v25.2d // bc[4]
124
rax1 v25.2d, v25.2d, v27.2d // bc[1]
125
rax1 v27.2d, v27.2d, v29.2d // bc[3]
126
127
eor v0.16b, v0.16b, v30.16b
128
xar v29.2d, v1.2d, v25.2d, (64 - 1)
129
xar v1.2d, v6.2d, v25.2d, (64 - 44)
130
xar v6.2d, v9.2d, v28.2d, (64 - 20)
131
xar v9.2d, v22.2d, v26.2d, (64 - 61)
132
xar v22.2d, v14.2d, v28.2d, (64 - 39)
133
xar v14.2d, v20.2d, v30.2d, (64 - 18)
134
xar v31.2d, v2.2d, v26.2d, (64 - 62)
135
xar v2.2d, v12.2d, v26.2d, (64 - 43)
136
xar v12.2d, v13.2d, v27.2d, (64 - 25)
137
xar v13.2d, v19.2d, v28.2d, (64 - 8)
138
xar v19.2d, v23.2d, v27.2d, (64 - 56)
139
xar v23.2d, v15.2d, v30.2d, (64 - 41)
140
xar v15.2d, v4.2d, v28.2d, (64 - 27)
141
xar v28.2d, v24.2d, v28.2d, (64 - 14)
142
xar v24.2d, v21.2d, v25.2d, (64 - 2)
143
xar v8.2d, v8.2d, v27.2d, (64 - 55)
144
xar v4.2d, v16.2d, v25.2d, (64 - 45)
145
xar v16.2d, v5.2d, v30.2d, (64 - 36)
146
xar v5.2d, v3.2d, v27.2d, (64 - 28)
147
xar v27.2d, v18.2d, v27.2d, (64 - 21)
148
xar v3.2d, v17.2d, v26.2d, (64 - 15)
149
xar v25.2d, v11.2d, v25.2d, (64 - 10)
150
xar v26.2d, v7.2d, v26.2d, (64 - 6)
151
xar v30.2d, v10.2d, v30.2d, (64 - 3)
152
153
bcax v20.16b, v31.16b, v22.16b, v8.16b
154
bcax v21.16b, v8.16b, v23.16b, v22.16b
155
bcax v22.16b, v22.16b, v24.16b, v23.16b
156
bcax v23.16b, v23.16b, v31.16b, v24.16b
157
bcax v24.16b, v24.16b, v8.16b, v31.16b
158
159
ld1r {v31.2d}, [x9], #8
160
161
bcax v17.16b, v25.16b, v19.16b, v3.16b
162
bcax v18.16b, v3.16b, v15.16b, v19.16b
163
bcax v19.16b, v19.16b, v16.16b, v15.16b
164
bcax v15.16b, v15.16b, v25.16b, v16.16b
165
bcax v16.16b, v16.16b, v3.16b, v25.16b
166
167
bcax v10.16b, v29.16b, v12.16b, v26.16b
168
bcax v11.16b, v26.16b, v13.16b, v12.16b
169
bcax v12.16b, v12.16b, v14.16b, v13.16b
170
bcax v13.16b, v13.16b, v29.16b, v14.16b
171
bcax v14.16b, v14.16b, v26.16b, v29.16b
172
173
bcax v7.16b, v30.16b, v9.16b, v4.16b
174
bcax v8.16b, v4.16b, v5.16b, v9.16b
175
bcax v9.16b, v9.16b, v6.16b, v5.16b
176
bcax v5.16b, v5.16b, v30.16b, v6.16b
177
bcax v6.16b, v6.16b, v4.16b, v30.16b
178
179
bcax v3.16b, v27.16b, v0.16b, v28.16b
180
bcax v4.16b, v28.16b, v1.16b, v0.16b
181
bcax v0.16b, v0.16b, v2.16b, v1.16b
182
bcax v1.16b, v1.16b, v27.16b, v2.16b
183
bcax v2.16b, v2.16b, v28.16b, v27.16b
184
185
eor v0.16b, v0.16b, v31.16b
186
187
cbnz w8, 3b
188
cond_yield 4f, x8, x9
189
cbnz x2, 0b
190
191
/* save state */
192
4: st1 { v0.1d- v3.1d}, [x0], #32
193
st1 { v4.1d- v7.1d}, [x0], #32
194
st1 { v8.1d-v11.1d}, [x0], #32
195
st1 {v12.1d-v15.1d}, [x0], #32
196
st1 {v16.1d-v19.1d}, [x0], #32
197
st1 {v20.1d-v23.1d}, [x0], #32
198
st1 {v24.1d}, [x0]
199
mov x0, x2
200
ret
201
SYM_FUNC_END(sha3_ce_transform)
202
203
.section ".rodata", "a"
204
.align 8
205
.Lsha3_rcon:
206
.quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a
207
.quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001
208
.quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a
209
.quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a
210
.quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089
211
.quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080
212
.quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081
213
.quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
214
215