Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/crypto/sha3-ce-core.S
26451 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
4
*
5
* Copyright (C) 2018 Linaro Ltd <[email protected]>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License version 2 as
9
* published by the Free Software Foundation.
10
*/
11
12
#include <linux/linkage.h>
13
#include <asm/assembler.h>
14
15
.irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
16
.set .Lv\b\().2d, \b
17
.set .Lv\b\().16b, \b
18
.endr
19
20
/*
21
* ARMv8.2 Crypto Extensions instructions
22
*/
23
.macro eor3, rd, rn, rm, ra
24
.inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
25
.endm
26
27
.macro rax1, rd, rn, rm
28
.inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
29
.endm
30
31
.macro bcax, rd, rn, rm, ra
32
.inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
33
.endm
34
35
.macro xar, rd, rn, rm, imm6
36
.inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
37
.endm
38
39
/*
40
* int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
41
*/
42
.text
43
SYM_FUNC_START(sha3_ce_transform)
44
/* load state */
45
add x8, x0, #32
46
ld1 { v0.1d- v3.1d}, [x0]
47
ld1 { v4.1d- v7.1d}, [x8], #32
48
ld1 { v8.1d-v11.1d}, [x8], #32
49
ld1 {v12.1d-v15.1d}, [x8], #32
50
ld1 {v16.1d-v19.1d}, [x8], #32
51
ld1 {v20.1d-v23.1d}, [x8], #32
52
ld1 {v24.1d}, [x8]
53
54
0: sub w2, w2, #1
55
mov w8, #24
56
adr_l x9, .Lsha3_rcon
57
58
/* load input */
59
ld1 {v25.8b-v28.8b}, [x1], #32
60
ld1 {v29.8b-v31.8b}, [x1], #24
61
eor v0.8b, v0.8b, v25.8b
62
eor v1.8b, v1.8b, v26.8b
63
eor v2.8b, v2.8b, v27.8b
64
eor v3.8b, v3.8b, v28.8b
65
eor v4.8b, v4.8b, v29.8b
66
eor v5.8b, v5.8b, v30.8b
67
eor v6.8b, v6.8b, v31.8b
68
69
tbnz x3, #6, 2f // SHA3-512
70
71
ld1 {v25.8b-v28.8b}, [x1], #32
72
ld1 {v29.8b-v30.8b}, [x1], #16
73
eor v7.8b, v7.8b, v25.8b
74
eor v8.8b, v8.8b, v26.8b
75
eor v9.8b, v9.8b, v27.8b
76
eor v10.8b, v10.8b, v28.8b
77
eor v11.8b, v11.8b, v29.8b
78
eor v12.8b, v12.8b, v30.8b
79
80
tbnz x3, #4, 1f // SHA3-384 or SHA3-224
81
82
// SHA3-256
83
ld1 {v25.8b-v28.8b}, [x1], #32
84
eor v13.8b, v13.8b, v25.8b
85
eor v14.8b, v14.8b, v26.8b
86
eor v15.8b, v15.8b, v27.8b
87
eor v16.8b, v16.8b, v28.8b
88
b 3f
89
90
1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
91
92
// SHA3-224
93
ld1 {v25.8b-v28.8b}, [x1], #32
94
ld1 {v29.8b}, [x1], #8
95
eor v13.8b, v13.8b, v25.8b
96
eor v14.8b, v14.8b, v26.8b
97
eor v15.8b, v15.8b, v27.8b
98
eor v16.8b, v16.8b, v28.8b
99
eor v17.8b, v17.8b, v29.8b
100
b 3f
101
102
// SHA3-512
103
2: ld1 {v25.8b-v26.8b}, [x1], #16
104
eor v7.8b, v7.8b, v25.8b
105
eor v8.8b, v8.8b, v26.8b
106
107
3: sub w8, w8, #1
108
109
eor3 v29.16b, v4.16b, v9.16b, v14.16b
110
eor3 v26.16b, v1.16b, v6.16b, v11.16b
111
eor3 v28.16b, v3.16b, v8.16b, v13.16b
112
eor3 v25.16b, v0.16b, v5.16b, v10.16b
113
eor3 v27.16b, v2.16b, v7.16b, v12.16b
114
eor3 v29.16b, v29.16b, v19.16b, v24.16b
115
eor3 v26.16b, v26.16b, v16.16b, v21.16b
116
eor3 v28.16b, v28.16b, v18.16b, v23.16b
117
eor3 v25.16b, v25.16b, v15.16b, v20.16b
118
eor3 v27.16b, v27.16b, v17.16b, v22.16b
119
120
rax1 v30.2d, v29.2d, v26.2d // bc[0]
121
rax1 v26.2d, v26.2d, v28.2d // bc[2]
122
rax1 v28.2d, v28.2d, v25.2d // bc[4]
123
rax1 v25.2d, v25.2d, v27.2d // bc[1]
124
rax1 v27.2d, v27.2d, v29.2d // bc[3]
125
126
eor v0.16b, v0.16b, v30.16b
127
xar v29.2d, v1.2d, v25.2d, (64 - 1)
128
xar v1.2d, v6.2d, v25.2d, (64 - 44)
129
xar v6.2d, v9.2d, v28.2d, (64 - 20)
130
xar v9.2d, v22.2d, v26.2d, (64 - 61)
131
xar v22.2d, v14.2d, v28.2d, (64 - 39)
132
xar v14.2d, v20.2d, v30.2d, (64 - 18)
133
xar v31.2d, v2.2d, v26.2d, (64 - 62)
134
xar v2.2d, v12.2d, v26.2d, (64 - 43)
135
xar v12.2d, v13.2d, v27.2d, (64 - 25)
136
xar v13.2d, v19.2d, v28.2d, (64 - 8)
137
xar v19.2d, v23.2d, v27.2d, (64 - 56)
138
xar v23.2d, v15.2d, v30.2d, (64 - 41)
139
xar v15.2d, v4.2d, v28.2d, (64 - 27)
140
xar v28.2d, v24.2d, v28.2d, (64 - 14)
141
xar v24.2d, v21.2d, v25.2d, (64 - 2)
142
xar v8.2d, v8.2d, v27.2d, (64 - 55)
143
xar v4.2d, v16.2d, v25.2d, (64 - 45)
144
xar v16.2d, v5.2d, v30.2d, (64 - 36)
145
xar v5.2d, v3.2d, v27.2d, (64 - 28)
146
xar v27.2d, v18.2d, v27.2d, (64 - 21)
147
xar v3.2d, v17.2d, v26.2d, (64 - 15)
148
xar v25.2d, v11.2d, v25.2d, (64 - 10)
149
xar v26.2d, v7.2d, v26.2d, (64 - 6)
150
xar v30.2d, v10.2d, v30.2d, (64 - 3)
151
152
bcax v20.16b, v31.16b, v22.16b, v8.16b
153
bcax v21.16b, v8.16b, v23.16b, v22.16b
154
bcax v22.16b, v22.16b, v24.16b, v23.16b
155
bcax v23.16b, v23.16b, v31.16b, v24.16b
156
bcax v24.16b, v24.16b, v8.16b, v31.16b
157
158
ld1r {v31.2d}, [x9], #8
159
160
bcax v17.16b, v25.16b, v19.16b, v3.16b
161
bcax v18.16b, v3.16b, v15.16b, v19.16b
162
bcax v19.16b, v19.16b, v16.16b, v15.16b
163
bcax v15.16b, v15.16b, v25.16b, v16.16b
164
bcax v16.16b, v16.16b, v3.16b, v25.16b
165
166
bcax v10.16b, v29.16b, v12.16b, v26.16b
167
bcax v11.16b, v26.16b, v13.16b, v12.16b
168
bcax v12.16b, v12.16b, v14.16b, v13.16b
169
bcax v13.16b, v13.16b, v29.16b, v14.16b
170
bcax v14.16b, v14.16b, v26.16b, v29.16b
171
172
bcax v7.16b, v30.16b, v9.16b, v4.16b
173
bcax v8.16b, v4.16b, v5.16b, v9.16b
174
bcax v9.16b, v9.16b, v6.16b, v5.16b
175
bcax v5.16b, v5.16b, v30.16b, v6.16b
176
bcax v6.16b, v6.16b, v4.16b, v30.16b
177
178
bcax v3.16b, v27.16b, v0.16b, v28.16b
179
bcax v4.16b, v28.16b, v1.16b, v0.16b
180
bcax v0.16b, v0.16b, v2.16b, v1.16b
181
bcax v1.16b, v1.16b, v27.16b, v2.16b
182
bcax v2.16b, v2.16b, v28.16b, v27.16b
183
184
eor v0.16b, v0.16b, v31.16b
185
186
cbnz w8, 3b
187
cond_yield 4f, x8, x9
188
cbnz w2, 0b
189
190
/* save state */
191
4: st1 { v0.1d- v3.1d}, [x0], #32
192
st1 { v4.1d- v7.1d}, [x0], #32
193
st1 { v8.1d-v11.1d}, [x0], #32
194
st1 {v12.1d-v15.1d}, [x0], #32
195
st1 {v16.1d-v19.1d}, [x0], #32
196
st1 {v20.1d-v23.1d}, [x0], #32
197
st1 {v24.1d}, [x0]
198
mov w0, w2
199
ret
200
SYM_FUNC_END(sha3_ce_transform)
201
202
.section ".rodata", "a"
203
.align 8
204
.Lsha3_rcon:
205
.quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a
206
.quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001
207
.quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a
208
.quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a
209
.quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089
210
.quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080
211
.quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081
212
.quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
213
214