Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/crypto/arm64/ghash-neon-core.S
170891 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* Accelerated GHASH implementation with ARMv8 ASIMD instructions.
4
*
5
* Copyright (C) 2014 - 2018 Linaro Ltd. <[email protected]>
6
*/
7
8
#include <linux/linkage.h>
9
#include <asm/assembler.h>
10
11
SHASH .req v0
12
SHASH2 .req v1
13
T1 .req v2
14
T2 .req v3
15
XM .req v5
16
XL .req v6
17
XH .req v7
18
IN1 .req v7
19
20
k00_16 .req v8
21
k32_48 .req v9
22
23
t3 .req v10
24
t4 .req v11
25
t5 .req v12
26
t6 .req v13
27
t7 .req v14
28
t8 .req v15
29
t9 .req v16
30
31
perm1 .req v17
32
perm2 .req v18
33
perm3 .req v19
34
35
sh1 .req v20
36
sh2 .req v21
37
sh3 .req v22
38
sh4 .req v23
39
40
ss1 .req v24
41
ss2 .req v25
42
ss3 .req v26
43
ss4 .req v27
44
45
.text
46
47
.macro __pmull_p8, rq, ad, bd
48
ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1
49
ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2
50
ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3
51
52
__pmull_p8_\bd \rq, \ad
53
.endm
54
55
.macro __pmull2_p8, rq, ad, bd
56
tbl t3.16b, {\ad\().16b}, perm1.16b // A1
57
tbl t5.16b, {\ad\().16b}, perm2.16b // A2
58
tbl t7.16b, {\ad\().16b}, perm3.16b // A3
59
60
__pmull2_p8_\bd \rq, \ad
61
.endm
62
63
.macro __pmull_p8_SHASH, rq, ad
64
__pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
65
.endm
66
67
.macro __pmull_p8_SHASH2, rq, ad
68
__pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
69
.endm
70
71
.macro __pmull2_p8_SHASH, rq, ad
72
__pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
73
.endm
74
75
.macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
76
pmull\t t3.8h, t3.\nb, \bd // F = A1*B
77
pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1
78
pmull\t t5.8h, t5.\nb, \bd // H = A2*B
79
pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2
80
pmull\t t7.8h, t7.\nb, \bd // J = A3*B
81
pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3
82
pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4
83
pmull\t \rq\().8h, \ad, \bd // D = A*B
84
85
eor t3.16b, t3.16b, t4.16b // L = E + F
86
eor t5.16b, t5.16b, t6.16b // M = G + H
87
eor t7.16b, t7.16b, t8.16b // N = I + J
88
89
uzp1 t4.2d, t3.2d, t5.2d
90
uzp2 t3.2d, t3.2d, t5.2d
91
uzp1 t6.2d, t7.2d, t9.2d
92
uzp2 t7.2d, t7.2d, t9.2d
93
94
// t3 = (L) (P0 + P1) << 8
95
// t5 = (M) (P2 + P3) << 16
96
eor t4.16b, t4.16b, t3.16b
97
and t3.16b, t3.16b, k32_48.16b
98
99
// t7 = (N) (P4 + P5) << 24
100
// t9 = (K) (P6 + P7) << 32
101
eor t6.16b, t6.16b, t7.16b
102
and t7.16b, t7.16b, k00_16.16b
103
104
eor t4.16b, t4.16b, t3.16b
105
eor t6.16b, t6.16b, t7.16b
106
107
zip2 t5.2d, t4.2d, t3.2d
108
zip1 t3.2d, t4.2d, t3.2d
109
zip2 t9.2d, t6.2d, t7.2d
110
zip1 t7.2d, t6.2d, t7.2d
111
112
ext t3.16b, t3.16b, t3.16b, #15
113
ext t5.16b, t5.16b, t5.16b, #14
114
ext t7.16b, t7.16b, t7.16b, #13
115
ext t9.16b, t9.16b, t9.16b, #12
116
117
eor t3.16b, t3.16b, t5.16b
118
eor t7.16b, t7.16b, t9.16b
119
eor \rq\().16b, \rq\().16b, t3.16b
120
eor \rq\().16b, \rq\().16b, t7.16b
121
.endm
122
123
.macro __pmull_pre_p8
124
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
125
eor SHASH2.16b, SHASH2.16b, SHASH.16b
126
127
// k00_16 := 0x0000000000000000_000000000000ffff
128
// k32_48 := 0x00000000ffffffff_0000ffffffffffff
129
movi k32_48.2d, #0xffffffff
130
mov k32_48.h[2], k32_48.h[0]
131
ushr k00_16.2d, k32_48.2d, #32
132
133
// prepare the permutation vectors
134
mov_q x5, 0x080f0e0d0c0b0a09
135
movi T1.8b, #8
136
dup perm1.2d, x5
137
eor perm1.16b, perm1.16b, T1.16b
138
ushr perm2.2d, perm1.2d, #8
139
ushr perm3.2d, perm1.2d, #16
140
ushr T1.2d, perm1.2d, #24
141
sli perm2.2d, perm1.2d, #56
142
sli perm3.2d, perm1.2d, #48
143
sli T1.2d, perm1.2d, #40
144
145
// precompute loop invariants
146
tbl sh1.16b, {SHASH.16b}, perm1.16b
147
tbl sh2.16b, {SHASH.16b}, perm2.16b
148
tbl sh3.16b, {SHASH.16b}, perm3.16b
149
tbl sh4.16b, {SHASH.16b}, T1.16b
150
ext ss1.8b, SHASH2.8b, SHASH2.8b, #1
151
ext ss2.8b, SHASH2.8b, SHASH2.8b, #2
152
ext ss3.8b, SHASH2.8b, SHASH2.8b, #3
153
ext ss4.8b, SHASH2.8b, SHASH2.8b, #4
154
.endm
155
156
.macro __pmull_reduce_p8
157
eor XM.16b, XM.16b, T1.16b
158
159
mov XL.d[1], XM.d[0]
160
mov XH.d[0], XM.d[1]
161
162
shl T1.2d, XL.2d, #57
163
shl T2.2d, XL.2d, #62
164
eor T2.16b, T2.16b, T1.16b
165
shl T1.2d, XL.2d, #63
166
eor T2.16b, T2.16b, T1.16b
167
ext T1.16b, XL.16b, XH.16b, #8
168
eor T2.16b, T2.16b, T1.16b
169
170
mov XL.d[1], T2.d[0]
171
mov XH.d[0], T2.d[1]
172
173
ushr T2.2d, XL.2d, #1
174
eor XH.16b, XH.16b, XL.16b
175
eor XL.16b, XL.16b, T2.16b
176
ushr T2.2d, T2.2d, #6
177
ushr XL.2d, XL.2d, #1
178
.endm
179
180
/*
181
* void pmull_ghash_update_p8(size_t blocks, struct polyval_elem *dg,
182
* const u8 *src,
183
* const struct polyval_elem *h)
184
*/
185
SYM_FUNC_START(pmull_ghash_update_p8)
186
ld1 {SHASH.2d}, [x3]
187
ld1 {XL.2d}, [x1]
188
189
__pmull_pre_p8
190
191
0: ld1 {T1.2d}, [x2], #16
192
sub x0, x0, #1
193
194
/* multiply XL by SHASH in GF(2^128) */
195
rev64 T1.16b, T1.16b
196
197
ext T2.16b, XL.16b, XL.16b, #8
198
ext IN1.16b, T1.16b, T1.16b, #8
199
eor T1.16b, T1.16b, T2.16b
200
eor XL.16b, XL.16b, IN1.16b
201
202
__pmull2_p8 XH, XL, SHASH // a1 * b1
203
eor T1.16b, T1.16b, XL.16b
204
__pmull_p8 XL, XL, SHASH // a0 * b0
205
__pmull_p8 XM, T1, SHASH2 // (a1 + a0)(b1 + b0)
206
207
eor T2.16b, XL.16b, XH.16b
208
ext T1.16b, XL.16b, XH.16b, #8
209
eor XM.16b, XM.16b, T2.16b
210
211
__pmull_reduce_p8
212
213
eor T2.16b, T2.16b, XH.16b
214
eor XL.16b, XL.16b, T2.16b
215
216
cbnz x0, 0b
217
218
st1 {XL.2d}, [x1]
219
ret
220
SYM_FUNC_END(pmull_ghash_update_p8)
221
222