Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/crc/arm64/crc32-core.S
26285 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* Accelerated CRC32(C) using AArch64 CRC and PMULL instructions
4
*
5
* Copyright (C) 2016 - 2018 Linaro Ltd.
6
* Copyright (C) 2024 Google LLC
7
*
8
* Author: Ard Biesheuvel <[email protected]>
9
*/
10
11
#include <linux/linkage.h>
12
#include <asm/assembler.h>
13
14
.cpu generic+crc+crypto
15
16
.macro bitle, reg
17
.endm
18
19
.macro bitbe, reg
20
rbit \reg, \reg
21
.endm
22
23
.macro bytele, reg
24
.endm
25
26
.macro bytebe, reg
27
rbit \reg, \reg
28
lsr \reg, \reg, #24
29
.endm
30
31
.macro hwordle, reg
32
CPU_BE( rev16 \reg, \reg )
33
.endm
34
35
.macro hwordbe, reg
36
CPU_LE( rev \reg, \reg )
37
rbit \reg, \reg
38
CPU_BE( lsr \reg, \reg, #16 )
39
.endm
40
41
.macro le, regs:vararg
42
.irp r, \regs
43
CPU_BE( rev \r, \r )
44
.endr
45
.endm
46
47
.macro be, regs:vararg
48
.irp r, \regs
49
CPU_LE( rev \r, \r )
50
.endr
51
.irp r, \regs
52
rbit \r, \r
53
.endr
54
.endm
55
56
.macro __crc32, c, order=le
57
bit\order w0
58
cmp x2, #16
59
b.lt 8f // less than 16 bytes
60
61
and x7, x2, #0x1f
62
and x2, x2, #~0x1f
63
cbz x7, 32f // multiple of 32 bytes
64
65
and x8, x7, #0xf
66
ldp x3, x4, [x1]
67
add x8, x8, x1
68
add x1, x1, x7
69
ldp x5, x6, [x8]
70
\order x3, x4, x5, x6
71
72
tst x7, #8
73
crc32\c\()x w8, w0, x3
74
csel x3, x3, x4, eq
75
csel w0, w0, w8, eq
76
tst x7, #4
77
lsr x4, x3, #32
78
crc32\c\()w w8, w0, w3
79
csel x3, x3, x4, eq
80
csel w0, w0, w8, eq
81
tst x7, #2
82
lsr w4, w3, #16
83
crc32\c\()h w8, w0, w3
84
csel w3, w3, w4, eq
85
csel w0, w0, w8, eq
86
tst x7, #1
87
crc32\c\()b w8, w0, w3
88
csel w0, w0, w8, eq
89
tst x7, #16
90
crc32\c\()x w8, w0, x5
91
crc32\c\()x w8, w8, x6
92
csel w0, w0, w8, eq
93
cbz x2, 0f
94
95
32: ldp x3, x4, [x1], #32
96
sub x2, x2, #32
97
ldp x5, x6, [x1, #-16]
98
\order x3, x4, x5, x6
99
crc32\c\()x w0, w0, x3
100
crc32\c\()x w0, w0, x4
101
crc32\c\()x w0, w0, x5
102
crc32\c\()x w0, w0, x6
103
cbnz x2, 32b
104
0: bit\order w0
105
ret
106
107
8: tbz x2, #3, 4f
108
ldr x3, [x1], #8
109
\order x3
110
crc32\c\()x w0, w0, x3
111
4: tbz x2, #2, 2f
112
ldr w3, [x1], #4
113
\order w3
114
crc32\c\()w w0, w0, w3
115
2: tbz x2, #1, 1f
116
ldrh w3, [x1], #2
117
hword\order w3
118
crc32\c\()h w0, w0, w3
119
1: tbz x2, #0, 0f
120
ldrb w3, [x1]
121
byte\order w3
122
crc32\c\()b w0, w0, w3
123
0: bit\order w0
124
ret
125
.endm
126
127
.align 5
128
SYM_FUNC_START(crc32_le_arm64)
129
__crc32
130
SYM_FUNC_END(crc32_le_arm64)
131
132
.align 5
133
SYM_FUNC_START(crc32c_le_arm64)
134
__crc32 c
135
SYM_FUNC_END(crc32c_le_arm64)
136
137
.align 5
138
SYM_FUNC_START(crc32_be_arm64)
139
__crc32 order=be
140
SYM_FUNC_END(crc32_be_arm64)
141
142
in .req x1
143
len .req x2
144
145
/*
146
* w0: input CRC at entry, output CRC at exit
147
* x1: pointer to input buffer
148
* x2: length of input in bytes
149
*/
150
.macro crc4way, insn, table, order=le
151
bit\order w0
152
lsr len, len, #6 // len := # of 64-byte blocks
153
154
/* Process up to 64 blocks of 64 bytes at a time */
155
.La\@: mov x3, #64
156
cmp len, #64
157
csel x3, x3, len, hi // x3 := min(len, 64)
158
sub len, len, x3
159
160
/* Divide the input into 4 contiguous blocks */
161
add x4, x3, x3, lsl #1 // x4 := 3 * x3
162
add x7, in, x3, lsl #4 // x7 := in + 16 * x3
163
add x8, in, x3, lsl #5 // x8 := in + 32 * x3
164
add x9, in, x4, lsl #4 // x9 := in + 16 * x4
165
166
/* Load the folding coefficients from the lookup table */
167
adr_l x5, \table - 12 // entry 0 omitted
168
add x5, x5, x4, lsl #2 // x5 += 12 * x3
169
ldp s0, s1, [x5]
170
ldr s2, [x5, #8]
171
172
/* Zero init partial CRCs for this iteration */
173
mov w4, wzr
174
mov w5, wzr
175
mov w6, wzr
176
mov x17, xzr
177
178
.Lb\@: sub x3, x3, #1
179
\insn w6, w6, x17
180
ldp x10, x11, [in], #16
181
ldp x12, x13, [x7], #16
182
ldp x14, x15, [x8], #16
183
ldp x16, x17, [x9], #16
184
185
\order x10, x11, x12, x13, x14, x15, x16, x17
186
187
/* Apply the CRC transform to 4 16-byte blocks in parallel */
188
\insn w0, w0, x10
189
\insn w4, w4, x12
190
\insn w5, w5, x14
191
\insn w6, w6, x16
192
\insn w0, w0, x11
193
\insn w4, w4, x13
194
\insn w5, w5, x15
195
cbnz x3, .Lb\@
196
197
/* Combine the 4 partial results into w0 */
198
mov v3.d[0], x0
199
mov v4.d[0], x4
200
mov v5.d[0], x5
201
pmull v0.1q, v0.1d, v3.1d
202
pmull v1.1q, v1.1d, v4.1d
203
pmull v2.1q, v2.1d, v5.1d
204
eor v0.8b, v0.8b, v1.8b
205
eor v0.8b, v0.8b, v2.8b
206
mov x5, v0.d[0]
207
eor x5, x5, x17
208
\insn w0, w6, x5
209
210
mov in, x9
211
cbnz len, .La\@
212
213
bit\order w0
214
ret
215
.endm
216
217
.align 5
218
SYM_FUNC_START(crc32c_le_arm64_4way)
219
crc4way crc32cx, .L0
220
SYM_FUNC_END(crc32c_le_arm64_4way)
221
222
.align 5
223
SYM_FUNC_START(crc32_le_arm64_4way)
224
crc4way crc32x, .L1
225
SYM_FUNC_END(crc32_le_arm64_4way)
226
227
.align 5
228
SYM_FUNC_START(crc32_be_arm64_4way)
229
crc4way crc32x, .L1, be
230
SYM_FUNC_END(crc32_be_arm64_4way)
231
232
.section .rodata, "a", %progbits
233
.align 6
234
.L0: .long 0xddc0152b, 0xba4fc28e, 0x493c7d27
235
.long 0x0715ce53, 0x9e4addf8, 0xba4fc28e
236
.long 0xc96cfdc0, 0x0715ce53, 0xddc0152b
237
.long 0xab7aff2a, 0x0d3b6092, 0x9e4addf8
238
.long 0x299847d5, 0x878a92a7, 0x39d3b296
239
.long 0xb6dd949b, 0xab7aff2a, 0x0715ce53
240
.long 0xa60ce07b, 0x83348832, 0x47db8317
241
.long 0xd270f1a2, 0xb9e02b86, 0x0d3b6092
242
.long 0x65863b64, 0xb6dd949b, 0xc96cfdc0
243
.long 0xb3e32c28, 0xbac2fd7b, 0x878a92a7
244
.long 0xf285651c, 0xce7f39f4, 0xdaece73e
245
.long 0x271d9844, 0xd270f1a2, 0xab7aff2a
246
.long 0x6cb08e5c, 0x2b3cac5d, 0x2162d385
247
.long 0xcec3662e, 0x1b03397f, 0x83348832
248
.long 0x8227bb8a, 0xb3e32c28, 0x299847d5
249
.long 0xd7a4825c, 0xdd7e3b0c, 0xb9e02b86
250
.long 0xf6076544, 0x10746f3c, 0x18b33a4e
251
.long 0x98d8d9cb, 0x271d9844, 0xb6dd949b
252
.long 0x57a3d037, 0x93a5f730, 0x78d9ccb7
253
.long 0x3771e98f, 0x6b749fb2, 0xbac2fd7b
254
.long 0xe0ac139e, 0xcec3662e, 0xa60ce07b
255
.long 0x6f345e45, 0xe6fc4e6a, 0xce7f39f4
256
.long 0xa2b73df1, 0xb0cd4768, 0x61d82e56
257
.long 0x86d8e4d2, 0xd7a4825c, 0xd270f1a2
258
.long 0xa90fd27a, 0x0167d312, 0xc619809d
259
.long 0xca6ef3ac, 0x26f6a60a, 0x2b3cac5d
260
.long 0x4597456a, 0x98d8d9cb, 0x65863b64
261
.long 0xc9c8b782, 0x68bce87a, 0x1b03397f
262
.long 0x62ec6c6d, 0x6956fc3b, 0xebb883bd
263
.long 0x2342001e, 0x3771e98f, 0xb3e32c28
264
.long 0xe8b6368b, 0x2178513a, 0x064f7f26
265
.long 0x9ef68d35, 0x170076fa, 0xdd7e3b0c
266
.long 0x0b0bf8ca, 0x6f345e45, 0xf285651c
267
.long 0x02ee03b2, 0xff0dba97, 0x10746f3c
268
.long 0x135c83fd, 0xf872e54c, 0xc7a68855
269
.long 0x00bcf5f6, 0x86d8e4d2, 0x271d9844
270
.long 0x58ca5f00, 0x5bb8f1bc, 0x8e766a0c
271
.long 0xded288f8, 0xb3af077a, 0x93a5f730
272
.long 0x37170390, 0xca6ef3ac, 0x6cb08e5c
273
.long 0xf48642e9, 0xdd66cbbb, 0x6b749fb2
274
.long 0xb25b29f2, 0xe9e28eb4, 0x1393e203
275
.long 0x45cddf4e, 0xc9c8b782, 0xcec3662e
276
.long 0xdfd94fb2, 0x93e106a4, 0x96c515bb
277
.long 0x021ac5ef, 0xd813b325, 0xe6fc4e6a
278
.long 0x8e1450f7, 0x2342001e, 0x8227bb8a
279
.long 0xe0cdcf86, 0x6d9a4957, 0xb0cd4768
280
.long 0x613eee91, 0xd2c3ed1a, 0x39c7ff35
281
.long 0xbedc6ba1, 0x9ef68d35, 0xd7a4825c
282
.long 0x0cd1526a, 0xf2271e60, 0x0ab3844b
283
.long 0xd6c3a807, 0x2664fd8b, 0x0167d312
284
.long 0x1d31175f, 0x02ee03b2, 0xf6076544
285
.long 0x4be7fd90, 0x363bd6b3, 0x26f6a60a
286
.long 0x6eeed1c9, 0x5fabe670, 0xa741c1bf
287
.long 0xb3a6da94, 0x00bcf5f6, 0x98d8d9cb
288
.long 0x2e7d11a7, 0x17f27698, 0x49c3cc9c
289
.long 0x889774e1, 0xaa7c7ad5, 0x68bce87a
290
.long 0x8a074012, 0xded288f8, 0x57a3d037
291
.long 0xbd0bb25f, 0x6d390dec, 0x6956fc3b
292
.long 0x3be3c09b, 0x6353c1cc, 0x42d98888
293
.long 0x465a4eee, 0xf48642e9, 0x3771e98f
294
.long 0x2e5f3c8c, 0xdd35bc8d, 0xb42ae3d9
295
.long 0xa52f58ec, 0x9a5ede41, 0x2178513a
296
.long 0x47972100, 0x45cddf4e, 0xe0ac139e
297
.long 0x359674f7, 0xa51b6135, 0x170076fa
298
299
.L1: .long 0xaf449247, 0x81256527, 0xccaa009e
300
.long 0x57c54819, 0x1d9513d7, 0x81256527
301
.long 0x3f41287a, 0x57c54819, 0xaf449247
302
.long 0xf5e48c85, 0x910eeec1, 0x1d9513d7
303
.long 0x1f0c2cdd, 0x9026d5b1, 0xae0b5394
304
.long 0x71d54a59, 0xf5e48c85, 0x57c54819
305
.long 0x1c63267b, 0xfe807bbd, 0x0cbec0ed
306
.long 0xd31343ea, 0xe95c1271, 0x910eeec1
307
.long 0xf9d9c7ee, 0x71d54a59, 0x3f41287a
308
.long 0x9ee62949, 0xcec97417, 0x9026d5b1
309
.long 0xa55d1514, 0xf183c71b, 0xd1df2327
310
.long 0x21aa2b26, 0xd31343ea, 0xf5e48c85
311
.long 0x9d842b80, 0xeea395c4, 0x3c656ced
312
.long 0xd8110ff1, 0xcd669a40, 0xfe807bbd
313
.long 0x3f9e9356, 0x9ee62949, 0x1f0c2cdd
314
.long 0x1d6708a0, 0x0c30f51d, 0xe95c1271
315
.long 0xef82aa68, 0xdb3935ea, 0xb918a347
316
.long 0xd14bcc9b, 0x21aa2b26, 0x71d54a59
317
.long 0x99cce860, 0x356d209f, 0xff6f2fc2
318
.long 0xd8af8e46, 0xc352f6de, 0xcec97417
319
.long 0xf1996890, 0xd8110ff1, 0x1c63267b
320
.long 0x631bc508, 0xe95c7216, 0xf183c71b
321
.long 0x8511c306, 0x8e031a19, 0x9b9bdbd0
322
.long 0xdb3839f3, 0x1d6708a0, 0xd31343ea
323
.long 0x7a92fffb, 0xf7003835, 0x4470ac44
324
.long 0x6ce68f2a, 0x00eba0c8, 0xeea395c4
325
.long 0x4caaa263, 0xd14bcc9b, 0xf9d9c7ee
326
.long 0xb46f7cff, 0x9a1b53c8, 0xcd669a40
327
.long 0x60290934, 0x81b6f443, 0x6d40f445
328
.long 0x8e976a7d, 0xd8af8e46, 0x9ee62949
329
.long 0xdcf5088a, 0x9dbdc100, 0x145575d5
330
.long 0x1753ab84, 0xbbf2f6d6, 0x0c30f51d
331
.long 0x255b139e, 0x631bc508, 0xa55d1514
332
.long 0xd784eaa8, 0xce26786c, 0xdb3935ea
333
.long 0x6d2c864a, 0x8068c345, 0x2586d334
334
.long 0x02072e24, 0xdb3839f3, 0x21aa2b26
335
.long 0x06689b0a, 0x5efd72f5, 0xe0575528
336
.long 0x1e52f5ea, 0x4117915b, 0x356d209f
337
.long 0x1d3d1db6, 0x6ce68f2a, 0x9d842b80
338
.long 0x3796455c, 0xb8e0e4a8, 0xc352f6de
339
.long 0xdf3a4eb3, 0xc55a2330, 0xb84ffa9c
340
.long 0x28ae0976, 0xb46f7cff, 0xd8110ff1
341
.long 0x9764bc8d, 0xd7e7a22c, 0x712510f0
342
.long 0x13a13e18, 0x3e9a43cd, 0xe95c7216
343
.long 0xb8ee242e, 0x8e976a7d, 0x3f9e9356
344
.long 0x0c540e7b, 0x753c81ff, 0x8e031a19
345
.long 0x9924c781, 0xb9220208, 0x3edcde65
346
.long 0x3954de39, 0x1753ab84, 0x1d6708a0
347
.long 0xf32238b5, 0xbec81497, 0x9e70b943
348
.long 0xbbd2cd2c, 0x0925d861, 0xf7003835
349
.long 0xcc401304, 0xd784eaa8, 0xef82aa68
350
.long 0x4987e684, 0x6044fbb0, 0x00eba0c8
351
.long 0x3aa11427, 0x18fe3b4a, 0x87441142
352
.long 0x297aad60, 0x02072e24, 0xd14bcc9b
353
.long 0xf60c5e51, 0x6ef6f487, 0x5b7fdd0a
354
.long 0x632d78c5, 0x3fc33de4, 0x9a1b53c8
355
.long 0x25b8822a, 0x1e52f5ea, 0x99cce860
356
.long 0xd4fc84bc, 0x1af62fb8, 0x81b6f443
357
.long 0x5690aa32, 0xa91fdefb, 0x688a110e
358
.long 0x1357a093, 0x3796455c, 0xd8af8e46
359
.long 0x798fdd33, 0xaaa18a37, 0x357b9517
360
.long 0xc2815395, 0x54d42691, 0x9dbdc100
361
.long 0x21cfc0f7, 0x28ae0976, 0xf1996890
362
.long 0xa0decef3, 0x7b4aa8b7, 0xbbf2f6d6
363
364