Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/crypto/arm64/nh-neon-core.S
121833 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* NH - ε-almost-universal hash function, ARM64 NEON accelerated version
4
*
5
* Copyright 2018 Google LLC
6
*
7
* Author: Eric Biggers <[email protected]>
8
*/
9
10
#include <linux/linkage.h>
11
12
KEY .req x0
13
MESSAGE .req x1
14
MESSAGE_LEN .req x2
15
HASH .req x3
16
17
PASS0_SUMS .req v0
18
PASS1_SUMS .req v1
19
PASS2_SUMS .req v2
20
PASS3_SUMS .req v3
21
K0 .req v4
22
K1 .req v5
23
K2 .req v6
24
K3 .req v7
25
T0 .req v8
26
T1 .req v9
27
T2 .req v10
28
T3 .req v11
29
T4 .req v12
30
T5 .req v13
31
T6 .req v14
32
T7 .req v15
33
34
.macro _nh_stride k0, k1, k2, k3
35
36
// Load next message stride
37
ld1 {T3.16b}, [MESSAGE], #16
38
39
// Load next key stride
40
ld1 {\k3\().4s}, [KEY], #16
41
42
// Add message words to key words
43
add T0.4s, T3.4s, \k0\().4s
44
add T1.4s, T3.4s, \k1\().4s
45
add T2.4s, T3.4s, \k2\().4s
46
add T3.4s, T3.4s, \k3\().4s
47
48
// Multiply 32x32 => 64 and accumulate
49
mov T4.d[0], T0.d[1]
50
mov T5.d[0], T1.d[1]
51
mov T6.d[0], T2.d[1]
52
mov T7.d[0], T3.d[1]
53
umlal PASS0_SUMS.2d, T0.2s, T4.2s
54
umlal PASS1_SUMS.2d, T1.2s, T5.2s
55
umlal PASS2_SUMS.2d, T2.2s, T6.2s
56
umlal PASS3_SUMS.2d, T3.2s, T7.2s
57
.endm
58
59
/*
60
* void nh_neon(const u32 *key, const u8 *message, size_t message_len,
61
* __le64 hash[NH_NUM_PASSES])
62
*
63
* It's guaranteed that message_len % 16 == 0.
64
*/
65
SYM_FUNC_START(nh_neon)
66
67
ld1 {K0.4s,K1.4s}, [KEY], #32
68
movi PASS0_SUMS.2d, #0
69
movi PASS1_SUMS.2d, #0
70
ld1 {K2.4s}, [KEY], #16
71
movi PASS2_SUMS.2d, #0
72
movi PASS3_SUMS.2d, #0
73
74
subs MESSAGE_LEN, MESSAGE_LEN, #64
75
blt .Lloop4_done
76
.Lloop4:
77
_nh_stride K0, K1, K2, K3
78
_nh_stride K1, K2, K3, K0
79
_nh_stride K2, K3, K0, K1
80
_nh_stride K3, K0, K1, K2
81
subs MESSAGE_LEN, MESSAGE_LEN, #64
82
bge .Lloop4
83
84
.Lloop4_done:
85
ands MESSAGE_LEN, MESSAGE_LEN, #63
86
beq .Ldone
87
_nh_stride K0, K1, K2, K3
88
89
subs MESSAGE_LEN, MESSAGE_LEN, #16
90
beq .Ldone
91
_nh_stride K1, K2, K3, K0
92
93
subs MESSAGE_LEN, MESSAGE_LEN, #16
94
beq .Ldone
95
_nh_stride K2, K3, K0, K1
96
97
.Ldone:
98
// Sum the accumulators for each pass, then store the sums to 'hash'
99
addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
100
addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
101
st1 {T0.16b,T1.16b}, [HASH]
102
ret
103
SYM_FUNC_END(nh_neon)
104
105