Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-utils/src/clmul.rs
6939 views
1
#[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
2
fn intel_clmul64(x: u64, y: u64) -> u64 {
3
use core::arch::x86_64::*;
4
unsafe {
5
// SAFETY: we have the target feature.
6
_mm_cvtsi128_si64(_mm_clmulepi64_si128(
7
_mm_cvtsi64_si128(x as i64),
8
_mm_cvtsi64_si128(y as i64),
9
0,
10
)) as u64
11
}
12
}
13
14
#[cfg(all(
15
target_arch = "aarch64",
16
target_feature = "neon",
17
target_feature = "aes"
18
))]
19
fn arm_clmul64(x: u64, y: u64) -> u64 {
20
unsafe {
21
// SAFETY: we have the target feature.
22
use core::arch::aarch64::*;
23
vmull_p64(x, y) as u64
24
}
25
}
26
27
#[inline]
28
pub fn portable_clmul64(x: u64, mut y: u64) -> u64 {
29
let mut out = 0;
30
while y > 0 {
31
let lsb = y & y.wrapping_neg();
32
out ^= x.wrapping_mul(lsb);
33
y ^= lsb;
34
}
35
out
36
}
37
38
// Computes the carryless multiplication of x and y.
39
#[inline]
40
pub fn clmul64(x: u64, y: u64) -> u64 {
41
#[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
42
return intel_clmul64(x, y);
43
44
#[cfg(all(
45
target_arch = "aarch64",
46
target_feature = "neon",
47
target_feature = "aes"
48
))]
49
return arm_clmul64(x, y);
50
51
#[allow(unreachable_code)]
52
portable_clmul64(x, y)
53
}
54
55
#[inline]
56
pub fn portable_prefix_xorsum(x: u64) -> u64 {
57
portable_prefix_xorsum_inclusive(x << 1)
58
}
59
60
// Computes for each bit i the XOR of bits[0..i].
61
#[inline]
62
pub fn prefix_xorsum(x: u64) -> u64 {
63
#[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
64
return intel_clmul64(x, u64::MAX ^ 1);
65
66
#[cfg(all(
67
target_arch = "aarch64",
68
target_feature = "neon",
69
target_feature = "aes"
70
))]
71
return arm_clmul64(x, u64::MAX ^ 1);
72
73
#[allow(unreachable_code)]
74
portable_prefix_xorsum(x)
75
}
76
77
#[inline]
78
pub fn portable_prefix_xorsum_inclusive(mut x: u64) -> u64 {
79
for i in 0..6 {
80
x ^= x << (1 << i);
81
}
82
x
83
}
84
85
// Computes for each bit i the XOR of bits[0..=i].
86
#[inline]
87
pub fn prefix_xorsum_inclusive(x: u64) -> u64 {
88
#[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))]
89
return intel_clmul64(x, u64::MAX);
90
91
#[cfg(all(
92
target_arch = "aarch64",
93
target_feature = "neon",
94
target_feature = "aes"
95
))]
96
return arm_clmul64(x, u64::MAX);
97
98
#[allow(unreachable_code)]
99
portable_prefix_xorsum_inclusive(x)
100
}
101
102
#[cfg(test)]
103
mod test {
104
use rand::prelude::*;
105
106
use super::*;
107
108
#[test]
109
fn test_clmul() {
110
// Verify platform-specific clmul to portable.
111
let mut rng = StdRng::seed_from_u64(0xdeadbeef);
112
for _ in 0..100 {
113
let x = rng.random();
114
let y = rng.random();
115
assert_eq!(portable_clmul64(x, y), clmul64(x, y));
116
}
117
118
// Verify portable clmul for known test vectors.
119
assert_eq!(
120
portable_clmul64(0x8b44729195dde0ef, 0xb976c5ae2726fab0),
121
0x4ae14eae84899290
122
);
123
assert_eq!(
124
portable_clmul64(0x399b6ed00c44b301, 0x693341db5acb2ff0),
125
0x48dfa88344823ff0
126
);
127
assert_eq!(
128
portable_clmul64(0xdf4c9f6e60deb640, 0x6d4bcdb217ac4880),
129
0x7300ffe474792000
130
);
131
assert_eq!(
132
portable_clmul64(0xa7adf3c53a200a51, 0x818cb40fe11b431e),
133
0x6a280181d521797e
134
);
135
assert_eq!(
136
portable_clmul64(0x5e78e12b744f228c, 0x4225ff19e9273266),
137
0xa48b73cafb9665a8
138
);
139
}
140
141
#[test]
142
fn test_prefix_xorsum() {
143
// Verify platform-specific prefix_xorsum to portable.
144
let mut rng = StdRng::seed_from_u64(0xdeadbeef);
145
for _ in 0..100 {
146
let x = rng.random();
147
assert_eq!(portable_prefix_xorsum(x), prefix_xorsum(x));
148
}
149
150
// Verify portable prefix_xorsum for known test vectors.
151
assert_eq!(
152
portable_prefix_xorsum(0x8b44729195dde0ef),
153
0x0d87a31ee696bf4a
154
);
155
assert_eq!(
156
portable_prefix_xorsum(0xb976c5ae2726fab0),
157
0x2e5b79343a3b5320
158
);
159
assert_eq!(
160
portable_prefix_xorsum(0x399b6ed00c44b301),
161
0xd1124b600878ddfe
162
);
163
assert_eq!(
164
portable_prefix_xorsum(0x693341db5acb2ff0),
165
0x4e227e926c8dcaa0
166
);
167
assert_eq!(
168
portable_prefix_xorsum(0xdf4c9f6e60deb640),
169
0x6a7715b44094db80
170
);
171
}
172
}
173
174