Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-compute/src/comparisons/view.rs
6939 views
1
use arrow::array::{BinaryViewArray, Utf8ViewArray};
2
use arrow::bitmap::Bitmap;
3
4
use super::TotalEqKernel;
5
use crate::comparisons::TotalOrdKernel;
6
7
// If s fits in 12 bytes, returns the view encoding it would have in a
8
// BinaryViewArray.
9
fn small_view_encoding(s: &[u8]) -> Option<u128> {
10
if s.len() > 12 {
11
return None;
12
}
13
14
let mut tmp = [0u8; 16];
15
tmp[0] = s.len() as u8;
16
tmp[4..4 + s.len()].copy_from_slice(s);
17
Some(u128::from_le_bytes(tmp))
18
}
19
20
// Loads (up to) the first 4 bytes of s as little-endian, padded with zeros.
21
fn load_prefix(s: &[u8]) -> u32 {
22
let start = &s[..s.len().min(4)];
23
let mut tmp = [0u8; 4];
24
tmp[..start.len()].copy_from_slice(start);
25
u32::from_le_bytes(tmp)
26
}
27
28
fn broadcast_inequality(
29
arr: &BinaryViewArray,
30
scalar: &[u8],
31
cmp_prefix: impl Fn(u32, u32) -> bool,
32
cmp_str: impl Fn(&[u8], &[u8]) -> bool,
33
) -> Bitmap {
34
let views = arr.views().as_slice();
35
let prefix = load_prefix(scalar);
36
let be_prefix = prefix.to_be();
37
Bitmap::from_trusted_len_iter((0..arr.len()).map(|i| unsafe {
38
let v_prefix = (views.get_unchecked(i).as_u128() >> 32) as u32;
39
if v_prefix != prefix {
40
cmp_prefix(v_prefix.to_be(), be_prefix)
41
} else {
42
cmp_str(arr.value_unchecked(i), scalar)
43
}
44
}))
45
}
46
47
impl TotalEqKernel for BinaryViewArray {
48
type Scalar = [u8];
49
50
fn tot_eq_kernel(&self, other: &Self) -> Bitmap {
51
debug_assert!(self.len() == other.len());
52
53
let slf_views = self.views().as_slice();
54
let other_views = other.views().as_slice();
55
56
Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
57
let av = slf_views.get_unchecked(i).as_u128();
58
let bv = other_views.get_unchecked(i).as_u128();
59
60
// First 64 bits contain length and prefix.
61
let a_len_prefix = av as u64;
62
let b_len_prefix = bv as u64;
63
if a_len_prefix != b_len_prefix {
64
return false;
65
}
66
67
let alen = av as u32;
68
if alen <= 12 {
69
// String is fully inlined, compare top 64 bits. Bottom bits were
70
// tested equal before, which also ensures the lengths are equal.
71
(av >> 64) as u64 == (bv >> 64) as u64
72
} else {
73
self.value_unchecked(i) == other.value_unchecked(i)
74
}
75
}))
76
}
77
78
fn tot_ne_kernel(&self, other: &Self) -> Bitmap {
79
debug_assert!(self.len() == other.len());
80
81
let slf_views = self.views().as_slice();
82
let other_views = other.views().as_slice();
83
84
Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
85
let av = slf_views.get_unchecked(i).as_u128();
86
let bv = other_views.get_unchecked(i).as_u128();
87
88
// First 64 bits contain length and prefix.
89
let a_len_prefix = av as u64;
90
let b_len_prefix = bv as u64;
91
if a_len_prefix != b_len_prefix {
92
return true;
93
}
94
95
let alen = av as u32;
96
if alen <= 12 {
97
// String is fully inlined, compare top 64 bits. Bottom bits were
98
// tested equal before, which also ensures the lengths are equal.
99
(av >> 64) as u64 != (bv >> 64) as u64
100
} else {
101
self.value_unchecked(i) != other.value_unchecked(i)
102
}
103
}))
104
}
105
106
fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
107
if let Some(val) = small_view_encoding(other) {
108
Bitmap::from_trusted_len_iter(self.views().iter().map(|v| v.as_u128() == val))
109
} else {
110
let slf_views = self.views().as_slice();
111
let prefix = u32::from_le_bytes(other[..4].try_into().unwrap());
112
let prefix_len = ((prefix as u64) << 32) | other.len() as u64;
113
Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
114
let v_prefix_len = slf_views.get_unchecked(i).as_u128() as u64;
115
if v_prefix_len != prefix_len {
116
false
117
} else {
118
self.value_unchecked(i) == other
119
}
120
}))
121
}
122
}
123
124
fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
125
if let Some(val) = small_view_encoding(other) {
126
Bitmap::from_trusted_len_iter(self.views().iter().map(|v| v.as_u128() != val))
127
} else {
128
let slf_views = self.views().as_slice();
129
let prefix = u32::from_le_bytes(other[..4].try_into().unwrap());
130
let prefix_len = ((prefix as u64) << 32) | other.len() as u64;
131
Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
132
let v_prefix_len = slf_views.get_unchecked(i).as_u128() as u64;
133
if v_prefix_len != prefix_len {
134
true
135
} else {
136
self.value_unchecked(i) != other
137
}
138
}))
139
}
140
}
141
}
142
143
impl TotalOrdKernel for BinaryViewArray {
144
type Scalar = [u8];
145
146
fn tot_lt_kernel(&self, other: &Self) -> Bitmap {
147
debug_assert!(self.len() == other.len());
148
149
let slf_views = self.views().as_slice();
150
let other_views = other.views().as_slice();
151
152
Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
153
let av = slf_views.get_unchecked(i).as_u128();
154
let bv = other_views.get_unchecked(i).as_u128();
155
156
// First 64 bits contain length and prefix.
157
// Only check prefix.
158
let a_prefix = (av >> 32) as u32;
159
let b_prefix = (bv >> 32) as u32;
160
if a_prefix != b_prefix {
161
a_prefix.to_be() < b_prefix.to_be()
162
} else {
163
self.value_unchecked(i) < other.value_unchecked(i)
164
}
165
}))
166
}
167
168
fn tot_le_kernel(&self, other: &Self) -> Bitmap {
169
debug_assert!(self.len() == other.len());
170
171
let slf_views = self.views().as_slice();
172
let other_views = other.views().as_slice();
173
174
Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
175
let av = slf_views.get_unchecked(i).as_u128();
176
let bv = other_views.get_unchecked(i).as_u128();
177
178
// First 64 bits contain length and prefix.
179
// Only check prefix.
180
let a_prefix = (av >> 32) as u32;
181
let b_prefix = (bv >> 32) as u32;
182
if a_prefix != b_prefix {
183
a_prefix.to_be() < b_prefix.to_be()
184
} else {
185
self.value_unchecked(i) <= other.value_unchecked(i)
186
}
187
}))
188
}
189
190
fn tot_lt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
191
broadcast_inequality(self, other, |a, b| a < b, |a, b| a < b)
192
}
193
194
fn tot_le_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
195
broadcast_inequality(self, other, |a, b| a <= b, |a, b| a <= b)
196
}
197
198
fn tot_gt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
199
broadcast_inequality(self, other, |a, b| a > b, |a, b| a > b)
200
}
201
202
fn tot_ge_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
203
broadcast_inequality(self, other, |a, b| a >= b, |a, b| a >= b)
204
}
205
}
206
207
impl TotalEqKernel for Utf8ViewArray {
208
type Scalar = str;
209
210
fn tot_eq_kernel(&self, other: &Self) -> Bitmap {
211
self.to_binview().tot_eq_kernel(&other.to_binview())
212
}
213
214
fn tot_ne_kernel(&self, other: &Self) -> Bitmap {
215
self.to_binview().tot_ne_kernel(&other.to_binview())
216
}
217
218
fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
219
self.to_binview().tot_eq_kernel_broadcast(other.as_bytes())
220
}
221
222
fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
223
self.to_binview().tot_ne_kernel_broadcast(other.as_bytes())
224
}
225
}
226
227
impl TotalOrdKernel for Utf8ViewArray {
228
type Scalar = str;
229
230
fn tot_lt_kernel(&self, other: &Self) -> Bitmap {
231
self.to_binview().tot_lt_kernel(&other.to_binview())
232
}
233
234
fn tot_le_kernel(&self, other: &Self) -> Bitmap {
235
self.to_binview().tot_le_kernel(&other.to_binview())
236
}
237
238
fn tot_lt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
239
self.to_binview().tot_lt_kernel_broadcast(other.as_bytes())
240
}
241
242
fn tot_le_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
243
self.to_binview().tot_le_kernel_broadcast(other.as_bytes())
244
}
245
246
fn tot_gt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
247
self.to_binview().tot_gt_kernel_broadcast(other.as_bytes())
248
}
249
250
fn tot_ge_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
251
self.to_binview().tot_ge_kernel_broadcast(other.as_bytes())
252
}
253
}
254
255