Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/bitmap/bitmap_ops.rs
6939 views
1
use std::ops::{BitAnd, BitOr, BitXor, Not};
2
3
use super::Bitmap;
4
use super::utils::{BitChunk, BitChunkIterExact, BitChunksExact};
5
use crate::bitmap::MutableBitmap;
6
use crate::trusted_len::TrustedLen;
7
8
#[inline(always)]
9
pub(crate) fn push_bitchunk<T: BitChunk>(buffer: &mut Vec<u8>, value: T) {
10
buffer.extend(value.to_ne_bytes())
11
}
12
13
/// Creates a [`Vec<u8>`] from a [`TrustedLen`] of [`BitChunk`].
14
pub fn chunk_iter_to_vec<T: BitChunk, I: TrustedLen<Item = T>>(iter: I) -> Vec<u8> {
15
let cap = iter.size_hint().0 * size_of::<T>();
16
let mut buffer = Vec::with_capacity(cap);
17
for v in iter {
18
push_bitchunk(&mut buffer, v)
19
}
20
buffer
21
}
22
23
fn chunk_iter_to_vec_and_remainder<T: BitChunk, I: TrustedLen<Item = T>>(
24
iter: I,
25
remainder: T,
26
) -> Vec<u8> {
27
let cap = (iter.size_hint().0 + 1) * size_of::<T>();
28
let mut buffer = Vec::with_capacity(cap);
29
for v in iter {
30
push_bitchunk(&mut buffer, v)
31
}
32
push_bitchunk(&mut buffer, remainder);
33
debug_assert_eq!(buffer.len(), cap);
34
buffer
35
}
36
37
/// Apply a bitwise operation `op` to four inputs and return the result as a [`Bitmap`].
38
pub fn quaternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, a4: &Bitmap, op: F) -> Bitmap
39
where
40
F: Fn(u64, u64, u64, u64) -> u64,
41
{
42
assert_eq!(a1.len(), a2.len());
43
assert_eq!(a1.len(), a3.len());
44
assert_eq!(a1.len(), a4.len());
45
let a1_chunks = a1.chunks();
46
let a2_chunks = a2.chunks();
47
let a3_chunks = a3.chunks();
48
let a4_chunks = a4.chunks();
49
50
let rem_a1 = a1_chunks.remainder();
51
let rem_a2 = a2_chunks.remainder();
52
let rem_a3 = a3_chunks.remainder();
53
let rem_a4 = a4_chunks.remainder();
54
55
let chunks = a1_chunks
56
.zip(a2_chunks)
57
.zip(a3_chunks)
58
.zip(a4_chunks)
59
.map(|(((a1, a2), a3), a4)| op(a1, a2, a3, a4));
60
61
let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3, rem_a4));
62
let length = a1.len();
63
64
Bitmap::from_u8_vec(buffer, length)
65
}
66
67
/// Apply a bitwise operation `op` to three inputs and return the result as a [`Bitmap`].
68
pub fn ternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, op: F) -> Bitmap
69
where
70
F: Fn(u64, u64, u64) -> u64,
71
{
72
assert_eq!(a1.len(), a2.len());
73
assert_eq!(a1.len(), a3.len());
74
let a1_chunks = a1.chunks();
75
let a2_chunks = a2.chunks();
76
let a3_chunks = a3.chunks();
77
78
let rem_a1 = a1_chunks.remainder();
79
let rem_a2 = a2_chunks.remainder();
80
let rem_a3 = a3_chunks.remainder();
81
82
let chunks = a1_chunks
83
.zip(a2_chunks)
84
.zip(a3_chunks)
85
.map(|((a1, a2), a3)| op(a1, a2, a3));
86
87
let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3));
88
let length = a1.len();
89
90
Bitmap::from_u8_vec(buffer, length)
91
}
92
93
/// Apply a bitwise operation `op` to two inputs and return the result as a [`Bitmap`].
94
pub fn binary<F>(lhs: &Bitmap, rhs: &Bitmap, op: F) -> Bitmap
95
where
96
F: Fn(u64, u64) -> u64,
97
{
98
assert_eq!(lhs.len(), rhs.len());
99
let lhs_chunks = lhs.chunks();
100
let rhs_chunks = rhs.chunks();
101
let rem_lhs = lhs_chunks.remainder();
102
let rem_rhs = rhs_chunks.remainder();
103
104
let chunks = lhs_chunks
105
.zip(rhs_chunks)
106
.map(|(left, right)| op(left, right));
107
108
let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_lhs, rem_rhs));
109
let length = lhs.len();
110
111
Bitmap::from_u8_vec(buffer, length)
112
}
113
114
/// Apply a bitwise operation `op` to two inputs and fold the result.
115
pub fn binary_fold<B, F, R>(lhs: &Bitmap, rhs: &Bitmap, op: F, init: B, fold: R) -> B
116
where
117
F: Fn(u64, u64) -> B,
118
R: Fn(B, B) -> B,
119
{
120
assert_eq!(lhs.len(), rhs.len());
121
let lhs_chunks = lhs.chunks();
122
let rhs_chunks = rhs.chunks();
123
let rem_lhs = lhs_chunks.remainder();
124
let rem_rhs = rhs_chunks.remainder();
125
126
let result = lhs_chunks
127
.zip(rhs_chunks)
128
.fold(init, |prev, (left, right)| fold(prev, op(left, right)));
129
130
fold(result, op(rem_lhs, rem_rhs))
131
}
132
133
/// Apply a bitwise operation `op` to two inputs and fold the result.
134
pub fn binary_fold_mut<B, F, R>(
135
lhs: &MutableBitmap,
136
rhs: &MutableBitmap,
137
op: F,
138
init: B,
139
fold: R,
140
) -> B
141
where
142
F: Fn(u64, u64) -> B,
143
R: Fn(B, B) -> B,
144
{
145
assert_eq!(lhs.len(), rhs.len());
146
let lhs_chunks = lhs.chunks();
147
let rhs_chunks = rhs.chunks();
148
let rem_lhs = lhs_chunks.remainder();
149
let rem_rhs = rhs_chunks.remainder();
150
151
let result = lhs_chunks
152
.zip(rhs_chunks)
153
.fold(init, |prev, (left, right)| fold(prev, op(left, right)));
154
155
fold(result, op(rem_lhs, rem_rhs))
156
}
157
158
fn unary_impl<F, I>(iter: I, op: F, length: usize) -> Bitmap
159
where
160
I: BitChunkIterExact<u64>,
161
F: Fn(u64) -> u64,
162
{
163
let rem = op(iter.remainder());
164
let buffer = chunk_iter_to_vec_and_remainder(iter.map(op), rem);
165
166
Bitmap::from_u8_vec(buffer, length)
167
}
168
169
/// Apply a bitwise operation `op` to one input and return the result as a [`Bitmap`].
170
pub fn unary<F>(lhs: &Bitmap, op: F) -> Bitmap
171
where
172
F: Fn(u64) -> u64,
173
{
174
let (slice, offset, length) = lhs.as_slice();
175
if offset == 0 {
176
let iter = BitChunksExact::<u64>::new(slice, length);
177
unary_impl(iter, op, lhs.len())
178
} else {
179
let iter = lhs.chunks::<u64>();
180
unary_impl(iter, op, lhs.len())
181
}
182
}
183
184
// create a new [`Bitmap`] semantically equal to ``bitmap`` but with an offset equal to ``offset``
185
pub(crate) fn align(bitmap: &Bitmap, new_offset: usize) -> Bitmap {
186
let length = bitmap.len();
187
188
let bitmap: Bitmap = std::iter::repeat_n(false, new_offset)
189
.chain(bitmap.iter())
190
.collect();
191
192
bitmap.sliced(new_offset, length)
193
}
194
195
/// Compute bitwise A AND B operation.
196
pub fn and(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
197
if lhs.unset_bits() == lhs.len() || rhs.unset_bits() == rhs.len() {
198
assert_eq!(lhs.len(), rhs.len());
199
Bitmap::new_zeroed(lhs.len())
200
} else {
201
binary(lhs, rhs, |x, y| x & y)
202
}
203
}
204
205
/// Compute bitwise A AND NOT B operation.
206
pub fn and_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
207
binary(lhs, rhs, |x, y| x & !y)
208
}
209
210
/// Compute bitwise A OR B operation.
211
pub fn or(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
212
if lhs.unset_bits() == 0 || rhs.unset_bits() == 0 {
213
assert_eq!(lhs.len(), rhs.len());
214
let mut mutable = MutableBitmap::with_capacity(lhs.len());
215
mutable.extend_constant(lhs.len(), true);
216
mutable.into()
217
} else {
218
binary(lhs, rhs, |x, y| x | y)
219
}
220
}
221
222
/// Compute bitwise A OR NOT B operation.
223
pub fn or_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
224
binary(lhs, rhs, |x, y| x | !y)
225
}
226
227
/// Compute bitwise XOR operation.
228
pub fn xor(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
229
let lhs_nulls = lhs.unset_bits();
230
let rhs_nulls = rhs.unset_bits();
231
232
// all false or all true
233
if lhs_nulls == rhs_nulls && rhs_nulls == rhs.len() || lhs_nulls == 0 && rhs_nulls == 0 {
234
assert_eq!(lhs.len(), rhs.len());
235
Bitmap::new_zeroed(rhs.len())
236
}
237
// all false and all true or vice versa
238
else if (lhs_nulls == 0 && rhs_nulls == rhs.len())
239
|| (lhs_nulls == lhs.len() && rhs_nulls == 0)
240
{
241
assert_eq!(lhs.len(), rhs.len());
242
let mut mutable = MutableBitmap::with_capacity(lhs.len());
243
mutable.extend_constant(lhs.len(), true);
244
mutable.into()
245
} else {
246
binary(lhs, rhs, |x, y| x ^ y)
247
}
248
}
249
250
/// Compute bitwise equality (not XOR) operation.
251
fn eq(lhs: &Bitmap, rhs: &Bitmap) -> bool {
252
if lhs.len() != rhs.len() {
253
return false;
254
}
255
256
let mut lhs_chunks = lhs.chunks::<u64>();
257
let mut rhs_chunks = rhs.chunks::<u64>();
258
259
let equal_chunks = lhs_chunks
260
.by_ref()
261
.zip(rhs_chunks.by_ref())
262
.all(|(left, right)| left == right);
263
264
if !equal_chunks {
265
return false;
266
}
267
let lhs_remainder = lhs_chunks.remainder_iter();
268
let rhs_remainder = rhs_chunks.remainder_iter();
269
lhs_remainder.zip(rhs_remainder).all(|(x, y)| x == y)
270
}
271
272
pub fn num_intersections_with(lhs: &Bitmap, rhs: &Bitmap) -> usize {
273
binary_fold(
274
lhs,
275
rhs,
276
|lhs, rhs| (lhs & rhs).count_ones() as usize,
277
0,
278
|lhs, rhs| lhs + rhs,
279
)
280
}
281
282
pub fn intersects_with(lhs: &Bitmap, rhs: &Bitmap) -> bool {
283
binary_fold(
284
lhs,
285
rhs,
286
|lhs, rhs| lhs & rhs != 0,
287
false,
288
|lhs, rhs| lhs || rhs,
289
)
290
}
291
292
pub fn intersects_with_mut(lhs: &MutableBitmap, rhs: &MutableBitmap) -> bool {
293
binary_fold_mut(
294
lhs,
295
rhs,
296
|lhs, rhs| lhs & rhs != 0,
297
false,
298
|lhs, rhs| lhs || rhs,
299
)
300
}
301
302
pub fn num_edges(lhs: &Bitmap) -> usize {
303
if lhs.is_empty() {
304
return 0;
305
}
306
307
// @TODO: If is probably quite inefficient to do it like this because now either one is not
308
// aligned. Maybe, we can implement a smarter way to do this.
309
binary_fold(
310
&unsafe { lhs.clone().sliced_unchecked(0, lhs.len() - 1) },
311
&unsafe { lhs.clone().sliced_unchecked(1, lhs.len() - 1) },
312
|l, r| (l ^ r).count_ones() as usize,
313
0,
314
|acc, v| acc + v,
315
)
316
}
317
318
/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy }`.
319
pub fn select_constant(selector: &Bitmap, truthy: &Bitmap, falsy: bool) -> Bitmap {
320
let falsy_mask: u64 = if falsy {
321
0xFFFF_FFFF_FFFF_FFFF
322
} else {
323
0x0000_0000_0000_0000
324
};
325
326
binary(selector, truthy, |s, t| (s & t) | (!s & falsy_mask))
327
}
328
329
/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy[i] }`.
330
pub fn select(selector: &Bitmap, truthy: &Bitmap, falsy: &Bitmap) -> Bitmap {
331
ternary(selector, truthy, falsy, |s, t, f| (s & t) | (!s & f))
332
}
333
334
impl PartialEq for Bitmap {
335
fn eq(&self, other: &Self) -> bool {
336
eq(self, other)
337
}
338
}
339
340
impl<'b> BitOr<&'b Bitmap> for &Bitmap {
341
type Output = Bitmap;
342
343
fn bitor(self, rhs: &'b Bitmap) -> Bitmap {
344
or(self, rhs)
345
}
346
}
347
348
impl<'b> BitAnd<&'b Bitmap> for &Bitmap {
349
type Output = Bitmap;
350
351
fn bitand(self, rhs: &'b Bitmap) -> Bitmap {
352
and(self, rhs)
353
}
354
}
355
356
impl<'b> BitXor<&'b Bitmap> for &Bitmap {
357
type Output = Bitmap;
358
359
fn bitxor(self, rhs: &'b Bitmap) -> Bitmap {
360
xor(self, rhs)
361
}
362
}
363
364
impl Not for &Bitmap {
365
type Output = Bitmap;
366
367
fn not(self) -> Bitmap {
368
unary(self, |a| !a)
369
}
370
}
371
372
#[cfg(test)]
373
mod tests {
374
use proptest::prelude::*;
375
376
use super::*;
377
use crate::bitmap::proptest::bitmap;
378
379
fn two_equal_length_bitmaps() -> impl Strategy<Value = (Bitmap, Bitmap)> {
380
(1..=250usize).prop_flat_map(|length| {
381
(
382
bitmap(length..300),
383
bitmap(length..300),
384
0..length,
385
0..length,
386
)
387
.prop_flat_map(move |(lhs, rhs, lhs_offset, rhs_offset)| {
388
(0..usize::min(length - lhs_offset, length - rhs_offset)).prop_map(
389
move |slice_length| {
390
(
391
lhs.clone().sliced(lhs_offset, slice_length),
392
rhs.clone().sliced(rhs_offset, slice_length),
393
)
394
},
395
)
396
})
397
})
398
}
399
400
proptest! {
401
#[test]
402
fn test_num_intersections_with(
403
(lhs, rhs) in two_equal_length_bitmaps()
404
) {
405
let kernel_out = num_intersections_with(&lhs, &rhs);
406
let mut reference_out = 0;
407
for (l, r) in lhs.iter().zip(rhs.iter()) {
408
reference_out += usize::from(l & r);
409
}
410
411
prop_assert_eq!(kernel_out, reference_out);
412
}
413
}
414
}
415
416