Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/bitmap/bitmap_ops.rs
8418 views
1
use std::ops::{BitAnd, BitOr, BitXor, Not};
2
3
use super::Bitmap;
4
use super::bitmask::BitMask;
5
use super::utils::{BitChunk, BitChunkIterExact, BitChunksExact};
6
use crate::bitmap::MutableBitmap;
7
use crate::trusted_len::TrustedLen;
8
9
#[inline(always)]
10
pub(crate) fn push_bitchunk<T: BitChunk>(buffer: &mut Vec<u8>, value: T) {
11
buffer.extend(value.to_ne_bytes())
12
}
13
14
/// Creates a [`Vec<u8>`] from a [`TrustedLen`] of [`BitChunk`].
15
pub fn chunk_iter_to_vec<T: BitChunk, I: TrustedLen<Item = T>>(iter: I) -> Vec<u8> {
16
let cap = iter.size_hint().0 * size_of::<T>();
17
let mut buffer = Vec::with_capacity(cap);
18
for v in iter {
19
push_bitchunk(&mut buffer, v)
20
}
21
buffer
22
}
23
24
fn chunk_iter_to_vec_and_remainder<T: BitChunk, I: TrustedLen<Item = T>>(
25
iter: I,
26
remainder: T,
27
) -> Vec<u8> {
28
let cap = (iter.size_hint().0 + 1) * size_of::<T>();
29
let mut buffer = Vec::with_capacity(cap);
30
for v in iter {
31
push_bitchunk(&mut buffer, v)
32
}
33
push_bitchunk(&mut buffer, remainder);
34
debug_assert_eq!(buffer.len(), cap);
35
buffer
36
}
37
38
/// Apply a bitwise operation `op` to four inputs and return the result as a [`Bitmap`].
39
pub fn quaternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, a4: &Bitmap, op: F) -> Bitmap
40
where
41
F: Fn(u64, u64, u64, u64) -> u64,
42
{
43
assert_eq!(a1.len(), a2.len());
44
assert_eq!(a1.len(), a3.len());
45
assert_eq!(a1.len(), a4.len());
46
let a1_chunks = a1.chunks();
47
let a2_chunks = a2.chunks();
48
let a3_chunks = a3.chunks();
49
let a4_chunks = a4.chunks();
50
51
let rem_a1 = a1_chunks.remainder();
52
let rem_a2 = a2_chunks.remainder();
53
let rem_a3 = a3_chunks.remainder();
54
let rem_a4 = a4_chunks.remainder();
55
56
let chunks = a1_chunks
57
.zip(a2_chunks)
58
.zip(a3_chunks)
59
.zip(a4_chunks)
60
.map(|(((a1, a2), a3), a4)| op(a1, a2, a3, a4));
61
62
let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3, rem_a4));
63
let length = a1.len();
64
65
Bitmap::from_u8_vec(buffer, length)
66
}
67
68
/// Apply a bitwise operation `op` to three inputs and return the result as a [`Bitmap`].
69
pub fn ternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, op: F) -> Bitmap
70
where
71
F: Fn(u64, u64, u64) -> u64,
72
{
73
assert_eq!(a1.len(), a2.len());
74
assert_eq!(a1.len(), a3.len());
75
let a1_chunks = a1.chunks();
76
let a2_chunks = a2.chunks();
77
let a3_chunks = a3.chunks();
78
79
let rem_a1 = a1_chunks.remainder();
80
let rem_a2 = a2_chunks.remainder();
81
let rem_a3 = a3_chunks.remainder();
82
83
let chunks = a1_chunks
84
.zip(a2_chunks)
85
.zip(a3_chunks)
86
.map(|((a1, a2), a3)| op(a1, a2, a3));
87
88
let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3));
89
let length = a1.len();
90
91
Bitmap::from_u8_vec(buffer, length)
92
}
93
94
/// Apply a bitwise operation `op` to two inputs and return the result as a [`Bitmap`].
95
pub fn binary<F>(lhs: &Bitmap, rhs: &Bitmap, op: F) -> Bitmap
96
where
97
F: Fn(u64, u64) -> u64,
98
{
99
assert_eq!(lhs.len(), rhs.len());
100
let lhs_chunks = lhs.chunks();
101
let rhs_chunks = rhs.chunks();
102
let rem_lhs = lhs_chunks.remainder();
103
let rem_rhs = rhs_chunks.remainder();
104
105
let chunks = lhs_chunks
106
.zip(rhs_chunks)
107
.map(|(left, right)| op(left, right));
108
109
let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_lhs, rem_rhs));
110
let length = lhs.len();
111
112
Bitmap::from_u8_vec(buffer, length)
113
}
114
115
/// Apply a bitwise operation `op` to two inputs and fold the result.
116
pub fn binary_fold<B, F, R>(lhs: &Bitmap, rhs: &Bitmap, op: F, init: B, fold: R) -> B
117
where
118
F: Fn(u64, u64) -> B,
119
R: Fn(B, B) -> B,
120
{
121
assert_eq!(lhs.len(), rhs.len());
122
let lhs_chunks = lhs.chunks();
123
let rhs_chunks = rhs.chunks();
124
let rem_lhs = lhs_chunks.remainder();
125
let rem_rhs = rhs_chunks.remainder();
126
127
let result = lhs_chunks
128
.zip(rhs_chunks)
129
.fold(init, |prev, (left, right)| fold(prev, op(left, right)));
130
131
fold(result, op(rem_lhs, rem_rhs))
132
}
133
134
/// Apply a bitwise operation `op` to two inputs and fold the result.
135
pub fn binary_mask_fold<B, F, R>(lhs: BitMask<'_>, rhs: BitMask<'_>, op: F, init: B, fold: R) -> B
136
where
137
F: Fn(u64, u64) -> B,
138
R: Fn(B, B) -> B,
139
{
140
assert_eq!(lhs.len(), rhs.len());
141
let lhs_chunks = lhs.chunks();
142
let rhs_chunks = rhs.chunks();
143
let rem_lhs = lhs_chunks.remainder();
144
let rem_rhs = rhs_chunks.remainder();
145
146
let result = lhs_chunks
147
.zip(rhs_chunks)
148
.fold(init, |prev, (left, right)| fold(prev, op(left, right)));
149
150
fold(result, op(rem_lhs, rem_rhs))
151
}
152
153
/// Apply a bitwise operation `op` to two inputs and fold the result.
154
pub fn binary_fold_mut<B, F, R>(
155
lhs: &MutableBitmap,
156
rhs: &MutableBitmap,
157
op: F,
158
init: B,
159
fold: R,
160
) -> B
161
where
162
F: Fn(u64, u64) -> B,
163
R: Fn(B, B) -> B,
164
{
165
assert_eq!(lhs.len(), rhs.len());
166
let lhs_chunks = lhs.chunks();
167
let rhs_chunks = rhs.chunks();
168
let rem_lhs = lhs_chunks.remainder();
169
let rem_rhs = rhs_chunks.remainder();
170
171
let result = lhs_chunks
172
.zip(rhs_chunks)
173
.fold(init, |prev, (left, right)| fold(prev, op(left, right)));
174
175
fold(result, op(rem_lhs, rem_rhs))
176
}
177
178
fn unary_impl<F, I>(iter: I, op: F, length: usize) -> Bitmap
179
where
180
I: BitChunkIterExact<u64>,
181
F: Fn(u64) -> u64,
182
{
183
let rem = op(iter.remainder());
184
let buffer = chunk_iter_to_vec_and_remainder(iter.map(op), rem);
185
186
Bitmap::from_u8_vec(buffer, length)
187
}
188
189
/// Apply a bitwise operation `op` to one input and return the result as a [`Bitmap`].
190
pub fn unary<F>(lhs: &Bitmap, op: F) -> Bitmap
191
where
192
F: Fn(u64) -> u64,
193
{
194
let (slice, offset, length) = lhs.as_slice();
195
if offset == 0 {
196
let iter = BitChunksExact::<u64>::new(slice, length);
197
unary_impl(iter, op, lhs.len())
198
} else {
199
let iter = lhs.chunks::<u64>();
200
unary_impl(iter, op, lhs.len())
201
}
202
}
203
204
// create a new [`Bitmap`] semantically equal to ``bitmap`` but with an offset equal to ``offset``
205
pub(crate) fn align(bitmap: &Bitmap, new_offset: usize) -> Bitmap {
206
let length = bitmap.len();
207
208
let bitmap: Bitmap = std::iter::repeat_n(false, new_offset)
209
.chain(bitmap.iter())
210
.collect();
211
212
bitmap.sliced(new_offset, length)
213
}
214
215
/// Compute bitwise A AND B operation.
216
pub fn and(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
217
if lhs.unset_bits() == lhs.len() || rhs.unset_bits() == rhs.len() {
218
assert_eq!(lhs.len(), rhs.len());
219
Bitmap::new_zeroed(lhs.len())
220
} else {
221
binary(lhs, rhs, |x, y| x & y)
222
}
223
}
224
225
/// Compute bitwise A AND NOT B operation.
226
pub fn and_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
227
binary(lhs, rhs, |x, y| x & !y)
228
}
229
230
/// Compute bitwise A OR B operation.
231
pub fn or(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
232
if lhs.unset_bits() == 0 || rhs.unset_bits() == 0 {
233
assert_eq!(lhs.len(), rhs.len());
234
let mut mutable = MutableBitmap::with_capacity(lhs.len());
235
mutable.extend_constant(lhs.len(), true);
236
mutable.into()
237
} else {
238
binary(lhs, rhs, |x, y| x | y)
239
}
240
}
241
242
/// Compute bitwise A OR NOT B operation.
243
pub fn or_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
244
binary(lhs, rhs, |x, y| x | !y)
245
}
246
247
/// Compute bitwise XOR operation.
248
pub fn xor(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
249
let lhs_nulls = lhs.unset_bits();
250
let rhs_nulls = rhs.unset_bits();
251
252
// all false or all true
253
if lhs_nulls == rhs_nulls && rhs_nulls == rhs.len() || lhs_nulls == 0 && rhs_nulls == 0 {
254
assert_eq!(lhs.len(), rhs.len());
255
Bitmap::new_zeroed(rhs.len())
256
}
257
// all false and all true or vice versa
258
else if (lhs_nulls == 0 && rhs_nulls == rhs.len())
259
|| (lhs_nulls == lhs.len() && rhs_nulls == 0)
260
{
261
assert_eq!(lhs.len(), rhs.len());
262
let mut mutable = MutableBitmap::with_capacity(lhs.len());
263
mutable.extend_constant(lhs.len(), true);
264
mutable.into()
265
} else {
266
binary(lhs, rhs, |x, y| x ^ y)
267
}
268
}
269
270
/// Compute bitwise equality (not XOR) operation.
271
fn eq(lhs: &Bitmap, rhs: &Bitmap) -> bool {
272
if lhs.len() != rhs.len() {
273
return false;
274
}
275
276
let mut lhs_chunks = lhs.chunks::<u64>();
277
let mut rhs_chunks = rhs.chunks::<u64>();
278
279
let equal_chunks = lhs_chunks
280
.by_ref()
281
.zip(rhs_chunks.by_ref())
282
.all(|(left, right)| left == right);
283
284
if !equal_chunks {
285
return false;
286
}
287
let lhs_remainder = lhs_chunks.remainder_iter();
288
let rhs_remainder = rhs_chunks.remainder_iter();
289
lhs_remainder.zip(rhs_remainder).all(|(x, y)| x == y)
290
}
291
292
pub fn num_intersections_with(lhs: BitMask<'_>, rhs: BitMask<'_>) -> usize {
293
binary_mask_fold(
294
lhs,
295
rhs,
296
|lhs, rhs| (lhs & rhs).count_ones() as usize,
297
0,
298
|lhs, rhs| lhs + rhs,
299
)
300
}
301
302
pub fn intersects_with(lhs: &Bitmap, rhs: &Bitmap) -> bool {
303
binary_fold(
304
lhs,
305
rhs,
306
|lhs, rhs| lhs & rhs != 0,
307
false,
308
|lhs, rhs| lhs || rhs,
309
)
310
}
311
312
pub fn intersects_with_mut(lhs: &MutableBitmap, rhs: &MutableBitmap) -> bool {
313
binary_fold_mut(
314
lhs,
315
rhs,
316
|lhs, rhs| lhs & rhs != 0,
317
false,
318
|lhs, rhs| lhs || rhs,
319
)
320
}
321
322
pub fn num_edges(lhs: &Bitmap) -> usize {
323
if lhs.is_empty() {
324
return 0;
325
}
326
327
// @TODO: If is probably quite inefficient to do it like this because now either one is not
328
// aligned. Maybe, we can implement a smarter way to do this.
329
binary_fold(
330
&unsafe { lhs.clone().sliced_unchecked(0, lhs.len() - 1) },
331
&unsafe { lhs.clone().sliced_unchecked(1, lhs.len() - 1) },
332
|l, r| (l ^ r).count_ones() as usize,
333
0,
334
|acc, v| acc + v,
335
)
336
}
337
338
/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy }`.
339
pub fn select_constant(selector: &Bitmap, truthy: &Bitmap, falsy: bool) -> Bitmap {
340
let falsy_mask: u64 = if falsy {
341
0xFFFF_FFFF_FFFF_FFFF
342
} else {
343
0x0000_0000_0000_0000
344
};
345
346
binary(selector, truthy, |s, t| (s & t) | (!s & falsy_mask))
347
}
348
349
/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy[i] }`.
350
pub fn select(selector: &Bitmap, truthy: &Bitmap, falsy: &Bitmap) -> Bitmap {
351
ternary(selector, truthy, falsy, |s, t, f| (s & t) | (!s & f))
352
}
353
354
impl PartialEq for Bitmap {
355
fn eq(&self, other: &Self) -> bool {
356
eq(self, other)
357
}
358
}
359
360
impl<'b> BitOr<&'b Bitmap> for &Bitmap {
361
type Output = Bitmap;
362
363
fn bitor(self, rhs: &'b Bitmap) -> Bitmap {
364
or(self, rhs)
365
}
366
}
367
368
impl<'b> BitAnd<&'b Bitmap> for &Bitmap {
369
type Output = Bitmap;
370
371
fn bitand(self, rhs: &'b Bitmap) -> Bitmap {
372
and(self, rhs)
373
}
374
}
375
376
impl<'b> BitXor<&'b Bitmap> for &Bitmap {
377
type Output = Bitmap;
378
379
fn bitxor(self, rhs: &'b Bitmap) -> Bitmap {
380
xor(self, rhs)
381
}
382
}
383
384
impl Not for &Bitmap {
385
type Output = Bitmap;
386
387
fn not(self) -> Bitmap {
388
unary(self, |a| !a)
389
}
390
}
391
392
#[cfg(test)]
393
mod tests {
394
use proptest::prelude::*;
395
396
use super::*;
397
use crate::bitmap::proptest::bitmap;
398
399
fn two_equal_length_bitmaps() -> impl Strategy<Value = (Bitmap, Bitmap)> {
400
(1..=250usize).prop_flat_map(|length| {
401
(
402
bitmap(length..300),
403
bitmap(length..300),
404
0..length,
405
0..length,
406
)
407
.prop_flat_map(move |(lhs, rhs, lhs_offset, rhs_offset)| {
408
(0..usize::min(length - lhs_offset, length - rhs_offset)).prop_map(
409
move |slice_length| {
410
(
411
lhs.clone().sliced(lhs_offset, slice_length),
412
rhs.clone().sliced(rhs_offset, slice_length),
413
)
414
},
415
)
416
})
417
})
418
}
419
420
proptest! {
421
#[test]
422
fn test_num_intersections_with(
423
(lhs, rhs) in two_equal_length_bitmaps()
424
) {
425
let kernel_out = num_intersections_with(BitMask::from_bitmap(&lhs), BitMask::from_bitmap(&rhs));
426
let mut reference_out = 0;
427
for (l, r) in lhs.iter().zip(rhs.iter()) {
428
reference_out += usize::from(l & r);
429
}
430
431
prop_assert_eq!(kernel_out, reference_out);
432
}
433
}
434
}
435
436