Path: blob/main/crates/polars-arrow/src/bitmap/bitmap_ops.rs
6939 views
use std::ops::{BitAnd, BitOr, BitXor, Not};12use super::Bitmap;3use super::utils::{BitChunk, BitChunkIterExact, BitChunksExact};4use crate::bitmap::MutableBitmap;5use crate::trusted_len::TrustedLen;67#[inline(always)]8pub(crate) fn push_bitchunk<T: BitChunk>(buffer: &mut Vec<u8>, value: T) {9buffer.extend(value.to_ne_bytes())10}1112/// Creates a [`Vec<u8>`] from a [`TrustedLen`] of [`BitChunk`].13pub fn chunk_iter_to_vec<T: BitChunk, I: TrustedLen<Item = T>>(iter: I) -> Vec<u8> {14let cap = iter.size_hint().0 * size_of::<T>();15let mut buffer = Vec::with_capacity(cap);16for v in iter {17push_bitchunk(&mut buffer, v)18}19buffer20}2122fn chunk_iter_to_vec_and_remainder<T: BitChunk, I: TrustedLen<Item = T>>(23iter: I,24remainder: T,25) -> Vec<u8> {26let cap = (iter.size_hint().0 + 1) * size_of::<T>();27let mut buffer = Vec::with_capacity(cap);28for v in iter {29push_bitchunk(&mut buffer, v)30}31push_bitchunk(&mut buffer, remainder);32debug_assert_eq!(buffer.len(), cap);33buffer34}3536/// Apply a bitwise operation `op` to four inputs and return the result as a [`Bitmap`].37pub fn quaternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, a4: &Bitmap, op: F) -> Bitmap38where39F: Fn(u64, u64, u64, u64) -> u64,40{41assert_eq!(a1.len(), a2.len());42assert_eq!(a1.len(), a3.len());43assert_eq!(a1.len(), a4.len());44let a1_chunks = a1.chunks();45let a2_chunks = a2.chunks();46let a3_chunks = a3.chunks();47let a4_chunks = a4.chunks();4849let rem_a1 = a1_chunks.remainder();50let rem_a2 = a2_chunks.remainder();51let rem_a3 = a3_chunks.remainder();52let rem_a4 = a4_chunks.remainder();5354let chunks = a1_chunks55.zip(a2_chunks)56.zip(a3_chunks)57.zip(a4_chunks)58.map(|(((a1, a2), a3), a4)| op(a1, a2, a3, a4));5960let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3, rem_a4));61let length = a1.len();6263Bitmap::from_u8_vec(buffer, length)64}6566/// Apply a bitwise operation `op` to three inputs and return the result as a [`Bitmap`].67pub fn ternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, op: F) -> Bitmap68where69F: Fn(u64, u64, u64) -> u64,70{71assert_eq!(a1.len(), a2.len());72assert_eq!(a1.len(), a3.len());73let a1_chunks = a1.chunks();74let a2_chunks = a2.chunks();75let a3_chunks = a3.chunks();7677let rem_a1 = a1_chunks.remainder();78let rem_a2 = a2_chunks.remainder();79let rem_a3 = a3_chunks.remainder();8081let chunks = a1_chunks82.zip(a2_chunks)83.zip(a3_chunks)84.map(|((a1, a2), a3)| op(a1, a2, a3));8586let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3));87let length = a1.len();8889Bitmap::from_u8_vec(buffer, length)90}9192/// Apply a bitwise operation `op` to two inputs and return the result as a [`Bitmap`].93pub fn binary<F>(lhs: &Bitmap, rhs: &Bitmap, op: F) -> Bitmap94where95F: Fn(u64, u64) -> u64,96{97assert_eq!(lhs.len(), rhs.len());98let lhs_chunks = lhs.chunks();99let rhs_chunks = rhs.chunks();100let rem_lhs = lhs_chunks.remainder();101let rem_rhs = rhs_chunks.remainder();102103let chunks = lhs_chunks104.zip(rhs_chunks)105.map(|(left, right)| op(left, right));106107let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_lhs, rem_rhs));108let length = lhs.len();109110Bitmap::from_u8_vec(buffer, length)111}112113/// Apply a bitwise operation `op` to two inputs and fold the result.114pub fn binary_fold<B, F, R>(lhs: &Bitmap, rhs: &Bitmap, op: F, init: B, fold: R) -> B115where116F: Fn(u64, u64) -> B,117R: Fn(B, B) -> B,118{119assert_eq!(lhs.len(), rhs.len());120let lhs_chunks = lhs.chunks();121let rhs_chunks = rhs.chunks();122let rem_lhs = lhs_chunks.remainder();123let rem_rhs = rhs_chunks.remainder();124125let result = lhs_chunks126.zip(rhs_chunks)127.fold(init, |prev, (left, right)| fold(prev, op(left, right)));128129fold(result, op(rem_lhs, rem_rhs))130}131132/// Apply a bitwise operation `op` to two inputs and fold the result.133pub fn binary_fold_mut<B, F, R>(134lhs: &MutableBitmap,135rhs: &MutableBitmap,136op: F,137init: B,138fold: R,139) -> B140where141F: Fn(u64, u64) -> B,142R: Fn(B, B) -> B,143{144assert_eq!(lhs.len(), rhs.len());145let lhs_chunks = lhs.chunks();146let rhs_chunks = rhs.chunks();147let rem_lhs = lhs_chunks.remainder();148let rem_rhs = rhs_chunks.remainder();149150let result = lhs_chunks151.zip(rhs_chunks)152.fold(init, |prev, (left, right)| fold(prev, op(left, right)));153154fold(result, op(rem_lhs, rem_rhs))155}156157fn unary_impl<F, I>(iter: I, op: F, length: usize) -> Bitmap158where159I: BitChunkIterExact<u64>,160F: Fn(u64) -> u64,161{162let rem = op(iter.remainder());163let buffer = chunk_iter_to_vec_and_remainder(iter.map(op), rem);164165Bitmap::from_u8_vec(buffer, length)166}167168/// Apply a bitwise operation `op` to one input and return the result as a [`Bitmap`].169pub fn unary<F>(lhs: &Bitmap, op: F) -> Bitmap170where171F: Fn(u64) -> u64,172{173let (slice, offset, length) = lhs.as_slice();174if offset == 0 {175let iter = BitChunksExact::<u64>::new(slice, length);176unary_impl(iter, op, lhs.len())177} else {178let iter = lhs.chunks::<u64>();179unary_impl(iter, op, lhs.len())180}181}182183// create a new [`Bitmap`] semantically equal to ``bitmap`` but with an offset equal to ``offset``184pub(crate) fn align(bitmap: &Bitmap, new_offset: usize) -> Bitmap {185let length = bitmap.len();186187let bitmap: Bitmap = std::iter::repeat_n(false, new_offset)188.chain(bitmap.iter())189.collect();190191bitmap.sliced(new_offset, length)192}193194/// Compute bitwise A AND B operation.195pub fn and(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {196if lhs.unset_bits() == lhs.len() || rhs.unset_bits() == rhs.len() {197assert_eq!(lhs.len(), rhs.len());198Bitmap::new_zeroed(lhs.len())199} else {200binary(lhs, rhs, |x, y| x & y)201}202}203204/// Compute bitwise A AND NOT B operation.205pub fn and_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {206binary(lhs, rhs, |x, y| x & !y)207}208209/// Compute bitwise A OR B operation.210pub fn or(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {211if lhs.unset_bits() == 0 || rhs.unset_bits() == 0 {212assert_eq!(lhs.len(), rhs.len());213let mut mutable = MutableBitmap::with_capacity(lhs.len());214mutable.extend_constant(lhs.len(), true);215mutable.into()216} else {217binary(lhs, rhs, |x, y| x | y)218}219}220221/// Compute bitwise A OR NOT B operation.222pub fn or_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {223binary(lhs, rhs, |x, y| x | !y)224}225226/// Compute bitwise XOR operation.227pub fn xor(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {228let lhs_nulls = lhs.unset_bits();229let rhs_nulls = rhs.unset_bits();230231// all false or all true232if lhs_nulls == rhs_nulls && rhs_nulls == rhs.len() || lhs_nulls == 0 && rhs_nulls == 0 {233assert_eq!(lhs.len(), rhs.len());234Bitmap::new_zeroed(rhs.len())235}236// all false and all true or vice versa237else if (lhs_nulls == 0 && rhs_nulls == rhs.len())238|| (lhs_nulls == lhs.len() && rhs_nulls == 0)239{240assert_eq!(lhs.len(), rhs.len());241let mut mutable = MutableBitmap::with_capacity(lhs.len());242mutable.extend_constant(lhs.len(), true);243mutable.into()244} else {245binary(lhs, rhs, |x, y| x ^ y)246}247}248249/// Compute bitwise equality (not XOR) operation.250fn eq(lhs: &Bitmap, rhs: &Bitmap) -> bool {251if lhs.len() != rhs.len() {252return false;253}254255let mut lhs_chunks = lhs.chunks::<u64>();256let mut rhs_chunks = rhs.chunks::<u64>();257258let equal_chunks = lhs_chunks259.by_ref()260.zip(rhs_chunks.by_ref())261.all(|(left, right)| left == right);262263if !equal_chunks {264return false;265}266let lhs_remainder = lhs_chunks.remainder_iter();267let rhs_remainder = rhs_chunks.remainder_iter();268lhs_remainder.zip(rhs_remainder).all(|(x, y)| x == y)269}270271pub fn num_intersections_with(lhs: &Bitmap, rhs: &Bitmap) -> usize {272binary_fold(273lhs,274rhs,275|lhs, rhs| (lhs & rhs).count_ones() as usize,2760,277|lhs, rhs| lhs + rhs,278)279}280281pub fn intersects_with(lhs: &Bitmap, rhs: &Bitmap) -> bool {282binary_fold(283lhs,284rhs,285|lhs, rhs| lhs & rhs != 0,286false,287|lhs, rhs| lhs || rhs,288)289}290291pub fn intersects_with_mut(lhs: &MutableBitmap, rhs: &MutableBitmap) -> bool {292binary_fold_mut(293lhs,294rhs,295|lhs, rhs| lhs & rhs != 0,296false,297|lhs, rhs| lhs || rhs,298)299}300301pub fn num_edges(lhs: &Bitmap) -> usize {302if lhs.is_empty() {303return 0;304}305306// @TODO: If is probably quite inefficient to do it like this because now either one is not307// aligned. Maybe, we can implement a smarter way to do this.308binary_fold(309&unsafe { lhs.clone().sliced_unchecked(0, lhs.len() - 1) },310&unsafe { lhs.clone().sliced_unchecked(1, lhs.len() - 1) },311|l, r| (l ^ r).count_ones() as usize,3120,313|acc, v| acc + v,314)315}316317/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy }`.318pub fn select_constant(selector: &Bitmap, truthy: &Bitmap, falsy: bool) -> Bitmap {319let falsy_mask: u64 = if falsy {3200xFFFF_FFFF_FFFF_FFFF321} else {3220x0000_0000_0000_0000323};324325binary(selector, truthy, |s, t| (s & t) | (!s & falsy_mask))326}327328/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy[i] }`.329pub fn select(selector: &Bitmap, truthy: &Bitmap, falsy: &Bitmap) -> Bitmap {330ternary(selector, truthy, falsy, |s, t, f| (s & t) | (!s & f))331}332333impl PartialEq for Bitmap {334fn eq(&self, other: &Self) -> bool {335eq(self, other)336}337}338339impl<'b> BitOr<&'b Bitmap> for &Bitmap {340type Output = Bitmap;341342fn bitor(self, rhs: &'b Bitmap) -> Bitmap {343or(self, rhs)344}345}346347impl<'b> BitAnd<&'b Bitmap> for &Bitmap {348type Output = Bitmap;349350fn bitand(self, rhs: &'b Bitmap) -> Bitmap {351and(self, rhs)352}353}354355impl<'b> BitXor<&'b Bitmap> for &Bitmap {356type Output = Bitmap;357358fn bitxor(self, rhs: &'b Bitmap) -> Bitmap {359xor(self, rhs)360}361}362363impl Not for &Bitmap {364type Output = Bitmap;365366fn not(self) -> Bitmap {367unary(self, |a| !a)368}369}370371#[cfg(test)]372mod tests {373use proptest::prelude::*;374375use super::*;376use crate::bitmap::proptest::bitmap;377378fn two_equal_length_bitmaps() -> impl Strategy<Value = (Bitmap, Bitmap)> {379(1..=250usize).prop_flat_map(|length| {380(381bitmap(length..300),382bitmap(length..300),3830..length,3840..length,385)386.prop_flat_map(move |(lhs, rhs, lhs_offset, rhs_offset)| {387(0..usize::min(length - lhs_offset, length - rhs_offset)).prop_map(388move |slice_length| {389(390lhs.clone().sliced(lhs_offset, slice_length),391rhs.clone().sliced(rhs_offset, slice_length),392)393},394)395})396})397}398399proptest! {400#[test]401fn test_num_intersections_with(402(lhs, rhs) in two_equal_length_bitmaps()403) {404let kernel_out = num_intersections_with(&lhs, &rhs);405let mut reference_out = 0;406for (l, r) in lhs.iter().zip(rhs.iter()) {407reference_out += usize::from(l & r);408}409410prop_assert_eq!(kernel_out, reference_out);411}412}413}414415416