Path: blob/main/crates/polars-arrow/src/bitmap/bitmap_ops.rs
8418 views
use std::ops::{BitAnd, BitOr, BitXor, Not};12use super::Bitmap;3use super::bitmask::BitMask;4use super::utils::{BitChunk, BitChunkIterExact, BitChunksExact};5use crate::bitmap::MutableBitmap;6use crate::trusted_len::TrustedLen;78#[inline(always)]9pub(crate) fn push_bitchunk<T: BitChunk>(buffer: &mut Vec<u8>, value: T) {10buffer.extend(value.to_ne_bytes())11}1213/// Creates a [`Vec<u8>`] from a [`TrustedLen`] of [`BitChunk`].14pub fn chunk_iter_to_vec<T: BitChunk, I: TrustedLen<Item = T>>(iter: I) -> Vec<u8> {15let cap = iter.size_hint().0 * size_of::<T>();16let mut buffer = Vec::with_capacity(cap);17for v in iter {18push_bitchunk(&mut buffer, v)19}20buffer21}2223fn chunk_iter_to_vec_and_remainder<T: BitChunk, I: TrustedLen<Item = T>>(24iter: I,25remainder: T,26) -> Vec<u8> {27let cap = (iter.size_hint().0 + 1) * size_of::<T>();28let mut buffer = Vec::with_capacity(cap);29for v in iter {30push_bitchunk(&mut buffer, v)31}32push_bitchunk(&mut buffer, remainder);33debug_assert_eq!(buffer.len(), cap);34buffer35}3637/// Apply a bitwise operation `op` to four inputs and return the result as a [`Bitmap`].38pub fn quaternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, a4: &Bitmap, op: F) -> Bitmap39where40F: Fn(u64, u64, u64, u64) -> u64,41{42assert_eq!(a1.len(), a2.len());43assert_eq!(a1.len(), a3.len());44assert_eq!(a1.len(), a4.len());45let a1_chunks = a1.chunks();46let a2_chunks = a2.chunks();47let a3_chunks = a3.chunks();48let a4_chunks = a4.chunks();4950let rem_a1 = a1_chunks.remainder();51let rem_a2 = a2_chunks.remainder();52let rem_a3 = a3_chunks.remainder();53let rem_a4 = a4_chunks.remainder();5455let chunks = a1_chunks56.zip(a2_chunks)57.zip(a3_chunks)58.zip(a4_chunks)59.map(|(((a1, a2), a3), a4)| op(a1, a2, a3, a4));6061let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3, rem_a4));62let length = a1.len();6364Bitmap::from_u8_vec(buffer, length)65}6667/// Apply a bitwise operation `op` to three inputs and return the result as a [`Bitmap`].68pub fn ternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, op: F) -> Bitmap69where70F: Fn(u64, u64, u64) -> u64,71{72assert_eq!(a1.len(), a2.len());73assert_eq!(a1.len(), a3.len());74let a1_chunks = a1.chunks();75let a2_chunks = a2.chunks();76let a3_chunks = a3.chunks();7778let rem_a1 = a1_chunks.remainder();79let rem_a2 = a2_chunks.remainder();80let rem_a3 = a3_chunks.remainder();8182let chunks = a1_chunks83.zip(a2_chunks)84.zip(a3_chunks)85.map(|((a1, a2), a3)| op(a1, a2, a3));8687let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3));88let length = a1.len();8990Bitmap::from_u8_vec(buffer, length)91}9293/// Apply a bitwise operation `op` to two inputs and return the result as a [`Bitmap`].94pub fn binary<F>(lhs: &Bitmap, rhs: &Bitmap, op: F) -> Bitmap95where96F: Fn(u64, u64) -> u64,97{98assert_eq!(lhs.len(), rhs.len());99let lhs_chunks = lhs.chunks();100let rhs_chunks = rhs.chunks();101let rem_lhs = lhs_chunks.remainder();102let rem_rhs = rhs_chunks.remainder();103104let chunks = lhs_chunks105.zip(rhs_chunks)106.map(|(left, right)| op(left, right));107108let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_lhs, rem_rhs));109let length = lhs.len();110111Bitmap::from_u8_vec(buffer, length)112}113114/// Apply a bitwise operation `op` to two inputs and fold the result.115pub fn binary_fold<B, F, R>(lhs: &Bitmap, rhs: &Bitmap, op: F, init: B, fold: R) -> B116where117F: Fn(u64, u64) -> B,118R: Fn(B, B) -> B,119{120assert_eq!(lhs.len(), rhs.len());121let lhs_chunks = lhs.chunks();122let rhs_chunks = rhs.chunks();123let rem_lhs = lhs_chunks.remainder();124let rem_rhs = rhs_chunks.remainder();125126let result = lhs_chunks127.zip(rhs_chunks)128.fold(init, |prev, (left, right)| fold(prev, op(left, right)));129130fold(result, op(rem_lhs, rem_rhs))131}132133/// Apply a bitwise operation `op` to two inputs and fold the result.134pub fn binary_mask_fold<B, F, R>(lhs: BitMask<'_>, rhs: BitMask<'_>, op: F, init: B, fold: R) -> B135where136F: Fn(u64, u64) -> B,137R: Fn(B, B) -> B,138{139assert_eq!(lhs.len(), rhs.len());140let lhs_chunks = lhs.chunks();141let rhs_chunks = rhs.chunks();142let rem_lhs = lhs_chunks.remainder();143let rem_rhs = rhs_chunks.remainder();144145let result = lhs_chunks146.zip(rhs_chunks)147.fold(init, |prev, (left, right)| fold(prev, op(left, right)));148149fold(result, op(rem_lhs, rem_rhs))150}151152/// Apply a bitwise operation `op` to two inputs and fold the result.153pub fn binary_fold_mut<B, F, R>(154lhs: &MutableBitmap,155rhs: &MutableBitmap,156op: F,157init: B,158fold: R,159) -> B160where161F: Fn(u64, u64) -> B,162R: Fn(B, B) -> B,163{164assert_eq!(lhs.len(), rhs.len());165let lhs_chunks = lhs.chunks();166let rhs_chunks = rhs.chunks();167let rem_lhs = lhs_chunks.remainder();168let rem_rhs = rhs_chunks.remainder();169170let result = lhs_chunks171.zip(rhs_chunks)172.fold(init, |prev, (left, right)| fold(prev, op(left, right)));173174fold(result, op(rem_lhs, rem_rhs))175}176177fn unary_impl<F, I>(iter: I, op: F, length: usize) -> Bitmap178where179I: BitChunkIterExact<u64>,180F: Fn(u64) -> u64,181{182let rem = op(iter.remainder());183let buffer = chunk_iter_to_vec_and_remainder(iter.map(op), rem);184185Bitmap::from_u8_vec(buffer, length)186}187188/// Apply a bitwise operation `op` to one input and return the result as a [`Bitmap`].189pub fn unary<F>(lhs: &Bitmap, op: F) -> Bitmap190where191F: Fn(u64) -> u64,192{193let (slice, offset, length) = lhs.as_slice();194if offset == 0 {195let iter = BitChunksExact::<u64>::new(slice, length);196unary_impl(iter, op, lhs.len())197} else {198let iter = lhs.chunks::<u64>();199unary_impl(iter, op, lhs.len())200}201}202203// create a new [`Bitmap`] semantically equal to ``bitmap`` but with an offset equal to ``offset``204pub(crate) fn align(bitmap: &Bitmap, new_offset: usize) -> Bitmap {205let length = bitmap.len();206207let bitmap: Bitmap = std::iter::repeat_n(false, new_offset)208.chain(bitmap.iter())209.collect();210211bitmap.sliced(new_offset, length)212}213214/// Compute bitwise A AND B operation.215pub fn and(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {216if lhs.unset_bits() == lhs.len() || rhs.unset_bits() == rhs.len() {217assert_eq!(lhs.len(), rhs.len());218Bitmap::new_zeroed(lhs.len())219} else {220binary(lhs, rhs, |x, y| x & y)221}222}223224/// Compute bitwise A AND NOT B operation.225pub fn and_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {226binary(lhs, rhs, |x, y| x & !y)227}228229/// Compute bitwise A OR B operation.230pub fn or(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {231if lhs.unset_bits() == 0 || rhs.unset_bits() == 0 {232assert_eq!(lhs.len(), rhs.len());233let mut mutable = MutableBitmap::with_capacity(lhs.len());234mutable.extend_constant(lhs.len(), true);235mutable.into()236} else {237binary(lhs, rhs, |x, y| x | y)238}239}240241/// Compute bitwise A OR NOT B operation.242pub fn or_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {243binary(lhs, rhs, |x, y| x | !y)244}245246/// Compute bitwise XOR operation.247pub fn xor(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {248let lhs_nulls = lhs.unset_bits();249let rhs_nulls = rhs.unset_bits();250251// all false or all true252if lhs_nulls == rhs_nulls && rhs_nulls == rhs.len() || lhs_nulls == 0 && rhs_nulls == 0 {253assert_eq!(lhs.len(), rhs.len());254Bitmap::new_zeroed(rhs.len())255}256// all false and all true or vice versa257else if (lhs_nulls == 0 && rhs_nulls == rhs.len())258|| (lhs_nulls == lhs.len() && rhs_nulls == 0)259{260assert_eq!(lhs.len(), rhs.len());261let mut mutable = MutableBitmap::with_capacity(lhs.len());262mutable.extend_constant(lhs.len(), true);263mutable.into()264} else {265binary(lhs, rhs, |x, y| x ^ y)266}267}268269/// Compute bitwise equality (not XOR) operation.270fn eq(lhs: &Bitmap, rhs: &Bitmap) -> bool {271if lhs.len() != rhs.len() {272return false;273}274275let mut lhs_chunks = lhs.chunks::<u64>();276let mut rhs_chunks = rhs.chunks::<u64>();277278let equal_chunks = lhs_chunks279.by_ref()280.zip(rhs_chunks.by_ref())281.all(|(left, right)| left == right);282283if !equal_chunks {284return false;285}286let lhs_remainder = lhs_chunks.remainder_iter();287let rhs_remainder = rhs_chunks.remainder_iter();288lhs_remainder.zip(rhs_remainder).all(|(x, y)| x == y)289}290291pub fn num_intersections_with(lhs: BitMask<'_>, rhs: BitMask<'_>) -> usize {292binary_mask_fold(293lhs,294rhs,295|lhs, rhs| (lhs & rhs).count_ones() as usize,2960,297|lhs, rhs| lhs + rhs,298)299}300301pub fn intersects_with(lhs: &Bitmap, rhs: &Bitmap) -> bool {302binary_fold(303lhs,304rhs,305|lhs, rhs| lhs & rhs != 0,306false,307|lhs, rhs| lhs || rhs,308)309}310311pub fn intersects_with_mut(lhs: &MutableBitmap, rhs: &MutableBitmap) -> bool {312binary_fold_mut(313lhs,314rhs,315|lhs, rhs| lhs & rhs != 0,316false,317|lhs, rhs| lhs || rhs,318)319}320321pub fn num_edges(lhs: &Bitmap) -> usize {322if lhs.is_empty() {323return 0;324}325326// @TODO: If is probably quite inefficient to do it like this because now either one is not327// aligned. Maybe, we can implement a smarter way to do this.328binary_fold(329&unsafe { lhs.clone().sliced_unchecked(0, lhs.len() - 1) },330&unsafe { lhs.clone().sliced_unchecked(1, lhs.len() - 1) },331|l, r| (l ^ r).count_ones() as usize,3320,333|acc, v| acc + v,334)335}336337/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy }`.338pub fn select_constant(selector: &Bitmap, truthy: &Bitmap, falsy: bool) -> Bitmap {339let falsy_mask: u64 = if falsy {3400xFFFF_FFFF_FFFF_FFFF341} else {3420x0000_0000_0000_0000343};344345binary(selector, truthy, |s, t| (s & t) | (!s & falsy_mask))346}347348/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy[i] }`.349pub fn select(selector: &Bitmap, truthy: &Bitmap, falsy: &Bitmap) -> Bitmap {350ternary(selector, truthy, falsy, |s, t, f| (s & t) | (!s & f))351}352353impl PartialEq for Bitmap {354fn eq(&self, other: &Self) -> bool {355eq(self, other)356}357}358359impl<'b> BitOr<&'b Bitmap> for &Bitmap {360type Output = Bitmap;361362fn bitor(self, rhs: &'b Bitmap) -> Bitmap {363or(self, rhs)364}365}366367impl<'b> BitAnd<&'b Bitmap> for &Bitmap {368type Output = Bitmap;369370fn bitand(self, rhs: &'b Bitmap) -> Bitmap {371and(self, rhs)372}373}374375impl<'b> BitXor<&'b Bitmap> for &Bitmap {376type Output = Bitmap;377378fn bitxor(self, rhs: &'b Bitmap) -> Bitmap {379xor(self, rhs)380}381}382383impl Not for &Bitmap {384type Output = Bitmap;385386fn not(self) -> Bitmap {387unary(self, |a| !a)388}389}390391#[cfg(test)]392mod tests {393use proptest::prelude::*;394395use super::*;396use crate::bitmap::proptest::bitmap;397398fn two_equal_length_bitmaps() -> impl Strategy<Value = (Bitmap, Bitmap)> {399(1..=250usize).prop_flat_map(|length| {400(401bitmap(length..300),402bitmap(length..300),4030..length,4040..length,405)406.prop_flat_map(move |(lhs, rhs, lhs_offset, rhs_offset)| {407(0..usize::min(length - lhs_offset, length - rhs_offset)).prop_map(408move |slice_length| {409(410lhs.clone().sliced(lhs_offset, slice_length),411rhs.clone().sliced(rhs_offset, slice_length),412)413},414)415})416})417}418419proptest! {420#[test]421fn test_num_intersections_with(422(lhs, rhs) in two_equal_length_bitmaps()423) {424let kernel_out = num_intersections_with(BitMask::from_bitmap(&lhs), BitMask::from_bitmap(&rhs));425let mut reference_out = 0;426for (l, r) in lhs.iter().zip(rhs.iter()) {427reference_out += usize::from(l & r);428}429430prop_assert_eq!(kernel_out, reference_out);431}432}433}434435436