Path: blob/main/crates/polars-arrow/src/bitmap/utils/chunk_iterator/mod.rs
6940 views
mod chunks_exact;1mod merge;23pub use chunks_exact::BitChunksExact;4pub(crate) use merge::merge_reversed;56use crate::trusted_len::TrustedLen;7pub use crate::types::BitChunk;8use crate::types::BitChunkIter;910/// Trait representing an exact iterator over bytes in [`BitChunk`].11pub trait BitChunkIterExact<B: BitChunk>: TrustedLen<Item = B> {12/// The remainder of the iterator.13fn remainder(&self) -> B;1415/// The number of items in the remainder16fn remainder_len(&self) -> usize;1718/// An iterator over individual items of the remainder19#[inline]20fn remainder_iter(&self) -> BitChunkIter<B> {21BitChunkIter::new(self.remainder(), self.remainder_len())22}23}2425/// This struct is used to efficiently iterate over bit masks by loading bytes on26/// the stack with alignments of `uX`. This allows efficient iteration over bitmaps.27#[derive(Debug)]28pub struct BitChunks<'a, T: BitChunk> {29chunk_iterator: std::slice::ChunksExact<'a, u8>,30current: T,31remainder_bytes: &'a [u8],32last_chunk: T,33remaining: usize,34/// offset inside a byte35bit_offset: usize,36len: usize,37phantom: std::marker::PhantomData<T>,38}3940/// writes `bytes` into `dst`.41#[inline]42fn copy_with_merge<T: BitChunk>(dst: &mut T::Bytes, bytes: &[u8], bit_offset: usize) {43bytes44.windows(2)45.chain(std::iter::once([bytes[bytes.len() - 1], 0].as_ref()))46.take(size_of::<T>())47.enumerate()48.for_each(|(i, w)| {49let val = merge_reversed(w[0], w[1], bit_offset);50dst[i] = val;51});52}5354impl<'a, T: BitChunk> BitChunks<'a, T> {55/// Creates a [`BitChunks`].56pub fn new(slice: &'a [u8], offset: usize, len: usize) -> Self {57assert!(offset + len <= slice.len() * 8);5859let slice = &slice[offset / 8..];60let bit_offset = offset % 8;61let size_of = size_of::<T>();6263let bytes_len = len / 8;64let bytes_upper_len = (len + bit_offset).div_ceil(8);65let mut chunks = slice[..bytes_len].chunks_exact(size_of);6667let remainder = &slice[bytes_len - chunks.remainder().len()..bytes_upper_len];6869let remainder_bytes = if chunks.len() == 0 { slice } else { remainder };7071let last_chunk = remainder_bytes72.first()73.map(|first| {74let mut last = T::zero().to_ne_bytes();75last[0] = *first;76T::from_ne_bytes(last)77})78.unwrap_or_else(T::zero);7980let remaining = chunks.size_hint().0;8182let current = chunks83.next()84.map(|x| match x.try_into() {85Ok(a) => T::from_ne_bytes(a),86Err(_) => unreachable!(),87})88.unwrap_or_else(T::zero);8990Self {91chunk_iterator: chunks,92len,93current,94remaining,95remainder_bytes,96last_chunk,97bit_offset,98phantom: std::marker::PhantomData,99}100}101102#[inline]103fn load_next(&mut self) {104self.current = match self.chunk_iterator.next().unwrap().try_into() {105Ok(a) => T::from_ne_bytes(a),106Err(_) => unreachable!(),107};108}109110/// Returns the remainder [`BitChunk`].111pub fn remainder(&self) -> T {112// remaining bytes may not fit in `size_of::<T>()`. We complement113// them to fit by allocating T and writing to it byte by byte114let mut remainder = T::zero().to_ne_bytes();115116let remainder = match (self.remainder_bytes.is_empty(), self.bit_offset == 0) {117(true, _) => remainder,118(false, true) => {119// all remaining bytes120self.remainder_bytes121.iter()122.take(size_of::<T>())123.enumerate()124.for_each(|(i, val)| remainder[i] = *val);125126remainder127},128(false, false) => {129// all remaining bytes130copy_with_merge::<T>(&mut remainder, self.remainder_bytes, self.bit_offset);131remainder132},133};134let mut remainder = T::from_ne_bytes(remainder);135let mask = (T::one() << self.remainder_len()) - T::one();136remainder &= mask;137remainder138}139140/// Returns the remainder bits in [`BitChunks::remainder`].141pub fn remainder_len(&self) -> usize {142self.len - (size_of::<T>() * ((self.len / 8) / size_of::<T>()) * 8)143}144}145146impl<T: BitChunk> Iterator for BitChunks<'_, T> {147type Item = T;148149#[inline]150fn next(&mut self) -> Option<T> {151if self.remaining == 0 {152return None;153}154155let current = self.current;156let combined = if self.bit_offset == 0 {157// fast case where there is no offset. In this case, there is bit-alignment158// at byte boundary and thus the bytes correspond exactly.159if self.remaining >= 2 {160self.load_next();161}162current163} else {164let next = if self.remaining >= 2 {165// case where `next` is complete and thus we can take it all166self.load_next();167self.current168} else {169// case where the `next` is incomplete and thus we take the remaining170self.last_chunk171};172merge_reversed(current, next, self.bit_offset)173};174175self.remaining -= 1;176Some(combined)177}178179#[inline]180fn size_hint(&self) -> (usize, Option<usize>) {181// it contains always one more than the chunk_iterator, which is the last182// one where the remainder is merged into current.183(self.remaining, Some(self.remaining))184}185}186187impl<T: BitChunk> BitChunkIterExact<T> for BitChunks<'_, T> {188#[inline]189fn remainder(&self) -> T {190self.remainder()191}192193#[inline]194fn remainder_len(&self) -> usize {195self.remainder_len()196}197}198199impl<T: BitChunk> ExactSizeIterator for BitChunks<'_, T> {200#[inline]201fn len(&self) -> usize {202self.chunk_iterator.len()203}204}205206unsafe impl<T: BitChunk> TrustedLen for BitChunks<'_, T> {}207208209