Path: blob/main/crates/polars-arrow/src/bitmap/aligned.rs
6939 views
use std::iter::Copied;1use std::slice::Iter;23use crate::bitmap::utils::BitChunk;45fn load_chunk_le<T: BitChunk>(src: &[u8]) -> T {6if let Ok(chunk) = src.try_into() {7return T::from_le_bytes(chunk);8}910let mut chunk = T::Bytes::default();11let len = src.len().min(chunk.as_ref().len());12chunk.as_mut()[..len].copy_from_slice(&src[..len]);13T::from_le_bytes(chunk)14}1516/// Represents a bitmap split in three portions, a prefix, a suffix and an17/// aligned bulk section in the middle.18#[derive(Default, Clone, Debug)]19pub struct AlignedBitmapSlice<'a, T: BitChunk> {20prefix: T,21prefix_len: u32,22bulk: &'a [T],23suffix: T,24suffix_len: u32,25}2627impl<'a, T: BitChunk> AlignedBitmapSlice<'a, T> {28#[inline(always)]29pub fn prefix(&self) -> T {30self.prefix31}3233#[inline(always)]34pub fn bulk_iter(&self) -> Copied<Iter<'a, T>> {35self.bulk.iter().copied()36}3738#[inline(always)]39pub fn bulk(&self) -> &'a [T] {40self.bulk41}4243#[inline(always)]44pub fn suffix(&self) -> T {45self.suffix46}4748/// The length (in bits) of the portion of the bitmap found in prefix.49#[inline(always)]50pub fn prefix_bitlen(&self) -> usize {51self.prefix_len as usize52}5354/// The length (in bits) of the portion of the bitmap found in bulk.55#[inline(always)]56pub fn bulk_bitlen(&self) -> usize {578 * size_of::<T>() * self.bulk.len()58}5960/// The length (in bits) of the portion of the bitmap found in suffix.61#[inline(always)]62pub fn suffix_bitlen(&self) -> usize {63self.suffix_len as usize64}6566pub fn new(mut bytes: &'a [u8], mut offset: usize, len: usize) -> Self {67if len == 0 {68return Self::default();69}7071assert!(bytes.len() * 8 >= offset + len);7273// Strip off useless bytes from start.74let start_byte_idx = offset / 8;75bytes = &bytes[start_byte_idx..];76offset %= 8;7778// Fast-path: fits entirely in one chunk.79let chunk_len = size_of::<T>();80let chunk_len_bits = 8 * chunk_len;81if offset + len <= chunk_len_bits {82let mut prefix = load_chunk_le::<T>(bytes) >> offset;83if len < chunk_len_bits {84prefix &= (T::one() << len) - T::one();85}86return Self {87prefix,88prefix_len: len as u32,89..Self::default()90};91}9293// Find how many bytes from the start our aligned section would start.94let mut align_offset = bytes.as_ptr().align_offset(chunk_len);95let mut align_offset_bits = 8 * align_offset;9697// Oops, the original pointer was already aligned, but our offset means98// we can't start there, start one chunk later.99if offset > align_offset_bits {100align_offset_bits += chunk_len_bits;101align_offset += chunk_len;102}103104// Calculate based on this the lengths of our sections (in bits).105let prefix_len = (align_offset_bits - offset).min(len);106let rest_len = len - prefix_len;107let suffix_len = rest_len % chunk_len_bits;108let bulk_len = rest_len - suffix_len;109debug_assert!(prefix_len < chunk_len_bits);110debug_assert!(bulk_len.is_multiple_of(chunk_len_bits));111debug_assert!(suffix_len < chunk_len_bits);112113// Now we just have to load.114let (prefix_bytes, rest_bytes) = bytes.split_at(align_offset);115let (bulk_bytes, suffix_bytes) = rest_bytes.split_at(bulk_len / 8);116let mut prefix = load_chunk_le::<T>(prefix_bytes) >> offset;117let mut suffix = load_chunk_le::<T>(suffix_bytes);118prefix &= (T::one() << prefix_len) - T::one();119suffix &= (T::one() << suffix_len) - T::one();120Self {121prefix,122bulk: bytemuck::cast_slice(bulk_bytes),123suffix,124prefix_len: prefix_len as u32,125suffix_len: suffix_len as u32,126}127}128}129130131