Path: blob/main/crates/polars-arrow/src/bitmap/utils/slice_iterator.rs
6939 views
use crate::bitmap::Bitmap;12/// Internal state of [`SlicesIterator`]3#[derive(Debug, Clone, PartialEq)]4enum State {5// normal iteration6Nominal,7// nothing more to iterate.8Finished,9}1011/// Iterator over a bitmap that returns slices of set regions.12///13/// This is the most efficient method to extract slices of values from arrays14/// with a validity bitmap.15/// For example, the bitmap `00101111` returns `[(0,4), (6,1)]`16#[derive(Debug, Clone)]17pub struct SlicesIterator<'a> {18values: std::slice::Iter<'a, u8>,19count: usize,20mask: u8,21max_len: usize,22current_byte: &'a u8,23state: State,24len: usize,25start: usize,26on_region: bool,27}2829impl<'a> SlicesIterator<'a> {30/// Creates a new [`SlicesIterator`]31pub fn new(values: &'a Bitmap) -> Self {32let (buffer, offset, _) = values.as_slice();33let mut iter = buffer.iter();3435let (current_byte, state) = match iter.next() {36Some(b) => (b, State::Nominal),37None => (&0, State::Finished),38};3940Self {41state,42count: values.len() - values.unset_bits(),43max_len: values.len(),44values: iter,45mask: 1u8.rotate_left(offset as u32),46current_byte,47len: 0,48start: 0,49on_region: false,50}51}5253#[inline]54fn finish(&mut self) -> Option<(usize, usize)> {55self.state = State::Finished;56if self.on_region {57Some((self.start, self.len))58} else {59None60}61}6263#[inline]64fn current_len(&self) -> usize {65self.start + self.len66}6768/// Returns the total number of slots.69/// It corresponds to the sum of all lengths of all slices.70#[inline]71pub fn slots(&self) -> usize {72self.count73}74}7576impl Iterator for SlicesIterator<'_> {77type Item = (usize, usize);7879#[inline]80fn next(&mut self) -> Option<Self::Item> {81loop {82if self.state == State::Finished {83return None;84}85if self.current_len() == self.max_len {86return self.finish();87}8889if self.mask == 1 {90// at the beginning of a byte => try to skip it all together91match (self.on_region, self.current_byte) {92(true, &255u8) => {93self.len = std::cmp::min(self.max_len - self.start, self.len + 8);94if let Some(v) = self.values.next() {95self.current_byte = v;96};97continue;98},99(false, &0) => {100self.len = std::cmp::min(self.max_len - self.start, self.len + 8);101if let Some(v) = self.values.next() {102self.current_byte = v;103};104continue;105},106_ => (), // we need to run over all bits of this byte107}108};109110let value = (self.current_byte & self.mask) != 0;111self.mask = self.mask.rotate_left(1);112113match (self.on_region, value) {114(true, true) => self.len += 1,115(false, false) => self.len += 1,116(true, false) => {117self.on_region = false;118let result = (self.start, self.len);119self.start += self.len;120self.len = 1;121if self.mask == 1 {122// reached a new byte => try to fetch it from the iterator123if let Some(v) = self.values.next() {124self.current_byte = v;125};126}127return Some(result);128},129(false, true) => {130self.start += self.len;131self.len = 1;132self.on_region = true;133},134}135136if self.mask == 1 {137// reached a new byte => try to fetch it from the iterator138match self.values.next() {139Some(v) => self.current_byte = v,140None => return self.finish(),141};142}143}144}145}146147148