Path: blob/main/crates/polars-arrow/src/legacy/kernels/mod.rs
8430 views
use std::iter::Enumerate;12use crate::array::BooleanArray;3use crate::bitmap::utils::BitChunks;4pub mod set;5pub mod sort_partition;6#[cfg(feature = "performant")]7pub mod sorted_join;8#[cfg(feature = "strings")]9pub mod string;10pub mod take_agg;11mod time;1213pub use time::{Ambiguous, NonExistent};14#[cfg(feature = "timezones")]15pub use time::{convert_to_naive_local, convert_to_naive_local_opt};1617/// Internal state of [SlicesIterator]18#[derive(Debug, PartialEq)]19enum State {20// it is iterating over bits of a mask (`u64`, steps of size of 1 slot)21Bits(u64),22// it is iterating over chunks (steps of size of 64 slots)23Chunks,24// it is iterating over the remaining bits (steps of size of 1 slot)25Remainder,26// nothing more to iterate.27Finish,28}2930/// Forked and modified from arrow crate.31///32/// An iterator of `(usize, usize)` each representing an interval `[start,end[` whose33/// slots of a [BooleanArray] are true. Each interval corresponds to a contiguous region of memory to be34/// "taken" from an array to be filtered.35#[derive(Debug)]36struct MaskedSlicesIterator<'a> {37iter: Enumerate<BitChunks<'a, u64>>,38state: State,39remainder_mask: u64,40remainder_len: usize,41chunk_len: usize,42len: usize,43start: usize,44on_region: bool,45current_chunk: usize,46current_bit: usize,47total_len: usize,48}4950impl<'a> MaskedSlicesIterator<'a> {51pub(crate) fn new(mask: &'a BooleanArray) -> Self {52let chunks = mask.values().chunks::<u64>();5354let chunk_len = mask.len() / 64;55let remainder_len = chunks.remainder_len();56let remainder_mask = chunks.remainder();5758Self {59iter: chunks.enumerate(),60state: State::Chunks,61remainder_len,62chunk_len,63remainder_mask,64len: 0,65start: 0,66on_region: false,67current_chunk: 0,68current_bit: 0,69total_len: mask.len(),70}71}7273#[inline]74fn current_start(&self) -> usize {75self.current_chunk * 64 + self.current_bit76}7778#[inline]79fn iterate_bits(&mut self, mask: u64, max: usize) -> Option<(usize, usize)> {80while self.current_bit < max {81if (mask & (1 << self.current_bit)) != 0 {82if !self.on_region {83self.start = self.current_start();84self.on_region = true;85}86self.len += 1;87} else if self.on_region {88let result = (self.start, self.start + self.len);89self.len = 0;90self.on_region = false;91self.current_bit += 1;92return Some(result);93}94self.current_bit += 1;95}96self.current_bit = 0;97None98}99100/// iterates over chunks.101#[inline]102fn iterate_chunks(&mut self) -> Option<(usize, usize)> {103while let Some((i, mask)) = self.iter.next() {104self.current_chunk = i;105if mask == 0 {106if self.on_region {107let result = (self.start, self.start + self.len);108self.len = 0;109self.on_region = false;110return Some(result);111}112} else if mask == u64::MAX {113// = !0u64114if !self.on_region {115self.start = self.current_start();116self.on_region = true;117}118self.len += 64;119} else {120// there is a chunk that has a non-trivial mask => iterate over bits.121self.state = State::Bits(mask);122return None;123}124}125// no more chunks => start iterating over the remainder126self.current_chunk = self.chunk_len;127self.state = State::Remainder;128None129}130}131132impl Iterator for MaskedSlicesIterator<'_> {133type Item = (usize, usize);134135fn next(&mut self) -> Option<Self::Item> {136match self.state {137State::Chunks => {138match self.iterate_chunks() {139None => {140// iterating over chunks does not yield any new slice => continue to the next141self.current_bit = 0;142self.next()143},144other => other,145}146},147State::Bits(mask) => {148match self.iterate_bits(mask, 64) {149None => {150// iterating over bits does not yield any new slice => change back151// to chunks and continue to the next152self.state = State::Chunks;153self.next()154},155other => other,156}157},158State::Remainder => match self.iterate_bits(self.remainder_mask, self.remainder_len) {159None => {160self.state = State::Finish;161if self.on_region {162Some((self.start, self.start + self.len))163} else {164None165}166},167other => other,168},169State::Finish => None,170}171}172}173174#[derive(Eq, PartialEq, Debug)]175enum BinaryMaskedState {176Start,177// Last masks were false values178LastFalse,179// Last masks were true values180LastTrue,181Finish,182}183184pub(crate) struct BinaryMaskedSliceIterator<'a> {185slice_iter: MaskedSlicesIterator<'a>,186filled: usize,187low: usize,188high: usize,189state: BinaryMaskedState,190}191192impl<'a> BinaryMaskedSliceIterator<'a> {193pub(crate) fn new(mask: &'a BooleanArray) -> Self {194Self {195slice_iter: MaskedSlicesIterator::new(mask),196filled: 0,197low: 0,198high: 0,199state: BinaryMaskedState::Start,200}201}202}203204impl Iterator for BinaryMaskedSliceIterator<'_> {205type Item = (usize, usize, bool);206207fn next(&mut self) -> Option<Self::Item> {208use BinaryMaskedState::*;209210match self.state {211Start => {212// first iteration213if self.low == 0 && self.high == 0 {214match self.slice_iter.next() {215Some((low, high)) => {216self.low = low;217self.high = high;218219if low > 0 {220// do another start iteration.221Some((0, low, false))222} else {223self.state = LastTrue;224self.filled = high;225Some((low, high, true))226}227},228None => {229self.state = Finish;230Some((self.filled, self.slice_iter.total_len, false))231},232}233} else {234self.filled = self.high;235self.state = LastTrue;236Some((self.low, self.high, true))237}238},239LastFalse => {240self.state = LastTrue;241self.filled = self.high;242Some((self.low, self.high, true))243},244LastTrue => match self.slice_iter.next() {245Some((low, high)) => {246self.low = low;247self.high = high;248self.state = LastFalse;249let last_filled = self.filled;250self.filled = low;251Some((last_filled, low, false))252},253None => {254self.state = Finish;255if self.filled != self.slice_iter.total_len {256Some((self.filled, self.slice_iter.total_len, false))257} else {258None259}260},261},262Finish => None,263}264}265}266267#[cfg(test)]268mod test {269use super::*;270271#[test]272fn test_binary_masked_slice_iter() {273let mask = BooleanArray::from_slice([false, false, true, true, true, false, false]);274275let out = BinaryMaskedSliceIterator::new(&mask)276.map(|(_, _, b)| b)277.collect::<Vec<_>>();278assert_eq!(out, &[false, true, false]);279let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();280assert_eq!(out, &[(0, 2, false), (2, 5, true), (5, 7, false)]);281let mask = BooleanArray::from_slice([true, true, false, true]);282let out = BinaryMaskedSliceIterator::new(&mask)283.map(|(_, _, b)| b)284.collect::<Vec<_>>();285assert_eq!(out, &[true, false, true]);286let mask = BooleanArray::from_slice([true, true, false, true, true]);287let out = BinaryMaskedSliceIterator::new(&mask)288.map(|(_, _, b)| b)289.collect::<Vec<_>>();290assert_eq!(out, &[true, false, true]);291}292293#[test]294fn test_binary_slice_mask_iter_with_false() {295let mask = BooleanArray::from_slice([false, false]);296let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();297assert_eq!(out, &[(0, 2, false)]);298}299}300301302