Path: blob/main/crates/polars-arrow/src/legacy/kernels/mod.rs
6939 views
use std::iter::Enumerate;12use crate::array::BooleanArray;3use crate::bitmap::utils::BitChunks;4pub mod ewm;5pub mod set;6pub mod sort_partition;7#[cfg(feature = "performant")]8pub mod sorted_join;9#[cfg(feature = "strings")]10pub mod string;11pub mod take_agg;12mod time;1314pub use time::{Ambiguous, NonExistent};15#[cfg(feature = "timezones")]16pub use time::{convert_to_naive_local, convert_to_naive_local_opt};1718/// Internal state of [SlicesIterator]19#[derive(Debug, PartialEq)]20enum State {21// it is iterating over bits of a mask (`u64`, steps of size of 1 slot)22Bits(u64),23// it is iterating over chunks (steps of size of 64 slots)24Chunks,25// it is iterating over the remaining bits (steps of size of 1 slot)26Remainder,27// nothing more to iterate.28Finish,29}3031/// Forked and modified from arrow crate.32///33/// An iterator of `(usize, usize)` each representing an interval `[start,end[` whose34/// slots of a [BooleanArray] are true. Each interval corresponds to a contiguous region of memory to be35/// "taken" from an array to be filtered.36#[derive(Debug)]37struct MaskedSlicesIterator<'a> {38iter: Enumerate<BitChunks<'a, u64>>,39state: State,40remainder_mask: u64,41remainder_len: usize,42chunk_len: usize,43len: usize,44start: usize,45on_region: bool,46current_chunk: usize,47current_bit: usize,48total_len: usize,49}5051impl<'a> MaskedSlicesIterator<'a> {52pub(crate) fn new(mask: &'a BooleanArray) -> Self {53let chunks = mask.values().chunks::<u64>();5455let chunk_len = mask.len() / 64;56let remainder_len = chunks.remainder_len();57let remainder_mask = chunks.remainder();5859Self {60iter: chunks.enumerate(),61state: State::Chunks,62remainder_len,63chunk_len,64remainder_mask,65len: 0,66start: 0,67on_region: false,68current_chunk: 0,69current_bit: 0,70total_len: mask.len(),71}72}7374#[inline]75fn current_start(&self) -> usize {76self.current_chunk * 64 + self.current_bit77}7879#[inline]80fn iterate_bits(&mut self, mask: u64, max: usize) -> Option<(usize, usize)> {81while self.current_bit < max {82if (mask & (1 << self.current_bit)) != 0 {83if !self.on_region {84self.start = self.current_start();85self.on_region = true;86}87self.len += 1;88} else if self.on_region {89let result = (self.start, self.start + self.len);90self.len = 0;91self.on_region = false;92self.current_bit += 1;93return Some(result);94}95self.current_bit += 1;96}97self.current_bit = 0;98None99}100101/// iterates over chunks.102#[inline]103fn iterate_chunks(&mut self) -> Option<(usize, usize)> {104while let Some((i, mask)) = self.iter.next() {105self.current_chunk = i;106if mask == 0 {107if self.on_region {108let result = (self.start, self.start + self.len);109self.len = 0;110self.on_region = false;111return Some(result);112}113} else if mask == u64::MAX {114// = !0u64115if !self.on_region {116self.start = self.current_start();117self.on_region = true;118}119self.len += 64;120} else {121// there is a chunk that has a non-trivial mask => iterate over bits.122self.state = State::Bits(mask);123return None;124}125}126// no more chunks => start iterating over the remainder127self.current_chunk = self.chunk_len;128self.state = State::Remainder;129None130}131}132133impl Iterator for MaskedSlicesIterator<'_> {134type Item = (usize, usize);135136fn next(&mut self) -> Option<Self::Item> {137match self.state {138State::Chunks => {139match self.iterate_chunks() {140None => {141// iterating over chunks does not yield any new slice => continue to the next142self.current_bit = 0;143self.next()144},145other => other,146}147},148State::Bits(mask) => {149match self.iterate_bits(mask, 64) {150None => {151// iterating over bits does not yield any new slice => change back152// to chunks and continue to the next153self.state = State::Chunks;154self.next()155},156other => other,157}158},159State::Remainder => match self.iterate_bits(self.remainder_mask, self.remainder_len) {160None => {161self.state = State::Finish;162if self.on_region {163Some((self.start, self.start + self.len))164} else {165None166}167},168other => other,169},170State::Finish => None,171}172}173}174175#[derive(Eq, PartialEq, Debug)]176enum BinaryMaskedState {177Start,178// Last masks were false values179LastFalse,180// Last masks were true values181LastTrue,182Finish,183}184185pub(crate) struct BinaryMaskedSliceIterator<'a> {186slice_iter: MaskedSlicesIterator<'a>,187filled: usize,188low: usize,189high: usize,190state: BinaryMaskedState,191}192193impl<'a> BinaryMaskedSliceIterator<'a> {194pub(crate) fn new(mask: &'a BooleanArray) -> Self {195Self {196slice_iter: MaskedSlicesIterator::new(mask),197filled: 0,198low: 0,199high: 0,200state: BinaryMaskedState::Start,201}202}203}204205impl Iterator for BinaryMaskedSliceIterator<'_> {206type Item = (usize, usize, bool);207208fn next(&mut self) -> Option<Self::Item> {209use BinaryMaskedState::*;210211match self.state {212Start => {213// first iteration214if self.low == 0 && self.high == 0 {215match self.slice_iter.next() {216Some((low, high)) => {217self.low = low;218self.high = high;219220if low > 0 {221// do another start iteration.222Some((0, low, false))223} else {224self.state = LastTrue;225self.filled = high;226Some((low, high, true))227}228},229None => {230self.state = Finish;231Some((self.filled, self.slice_iter.total_len, false))232},233}234} else {235self.filled = self.high;236self.state = LastTrue;237Some((self.low, self.high, true))238}239},240LastFalse => {241self.state = LastTrue;242self.filled = self.high;243Some((self.low, self.high, true))244},245LastTrue => match self.slice_iter.next() {246Some((low, high)) => {247self.low = low;248self.high = high;249self.state = LastFalse;250let last_filled = self.filled;251self.filled = low;252Some((last_filled, low, false))253},254None => {255self.state = Finish;256if self.filled != self.slice_iter.total_len {257Some((self.filled, self.slice_iter.total_len, false))258} else {259None260}261},262},263Finish => None,264}265}266}267268#[cfg(test)]269mod test {270use super::*;271272#[test]273fn test_binary_masked_slice_iter() {274let mask = BooleanArray::from_slice([false, false, true, true, true, false, false]);275276let out = BinaryMaskedSliceIterator::new(&mask)277.map(|(_, _, b)| b)278.collect::<Vec<_>>();279assert_eq!(out, &[false, true, false]);280let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();281assert_eq!(out, &[(0, 2, false), (2, 5, true), (5, 7, false)]);282let mask = BooleanArray::from_slice([true, true, false, true]);283let out = BinaryMaskedSliceIterator::new(&mask)284.map(|(_, _, b)| b)285.collect::<Vec<_>>();286assert_eq!(out, &[true, false, true]);287let mask = BooleanArray::from_slice([true, true, false, true, true]);288let out = BinaryMaskedSliceIterator::new(&mask)289.map(|(_, _, b)| b)290.collect::<Vec<_>>();291assert_eq!(out, &[true, false, true]);292}293294#[test]295fn test_binary_slice_mask_iter_with_false() {296let mask = BooleanArray::from_slice([false, false]);297let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();298assert_eq!(out, &[(0, 2, false)]);299}300}301302303