Path: blob/main/crates/polars-ops/src/chunked_array/list/count.rs
6939 views
use arrow::array::{Array, BooleanArray};1use arrow::bitmap::Bitmap;2use arrow::bitmap::utils::count_zeros;3use arrow::legacy::utils::CustomIterTools;45use super::*;67fn count_bits_set_by_offsets(values: &Bitmap, offset: &[i64]) -> Vec<IdxSize> {8// Fast path where all bits are either set or unset.9if values.unset_bits() == values.len() {10return vec![0 as IdxSize; offset.len() - 1];11} else if values.unset_bits() == 0 {12let mut start = offset[0];13let v = (offset[1..])14.iter()15.map(|end| {16let current_offset = start;17start = *end;18(end - current_offset) as IdxSize19})20.collect_trusted();21return v;22}2324let (bits, bitmap_offset, _) = values.as_slice();2526let mut running_offset = offset[0];2728(offset[1..])29.iter()30.map(|end| {31let current_offset = running_offset;32running_offset = *end;3334let len = (end - current_offset) as usize;3536let set_ones = len - count_zeros(bits, bitmap_offset + current_offset as usize, len);37set_ones as IdxSize38})39.collect_trusted()40}4142#[cfg(feature = "list_count")]43pub fn list_count_matches(ca: &ListChunked, value: AnyValue) -> PolarsResult<Series> {44let value = Series::new(PlSmallStr::EMPTY, [value]);4546let ca = ca.apply_to_inner(&|s| {47ChunkCompareEq::<&Series>::equal_missing(&s, &value).map(|ca| ca.into_series())48})?;49let out = count_boolean_bits(&ca);50Ok(out.into_series())51}5253pub(super) fn count_boolean_bits(ca: &ListChunked) -> IdxCa {54let chunks = ca.downcast_iter().map(|arr| {55let inner_arr = arr.values();56let mask = inner_arr.as_any().downcast_ref::<BooleanArray>().unwrap();57assert_eq!(mask.null_count(), 0);58let out = count_bits_set_by_offsets(mask.values(), arr.offsets().as_slice());59IdxArr::from_data_default(out.into(), arr.validity().cloned())60});61IdxCa::from_chunk_iter(ca.name().clone(), chunks)62}636465