Path: blob/main/crates/polars-ops/src/series/ops/is_last_distinct.rs
6939 views
use std::hash::Hash;12use arrow::array::BooleanArray;3use arrow::bitmap::MutableBitmap;4use arrow::legacy::utils::CustomIterTools;5use polars_core::prelude::*;6use polars_core::utils::NoNull;7use polars_core::with_match_physical_numeric_polars_type;8use polars_utils::total_ord::{ToTotalOrd, TotalEq, TotalHash};910pub fn is_last_distinct(s: &Series) -> PolarsResult<BooleanChunked> {11// fast path.12if s.is_empty() {13return Ok(BooleanChunked::full_null(s.name().clone(), 0));14} else if s.len() == 1 {15return Ok(BooleanChunked::new(s.name().clone(), &[true]));16}1718let s = s.to_physical_repr();1920use DataType::*;21let out = match s.dtype() {22Boolean => {23let ca = s.bool().unwrap();24is_last_distinct_boolean(ca)25},26Binary => {27let ca = s.binary().unwrap();28is_last_distinct_bin(ca)29},30String => {31let s = s.cast(&Binary).unwrap();32return is_last_distinct(&s);33},34dt if dt.is_primitive_numeric() => {35with_match_physical_numeric_polars_type!(s.dtype(), |$T| {36let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();37is_last_distinct_numeric(ca)38})39},40#[cfg(feature = "dtype-struct")]41Struct(_) => return is_last_distinct_struct(&s),42List(inner) => {43polars_ensure!(44!inner.is_nested(),45InvalidOperation: "`is_last_distinct` on list type is only allowed if the inner type is not nested."46);47let ca = s.list().unwrap();48return is_last_distinct_list(ca);49},50dt => polars_bail!(opq = is_last_distinct, dt),51};52Ok(out)53}5455fn is_last_distinct_boolean(ca: &BooleanChunked) -> BooleanChunked {56let mut out = MutableBitmap::with_capacity(ca.len());57out.extend_constant(ca.len(), false);5859if ca.null_count() == ca.len() {60out.set(ca.len() - 1, true);61}62// TODO supports fast path.63else {64let mut first_true_found = false;65let mut first_false_found = false;66let mut first_null_found = false;67let mut all_found = false;68let ca = ca.rechunk();69ca.downcast_as_array()70.iter()71.enumerate()72.rev()73.find_map(|(idx, val)| match val {74Some(true) if !first_true_found => {75first_true_found = true;76all_found &= first_true_found;77out.set(idx, true);78if all_found { Some(()) } else { None }79},80Some(false) if !first_false_found => {81first_false_found = true;82all_found &= first_false_found;83out.set(idx, true);84if all_found { Some(()) } else { None }85},86None if !first_null_found => {87first_null_found = true;88all_found &= first_null_found;89out.set(idx, true);90if all_found { Some(()) } else { None }91},92_ => None,93});94}9596let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);97BooleanChunked::with_chunk(ca.name().clone(), arr)98}99100fn is_last_distinct_bin(ca: &BinaryChunked) -> BooleanChunked {101let tmp = ca.rechunk();102let arr = tmp.downcast_as_array();103let mut unique = PlHashSet::new();104arr.iter()105.rev()106.map(|opt_v| unique.insert(opt_v))107.collect_reversed::<NoNull<BooleanChunked>>()108.into_inner()109.with_name(ca.name().clone())110}111112fn is_last_distinct_numeric<T>(ca: &ChunkedArray<T>) -> BooleanChunked113where114T: PolarsNumericType,115T::Native: TotalHash + TotalEq + ToTotalOrd,116<T::Native as ToTotalOrd>::TotalOrdItem: Hash + Eq,117{118let tmp = ca.rechunk();119let arr = tmp.downcast_as_array();120let mut unique = PlHashSet::new();121arr.iter()122.rev()123.map(|opt_v| unique.insert(opt_v.to_total_ord()))124.collect_reversed::<NoNull<BooleanChunked>>()125.into_inner()126.with_name(ca.name().clone())127}128129#[cfg(feature = "dtype-struct")]130fn is_last_distinct_struct(s: &Series) -> PolarsResult<BooleanChunked> {131let groups = s.group_tuples(true, false)?;132// SAFETY: all groups have at least a single member133let last = unsafe { groups.take_group_lasts() };134let mut out = MutableBitmap::with_capacity(s.len());135out.extend_constant(s.len(), false);136137for idx in last {138// Group tuples are always in bounds139unsafe { out.set_unchecked(idx as usize, true) }140}141142let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);143Ok(BooleanChunked::with_chunk(s.name().clone(), arr))144}145146fn is_last_distinct_list(ca: &ListChunked) -> PolarsResult<BooleanChunked> {147let groups = ca.group_tuples(true, false)?;148// SAFETY: all groups have at least a single member149let last = unsafe { groups.take_group_lasts() };150let mut out = MutableBitmap::with_capacity(ca.len());151out.extend_constant(ca.len(), false);152153for idx in last {154// Group tuples are always in bounds155unsafe { out.set_unchecked(idx as usize, true) }156}157158let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);159Ok(BooleanChunked::with_chunk(ca.name().clone(), arr))160}161162163