Path: blob/main/crates/polars-ops/src/series/ops/is_first_distinct.rs
6939 views
use std::hash::Hash;12use arrow::array::BooleanArray;3use arrow::bitmap::MutableBitmap;4use arrow::legacy::bit_util::*;5use arrow::legacy::utils::CustomIterTools;6use polars_core::prelude::*;7use polars_core::with_match_physical_numeric_polars_type;8use polars_utils::total_ord::{ToTotalOrd, TotalEq, TotalHash};9fn is_first_distinct_numeric<T>(ca: &ChunkedArray<T>) -> BooleanChunked10where11T: PolarsNumericType,12T::Native: TotalHash + TotalEq + ToTotalOrd,13<T::Native as ToTotalOrd>::TotalOrdItem: Hash + Eq,14{15let mut unique = PlHashSet::new();16let chunks = ca.downcast_iter().map(|arr| -> BooleanArray {17arr.into_iter()18.map(|opt_v| unique.insert(opt_v.to_total_ord()))19.collect_trusted()20});2122BooleanChunked::from_chunk_iter(ca.name().clone(), chunks)23}2425fn is_first_distinct_bin(ca: &BinaryChunked) -> BooleanChunked {26let mut unique = PlHashSet::new();27let chunks = ca.downcast_iter().map(|arr| -> BooleanArray {28arr.into_iter()29.map(|opt_v| unique.insert(opt_v))30.collect_trusted()31});3233BooleanChunked::from_chunk_iter(ca.name().clone(), chunks)34}3536fn is_first_distinct_boolean(ca: &BooleanChunked) -> BooleanChunked {37let mut out = MutableBitmap::with_capacity(ca.len());38out.extend_constant(ca.len(), false);3940if ca.null_count() == ca.len() {41out.set(0, true);42} else {43let ca = ca.rechunk();44let arr = ca.downcast_as_array();45if ca.null_count() == 0 {46let (true_index, false_index) =47find_first_true_false_no_null(arr.values().chunks::<u64>());48if let Some(idx) = true_index {49out.set(idx, true)50}51if let Some(idx) = false_index {52out.set(idx, true)53}54} else {55let (true_index, false_index, null_index) = find_first_true_false_null(56arr.values().chunks::<u64>(),57arr.validity().unwrap().chunks::<u64>(),58);59if let Some(idx) = true_index {60out.set(idx, true)61}62if let Some(idx) = false_index {63out.set(idx, true)64}65if let Some(idx) = null_index {66out.set(idx, true)67}68}69}70let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);71BooleanChunked::with_chunk(ca.name().clone(), arr)72}7374#[cfg(feature = "dtype-struct")]75fn is_first_distinct_struct(s: &Series) -> PolarsResult<BooleanChunked> {76let groups = s.group_tuples(true, false)?;77let first = groups.take_group_firsts();78let mut out = MutableBitmap::with_capacity(s.len());79out.extend_constant(s.len(), false);8081for idx in first {82// Group tuples are always in bounds83unsafe { out.set_unchecked(idx as usize, true) }84}8586let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);87Ok(BooleanChunked::with_chunk(s.name().clone(), arr))88}8990fn is_first_distinct_list(ca: &ListChunked) -> PolarsResult<BooleanChunked> {91let groups = ca.group_tuples(true, false)?;92let first = groups.take_group_firsts();93let mut out = MutableBitmap::with_capacity(ca.len());94out.extend_constant(ca.len(), false);9596for idx in first {97// Group tuples are always in bounds98unsafe { out.set_unchecked(idx as usize, true) }99}100101let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);102Ok(BooleanChunked::with_chunk(ca.name().clone(), arr))103}104105pub fn is_first_distinct(s: &Series) -> PolarsResult<BooleanChunked> {106// fast path.107if s.is_empty() {108return Ok(BooleanChunked::full_null(s.name().clone(), 0));109} else if s.len() == 1 {110return Ok(BooleanChunked::new(s.name().clone(), &[true]));111}112113let s = s.to_physical_repr();114115use DataType::*;116let out = match s.dtype() {117Boolean => {118let ca = s.bool().unwrap();119is_first_distinct_boolean(ca)120},121Binary => {122let ca = s.binary().unwrap();123is_first_distinct_bin(ca)124},125String => {126let s = s.cast(&Binary).unwrap();127return is_first_distinct(&s);128},129dt if dt.is_primitive_numeric() => {130with_match_physical_numeric_polars_type!(s.dtype(), |$T| {131let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();132is_first_distinct_numeric(ca)133})134},135#[cfg(feature = "dtype-struct")]136Struct(_) => return is_first_distinct_struct(&s),137List(inner) => {138polars_ensure!(139!inner.is_nested(),140InvalidOperation: "`is_first_distinct` on list type is only allowed if the inner type is not nested."141);142let ca = s.list().unwrap();143return is_first_distinct_list(ca);144},145dt => polars_bail!(opq = is_first_distinct, dt),146};147Ok(out)148}149150151