Path: blob/main/crates/polars-ops/src/series/ops/index_of.rs
8475 views
use arrow::array::{BinaryArray, BinaryViewArray, PrimitiveArray};1use polars_core::downcast_as_macro_arg_physical;2use polars_core::prelude::*;3use polars_utils::total_ord::TotalEq;4use row_encode::encode_rows_unordered;56/// Find the index of the value, or ``None`` if it can't be found.7fn index_of_value<'a, DT, AR>(ca: &'a ChunkedArray<DT>, value: AR::ValueT<'a>) -> Option<usize>8where9DT: PolarsDataType<Array = AR>,10AR: StaticArray,11AR::ValueT<'a>: TotalEq,12{13let req_value = &value;14let mut index = 0;15for chunk in ca.downcast_iter() {16if chunk.validity().is_some() {17for maybe_value in chunk.iter() {18if maybe_value.map(|v| v.tot_eq(req_value)) == Some(true) {19return Some(index);20} else {21index += 1;22}23}24} else {25// A lack of a validity bitmap means there are no nulls, so we26// can simplify our logic and use a faster code path:27for value in chunk.values_iter() {28if value.tot_eq(req_value) {29return Some(index);30} else {31index += 1;32}33}34}35}36None37}3839fn index_of_numeric_value<T>(ca: &ChunkedArray<T>, value: T::Native) -> Option<usize>40where41T: PolarsNumericType,42{43index_of_value::<_, PrimitiveArray<T::Native>>(ca, value)44}4546/// Try casting the value to the correct type, then call47/// index_of_numeric_value().48macro_rules! try_index_of_numeric_ca {49($ca:expr, $value:expr) => {{50let ca = $ca;51let value = $value;52// extract() returns None if casting failed, so consider an extract()53// failure as not finding the value. Nulls should have been handled54// earlier.55let value = value.into_value().to_physical().extract().unwrap();56index_of_numeric_value(ca, value)57}};58}5960/// Find the index of a given value (the first and only entry in `value_series`)61/// within the series.62pub fn index_of(series: &Series, needle: Scalar) -> PolarsResult<Option<usize>> {63polars_ensure!(64series.dtype() == needle.dtype(),65InvalidOperation: "Cannot perform index_of with mismatching datatypes: {:?} and {:?}",66series.dtype(),67needle.dtype(),68);6970if series.is_empty() {71return Ok(None);72}7374// Series is not null, and the value is null:75if needle.is_null() {76let null_count = series.null_count();77if null_count == 0 {78return Ok(None);79} else if null_count == series.len() {80return Ok(Some(0));81}8283let mut offset = 0;84for chunk in series.chunks() {85let length = chunk.len();86if let Some(bitmap) = chunk.validity() {87let leading_ones = bitmap.leading_ones();88if leading_ones < length {89return Ok(Some(offset + leading_ones));90}91}92offset += length;93}94return Ok(None);95}9697use DataType as DT;98match series.dtype().to_physical() {99DT::Null => unreachable!("handled above"),100#[cfg(feature = "dtype-extension")]101DT::Extension(..) => unreachable!("handled above"),102DT::Boolean => Ok(if needle.value().extract_bool().unwrap() {103series.bool().unwrap().first_true_idx()104} else {105series.bool().unwrap().first_false_idx()106}),107dt if dt.is_primitive_numeric() => {108let series = series.to_physical_repr();109Ok(downcast_as_macro_arg_physical!(110series,111try_index_of_numeric_ca,112needle113))114},115DT::String => Ok(index_of_value::<_, BinaryViewArray>(116&series.str()?.as_binary(),117needle.value().extract_str().unwrap().as_bytes(),118)),119DT::Binary => Ok(index_of_value::<_, BinaryViewArray>(120series.binary()?,121needle.value().extract_bytes().unwrap(),122)),123DT::BinaryOffset => Ok(index_of_value::<_, BinaryArray<i64>>(124series.binary_offset()?,125needle.value().extract_bytes().unwrap(),126)),127DT::Array(_, _) | DT::List(_) | DT::Struct(_) => {128// For non-numeric dtypes, we convert to row-encoding, which essentially has129// us searching the physical representation of the data as a series of130// bytes.131let value_as_column = Column::new_scalar(PlSmallStr::EMPTY, needle, 1);132let value_as_row_encoded_ca = encode_rows_unordered(&[value_as_column])?;133let value = value_as_row_encoded_ca134.first()135.expect("Shouldn't have nulls in a row-encoded result");136let ca = encode_rows_unordered(&[series.clone().into_column()])?;137Ok(index_of_value::<_, BinaryArray<i64>>(&ca, value))138},139140DT::UInt8141| DT::UInt16142| DT::UInt32143| DT::UInt64144| DT::UInt128145| DT::Int8146| DT::Int16147| DT::Int32148| DT::Int64149| DT::Int128150| DT::Float16151| DT::Float32152| DT::Float64 => unreachable!("primitive numeric"),153154// to_physical155#[cfg(feature = "dtype-decimal")]156DT::Decimal(..) => unreachable!(),157#[cfg(feature = "dtype-categorical")]158DT::Categorical(..) | DT::Enum(..) => unreachable!(),159DT::Date | DT::Datetime(..) | DT::Duration(..) | DT::Time => unreachable!(),160161#[cfg(feature = "object")]162DT::Object(_) => polars_bail!(op = "index_of", series.dtype()),163164DT::Unknown(_) => polars_bail!(op = "index_of", series.dtype()),165}166}167168169