Path: blob/main/crates/polars-ops/src/series/ops/index_of.rs
6939 views
use arrow::array::{BinaryArray, BinaryViewArray, PrimitiveArray};1use polars_core::downcast_as_macro_arg_physical;2use polars_core::prelude::*;3use polars_utils::total_ord::TotalEq;4use row_encode::encode_rows_unordered;56/// Find the index of the value, or ``None`` if it can't be found.7fn index_of_value<'a, DT, AR>(ca: &'a ChunkedArray<DT>, value: AR::ValueT<'a>) -> Option<usize>8where9DT: PolarsDataType<Array = AR>,10AR: StaticArray,11AR::ValueT<'a>: TotalEq,12{13let req_value = &value;14let mut index = 0;15for chunk in ca.downcast_iter() {16if chunk.validity().is_some() {17for maybe_value in chunk.iter() {18if maybe_value.map(|v| v.tot_eq(req_value)) == Some(true) {19return Some(index);20} else {21index += 1;22}23}24} else {25// A lack of a validity bitmap means there are no nulls, so we26// can simplify our logic and use a faster code path:27for value in chunk.values_iter() {28if value.tot_eq(req_value) {29return Some(index);30} else {31index += 1;32}33}34}35}36None37}3839fn index_of_numeric_value<T>(ca: &ChunkedArray<T>, value: T::Native) -> Option<usize>40where41T: PolarsNumericType,42{43index_of_value::<_, PrimitiveArray<T::Native>>(ca, value)44}4546/// Try casting the value to the correct type, then call47/// index_of_numeric_value().48macro_rules! try_index_of_numeric_ca {49($ca:expr, $value:expr) => {{50let ca = $ca;51let value = $value;52// extract() returns None if casting failed, so consider an extract()53// failure as not finding the value. Nulls should have been handled54// earlier.55let value = value.into_value().to_physical().extract().unwrap();56index_of_numeric_value(ca, value)57}};58}5960/// Find the index of a given value (the first and only entry in `value_series`)61/// within the series.62pub fn index_of(series: &Series, needle: Scalar) -> PolarsResult<Option<usize>> {63polars_ensure!(64series.dtype() == needle.dtype(),65InvalidOperation: "Cannot perform index_of with mismatching datatypes: {:?} and {:?}",66series.dtype(),67needle.dtype(),68);6970if series.is_empty() {71return Ok(None);72}7374// Series is not null, and the value is null:75if needle.is_null() {76let null_count = series.null_count();77if null_count == 0 {78return Ok(None);79} else if null_count == series.len() {80return Ok(Some(0));81}8283let mut offset = 0;84for chunk in series.chunks() {85let length = chunk.len();86if let Some(bitmap) = chunk.validity() {87let leading_ones = bitmap.leading_ones();88if leading_ones < length {89return Ok(Some(offset + leading_ones));90}91}92offset += length;93}94return Ok(None);95}9697use DataType as DT;98match series.dtype().to_physical() {99DT::Null => unreachable!("handled above"),100DT::Boolean => Ok(if needle.value().extract_bool().unwrap() {101series.bool().unwrap().first_true_idx()102} else {103series.bool().unwrap().first_false_idx()104}),105dt if dt.is_primitive_numeric() => {106let series = series.to_physical_repr();107Ok(downcast_as_macro_arg_physical!(108series,109try_index_of_numeric_ca,110needle111))112},113DT::String => Ok(index_of_value::<_, BinaryViewArray>(114&series.str()?.as_binary(),115needle.value().extract_str().unwrap().as_bytes(),116)),117DT::Binary => Ok(index_of_value::<_, BinaryViewArray>(118series.binary()?,119needle.value().extract_bytes().unwrap(),120)),121DT::BinaryOffset => Ok(index_of_value::<_, BinaryArray<i64>>(122series.binary_offset()?,123needle.value().extract_bytes().unwrap(),124)),125DT::Array(_, _) | DT::List(_) | DT::Struct(_) => {126// For non-numeric dtypes, we convert to row-encoding, which essentially has127// us searching the physical representation of the data as a series of128// bytes.129let value_as_column = Column::new_scalar(PlSmallStr::EMPTY, needle, 1);130let value_as_row_encoded_ca = encode_rows_unordered(&[value_as_column])?;131let value = value_as_row_encoded_ca132.first()133.expect("Shouldn't have nulls in a row-encoded result");134let ca = encode_rows_unordered(&[series.clone().into_column()])?;135Ok(index_of_value::<_, BinaryArray<i64>>(&ca, value))136},137138DT::UInt8139| DT::UInt16140| DT::UInt32141| DT::UInt64142| DT::Int8143| DT::Int16144| DT::Int32145| DT::Int64146| DT::Int128147| DT::Float32148| DT::Float64 => unreachable!("primitive numeric"),149150// to_physical151#[cfg(feature = "dtype-decimal")]152DT::Decimal(..) => unreachable!(),153#[cfg(feature = "dtype-categorical")]154DT::Categorical(..) | DT::Enum(..) => unreachable!(),155DT::Date | DT::Datetime(..) | DT::Duration(..) | DT::Time => unreachable!(),156157#[cfg(feature = "object")]158DT::Object(_) => polars_bail!(op = "index_of", series.dtype()),159160DT::Unknown(_) => polars_bail!(op = "index_of", series.dtype()),161}162}163164165