Path: blob/main/crates/polars-core/src/series/series_trait.rs
8475 views
use std::any::Any;1use std::borrow::Cow;23use arrow::bitmap::{Bitmap, BitmapBuilder};4use polars_compute::rolling::QuantileMethod;5#[cfg(feature = "serde")]6use serde::{Deserialize, Serialize};78use crate::chunked_array::cast::CastOptions;9#[cfg(feature = "object")]10use crate::chunked_array::object::PolarsObjectSafe;11use crate::prelude::*;12use crate::utils::{first_non_null, last_non_null};1314#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]15#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]16#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]17pub enum IsSorted {18Ascending,19Descending,20Not,21}2223impl IsSorted {24pub fn reverse(self) -> Self {25use IsSorted::*;26match self {27Ascending => Descending,28Descending => Ascending,29Not => Not,30}31}32}3334pub enum BitRepr {35U8(UInt8Chunked),36U16(UInt16Chunked),37U32(UInt32Chunked),38U64(UInt64Chunked),39#[cfg(feature = "dtype-u128")]40U128(UInt128Chunked),41}4243pub(crate) mod private {44use polars_utils::aliases::PlSeedableRandomStateQuality;4546use super::*;47use crate::chunked_array::flags::StatisticsFlags;48use crate::chunked_array::ops::compare_inner::{TotalEqInner, TotalOrdInner};4950pub trait PrivateSeriesNumeric {51/// Return a bit representation52///53/// If there is no available bit representation this returns `None`.54fn bit_repr(&self) -> Option<BitRepr>;55}5657pub trait PrivateSeries {58#[cfg(feature = "object")]59fn get_list_builder(60&self,61_name: PlSmallStr,62_values_capacity: usize,63_list_capacity: usize,64) -> Box<dyn ListBuilderTrait> {65invalid_operation_panic!(get_list_builder, self)66}6768/// Get field (used in schema)69fn _field(&self) -> Cow<'_, Field>;7071fn _dtype(&self) -> &DataType;7273fn compute_len(&mut self);7475fn _get_flags(&self) -> StatisticsFlags;7677fn _set_flags(&mut self, flags: StatisticsFlags);7879unsafe fn equal_element(80&self,81_idx_self: usize,82_idx_other: usize,83_other: &Series,84) -> bool {85invalid_operation_panic!(equal_element, self)86}87#[expect(clippy::wrong_self_convention)]88fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a>;89#[expect(clippy::wrong_self_convention)]90fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a>;9192fn vec_hash(93&self,94_build_hasher: PlSeedableRandomStateQuality,95_buf: &mut Vec<u64>,96) -> PolarsResult<()>;97fn vec_hash_combine(98&self,99_build_hasher: PlSeedableRandomStateQuality,100_hashes: &mut [u64],101) -> PolarsResult<()>;102103/// # Safety104///105/// Does no bounds checks, groups must be correct.106#[cfg(feature = "algorithm_group_by")]107unsafe fn agg_min(&self, groups: &GroupsType) -> Series {108Series::full_null(self._field().name().clone(), groups.len(), self._dtype())109}110/// # Safety111///112/// Does no bounds checks, groups must be correct.113#[cfg(feature = "algorithm_group_by")]114unsafe fn agg_max(&self, groups: &GroupsType) -> Series {115Series::full_null(self._field().name().clone(), groups.len(), self._dtype())116}117/// # Safety118///119/// Does no bounds checks, groups must be correct.120#[cfg(feature = "algorithm_group_by")]121unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Series {122Series::full_null(self._field().name().clone(), groups.len(), &IDX_DTYPE)123}124125/// # Safety126///127/// Does no bounds checks, groups must be correct.128#[cfg(feature = "algorithm_group_by")]129unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Series {130Series::full_null(self._field().name().clone(), groups.len(), &IDX_DTYPE)131}132133/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is134/// first cast to `Int64` to prevent overflow issues.135#[cfg(feature = "algorithm_group_by")]136unsafe fn agg_sum(&self, groups: &GroupsType) -> Series {137Series::full_null(self._field().name().clone(), groups.len(), self._dtype())138}139/// # Safety140///141/// Does no bounds checks, groups must be correct.142#[cfg(feature = "algorithm_group_by")]143unsafe fn agg_std(&self, groups: &GroupsType, _ddof: u8) -> Series {144Series::full_null(self._field().name().clone(), groups.len(), self._dtype())145}146/// # Safety147///148/// Does no bounds checks, groups must be correct.149#[cfg(feature = "algorithm_group_by")]150unsafe fn agg_var(&self, groups: &GroupsType, _ddof: u8) -> Series {151Series::full_null(self._field().name().clone(), groups.len(), self._dtype())152}153/// # Safety154///155/// Does no bounds checks, groups must be correct.156#[cfg(feature = "algorithm_group_by")]157unsafe fn agg_list(&self, groups: &GroupsType) -> Series {158Series::full_null(self._field().name().clone(), groups.len(), self._dtype())159}160161/// # Safety162///163/// Does no bounds checks, groups must be correct.164#[cfg(feature = "bitwise")]165unsafe fn agg_and(&self, groups: &GroupsType) -> Series {166Series::full_null(self._field().name().clone(), groups.len(), self._dtype())167}168169/// # Safety170///171/// Does no bounds checks, groups must be correct.172#[cfg(feature = "bitwise")]173unsafe fn agg_or(&self, groups: &GroupsType) -> Series {174Series::full_null(self._field().name().clone(), groups.len(), self._dtype())175}176177/// # Safety178///179/// Does no bounds checks, groups must be correct.180#[cfg(feature = "bitwise")]181unsafe fn agg_xor(&self, groups: &GroupsType) -> Series {182Series::full_null(self._field().name().clone(), groups.len(), self._dtype())183}184185fn subtract(&self, _rhs: &Series) -> PolarsResult<Series> {186polars_bail!(opq = subtract, self._dtype());187}188fn add_to(&self, _rhs: &Series) -> PolarsResult<Series> {189polars_bail!(opq = add, self._dtype());190}191fn multiply(&self, _rhs: &Series) -> PolarsResult<Series> {192polars_bail!(opq = multiply, self._dtype());193}194fn divide(&self, _rhs: &Series) -> PolarsResult<Series> {195polars_bail!(opq = divide, self._dtype());196}197fn remainder(&self, _rhs: &Series) -> PolarsResult<Series> {198polars_bail!(opq = remainder, self._dtype());199}200#[cfg(feature = "algorithm_group_by")]201fn group_tuples(&self, _multithreaded: bool, _sorted: bool) -> PolarsResult<GroupsType> {202polars_bail!(opq = group_tuples, self._dtype());203}204#[cfg(feature = "zip_with")]205fn zip_with_same_type(206&self,207_mask: &BooleanChunked,208_other: &Series,209) -> PolarsResult<Series> {210polars_bail!(opq = zip_with_same_type, self._dtype());211}212213#[allow(unused_variables)]214fn arg_sort_multiple(215&self,216by: &[Column],217_options: &SortMultipleOptions,218) -> PolarsResult<IdxCa> {219polars_bail!(opq = arg_sort_multiple, self._dtype());220}221}222}223224pub trait SeriesTrait:225Send + Sync + private::PrivateSeries + private::PrivateSeriesNumeric226{227/// Rename the Series.228fn rename(&mut self, name: PlSmallStr);229230/// Get the lengths of the underlying chunks231fn chunk_lengths(&self) -> ChunkLenIter<'_>;232233/// Name of series.234fn name(&self) -> &PlSmallStr;235236/// Get field (used in schema)237fn field(&self) -> Cow<'_, Field> {238self._field()239}240241/// Get datatype of series.242fn dtype(&self) -> &DataType {243self._dtype()244}245246/// Underlying chunks.247fn chunks(&self) -> &Vec<ArrayRef>;248249/// Underlying chunks.250///251/// # Safety252/// The caller must ensure the length and the data types of `ArrayRef` does not change.253unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef>;254255/// Number of chunks in this Series256fn n_chunks(&self) -> usize {257self.chunks().len()258}259260/// Shrink the capacity of this array to fit its length.261fn shrink_to_fit(&mut self) {262// no-op263}264265/// Take `num_elements` from the top as a zero copy view.266fn limit(&self, num_elements: usize) -> Series {267self.slice(0, num_elements)268}269270/// Get a zero copy view of the data.271///272/// When offset is negative the offset is counted from the273/// end of the array274fn slice(&self, _offset: i64, _length: usize) -> Series;275276/// Get a zero copy view of the data.277///278/// When offset is negative the offset is counted from the279/// end of the array280fn split_at(&self, _offset: i64) -> (Series, Series);281282fn append(&mut self, other: &Series) -> PolarsResult<()>;283fn append_owned(&mut self, other: Series) -> PolarsResult<()>;284285#[doc(hidden)]286fn extend(&mut self, _other: &Series) -> PolarsResult<()>;287288/// Filter by boolean mask. This operation clones data.289fn filter(&self, _filter: &BooleanChunked) -> PolarsResult<Series>;290291/// Take from `self` at the indexes given by `idx`.292///293/// Null values in `idx` because null values in the output array.294///295/// This operation is clone.296fn take(&self, _indices: &IdxCa) -> PolarsResult<Series>;297298/// Take from `self` at the indexes given by `idx`.299///300/// Null values in `idx` because null values in the output array.301///302/// # Safety303/// This doesn't check any bounds.304unsafe fn take_unchecked(&self, _idx: &IdxCa) -> Series;305306/// Take from `self` at the indexes given by `idx`.307///308/// This operation is clone.309fn take_slice(&self, _indices: &[IdxSize]) -> PolarsResult<Series>;310311/// Take from `self` at the indexes given by `idx`.312///313/// # Safety314/// This doesn't check any bounds.315unsafe fn take_slice_unchecked(&self, _idx: &[IdxSize]) -> Series;316317/// Get length of series.318fn len(&self) -> usize;319320/// Check if Series is empty.321fn is_empty(&self) -> bool {322self.len() == 0323}324325/// Aggregate all chunks to a contiguous array of memory.326fn rechunk(&self) -> Series;327328fn rechunk_validity(&self) -> Option<Bitmap> {329if self.chunks().len() == 1 {330return self.chunks()[0].validity().cloned();331}332333if !self.has_nulls() || self.is_empty() {334return None;335}336337let mut bm = BitmapBuilder::with_capacity(self.len());338for arr in self.chunks() {339if let Some(v) = arr.validity() {340bm.extend_from_bitmap(v);341} else {342bm.extend_constant(arr.len(), true);343}344}345bm.into_opt_validity()346}347348/// Drop all null values and return a new Series.349fn drop_nulls(&self) -> Series {350if self.null_count() == 0 {351Series(self.clone_inner())352} else {353self.filter(&self.is_not_null()).unwrap()354}355}356357/// Returns the sum of the array as an f64.358fn _sum_as_f64(&self) -> f64 {359invalid_operation_panic!(_sum_as_f64, self)360}361362/// Returns the mean value in the array363/// Returns an option because the array is nullable.364fn mean(&self) -> Option<f64> {365None366}367368/// Returns the std value in the array369/// Returns an option because the array is nullable.370fn std(&self, _ddof: u8) -> Option<f64> {371None372}373374/// Returns the var value in the array375/// Returns an option because the array is nullable.376fn var(&self, _ddof: u8) -> Option<f64> {377None378}379380/// Returns the median value in the array381/// Returns an option because the array is nullable.382fn median(&self) -> Option<f64> {383None384}385386/// Create a new Series filled with values from the given index.387///388/// # Example389///390/// ```rust391/// use polars_core::prelude::*;392/// let s = Series::new("a".into(), [0i32, 1, 8]);393/// let s2 = s.new_from_index(2, 4);394/// assert_eq!(Vec::from(s2.i32().unwrap()), &[Some(8), Some(8), Some(8), Some(8)])395/// ```396fn new_from_index(&self, _index: usize, _length: usize) -> Series;397398/// Trim all lists of unused start and end elements recursively.399///400/// - `None` if nothing needed to be done.401/// - `Some(series)` if something changed.402fn trim_lists_to_normalized_offsets(&self) -> Option<Series> {403None404}405406/// Propagate down nulls in nested types.407///408/// - `None` if nothing needed to be done.409/// - `Some(series)` if something changed.410fn propagate_nulls(&self) -> Option<Series> {411None412}413414fn deposit(&self, validity: &Bitmap) -> Series;415416/// Find the indices of elements where the null masks are different recursively.417fn find_validity_mismatch(&self, other: &Series, idxs: &mut Vec<IdxSize>);418419fn cast(&self, _dtype: &DataType, options: CastOptions) -> PolarsResult<Series>;420421/// Get a single value by index. Don't use this operation for loops as a runtime cast is422/// needed for every iteration.423fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {424polars_ensure!(index < self.len(), oob = index, self.len());425// SAFETY: Just did bounds check426let value = unsafe { self.get_unchecked(index) };427Ok(value)428}429430/// Get a single value by index. Don't use this operation for loops as a runtime cast is431/// needed for every iteration.432///433/// This may refer to physical types434///435/// # Safety436/// Does not do any bounds checking437unsafe fn get_unchecked(&self, _index: usize) -> AnyValue<'_>;438439fn sort_with(&self, _options: SortOptions) -> PolarsResult<Series> {440polars_bail!(opq = sort_with, self._dtype());441}442443/// Retrieve the indexes needed for a sort.444#[allow(unused)]445fn arg_sort(&self, options: SortOptions) -> IdxCa {446invalid_operation_panic!(arg_sort, self)447}448449/// Count the null values.450fn null_count(&self) -> usize;451452/// Return if any the chunks in this [`ChunkedArray`] have nulls.453fn has_nulls(&self) -> bool;454455/// Get unique values in the Series.456fn unique(&self) -> PolarsResult<Series> {457polars_bail!(opq = unique, self._dtype());458}459460/// Get unique values in the Series.461///462/// A `null` value also counts as a unique value.463fn n_unique(&self) -> PolarsResult<usize> {464polars_bail!(opq = n_unique, self._dtype());465}466467/// Get first indexes of unique values.468fn arg_unique(&self) -> PolarsResult<IdxCa> {469polars_bail!(opq = arg_unique, self._dtype());470}471472/// Get dense ids for each unique value.473///474/// Returns: (n_unique, unique_ids)475fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)>;476477/// Get a mask of the null values.478fn is_null(&self) -> BooleanChunked;479480/// Get a mask of the non-null values.481fn is_not_null(&self) -> BooleanChunked;482483/// return a Series in reversed order484fn reverse(&self) -> Series;485486/// Rechunk and return a pointer to the start of the Series.487/// Only implemented for numeric types488fn as_single_ptr(&mut self) -> PolarsResult<usize> {489polars_bail!(opq = as_single_ptr, self._dtype());490}491492/// Shift the values by a given period and fill the parts that will be empty due to this operation493/// with `Nones`.494///495/// *NOTE: If you want to fill the Nones with a value use the496/// [`shift` operation on `ChunkedArray<T>`](../chunked_array/ops/trait.ChunkShift.html).*497///498/// # Example499///500/// ```rust501/// # use polars_core::prelude::*;502/// fn example() -> PolarsResult<()> {503/// let s = Series::new("series".into(), &[1, 2, 3]);504///505/// let shifted = s.shift(1);506/// assert_eq!(Vec::from(shifted.i32()?), &[None, Some(1), Some(2)]);507///508/// let shifted = s.shift(-1);509/// assert_eq!(Vec::from(shifted.i32()?), &[Some(2), Some(3), None]);510///511/// let shifted = s.shift(2);512/// assert_eq!(Vec::from(shifted.i32()?), &[None, None, Some(1)]);513///514/// Ok(())515/// }516/// example();517/// ```518fn shift(&self, _periods: i64) -> Series;519520/// Get the sum of the Series as a new Scalar.521///522/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is523/// first cast to `Int64` to prevent overflow issues.524fn sum_reduce(&self) -> PolarsResult<Scalar> {525polars_bail!(opq = sum, self._dtype());526}527/// Get the max of the Series as a new Series of length 1.528fn max_reduce(&self) -> PolarsResult<Scalar> {529polars_bail!(opq = max, self._dtype());530}531/// Get the min of the Series as a new Series of length 1.532fn min_reduce(&self) -> PolarsResult<Scalar> {533polars_bail!(opq = min, self._dtype());534}535/// Get the median of the Series as a new Series of length 1.536fn median_reduce(&self) -> PolarsResult<Scalar> {537polars_bail!(opq = median, self._dtype());538}539/// Get the mean of the Series as a new Scalar540fn mean_reduce(&self) -> PolarsResult<Scalar> {541polars_bail!(opq = mean, self._dtype());542}543/// Get the variance of the Series as a new Series of length 1.544fn var_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {545polars_bail!(opq = var, self._dtype());546}547/// Get the standard deviation of the Series as a new Series of length 1.548fn std_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {549polars_bail!(opq = std, self._dtype());550}551/// Get the quantile of the Series as a new Series of length 1.552fn quantile_reduce(&self, _quantile: f64, _method: QuantileMethod) -> PolarsResult<Scalar> {553polars_bail!(opq = quantile, self._dtype());554}555/// Get multiple quantiles of the ChunkedArray as a new `List` Scalar556fn quantiles_reduce(557&self,558_quantiles: &[f64],559_method: QuantileMethod,560) -> PolarsResult<Scalar> {561polars_bail!(opq = quantiles, self._dtype());562}563/// Get the bitwise AND of the Series as a new Series of length 1,564fn and_reduce(&self) -> PolarsResult<Scalar> {565polars_bail!(opq = and_reduce, self._dtype());566}567/// Get the bitwise OR of the Series as a new Series of length 1,568fn or_reduce(&self) -> PolarsResult<Scalar> {569polars_bail!(opq = or_reduce, self._dtype());570}571/// Get the bitwise XOR of the Series as a new Series of length 1,572fn xor_reduce(&self) -> PolarsResult<Scalar> {573polars_bail!(opq = xor_reduce, self._dtype());574}575576/// Get the first element of the [`Series`] as a [`Scalar`]577///578/// If the [`Series`] is empty, a [`Scalar`] with a [`AnyValue::Null`] is returned.579fn first(&self) -> Scalar {580let dt = self.dtype();581let av = self.get(0).map_or(AnyValue::Null, AnyValue::into_static);582583Scalar::new(dt.clone(), av)584}585586/// Get the first non-null element of the [`Series`] as a [`Scalar`]587///588/// If the [`Series`] is empty, a [`Scalar`] with a [`AnyValue::Null`] is returned.589fn first_non_null(&self) -> Scalar {590let av = if self.len() == 0 {591AnyValue::Null592} else {593let idx = if self.has_nulls() {594first_non_null(self.chunks().iter().map(|c| c.as_ref())).unwrap_or(0)595} else {5960597};598self.get(idx).map_or(AnyValue::Null, AnyValue::into_static)599};600Scalar::new(self.dtype().clone(), av)601}602603/// Get the last element of the [`Series`] as a [`Scalar`]604///605/// If the [`Series`] is empty, a [`Scalar`] with a [`AnyValue::Null`] is returned.606fn last(&self) -> Scalar {607let dt = self.dtype();608let av = if self.len() == 0 {609AnyValue::Null610} else {611// SAFETY: len-1 < len if len != 0612unsafe { self.get_unchecked(self.len() - 1) }.into_static()613};614615Scalar::new(dt.clone(), av)616}617618/// Get the last non-null element of the [`Series`] as a [`Scalar`]619///620/// If the [`Series`] is empty, a [`Scalar`] with a [`AnyValue::Null`] is returned.621fn last_non_null(&self) -> Scalar {622let n = self.len();623let av = if n == 0 {624AnyValue::Null625} else {626let idx = if self.has_nulls() {627last_non_null(self.chunks().iter().map(|c| c.as_ref()), n).unwrap_or(n - 1)628} else {629n - 1630};631// SAFETY: len-1 < len if len != 0632unsafe { self.get_unchecked(idx) }.into_static()633};634Scalar::new(self.dtype().clone(), av)635}636637#[cfg(feature = "approx_unique")]638fn approx_n_unique(&self) -> PolarsResult<IdxSize> {639polars_bail!(opq = approx_n_unique, self._dtype());640}641642/// Clone inner ChunkedArray and wrap in a new Arc643fn clone_inner(&self) -> Arc<dyn SeriesTrait>;644645#[cfg(feature = "object")]646/// Get the value at this index as a downcastable Any trait ref.647fn get_object(&self, _index: usize) -> Option<&dyn PolarsObjectSafe> {648invalid_operation_panic!(get_object, self)649}650651#[cfg(feature = "object")]652/// Get the value at this index as a downcastable Any trait ref.653///654/// # Safety655/// This function doesn't do any bound checks.656unsafe fn get_object_chunked_unchecked(657&self,658_chunk: usize,659_index: usize,660) -> Option<&dyn PolarsObjectSafe> {661invalid_operation_panic!(get_object_chunked_unchecked, self)662}663664/// Get a hold of the [`ChunkedArray`], [`Logical`] or `NullChunked` as an `Any` trait665/// reference.666fn as_any(&self) -> &dyn Any;667668/// Get a hold of the [`ChunkedArray`], [`Logical`] or `NullChunked` as an `Any` trait mutable669/// reference.670fn as_any_mut(&mut self) -> &mut dyn Any;671672/// Get a hold of the [`ChunkedArray`] or `NullChunked` as an `Any` trait reference. This673/// pierces through `Logical` types to get the underlying physical array.674fn as_phys_any(&self) -> &dyn Any;675676fn as_arc_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;677678#[cfg(feature = "checked_arithmetic")]679fn checked_div(&self, _rhs: &Series) -> PolarsResult<Series> {680polars_bail!(opq = checked_div, self._dtype());681}682683#[cfg(feature = "rolling_window")]684/// Apply a custom function over a rolling/ moving window of the array.685/// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.686fn rolling_map(687&self,688_f: &dyn Fn(&Series) -> PolarsResult<Series>,689_options: RollingOptionsFixedWindow,690) -> PolarsResult<Series> {691polars_bail!(opq = rolling_map, self._dtype());692}693}694695impl dyn SeriesTrait + '_ {696pub fn unpack<T: PolarsPhysicalType>(&self) -> PolarsResult<&ChunkedArray<T>> {697polars_ensure!(&T::get_static_dtype() == self.dtype(), unpack);698Ok(self.as_ref())699}700}701702703