Path: blob/main/crates/polars-core/src/frame/column/mod.rs
8446 views
use std::borrow::Cow;12use arrow::bitmap::{Bitmap, BitmapBuilder};3use arrow::trusted_len::TrustMyLength;4use num_traits::{Num, NumCast};5use polars_compute::rolling::QuantileMethod;6use polars_error::PolarsResult;7use polars_utils::aliases::PlSeedableRandomStateQuality;8use polars_utils::index::check_bounds;9use polars_utils::pl_str::PlSmallStr;10pub use scalar::ScalarColumn;1112use self::compare_inner::{TotalEqInner, TotalOrdInner};13use self::gather::check_bounds_ca;14use self::series::SeriesColumn;15use crate::chunked_array::cast::CastOptions;16use crate::chunked_array::flags::StatisticsFlags;17use crate::datatypes::ReshapeDimension;18use crate::prelude::*;19use crate::series::{BitRepr, IsSorted, SeriesPhysIter};20use crate::utils::{Container, slice_offsets};21use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};2223mod arithmetic;24mod compare;25mod scalar;26mod series;2728/// A column within a [`DataFrame`].29///30/// This is lazily initialized to a [`Series`] with methods like31/// [`as_materialized_series`][Column::as_materialized_series] and32/// [`take_materialized_series`][Column::take_materialized_series].33///34/// Currently, there are two ways to represent a [`Column`].35/// 1. A [`Series`] of values36/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]37#[derive(Debug, Clone)]38#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]39#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]40pub enum Column {41Series(SeriesColumn),42Scalar(ScalarColumn),43}4445/// Convert `Self` into a [`Column`]46pub trait IntoColumn: Sized {47fn into_column(self) -> Column;48}4950impl Column {51#[inline]52#[track_caller]53pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self54where55Phantom: ?Sized,56Series: NamedFrom<T, Phantom>,57{58Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))59}6061#[inline]62pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {63Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)64}6566#[inline]67pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {68Self::Scalar(ScalarColumn::new(name, scalar, length))69}7071pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {72let Ok(length) = IdxSize::try_from(length) else {73polars_bail!(74ComputeError:75"row index length {} overflows IdxSize::MAX ({})",76length,77IdxSize::MAX,78)79};8081if offset.checked_add(length).is_none() {82polars_bail!(83ComputeError:84"row index with offset {} overflows on dataframe with height {}",85offset, length86)87}8889let range = offset..offset + length;9091let mut ca = IdxCa::from_vec(name, range.collect());92ca.set_sorted_flag(IsSorted::Ascending);93let col = ca.into_series().into();9495Ok(col)96}9798// # Materialize99/// Get a reference to a [`Series`] for this [`Column`]100///101/// This may need to materialize the [`Series`] on the first invocation for a specific column.102#[inline]103pub fn as_materialized_series(&self) -> &Series {104match self {105Column::Series(s) => s,106Column::Scalar(s) => s.as_materialized_series(),107}108}109110/// If the memory repr of this Column is a scalar, a unit-length Series will111/// be returned.112#[inline]113pub fn as_materialized_series_maintain_scalar(&self) -> Series {114match self {115Column::Scalar(s) => s.as_single_value_series(),116v => v.as_materialized_series().clone(),117}118}119120/// Returns the backing `Series` for the values of this column.121///122/// * For `Column::Series` columns, simply returns the inner `Series`.123/// * For `Column::Scalar` columns, returns an empty or unit length series.124///125/// # Note126/// This method is safe to use. However, care must be taken when operating on the returned127/// `Series` to ensure result correctness. E.g. It is suitable to perform elementwise operations128/// on it, however e.g. aggregations will return unspecified results.129pub fn _get_backing_series(&self) -> Series {130match self {131Column::Series(s) => (**s).clone(),132Column::Scalar(s) => s.as_single_value_series(),133}134}135136/// Constructs a new `Column` of the same variant as `self` from a backing `Series` representing137/// the values.138///139/// # Panics140/// Panics if:141/// * `self` is `Column::Series` and the length of `new_s` does not match that of `self`.142/// * `self` is `Column::Scalar` and if either:143/// * `self` is not empty and `new_s` is not of unit length.144/// * `self` is empty and `new_s` is not empty.145pub fn _to_new_from_backing(&self, new_s: Series) -> Self {146match self {147Column::Series(s) => {148assert_eq!(new_s.len(), s.len());149Column::Series(SeriesColumn::new(new_s))150},151Column::Scalar(s) => {152assert_eq!(new_s.len(), s.as_single_value_series().len());153Column::Scalar(ScalarColumn::from_single_value_series(new_s, self.len()))154},155}156}157158/// Turn [`Column`] into a [`Column::Series`].159///160/// This may need to materialize the [`Series`] on the first invocation for a specific column.161#[inline]162pub fn into_materialized_series(&mut self) -> &mut Series {163match self {164Column::Series(s) => s,165Column::Scalar(s) => {166let series = std::mem::replace(167s,168ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),169)170.take_materialized_series();171*self = Column::Series(series.into());172let Column::Series(s) = self else {173unreachable!();174};175s176},177}178}179/// Take [`Series`] from a [`Column`]180///181/// This may need to materialize the [`Series`] on the first invocation for a specific column.182#[inline]183pub fn take_materialized_series(self) -> Series {184match self {185Column::Series(s) => s.take(),186Column::Scalar(s) => s.take_materialized_series(),187}188}189190#[inline]191pub fn dtype(&self) -> &DataType {192match self {193Column::Series(s) => s.dtype(),194Column::Scalar(s) => s.dtype(),195}196}197198#[inline]199pub fn field(&self) -> Cow<'_, Field> {200match self {201Column::Series(s) => s.field(),202Column::Scalar(s) => match s.lazy_as_materialized_series() {203None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),204Some(s) => s.field(),205},206}207}208209#[inline]210pub fn name(&self) -> &PlSmallStr {211match self {212Column::Series(s) => s.name(),213Column::Scalar(s) => s.name(),214}215}216217#[inline]218pub fn len(&self) -> usize {219match self {220Column::Series(s) => s.len(),221Column::Scalar(s) => s.len(),222}223}224225#[inline]226pub fn with_name(mut self, name: PlSmallStr) -> Column {227self.rename(name);228self229}230231#[inline]232pub fn rename(&mut self, name: PlSmallStr) {233match self {234Column::Series(s) => _ = s.rename(name),235Column::Scalar(s) => _ = s.rename(name),236}237}238239// # Downcasting240#[inline]241pub fn as_series(&self) -> Option<&Series> {242match self {243Column::Series(s) => Some(s),244_ => None,245}246}247#[inline]248pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {249match self {250Column::Scalar(s) => Some(s),251_ => None,252}253}254#[inline]255pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {256match self {257Column::Scalar(s) => Some(s),258_ => None,259}260}261262// # Try to Chunked Arrays263pub fn try_bool(&self) -> Option<&BooleanChunked> {264self.as_materialized_series().try_bool()265}266pub fn try_i8(&self) -> Option<&Int8Chunked> {267self.as_materialized_series().try_i8()268}269pub fn try_i16(&self) -> Option<&Int16Chunked> {270self.as_materialized_series().try_i16()271}272pub fn try_i32(&self) -> Option<&Int32Chunked> {273self.as_materialized_series().try_i32()274}275pub fn try_i64(&self) -> Option<&Int64Chunked> {276self.as_materialized_series().try_i64()277}278pub fn try_u8(&self) -> Option<&UInt8Chunked> {279self.as_materialized_series().try_u8()280}281pub fn try_u16(&self) -> Option<&UInt16Chunked> {282self.as_materialized_series().try_u16()283}284pub fn try_u32(&self) -> Option<&UInt32Chunked> {285self.as_materialized_series().try_u32()286}287pub fn try_u64(&self) -> Option<&UInt64Chunked> {288self.as_materialized_series().try_u64()289}290#[cfg(feature = "dtype-u128")]291pub fn try_u128(&self) -> Option<&UInt128Chunked> {292self.as_materialized_series().try_u128()293}294#[cfg(feature = "dtype-f16")]295pub fn try_f16(&self) -> Option<&Float16Chunked> {296self.as_materialized_series().try_f16()297}298pub fn try_f32(&self) -> Option<&Float32Chunked> {299self.as_materialized_series().try_f32()300}301pub fn try_f64(&self) -> Option<&Float64Chunked> {302self.as_materialized_series().try_f64()303}304pub fn try_str(&self) -> Option<&StringChunked> {305self.as_materialized_series().try_str()306}307pub fn try_list(&self) -> Option<&ListChunked> {308self.as_materialized_series().try_list()309}310pub fn try_binary(&self) -> Option<&BinaryChunked> {311self.as_materialized_series().try_binary()312}313pub fn try_idx(&self) -> Option<&IdxCa> {314self.as_materialized_series().try_idx()315}316pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {317self.as_materialized_series().try_binary_offset()318}319#[cfg(feature = "dtype-datetime")]320pub fn try_datetime(&self) -> Option<&DatetimeChunked> {321self.as_materialized_series().try_datetime()322}323#[cfg(feature = "dtype-struct")]324pub fn try_struct(&self) -> Option<&StructChunked> {325self.as_materialized_series().try_struct()326}327#[cfg(feature = "dtype-decimal")]328pub fn try_decimal(&self) -> Option<&DecimalChunked> {329self.as_materialized_series().try_decimal()330}331#[cfg(feature = "dtype-array")]332pub fn try_array(&self) -> Option<&ArrayChunked> {333self.as_materialized_series().try_array()334}335#[cfg(feature = "dtype-categorical")]336pub fn try_cat<T: PolarsCategoricalType>(&self) -> Option<&CategoricalChunked<T>> {337self.as_materialized_series().try_cat::<T>()338}339#[cfg(feature = "dtype-categorical")]340pub fn try_cat8(&self) -> Option<&Categorical8Chunked> {341self.as_materialized_series().try_cat8()342}343#[cfg(feature = "dtype-categorical")]344pub fn try_cat16(&self) -> Option<&Categorical16Chunked> {345self.as_materialized_series().try_cat16()346}347#[cfg(feature = "dtype-categorical")]348pub fn try_cat32(&self) -> Option<&Categorical32Chunked> {349self.as_materialized_series().try_cat32()350}351#[cfg(feature = "dtype-date")]352pub fn try_date(&self) -> Option<&DateChunked> {353self.as_materialized_series().try_date()354}355#[cfg(feature = "dtype-duration")]356pub fn try_duration(&self) -> Option<&DurationChunked> {357self.as_materialized_series().try_duration()358}359360// # To Chunked Arrays361pub fn bool(&self) -> PolarsResult<&BooleanChunked> {362self.as_materialized_series().bool()363}364pub fn i8(&self) -> PolarsResult<&Int8Chunked> {365self.as_materialized_series().i8()366}367pub fn i16(&self) -> PolarsResult<&Int16Chunked> {368self.as_materialized_series().i16()369}370pub fn i32(&self) -> PolarsResult<&Int32Chunked> {371self.as_materialized_series().i32()372}373pub fn i64(&self) -> PolarsResult<&Int64Chunked> {374self.as_materialized_series().i64()375}376#[cfg(feature = "dtype-i128")]377pub fn i128(&self) -> PolarsResult<&Int128Chunked> {378self.as_materialized_series().i128()379}380pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {381self.as_materialized_series().u8()382}383pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {384self.as_materialized_series().u16()385}386pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {387self.as_materialized_series().u32()388}389pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {390self.as_materialized_series().u64()391}392#[cfg(feature = "dtype-u128")]393pub fn u128(&self) -> PolarsResult<&UInt128Chunked> {394self.as_materialized_series().u128()395}396#[cfg(feature = "dtype-f16")]397pub fn f16(&self) -> PolarsResult<&Float16Chunked> {398self.as_materialized_series().f16()399}400pub fn f32(&self) -> PolarsResult<&Float32Chunked> {401self.as_materialized_series().f32()402}403pub fn f64(&self) -> PolarsResult<&Float64Chunked> {404self.as_materialized_series().f64()405}406pub fn str(&self) -> PolarsResult<&StringChunked> {407self.as_materialized_series().str()408}409pub fn list(&self) -> PolarsResult<&ListChunked> {410self.as_materialized_series().list()411}412pub fn binary(&self) -> PolarsResult<&BinaryChunked> {413self.as_materialized_series().binary()414}415pub fn idx(&self) -> PolarsResult<&IdxCa> {416self.as_materialized_series().idx()417}418pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {419self.as_materialized_series().binary_offset()420}421#[cfg(feature = "dtype-datetime")]422pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {423self.as_materialized_series().datetime()424}425#[cfg(feature = "dtype-struct")]426pub fn struct_(&self) -> PolarsResult<&StructChunked> {427self.as_materialized_series().struct_()428}429#[cfg(feature = "dtype-decimal")]430pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {431self.as_materialized_series().decimal()432}433#[cfg(feature = "dtype-array")]434pub fn array(&self) -> PolarsResult<&ArrayChunked> {435self.as_materialized_series().array()436}437#[cfg(feature = "dtype-categorical")]438pub fn cat<T: PolarsCategoricalType>(&self) -> PolarsResult<&CategoricalChunked<T>> {439self.as_materialized_series().cat::<T>()440}441#[cfg(feature = "dtype-categorical")]442pub fn cat8(&self) -> PolarsResult<&Categorical8Chunked> {443self.as_materialized_series().cat8()444}445#[cfg(feature = "dtype-categorical")]446pub fn cat16(&self) -> PolarsResult<&Categorical16Chunked> {447self.as_materialized_series().cat16()448}449#[cfg(feature = "dtype-categorical")]450pub fn cat32(&self) -> PolarsResult<&Categorical32Chunked> {451self.as_materialized_series().cat32()452}453#[cfg(feature = "dtype-date")]454pub fn date(&self) -> PolarsResult<&DateChunked> {455self.as_materialized_series().date()456}457#[cfg(feature = "dtype-duration")]458pub fn duration(&self) -> PolarsResult<&DurationChunked> {459self.as_materialized_series().duration()460}461462// # Casting463pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {464match self {465Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),466Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),467}468}469pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {470match self {471Column::Series(s) => s.strict_cast(dtype).map(Column::from),472Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),473}474}475pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {476match self {477Column::Series(s) => s.cast(dtype).map(Column::from),478Column::Scalar(s) => s.cast(dtype).map(Column::from),479}480}481/// # Safety482///483/// This can lead to invalid memory access in downstream code.484pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {485match self {486Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),487Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),488}489}490491#[must_use]492pub fn clear(&self) -> Self {493match self {494Column::Series(s) => s.clear().into(),495Column::Scalar(s) => s.resize(0).into(),496}497}498499#[inline]500pub fn shrink_to_fit(&mut self) {501match self {502Column::Series(s) => s.shrink_to_fit(),503Column::Scalar(_) => {},504}505}506507#[inline]508pub fn new_from_index(&self, index: usize, length: usize) -> Self {509if index >= self.len() {510return Self::full_null(self.name().clone(), length, self.dtype());511}512513match self {514Column::Series(s) => {515// SAFETY: Bounds check done before.516let av = unsafe { s.get_unchecked(index) };517let scalar = Scalar::new(self.dtype().clone(), av.into_static());518Self::new_scalar(self.name().clone(), scalar, length)519},520Column::Scalar(s) => s.resize(length).into(),521}522}523524#[inline]525pub fn has_nulls(&self) -> bool {526match self {527Self::Series(s) => s.has_nulls(),528Self::Scalar(s) => s.has_nulls(),529}530}531532#[inline]533pub fn is_null(&self) -> BooleanChunked {534match self {535Self::Series(s) => s.is_null(),536Self::Scalar(s) => {537BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())538},539}540}541#[inline]542pub fn is_not_null(&self) -> BooleanChunked {543match self {544Self::Series(s) => s.is_not_null(),545Self::Scalar(s) => {546BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())547},548}549}550551pub fn to_physical_repr(&self) -> Column {552// @scalar-opt553self.as_materialized_series()554.to_physical_repr()555.into_owned()556.into()557}558/// # Safety559///560/// This can lead to invalid memory access in downstream code.561pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {562// @scalar-opt563self.as_materialized_series()564.from_physical_unchecked(dtype)565.map(Column::from)566}567568pub fn head(&self, length: Option<usize>) -> Column {569let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);570let len = usize::min(len, self.len());571self.slice(0, len)572}573pub fn tail(&self, length: Option<usize>) -> Column {574let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);575let len = usize::min(len, self.len());576debug_assert!(len <= i64::MAX as usize);577self.slice(-(len as i64), len)578}579pub fn slice(&self, offset: i64, length: usize) -> Column {580match self {581Column::Series(s) => s.slice(offset, length).into(),582Column::Scalar(s) => {583let (_, length) = slice_offsets(offset, length, s.len());584s.resize(length).into()585},586}587}588589pub fn split_at(&self, offset: i64) -> (Column, Column) {590// @scalar-opt591let (l, r) = self.as_materialized_series().split_at(offset);592(l.into(), r.into())593}594595#[inline]596pub fn null_count(&self) -> usize {597match self {598Self::Series(s) => s.null_count(),599Self::Scalar(s) if s.scalar().is_null() => s.len(),600Self::Scalar(_) => 0,601}602}603604pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {605check_bounds_ca(indices, self.len() as IdxSize)?;606Ok(unsafe { self.take_unchecked(indices) })607}608pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {609check_bounds(indices, self.len() as IdxSize)?;610Ok(unsafe { self.take_slice_unchecked(indices) })611}612/// # Safety613///614/// No bounds on the indexes are performed.615pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {616debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());617618match self {619Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),620Self::Scalar(s) => {621let idxs_length = indices.len();622let idxs_null_count = indices.null_count();623624let scalar = ScalarColumn::from_single_value_series(625s.as_single_value_series().take_unchecked(&IdxCa::new(626indices.name().clone(),627&[0][..s.len().min(1)],628)),629idxs_length,630);631632// We need to make sure that null values in `idx` become null values in the result633if idxs_null_count == 0 || scalar.has_nulls() {634scalar.into_column()635} else if idxs_null_count == idxs_length {636scalar.into_nulls().into_column()637} else {638let validity = indices.rechunk_validity();639let series = scalar.take_materialized_series();640let name = series.name().clone();641let dtype = series.dtype().clone();642let mut chunks = series.into_chunks();643assert_eq!(chunks.len(), 1);644chunks[0] = chunks[0].with_validity(validity);645unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }646.into_column()647}648},649}650}651/// # Safety652///653/// No bounds on the indexes are performed.654pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {655debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());656657match self {658Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),659Self::Scalar(s) => ScalarColumn::from_single_value_series(660s.as_single_value_series()661.take_slice_unchecked(&[0][..s.len().min(1)]),662indices.len(),663)664.into(),665}666}667668/// General implementation for aggregation where a non-missing scalar would map to itself.669#[inline(always)]670#[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]671fn agg_with_scalar_identity(672&self,673groups: &GroupsType,674series_agg: impl Fn(&Series, &GroupsType) -> Series,675) -> Column {676match self {677Column::Series(s) => series_agg(s, groups).into_column(),678Column::Scalar(s) => {679if s.is_empty() {680return series_agg(s.as_materialized_series(), groups).into_column();681}682683// We utilize the aggregation on Series to see:684// 1. the output datatype of the aggregation685// 2. whether this aggregation is even defined686let series_aggregation = series_agg(687&s.as_single_value_series(),688// @NOTE: this group is always valid since s is non-empty.689&GroupsType::new_slice(vec![[0, 1]], false, true),690);691692// If the aggregation is not defined, just return all nulls.693if series_aggregation.has_nulls() {694return Self::new_scalar(695series_aggregation.name().clone(),696Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),697groups.len(),698);699}700701let mut scalar_col = s.resize(groups.len());702// The aggregation might change the type (e.g. mean changes int -> float), so we do703// a cast here to the output type.704if series_aggregation.dtype() != s.dtype() {705scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();706}707708let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {709// Fast path: no empty groups. keep the scalar intact.710return scalar_col.into_column();711};712713// All empty groups produce a *missing* or `null` value.714let mut validity = BitmapBuilder::with_capacity(groups.len());715validity.extend_constant(first_empty_idx, true);716// SAFETY: We trust the length of this iterator.717let iter = unsafe {718TrustMyLength::new(719groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),720groups.len() - first_empty_idx,721)722};723validity.extend_trusted_len_iter(iter);724725let mut s = scalar_col.take_materialized_series().rechunk();726// SAFETY: We perform a compute_len afterwards.727let chunks = unsafe { s.chunks_mut() };728let arr = &mut chunks[0];729*arr = arr.with_validity(validity.into_opt_validity());730s.compute_len();731732s.into_column()733},734}735}736737/// # Safety738///739/// Does no bounds checks, groups must be correct.740#[cfg(feature = "algorithm_group_by")]741pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {742self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_min(g) })743}744745/// # Safety746///747/// Does no bounds checks, groups must be correct.748#[cfg(feature = "algorithm_group_by")]749pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {750self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_max(g) })751}752753/// # Safety754///755/// Does no bounds checks, groups must be correct.756#[cfg(feature = "algorithm_group_by")]757pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {758self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_mean(g) })759}760761/// # Safety762///763/// Does no bounds checks, groups must be correct.764#[cfg(feature = "algorithm_group_by")]765pub unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Self {766match self {767Column::Series(s) => unsafe { Column::from(s.agg_arg_min(groups)) },768Column::Scalar(sc) => {769let scalar = if sc.is_empty() || sc.has_nulls() {770Scalar::null(IDX_DTYPE)771} else {772Scalar::new_idxsize(0)773};774Column::new_scalar(self.name().clone(), scalar, 1)775},776}777}778779/// # Safety780///781/// Does no bounds checks, groups must be correct.782#[cfg(feature = "algorithm_group_by")]783pub unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Self {784match self {785Column::Series(s) => unsafe { Column::from(s.agg_arg_max(groups)) },786Column::Scalar(sc) => {787let scalar = if sc.is_empty() || sc.has_nulls() {788Scalar::null(IDX_DTYPE)789} else {790Scalar::new_idxsize(0)791};792Column::new_scalar(self.name().clone(), scalar, 1)793},794}795}796797/// # Safety798///799/// Does no bounds checks, groups must be correct.800#[cfg(feature = "algorithm_group_by")]801pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {802// @scalar-opt803unsafe { self.as_materialized_series().agg_sum(groups) }.into()804}805806/// # Safety807///808/// Does no bounds checks, groups must be correct.809#[cfg(feature = "algorithm_group_by")]810pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {811self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_first(g) })812}813814/// # Safety815///816/// Does no bounds checks, groups must be correct.817#[cfg(feature = "algorithm_group_by")]818pub unsafe fn agg_first_non_null(&self, groups: &GroupsType) -> Self {819self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_first_non_null(g) })820}821822/// # Safety823///824/// Does no bounds checks, groups must be correct.825#[cfg(feature = "algorithm_group_by")]826pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {827self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_last(g) })828}829830/// # Safety831///832/// Does no bounds checks, groups must be correct.833#[cfg(feature = "algorithm_group_by")]834pub unsafe fn agg_last_non_null(&self, groups: &GroupsType) -> Self {835self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_last_non_null(g) })836}837838/// # Safety839///840/// Does no bounds checks, groups must be correct.841#[cfg(feature = "algorithm_group_by")]842pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {843// @scalar-opt844unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()845}846847/// # Safety848///849/// Does no bounds checks, groups must be correct.850#[cfg(feature = "algorithm_group_by")]851pub unsafe fn agg_quantile(852&self,853groups: &GroupsType,854quantile: f64,855method: QuantileMethod,856) -> Self {857// @scalar-opt858859unsafe {860self.as_materialized_series()861.agg_quantile(groups, quantile, method)862}863.into()864}865866/// # Safety867///868/// Does no bounds checks, groups must be correct.869#[cfg(feature = "algorithm_group_by")]870pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {871self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_median(g) })872}873874/// # Safety875///876/// Does no bounds checks, groups must be correct.877#[cfg(feature = "algorithm_group_by")]878pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {879// @scalar-opt880unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()881}882883/// # Safety884///885/// Does no bounds checks, groups must be correct.886#[cfg(feature = "algorithm_group_by")]887pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {888// @scalar-opt889unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()890}891892/// # Safety893///894/// Does no bounds checks, groups must be correct.895#[cfg(feature = "algorithm_group_by")]896pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {897// @scalar-opt898unsafe { self.as_materialized_series().agg_list(groups) }.into()899}900901/// # Safety902///903/// Does no bounds checks, groups must be correct.904#[cfg(feature = "algorithm_group_by")]905pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {906// @scalar-opt907unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()908}909910/// # Safety911///912/// Does no bounds checks, groups must be correct.913#[cfg(feature = "bitwise")]914pub unsafe fn agg_and(&self, groups: &GroupsType) -> Self {915self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_and(g) })916}917/// # Safety918///919/// Does no bounds checks, groups must be correct.920#[cfg(feature = "bitwise")]921pub unsafe fn agg_or(&self, groups: &GroupsType) -> Self {922self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_or(g) })923}924/// # Safety925///926/// Does no bounds checks, groups must be correct.927#[cfg(feature = "bitwise")]928pub unsafe fn agg_xor(&self, groups: &GroupsType) -> Self {929// @scalar-opt930unsafe { self.as_materialized_series().agg_xor(groups) }.into()931}932933pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {934Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)935}936937pub fn is_empty(&self) -> bool {938self.len() == 0939}940941pub fn reverse(&self) -> Column {942match self {943Column::Series(s) => s.reverse().into(),944Column::Scalar(_) => self.clone(),945}946}947948pub fn equals(&self, other: &Column) -> bool {949// @scalar-opt950self.as_materialized_series()951.equals(other.as_materialized_series())952}953954pub fn equals_missing(&self, other: &Column) -> bool {955// @scalar-opt956self.as_materialized_series()957.equals_missing(other.as_materialized_series())958}959960pub fn set_sorted_flag(&mut self, sorted: IsSorted) {961// @scalar-opt962match self {963Column::Series(s) => s.set_sorted_flag(sorted),964Column::Scalar(_) => {},965}966}967968pub fn get_flags(&self) -> StatisticsFlags {969match self {970Column::Series(s) => s.get_flags(),971Column::Scalar(_) => {972StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST973},974}975}976977/// Returns whether the flags were set978pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {979match self {980Column::Series(s) => {981s.set_flags(flags);982true983},984Column::Scalar(_) => false,985}986}987988pub fn vec_hash(989&self,990build_hasher: PlSeedableRandomStateQuality,991buf: &mut Vec<u64>,992) -> PolarsResult<()> {993// @scalar-opt?994self.as_materialized_series().vec_hash(build_hasher, buf)995}996997pub fn vec_hash_combine(998&self,999build_hasher: PlSeedableRandomStateQuality,1000hashes: &mut [u64],1001) -> PolarsResult<()> {1002// @scalar-opt?1003self.as_materialized_series()1004.vec_hash_combine(build_hasher, hashes)1005}10061007pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {1008// @scalar-opt1009self.into_materialized_series()1010.append(other.as_materialized_series())?;1011Ok(self)1012}1013pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {1014self.into_materialized_series()1015.append_owned(other.take_materialized_series())?;1016Ok(self)1017}10181019pub fn arg_sort(&self, options: SortOptions) -> IdxCa {1020if self.is_empty() {1021return IdxCa::from_vec(self.name().clone(), Vec::new());1022}10231024if self.null_count() == self.len() {1025// We might need to maintain order so just respect the descending parameter.1026let values = if options.descending {1027(0..self.len() as IdxSize).rev().collect()1028} else {1029(0..self.len() as IdxSize).collect()1030};10311032return IdxCa::from_vec(self.name().clone(), values);1033}10341035let is_sorted = Some(self.is_sorted_flag());1036let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {1037return self.as_materialized_series().arg_sort(options);1038};10391040// Fast path: the data is sorted.1041let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);1042let invert = options.descending != is_sorted_dsc;10431044let mut values = Vec::with_capacity(self.len());10451046#[inline(never)]1047fn extend(1048start: IdxSize,1049end: IdxSize,1050slf: &Column,1051values: &mut Vec<IdxSize>,1052is_only_nulls: bool,1053invert: bool,1054maintain_order: bool,1055) {1056debug_assert!(start <= end);1057debug_assert!(start as usize <= slf.len());1058debug_assert!(end as usize <= slf.len());10591060if !invert || is_only_nulls {1061values.extend(start..end);1062return;1063}10641065// If we don't have to maintain order but we have to invert. Just flip it around.1066if !maintain_order {1067values.extend((start..end).rev());1068return;1069}10701071// If we want to maintain order but we also needs to invert, we need to invert1072// per group of items.1073//1074// @NOTE: Since the column is sorted, arg_unique can also take a fast path and1075// just do a single traversal.1076let arg_unique = slf1077.slice(start as i64, (end - start) as usize)1078.arg_unique()1079.unwrap();10801081assert!(!arg_unique.has_nulls());10821083let num_unique = arg_unique.len();10841085// Fast path: all items are unique.1086if num_unique == (end - start) as usize {1087values.extend((start..end).rev());1088return;1089}10901091if num_unique == 1 {1092values.extend(start..end);1093return;1094}10951096let mut prev_idx = end - start;1097for chunk in arg_unique.downcast_iter() {1098for &idx in chunk.values().as_slice().iter().rev() {1099values.extend(start + idx..start + prev_idx);1100prev_idx = idx;1101}1102}1103}1104macro_rules! extend {1105($start:expr, $end:expr) => {1106extend!($start, $end, is_only_nulls = false);1107};1108($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {1109extend(1110$start,1111$end,1112self,1113&mut values,1114$is_only_nulls,1115invert,1116options.maintain_order,1117);1118};1119}11201121let length = self.len() as IdxSize;1122let null_count = self.null_count() as IdxSize;11231124if null_count == 0 {1125extend!(0, length);1126} else {1127let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();1128match (options.nulls_last, has_nulls_last) {1129(true, true) => {1130// Current: Nulls last, Wanted: Nulls last1131extend!(0, length - null_count);1132extend!(length - null_count, length, is_only_nulls = true);1133},1134(true, false) => {1135// Current: Nulls first, Wanted: Nulls last1136extend!(null_count, length);1137extend!(0, null_count, is_only_nulls = true);1138},1139(false, true) => {1140// Current: Nulls last, Wanted: Nulls first1141extend!(length - null_count, length, is_only_nulls = true);1142extend!(0, length - null_count);1143},1144(false, false) => {1145// Current: Nulls first, Wanted: Nulls first1146extend!(0, null_count, is_only_nulls = true);1147extend!(null_count, length);1148},1149}1150}11511152// @NOTE: This can theoretically be pushed into the previous operation but it is really1153// worth it... probably not...1154if let Some(limit) = options.limit {1155let limit = limit.min(length);1156values.truncate(limit as usize);1157}11581159IdxCa::from_vec(self.name().clone(), values)1160}11611162pub fn arg_sort_multiple(1163&self,1164by: &[Column],1165options: &SortMultipleOptions,1166) -> PolarsResult<IdxCa> {1167// @scalar-opt1168self.as_materialized_series().arg_sort_multiple(by, options)1169}11701171pub fn arg_unique(&self) -> PolarsResult<IdxCa> {1172match self {1173Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),1174_ => self.as_materialized_series().arg_unique(),1175}1176}11771178pub fn bit_repr(&self) -> Option<BitRepr> {1179// @scalar-opt1180self.as_materialized_series().bit_repr()1181}11821183pub fn into_frame(self) -> DataFrame {1184// SAFETY: A single-column dataframe cannot have length mismatches or duplicate names1185unsafe { DataFrame::new_unchecked(self.len(), vec![self]) }1186}11871188pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {1189// @scalar-opt1190self.into_materialized_series()1191.extend(other.as_materialized_series())?;1192Ok(self)1193}11941195pub fn rechunk(&self) -> Column {1196match self {1197Column::Series(s) => s.rechunk().into(),1198Column::Scalar(s) => {1199if s.lazy_as_materialized_series()1200.filter(|x| x.n_chunks() > 1)1201.is_some()1202{1203Column::Scalar(ScalarColumn::new(1204s.name().clone(),1205s.scalar().clone(),1206s.len(),1207))1208} else {1209self.clone()1210}1211},1212}1213}12141215pub fn explode(&self, options: ExplodeOptions) -> PolarsResult<Column> {1216self.as_materialized_series()1217.explode(options)1218.map(Column::from)1219}1220pub fn implode(&self) -> PolarsResult<ListChunked> {1221self.as_materialized_series().implode()1222}12231224pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {1225// @scalar-opt1226self.as_materialized_series()1227.fill_null(strategy)1228.map(Column::from)1229}12301231pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {1232// @scalar-opt1233self.as_materialized_series()1234.divide(rhs.as_materialized_series())1235.map(Column::from)1236}12371238pub fn shift(&self, periods: i64) -> Column {1239// @scalar-opt1240self.as_materialized_series().shift(periods).into()1241}12421243#[cfg(feature = "zip_with")]1244pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {1245// @scalar-opt1246self.as_materialized_series()1247.zip_with(mask, other.as_materialized_series())1248.map(Self::from)1249}12501251#[cfg(feature = "zip_with")]1252pub fn zip_with_same_type(1253&self,1254mask: &ChunkedArray<BooleanType>,1255other: &Column,1256) -> PolarsResult<Column> {1257// @scalar-opt1258self.as_materialized_series()1259.zip_with_same_type(mask, other.as_materialized_series())1260.map(Column::from)1261}12621263pub fn drop_nulls(&self) -> Column {1264match self {1265Column::Series(s) => s.drop_nulls().into_column(),1266Column::Scalar(s) => s.drop_nulls().into_column(),1267}1268}12691270/// Packs every element into a list.1271pub fn as_list(&self) -> ListChunked {1272// @scalar-opt1273self.as_materialized_series().as_list()1274}12751276pub fn is_sorted_flag(&self) -> IsSorted {1277match self {1278Column::Series(s) => s.is_sorted_flag(),1279Column::Scalar(_) => IsSorted::Ascending,1280}1281}12821283pub fn unique(&self) -> PolarsResult<Column> {1284match self {1285Column::Series(s) => s.unique().map(Column::from),1286Column::Scalar(s) => {1287_ = s.as_single_value_series().unique()?;1288if s.is_empty() {1289return Ok(s.clone().into_column());1290}12911292Ok(s.resize(1).into_column())1293},1294}1295}1296pub fn unique_stable(&self) -> PolarsResult<Column> {1297match self {1298Column::Series(s) => s.unique_stable().map(Column::from),1299Column::Scalar(s) => {1300_ = s.as_single_value_series().unique_stable()?;1301if s.is_empty() {1302return Ok(s.clone().into_column());1303}13041305Ok(s.resize(1).into_column())1306},1307}1308}13091310pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {1311// @scalar-opt1312self.as_materialized_series()1313.reshape_list(dimensions)1314.map(Self::from)1315}13161317#[cfg(feature = "dtype-array")]1318pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {1319// @scalar-opt1320self.as_materialized_series()1321.reshape_array(dimensions)1322.map(Self::from)1323}13241325pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {1326// @scalar-opt1327self.as_materialized_series()1328.sort(sort_options)1329.map(Self::from)1330}13311332pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {1333match self {1334Column::Series(s) => s.filter(filter).map(Column::from),1335Column::Scalar(s) => {1336if s.is_empty() {1337return Ok(s.clone().into_column());1338}13391340// Broadcasting1341if filter.len() == 1 {1342return match filter.get(0) {1343Some(true) => Ok(s.clone().into_column()),1344_ => Ok(s.resize(0).into_column()),1345};1346}13471348Ok(s.resize(filter.sum().unwrap() as usize).into_column())1349},1350}1351}13521353#[cfg(feature = "random")]1354pub fn shuffle(&self, seed: Option<u64>) -> Self {1355// @scalar-opt1356self.as_materialized_series().shuffle(seed).into()1357}13581359#[cfg(feature = "random")]1360pub fn sample_frac(1361&self,1362frac: f64,1363with_replacement: bool,1364shuffle: bool,1365seed: Option<u64>,1366) -> PolarsResult<Self> {1367self.as_materialized_series()1368.sample_frac(frac, with_replacement, shuffle, seed)1369.map(Self::from)1370}13711372#[cfg(feature = "random")]1373pub fn sample_n(1374&self,1375n: usize,1376with_replacement: bool,1377shuffle: bool,1378seed: Option<u64>,1379) -> PolarsResult<Self> {1380self.as_materialized_series()1381.sample_n(n, with_replacement, shuffle, seed)1382.map(Self::from)1383}13841385pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Column> {1386polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive");1387if self.len().saturating_sub(offset) == 0 {1388return Ok(self.clear());1389}13901391match self {1392Column::Series(s) => Ok(s.gather_every(n, offset)?.into()),1393Column::Scalar(s) => {1394let total = s.len() - offset;1395Ok(s.resize(1 + (total - 1) / n).into())1396},1397}1398}13991400pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {1401if self.is_empty() {1402return Ok(Self::new_scalar(1403self.name().clone(),1404Scalar::new(self.dtype().clone(), value.into_static()),1405n,1406));1407}14081409match self {1410Column::Series(s) => s.extend_constant(value, n).map(Column::from),1411Column::Scalar(s) => {1412if s.scalar().as_any_value() == value {1413Ok(s.resize(s.len() + n).into())1414} else {1415s.as_materialized_series()1416.extend_constant(value, n)1417.map(Column::from)1418}1419},1420}1421}14221423pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {1424self.try_map_unary_elementwise_to_bool(|s| s.is_finite())1425}1426pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {1427self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())1428}1429pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {1430self.try_map_unary_elementwise_to_bool(|s| s.is_nan())1431}1432pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {1433self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())1434}14351436pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self1437where1438T: Num + NumCast,1439{1440// @scalar-opt1441self.as_materialized_series()1442.wrapping_trunc_div_scalar(rhs)1443.into()1444}14451446pub fn product(&self) -> PolarsResult<Scalar> {1447// @scalar-opt1448self.as_materialized_series().product()1449}14501451pub fn phys_iter(&self) -> SeriesPhysIter<'_> {1452// @scalar-opt1453self.as_materialized_series().phys_iter()1454}14551456#[inline]1457pub fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {1458polars_ensure!(index < self.len(), oob = index, self.len());14591460// SAFETY: Bounds check done just before.1461Ok(unsafe { self.get_unchecked(index) })1462}1463/// # Safety1464///1465/// Does not perform bounds check on `index`1466#[inline(always)]1467pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {1468debug_assert!(index < self.len());14691470match self {1471Column::Series(s) => unsafe { s.get_unchecked(index) },1472Column::Scalar(s) => s.scalar().as_any_value(),1473}1474}14751476#[cfg(feature = "object")]1477pub fn get_object(1478&self,1479index: usize,1480) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {1481self.as_materialized_series().get_object(index)1482}14831484pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {1485self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)1486}1487pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {1488self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)1489}1490pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {1491self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)1492}14931494pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {1495match (self, other) {1496(Column::Series(lhs), Column::Series(rhs)) => {1497lhs.take().try_add_owned(rhs.take()).map(Column::from)1498},1499(lhs, rhs) => lhs + rhs,1500}1501}1502pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {1503match (self, other) {1504(Column::Series(lhs), Column::Series(rhs)) => {1505lhs.take().try_sub_owned(rhs.take()).map(Column::from)1506},1507(lhs, rhs) => lhs - rhs,1508}1509}1510pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {1511match (self, other) {1512(Column::Series(lhs), Column::Series(rhs)) => {1513lhs.take().try_mul_owned(rhs.take()).map(Column::from)1514},1515(lhs, rhs) => lhs * rhs,1516}1517}15181519pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {1520Ok(self.get(index)?.str_value())1521}15221523pub fn min_reduce(&self) -> PolarsResult<Scalar> {1524match self {1525Column::Series(s) => s.min_reduce(),1526Column::Scalar(s) => {1527// We don't really want to deal with handling the full semantics here so we just1528// cast to a single value series. This is a tiny bit wasteful, but probably fine.1529s.as_single_value_series().min_reduce()1530},1531}1532}1533pub fn max_reduce(&self) -> PolarsResult<Scalar> {1534match self {1535Column::Series(s) => s.max_reduce(),1536Column::Scalar(s) => {1537// We don't really want to deal with handling the full semantics here so we just1538// cast to a single value series. This is a tiny bit wasteful, but probably fine.1539s.as_single_value_series().max_reduce()1540},1541}1542}1543pub fn median_reduce(&self) -> PolarsResult<Scalar> {1544match self {1545Column::Series(s) => s.median_reduce(),1546Column::Scalar(s) => {1547// We don't really want to deal with handling the full semantics here so we just1548// cast to a single value series. This is a tiny bit wasteful, but probably fine.1549s.as_single_value_series().median_reduce()1550},1551}1552}1553pub fn mean_reduce(&self) -> PolarsResult<Scalar> {1554match self {1555Column::Series(s) => s.mean_reduce(),1556Column::Scalar(s) => {1557// We don't really want to deal with handling the full semantics here so we just1558// cast to a single value series. This is a tiny bit wasteful, but probably fine.1559s.as_single_value_series().mean_reduce()1560},1561}1562}1563pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {1564match self {1565Column::Series(s) => s.std_reduce(ddof),1566Column::Scalar(s) => {1567// We don't really want to deal with handling the full semantics here so we just1568// cast to a small series. This is a tiny bit wasteful, but probably fine.1569let n = s.len().min(ddof as usize + 1);1570s.as_n_values_series(n).std_reduce(ddof)1571},1572}1573}1574pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {1575match self {1576Column::Series(s) => s.var_reduce(ddof),1577Column::Scalar(s) => {1578// We don't really want to deal with handling the full semantics here so we just1579// cast to a small series. This is a tiny bit wasteful, but probably fine.1580let n = s.len().min(ddof as usize + 1);1581s.as_n_values_series(n).var_reduce(ddof)1582},1583}1584}1585pub fn sum_reduce(&self) -> PolarsResult<Scalar> {1586// @scalar-opt1587self.as_materialized_series().sum_reduce()1588}1589pub fn and_reduce(&self) -> PolarsResult<Scalar> {1590match self {1591Column::Series(s) => s.and_reduce(),1592Column::Scalar(s) => {1593// We don't really want to deal with handling the full semantics here so we just1594// cast to a single value series. This is a tiny bit wasteful, but probably fine.1595s.as_single_value_series().and_reduce()1596},1597}1598}1599pub fn or_reduce(&self) -> PolarsResult<Scalar> {1600match self {1601Column::Series(s) => s.or_reduce(),1602Column::Scalar(s) => {1603// We don't really want to deal with handling the full semantics here so we just1604// cast to a single value series. This is a tiny bit wasteful, but probably fine.1605s.as_single_value_series().or_reduce()1606},1607}1608}1609pub fn xor_reduce(&self) -> PolarsResult<Scalar> {1610match self {1611Column::Series(s) => s.xor_reduce(),1612Column::Scalar(s) => {1613// We don't really want to deal with handling the full semantics here so we just1614// cast to a single value series. This is a tiny bit wasteful, but probably fine.1615//1616// We have to deal with the fact that xor is 0 if there is an even number of1617// elements and the value if there is an odd number of elements. If there are zero1618// elements the result should be `null`.1619s.as_n_values_series(2 - s.len() % 2).xor_reduce()1620},1621}1622}1623pub fn n_unique(&self) -> PolarsResult<usize> {1624match self {1625Column::Series(s) => s.n_unique(),1626Column::Scalar(s) => s.as_single_value_series().n_unique(),1627}1628}16291630pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {1631self.as_materialized_series()1632.quantile_reduce(quantile, method)1633}16341635pub fn quantiles_reduce(1636&self,1637quantiles: &[f64],1638method: QuantileMethod,1639) -> PolarsResult<Scalar> {1640self.as_materialized_series()1641.quantiles_reduce(quantiles, method)1642}16431644pub(crate) fn estimated_size(&self) -> usize {1645// @scalar-opt1646self.as_materialized_series().estimated_size()1647}16481649pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {1650match self {1651Column::Series(s) => s.sort_with(options).map(Self::from),1652Column::Scalar(s) => {1653// This makes this function throw the same errors as Series::sort_with1654_ = s.as_single_value_series().sort_with(options)?;16551656Ok(self.clone())1657},1658}1659}16601661pub fn map_unary_elementwise_to_bool(1662&self,1663f: impl Fn(&Series) -> BooleanChunked,1664) -> BooleanChunked {1665self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))1666.unwrap()1667}1668pub fn try_map_unary_elementwise_to_bool(1669&self,1670f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,1671) -> PolarsResult<BooleanChunked> {1672match self {1673Column::Series(s) => f(s),1674Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),1675}1676}16771678pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {1679self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()1680}1681pub fn try_apply_unary_elementwise(1682&self,1683f: impl Fn(&Series) -> PolarsResult<Series>,1684) -> PolarsResult<Column> {1685match self {1686Column::Series(s) => f(s).map(Column::from),1687Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(1688f(&s.as_single_value_series())?,1689s.len(),1690)1691.into()),1692}1693}16941695pub fn apply_broadcasting_binary_elementwise(1696&self,1697other: &Self,1698op: impl Fn(&Series, &Series) -> Series,1699) -> PolarsResult<Column> {1700self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))1701}1702pub fn try_apply_broadcasting_binary_elementwise(1703&self,1704other: &Self,1705op: impl Fn(&Series, &Series) -> PolarsResult<Series>,1706) -> PolarsResult<Column> {1707fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {1708match (a.len(), b.len()) {1709// broadcasting1710(1, o) | (o, 1) => Ok(o),1711// equal1712(a, b) if a == b => Ok(a),1713// unequal1714(a, b) => {1715polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)1716},1717}1718}17191720// Here we rely on the underlying broadcast operations.1721let length = output_length(self, other)?;1722match (self, other) {1723(Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),1724(Column::Series(lhs), Column::Scalar(rhs)) => {1725op(lhs, &rhs.as_single_value_series()).map(Column::from)1726},1727(Column::Scalar(lhs), Column::Series(rhs)) => {1728op(&lhs.as_single_value_series(), rhs).map(Column::from)1729},1730(Column::Scalar(lhs), Column::Scalar(rhs)) => {1731let lhs = lhs.as_single_value_series();1732let rhs = rhs.as_single_value_series();17331734Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())1735},1736}1737}17381739pub fn apply_binary_elementwise(1740&self,1741other: &Self,1742f: impl Fn(&Series, &Series) -> Series,1743f_lb: impl Fn(&Scalar, &Series) -> Series,1744f_rb: impl Fn(&Series, &Scalar) -> Series,1745) -> Column {1746self.try_apply_binary_elementwise(1747other,1748|lhs, rhs| Ok(f(lhs, rhs)),1749|lhs, rhs| Ok(f_lb(lhs, rhs)),1750|lhs, rhs| Ok(f_rb(lhs, rhs)),1751)1752.unwrap()1753}1754pub fn try_apply_binary_elementwise(1755&self,1756other: &Self,1757f: impl Fn(&Series, &Series) -> PolarsResult<Series>,1758f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,1759f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,1760) -> PolarsResult<Column> {1761debug_assert_eq!(self.len(), other.len());17621763match (self, other) {1764(Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),1765(Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),1766(Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),1767(Column::Scalar(lhs), Column::Scalar(rhs)) => {1768let lhs = lhs.as_single_value_series();1769let rhs = rhs.as_single_value_series();17701771Ok(1772ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())1773.into_column(),1774)1775},1776}1777}17781779#[cfg(feature = "approx_unique")]1780pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {1781match self {1782Column::Series(s) => s.approx_n_unique(),1783Column::Scalar(s) => {1784// @NOTE: We do this for the error handling.1785s.as_single_value_series().approx_n_unique()?;1786Ok(1)1787},1788}1789}17901791pub fn n_chunks(&self) -> usize {1792match self {1793Column::Series(s) => s.n_chunks(),1794Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),1795}1796}17971798#[expect(clippy::wrong_self_convention)]1799pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {1800// @scalar-opt1801self.as_materialized_series().into_total_ord_inner()1802}1803#[expect(unused, clippy::wrong_self_convention)]1804pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {1805// @scalar-opt1806self.as_materialized_series().into_total_eq_inner()1807}18081809pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {1810// Rechunk to one chunk if necessary1811let mut series = self.take_materialized_series();1812if series.n_chunks() > 1 {1813series = series.rechunk();1814}1815series.to_arrow(0, compat_level)1816}18171818pub fn trim_lists_to_normalized_offsets(&self) -> Option<Column> {1819self.as_materialized_series()1820.trim_lists_to_normalized_offsets()1821.map(Column::from)1822}18231824pub fn propagate_nulls(&self) -> Option<Column> {1825self.as_materialized_series()1826.propagate_nulls()1827.map(Column::from)1828}18291830pub fn deposit(&self, validity: &Bitmap) -> Column {1831self.as_materialized_series()1832.deposit(validity)1833.into_column()1834}18351836pub fn rechunk_validity(&self) -> Option<Bitmap> {1837// @scalar-opt1838self.as_materialized_series().rechunk_validity()1839}18401841pub fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {1842self.as_materialized_series().unique_id()1843}1844}18451846impl Default for Column {1847fn default() -> Self {1848Self::new_scalar(1849PlSmallStr::EMPTY,1850Scalar::new(DataType::Int64, AnyValue::Null),18510,1852)1853}1854}18551856impl PartialEq for Column {1857fn eq(&self, other: &Self) -> bool {1858// @scalar-opt1859self.as_materialized_series()1860.eq(other.as_materialized_series())1861}1862}18631864impl From<Series> for Column {1865#[inline]1866fn from(series: Series) -> Self {1867// We instantiate a Scalar Column if the Series is length is 1. This makes it possible for1868// future operations to be faster.1869if series.len() == 1 {1870return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));1871}18721873Self::Series(SeriesColumn::new(series))1874}1875}18761877impl<T: IntoSeries> IntoColumn for T {1878#[inline]1879fn into_column(self) -> Column {1880self.into_series().into()1881}1882}18831884impl IntoColumn for Column {1885#[inline(always)]1886fn into_column(self) -> Column {1887self1888}1889}18901891/// We don't want to serialize the scalar columns. So this helps pretend that columns are always1892/// initialized without implementing From<Column> for Series.1893///1894/// Those casts should be explicit.1895#[derive(Clone)]1896#[cfg_attr(feature = "serde", derive(serde::Serialize))]1897#[cfg_attr(feature = "serde", serde(into = "Series"))]1898struct _SerdeSeries(Series);18991900impl From<Column> for _SerdeSeries {1901#[inline]1902fn from(value: Column) -> Self {1903Self(value.take_materialized_series())1904}1905}19061907impl From<_SerdeSeries> for Series {1908#[inline]1909fn from(value: _SerdeSeries) -> Self {1910value.01911}1912}191319141915