Path: blob/main/crates/polars-core/src/frame/column/mod.rs
6940 views
use std::borrow::Cow;12use arrow::bitmap::BitmapBuilder;3use arrow::trusted_len::TrustMyLength;4use num_traits::{Num, NumCast};5use polars_compute::rolling::QuantileMethod;6use polars_error::PolarsResult;7use polars_utils::aliases::PlSeedableRandomStateQuality;8use polars_utils::index::check_bounds;9use polars_utils::pl_str::PlSmallStr;10pub use scalar::ScalarColumn;1112use self::compare_inner::{TotalEqInner, TotalOrdInner};13use self::gather::check_bounds_ca;14use self::partitioned::PartitionedColumn;15use self::series::SeriesColumn;16use crate::chunked_array::cast::CastOptions;17use crate::chunked_array::flags::StatisticsFlags;18use crate::datatypes::ReshapeDimension;19use crate::prelude::*;20use crate::series::{BitRepr, IsSorted, SeriesPhysIter};21use crate::utils::{Container, slice_offsets};22use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};2324mod arithmetic;25mod compare;26mod partitioned;27mod scalar;28mod series;2930/// A column within a [`DataFrame`].31///32/// This is lazily initialized to a [`Series`] with methods like33/// [`as_materialized_series`][Column::as_materialized_series] and34/// [`take_materialized_series`][Column::take_materialized_series].35///36/// Currently, there are two ways to represent a [`Column`].37/// 1. A [`Series`] of values38/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]39#[derive(Debug, Clone)]40#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]41#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]42pub enum Column {43Series(SeriesColumn),44Partitioned(PartitionedColumn),45Scalar(ScalarColumn),46}4748/// Convert `Self` into a [`Column`]49pub trait IntoColumn: Sized {50fn into_column(self) -> Column;51}5253impl Column {54#[inline]55#[track_caller]56pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self57where58Phantom: ?Sized,59Series: NamedFrom<T, Phantom>,60{61Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))62}6364#[inline]65pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {66Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)67}6869#[inline]70pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {71Self::Scalar(ScalarColumn::new(name, scalar, length))72}7374#[inline]75pub fn new_partitioned(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {76Self::Scalar(ScalarColumn::new(name, scalar, length))77}7879pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {80let Ok(length) = IdxSize::try_from(length) else {81polars_bail!(82ComputeError:83"row index length {} overflows IdxSize::MAX ({})",84length,85IdxSize::MAX,86)87};8889if offset.checked_add(length).is_none() {90polars_bail!(91ComputeError:92"row index with offset {} overflows on dataframe with height {}",93offset, length94)95}9697let range = offset..offset + length;9899let mut ca = IdxCa::from_vec(name, range.collect());100ca.set_sorted_flag(IsSorted::Ascending);101let col = ca.into_series().into();102103Ok(col)104}105106// # Materialize107/// Get a reference to a [`Series`] for this [`Column`]108///109/// This may need to materialize the [`Series`] on the first invocation for a specific column.110#[inline]111pub fn as_materialized_series(&self) -> &Series {112match self {113Column::Series(s) => s,114Column::Partitioned(s) => s.as_materialized_series(),115Column::Scalar(s) => s.as_materialized_series(),116}117}118119/// If the memory repr of this Column is a scalar, a unit-length Series will120/// be returned.121#[inline]122pub fn as_materialized_series_maintain_scalar(&self) -> Series {123match self {124Column::Scalar(s) => s.as_single_value_series(),125v => v.as_materialized_series().clone(),126}127}128129/// Returns the backing `Series` for the values of this column.130///131/// * For `Column::Series` columns, simply returns the inner `Series`.132/// * For `Column::Partitioned` columns, returns the series representing the values.133/// * For `Column::Scalar` columns, returns an empty or unit length series.134///135/// # Note136/// This method is safe to use. However, care must be taken when operating on the returned137/// `Series` to ensure result correctness. E.g. It is suitable to perform elementwise operations138/// on it, however e.g. aggregations will return unspecified results.139pub fn _get_backing_series(&self) -> Series {140match self {141Column::Series(s) => (**s).clone(),142Column::Partitioned(s) => s.partitions().clone(),143Column::Scalar(s) => s.as_single_value_series(),144}145}146147/// Constructs a new `Column` of the same variant as `self` from a backing `Series` representing148/// the values.149///150/// # Panics151/// Panics if:152/// * `self` is `Column::Series` and the length of `new_s` does not match that of `self`.153/// * `self` is `Column::Partitioned` and the length of `new_s` does not match that of the existing partitions.154/// * `self` is `Column::Scalar` and if either:155/// * `self` is not empty and `new_s` is not of unit length.156/// * `self` is empty and `new_s` is not empty.157pub fn _to_new_from_backing(&self, new_s: Series) -> Self {158match self {159Column::Series(s) => {160assert_eq!(new_s.len(), s.len());161Column::Series(SeriesColumn::new(new_s))162},163Column::Partitioned(s) => {164assert_eq!(new_s.len(), s.partitions().len());165unsafe {166Column::Partitioned(PartitionedColumn::new_unchecked(167new_s.name().clone(),168new_s,169s.partition_ends_ref().clone(),170))171}172},173Column::Scalar(s) => {174assert_eq!(new_s.len(), s.as_single_value_series().len());175Column::Scalar(ScalarColumn::from_single_value_series(new_s, self.len()))176},177}178}179180/// Turn [`Column`] into a [`Column::Series`].181///182/// This may need to materialize the [`Series`] on the first invocation for a specific column.183#[inline]184pub fn into_materialized_series(&mut self) -> &mut Series {185match self {186Column::Series(s) => s,187Column::Partitioned(s) => {188let series = std::mem::replace(189s,190PartitionedColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),191)192.take_materialized_series();193*self = Column::Series(series.into());194let Column::Series(s) = self else {195unreachable!();196};197s198},199Column::Scalar(s) => {200let series = std::mem::replace(201s,202ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),203)204.take_materialized_series();205*self = Column::Series(series.into());206let Column::Series(s) = self else {207unreachable!();208};209s210},211}212}213/// Take [`Series`] from a [`Column`]214///215/// This may need to materialize the [`Series`] on the first invocation for a specific column.216#[inline]217pub fn take_materialized_series(self) -> Series {218match self {219Column::Series(s) => s.take(),220Column::Partitioned(s) => s.take_materialized_series(),221Column::Scalar(s) => s.take_materialized_series(),222}223}224225#[inline]226pub fn dtype(&self) -> &DataType {227match self {228Column::Series(s) => s.dtype(),229Column::Partitioned(s) => s.dtype(),230Column::Scalar(s) => s.dtype(),231}232}233234#[inline]235pub fn field(&self) -> Cow<'_, Field> {236match self {237Column::Series(s) => s.field(),238Column::Partitioned(s) => s.field(),239Column::Scalar(s) => match s.lazy_as_materialized_series() {240None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),241Some(s) => s.field(),242},243}244}245246#[inline]247pub fn name(&self) -> &PlSmallStr {248match self {249Column::Series(s) => s.name(),250Column::Partitioned(s) => s.name(),251Column::Scalar(s) => s.name(),252}253}254255#[inline]256pub fn len(&self) -> usize {257match self {258Column::Series(s) => s.len(),259Column::Partitioned(s) => s.len(),260Column::Scalar(s) => s.len(),261}262}263264#[inline]265pub fn with_name(mut self, name: PlSmallStr) -> Column {266self.rename(name);267self268}269270#[inline]271pub fn rename(&mut self, name: PlSmallStr) {272match self {273Column::Series(s) => _ = s.rename(name),274Column::Partitioned(s) => _ = s.rename(name),275Column::Scalar(s) => _ = s.rename(name),276}277}278279// # Downcasting280#[inline]281pub fn as_series(&self) -> Option<&Series> {282match self {283Column::Series(s) => Some(s),284_ => None,285}286}287#[inline]288pub fn as_partitioned_column(&self) -> Option<&PartitionedColumn> {289match self {290Column::Partitioned(s) => Some(s),291_ => None,292}293}294#[inline]295pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {296match self {297Column::Scalar(s) => Some(s),298_ => None,299}300}301#[inline]302pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {303match self {304Column::Scalar(s) => Some(s),305_ => None,306}307}308309// # Try to Chunked Arrays310pub fn try_bool(&self) -> Option<&BooleanChunked> {311self.as_materialized_series().try_bool()312}313pub fn try_i8(&self) -> Option<&Int8Chunked> {314self.as_materialized_series().try_i8()315}316pub fn try_i16(&self) -> Option<&Int16Chunked> {317self.as_materialized_series().try_i16()318}319pub fn try_i32(&self) -> Option<&Int32Chunked> {320self.as_materialized_series().try_i32()321}322pub fn try_i64(&self) -> Option<&Int64Chunked> {323self.as_materialized_series().try_i64()324}325pub fn try_u8(&self) -> Option<&UInt8Chunked> {326self.as_materialized_series().try_u8()327}328pub fn try_u16(&self) -> Option<&UInt16Chunked> {329self.as_materialized_series().try_u16()330}331pub fn try_u32(&self) -> Option<&UInt32Chunked> {332self.as_materialized_series().try_u32()333}334pub fn try_u64(&self) -> Option<&UInt64Chunked> {335self.as_materialized_series().try_u64()336}337pub fn try_f32(&self) -> Option<&Float32Chunked> {338self.as_materialized_series().try_f32()339}340pub fn try_f64(&self) -> Option<&Float64Chunked> {341self.as_materialized_series().try_f64()342}343pub fn try_str(&self) -> Option<&StringChunked> {344self.as_materialized_series().try_str()345}346pub fn try_list(&self) -> Option<&ListChunked> {347self.as_materialized_series().try_list()348}349pub fn try_binary(&self) -> Option<&BinaryChunked> {350self.as_materialized_series().try_binary()351}352pub fn try_idx(&self) -> Option<&IdxCa> {353self.as_materialized_series().try_idx()354}355pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {356self.as_materialized_series().try_binary_offset()357}358#[cfg(feature = "dtype-datetime")]359pub fn try_datetime(&self) -> Option<&DatetimeChunked> {360self.as_materialized_series().try_datetime()361}362#[cfg(feature = "dtype-struct")]363pub fn try_struct(&self) -> Option<&StructChunked> {364self.as_materialized_series().try_struct()365}366#[cfg(feature = "dtype-decimal")]367pub fn try_decimal(&self) -> Option<&DecimalChunked> {368self.as_materialized_series().try_decimal()369}370#[cfg(feature = "dtype-array")]371pub fn try_array(&self) -> Option<&ArrayChunked> {372self.as_materialized_series().try_array()373}374#[cfg(feature = "dtype-categorical")]375pub fn try_cat<T: PolarsCategoricalType>(&self) -> Option<&CategoricalChunked<T>> {376self.as_materialized_series().try_cat::<T>()377}378#[cfg(feature = "dtype-categorical")]379pub fn try_cat8(&self) -> Option<&Categorical8Chunked> {380self.as_materialized_series().try_cat8()381}382#[cfg(feature = "dtype-categorical")]383pub fn try_cat16(&self) -> Option<&Categorical16Chunked> {384self.as_materialized_series().try_cat16()385}386#[cfg(feature = "dtype-categorical")]387pub fn try_cat32(&self) -> Option<&Categorical32Chunked> {388self.as_materialized_series().try_cat32()389}390#[cfg(feature = "dtype-date")]391pub fn try_date(&self) -> Option<&DateChunked> {392self.as_materialized_series().try_date()393}394#[cfg(feature = "dtype-duration")]395pub fn try_duration(&self) -> Option<&DurationChunked> {396self.as_materialized_series().try_duration()397}398399// # To Chunked Arrays400pub fn bool(&self) -> PolarsResult<&BooleanChunked> {401self.as_materialized_series().bool()402}403pub fn i8(&self) -> PolarsResult<&Int8Chunked> {404self.as_materialized_series().i8()405}406pub fn i16(&self) -> PolarsResult<&Int16Chunked> {407self.as_materialized_series().i16()408}409pub fn i32(&self) -> PolarsResult<&Int32Chunked> {410self.as_materialized_series().i32()411}412pub fn i64(&self) -> PolarsResult<&Int64Chunked> {413self.as_materialized_series().i64()414}415#[cfg(feature = "dtype-i128")]416pub fn i128(&self) -> PolarsResult<&Int128Chunked> {417self.as_materialized_series().i128()418}419pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {420self.as_materialized_series().u8()421}422pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {423self.as_materialized_series().u16()424}425pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {426self.as_materialized_series().u32()427}428pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {429self.as_materialized_series().u64()430}431pub fn f32(&self) -> PolarsResult<&Float32Chunked> {432self.as_materialized_series().f32()433}434pub fn f64(&self) -> PolarsResult<&Float64Chunked> {435self.as_materialized_series().f64()436}437pub fn str(&self) -> PolarsResult<&StringChunked> {438self.as_materialized_series().str()439}440pub fn list(&self) -> PolarsResult<&ListChunked> {441self.as_materialized_series().list()442}443pub fn binary(&self) -> PolarsResult<&BinaryChunked> {444self.as_materialized_series().binary()445}446pub fn idx(&self) -> PolarsResult<&IdxCa> {447self.as_materialized_series().idx()448}449pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {450self.as_materialized_series().binary_offset()451}452#[cfg(feature = "dtype-datetime")]453pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {454self.as_materialized_series().datetime()455}456#[cfg(feature = "dtype-struct")]457pub fn struct_(&self) -> PolarsResult<&StructChunked> {458self.as_materialized_series().struct_()459}460#[cfg(feature = "dtype-decimal")]461pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {462self.as_materialized_series().decimal()463}464#[cfg(feature = "dtype-array")]465pub fn array(&self) -> PolarsResult<&ArrayChunked> {466self.as_materialized_series().array()467}468#[cfg(feature = "dtype-categorical")]469pub fn cat<T: PolarsCategoricalType>(&self) -> PolarsResult<&CategoricalChunked<T>> {470self.as_materialized_series().cat::<T>()471}472#[cfg(feature = "dtype-categorical")]473pub fn cat8(&self) -> PolarsResult<&Categorical8Chunked> {474self.as_materialized_series().cat8()475}476#[cfg(feature = "dtype-categorical")]477pub fn cat16(&self) -> PolarsResult<&Categorical16Chunked> {478self.as_materialized_series().cat16()479}480#[cfg(feature = "dtype-categorical")]481pub fn cat32(&self) -> PolarsResult<&Categorical32Chunked> {482self.as_materialized_series().cat32()483}484#[cfg(feature = "dtype-date")]485pub fn date(&self) -> PolarsResult<&DateChunked> {486self.as_materialized_series().date()487}488#[cfg(feature = "dtype-duration")]489pub fn duration(&self) -> PolarsResult<&DurationChunked> {490self.as_materialized_series().duration()491}492493// # Casting494pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {495match self {496Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),497Column::Partitioned(s) => s.cast_with_options(dtype, options).map(Column::from),498Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),499}500}501pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {502match self {503Column::Series(s) => s.strict_cast(dtype).map(Column::from),504Column::Partitioned(s) => s.strict_cast(dtype).map(Column::from),505Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),506}507}508pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {509match self {510Column::Series(s) => s.cast(dtype).map(Column::from),511Column::Partitioned(s) => s.cast(dtype).map(Column::from),512Column::Scalar(s) => s.cast(dtype).map(Column::from),513}514}515/// # Safety516///517/// This can lead to invalid memory access in downstream code.518pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {519match self {520Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),521Column::Partitioned(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),522Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),523}524}525526pub fn clear(&self) -> Self {527match self {528Column::Series(s) => s.clear().into(),529Column::Partitioned(s) => s.clear().into(),530Column::Scalar(s) => s.resize(0).into(),531}532}533534#[inline]535pub fn shrink_to_fit(&mut self) {536match self {537Column::Series(s) => s.shrink_to_fit(),538// @partition-opt539Column::Partitioned(_) => {},540Column::Scalar(_) => {},541}542}543544#[inline]545pub fn new_from_index(&self, index: usize, length: usize) -> Self {546if index >= self.len() {547return Self::full_null(self.name().clone(), length, self.dtype());548}549550match self {551Column::Series(s) => {552// SAFETY: Bounds check done before.553let av = unsafe { s.get_unchecked(index) };554let scalar = Scalar::new(self.dtype().clone(), av.into_static());555Self::new_scalar(self.name().clone(), scalar, length)556},557Column::Partitioned(s) => {558// SAFETY: Bounds check done before.559let av = unsafe { s.get_unchecked(index) };560let scalar = Scalar::new(self.dtype().clone(), av.into_static());561Self::new_scalar(self.name().clone(), scalar, length)562},563Column::Scalar(s) => s.resize(length).into(),564}565}566567#[inline]568pub fn has_nulls(&self) -> bool {569match self {570Self::Series(s) => s.has_nulls(),571// @partition-opt572Self::Partitioned(s) => s.as_materialized_series().has_nulls(),573Self::Scalar(s) => s.has_nulls(),574}575}576577#[inline]578pub fn is_null(&self) -> BooleanChunked {579match self {580Self::Series(s) => s.is_null(),581// @partition-opt582Self::Partitioned(s) => s.as_materialized_series().is_null(),583Self::Scalar(s) => {584BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())585},586}587}588#[inline]589pub fn is_not_null(&self) -> BooleanChunked {590match self {591Self::Series(s) => s.is_not_null(),592// @partition-opt593Self::Partitioned(s) => s.as_materialized_series().is_not_null(),594Self::Scalar(s) => {595BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())596},597}598}599600pub fn to_physical_repr(&self) -> Column {601// @scalar-opt602self.as_materialized_series()603.to_physical_repr()604.into_owned()605.into()606}607/// # Safety608///609/// This can lead to invalid memory access in downstream code.610pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {611// @scalar-opt612self.as_materialized_series()613.from_physical_unchecked(dtype)614.map(Column::from)615}616617pub fn head(&self, length: Option<usize>) -> Column {618let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);619let len = usize::min(len, self.len());620self.slice(0, len)621}622pub fn tail(&self, length: Option<usize>) -> Column {623let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);624let len = usize::min(len, self.len());625debug_assert!(len <= i64::MAX as usize);626self.slice(-(len as i64), len)627}628pub fn slice(&self, offset: i64, length: usize) -> Column {629match self {630Column::Series(s) => s.slice(offset, length).into(),631// @partition-opt632Column::Partitioned(s) => s.as_materialized_series().slice(offset, length).into(),633Column::Scalar(s) => {634let (_, length) = slice_offsets(offset, length, s.len());635s.resize(length).into()636},637}638}639640pub fn split_at(&self, offset: i64) -> (Column, Column) {641// @scalar-opt642let (l, r) = self.as_materialized_series().split_at(offset);643(l.into(), r.into())644}645646#[inline]647pub fn null_count(&self) -> usize {648match self {649Self::Series(s) => s.null_count(),650Self::Partitioned(s) => s.null_count(),651Self::Scalar(s) if s.scalar().is_null() => s.len(),652Self::Scalar(_) => 0,653}654}655656pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {657check_bounds_ca(indices, self.len() as IdxSize)?;658Ok(unsafe { self.take_unchecked(indices) })659}660pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {661check_bounds(indices, self.len() as IdxSize)?;662Ok(unsafe { self.take_slice_unchecked(indices) })663}664/// # Safety665///666/// No bounds on the indexes are performed.667pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {668debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());669670match self {671Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),672Self::Partitioned(s) => {673let s = s.as_materialized_series();674unsafe { s.take_unchecked(indices) }.into()675},676Self::Scalar(s) => {677let idxs_length = indices.len();678let idxs_null_count = indices.null_count();679680let scalar = ScalarColumn::from_single_value_series(681s.as_single_value_series().take_unchecked(&IdxCa::new(682indices.name().clone(),683&[0][..s.len().min(1)],684)),685idxs_length,686);687688// We need to make sure that null values in `idx` become null values in the result689if idxs_null_count == 0 || scalar.has_nulls() {690scalar.into_column()691} else if idxs_null_count == idxs_length {692scalar.into_nulls().into_column()693} else {694let validity = indices.rechunk_validity();695let series = scalar.take_materialized_series();696let name = series.name().clone();697let dtype = series.dtype().clone();698let mut chunks = series.into_chunks();699assert_eq!(chunks.len(), 1);700chunks[0] = chunks[0].with_validity(validity);701unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }702.into_column()703}704},705}706}707/// # Safety708///709/// No bounds on the indexes are performed.710pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {711debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());712713match self {714Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),715Self::Partitioned(s) => {716let s = s.as_materialized_series();717unsafe { s.take_slice_unchecked(indices) }.into()718},719Self::Scalar(s) => ScalarColumn::from_single_value_series(720s.as_single_value_series()721.take_slice_unchecked(&[0][..s.len().min(1)]),722indices.len(),723)724.into(),725}726}727728/// General implementation for aggregation where a non-missing scalar would map to itself.729#[inline(always)]730#[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]731fn agg_with_unit_scalar(732&self,733groups: &GroupsType,734series_agg: impl Fn(&Series, &GroupsType) -> Series,735) -> Column {736match self {737Column::Series(s) => series_agg(s, groups).into_column(),738// @partition-opt739Column::Partitioned(s) => series_agg(s.as_materialized_series(), groups).into_column(),740Column::Scalar(s) => {741if s.is_empty() {742return series_agg(s.as_materialized_series(), groups).into_column();743}744745// We utilize the aggregation on Series to see:746// 1. the output datatype of the aggregation747// 2. whether this aggregation is even defined748let series_aggregation = series_agg(749&s.as_single_value_series(),750&GroupsType::Slice {751// @NOTE: this group is always valid since s is non-empty.752groups: vec![[0, 1]],753rolling: false,754},755);756757// If the aggregation is not defined, just return all nulls.758if series_aggregation.has_nulls() {759return Self::new_scalar(760series_aggregation.name().clone(),761Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),762groups.len(),763);764}765766let mut scalar_col = s.resize(groups.len());767// The aggregation might change the type (e.g. mean changes int -> float), so we do768// a cast here to the output type.769if series_aggregation.dtype() != s.dtype() {770scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();771}772773let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {774// Fast path: no empty groups. keep the scalar intact.775return scalar_col.into_column();776};777778// All empty groups produce a *missing* or `null` value.779let mut validity = BitmapBuilder::with_capacity(groups.len());780validity.extend_constant(first_empty_idx, true);781// SAFETY: We trust the length of this iterator.782let iter = unsafe {783TrustMyLength::new(784groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),785groups.len() - first_empty_idx,786)787};788validity.extend_trusted_len_iter(iter);789790let mut s = scalar_col.take_materialized_series().rechunk();791// SAFETY: We perform a compute_len afterwards.792let chunks = unsafe { s.chunks_mut() };793let arr = &mut chunks[0];794*arr = arr.with_validity(validity.into_opt_validity());795s.compute_len();796797s.into_column()798},799}800}801802/// # Safety803///804/// Does no bounds checks, groups must be correct.805#[cfg(feature = "algorithm_group_by")]806pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {807self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_min(g) })808}809810/// # Safety811///812/// Does no bounds checks, groups must be correct.813#[cfg(feature = "algorithm_group_by")]814pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {815self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_max(g) })816}817818/// # Safety819///820/// Does no bounds checks, groups must be correct.821#[cfg(feature = "algorithm_group_by")]822pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {823self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_mean(g) })824}825826/// # Safety827///828/// Does no bounds checks, groups must be correct.829#[cfg(feature = "algorithm_group_by")]830pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {831// @scalar-opt832unsafe { self.as_materialized_series().agg_sum(groups) }.into()833}834835/// # Safety836///837/// Does no bounds checks, groups must be correct.838#[cfg(feature = "algorithm_group_by")]839pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {840self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_first(g) })841}842843/// # Safety844///845/// Does no bounds checks, groups must be correct.846#[cfg(feature = "algorithm_group_by")]847pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {848self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_last(g) })849}850851/// # Safety852///853/// Does no bounds checks, groups must be correct.854#[cfg(feature = "algorithm_group_by")]855pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {856// @scalar-opt857unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()858}859860/// # Safety861///862/// Does no bounds checks, groups must be correct.863#[cfg(feature = "algorithm_group_by")]864pub unsafe fn agg_quantile(865&self,866groups: &GroupsType,867quantile: f64,868method: QuantileMethod,869) -> Self {870// @scalar-opt871872unsafe {873self.as_materialized_series()874.agg_quantile(groups, quantile, method)875}876.into()877}878879/// # Safety880///881/// Does no bounds checks, groups must be correct.882#[cfg(feature = "algorithm_group_by")]883pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {884self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_median(g) })885}886887/// # Safety888///889/// Does no bounds checks, groups must be correct.890#[cfg(feature = "algorithm_group_by")]891pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {892// @scalar-opt893unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()894}895896/// # Safety897///898/// Does no bounds checks, groups must be correct.899#[cfg(feature = "algorithm_group_by")]900pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {901// @scalar-opt902unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()903}904905/// # Safety906///907/// Does no bounds checks, groups must be correct.908#[cfg(feature = "algorithm_group_by")]909pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {910// @scalar-opt911unsafe { self.as_materialized_series().agg_list(groups) }.into()912}913914/// # Safety915///916/// Does no bounds checks, groups must be correct.917#[cfg(feature = "algorithm_group_by")]918pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {919// @partition-opt920// @scalar-opt921unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()922}923924/// # Safety925///926/// Does no bounds checks, groups must be correct.927#[cfg(feature = "bitwise")]928pub fn agg_and(&self, groups: &GroupsType) -> Self {929self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_and(g) })930}931/// # Safety932///933/// Does no bounds checks, groups must be correct.934#[cfg(feature = "bitwise")]935pub fn agg_or(&self, groups: &GroupsType) -> Self {936self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_or(g) })937}938/// # Safety939///940/// Does no bounds checks, groups must be correct.941#[cfg(feature = "bitwise")]942pub fn agg_xor(&self, groups: &GroupsType) -> Self {943// @partition-opt944// @scalar-opt945unsafe { self.as_materialized_series().agg_xor(groups) }.into()946}947948pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {949Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)950}951952pub fn is_empty(&self) -> bool {953self.len() == 0954}955956pub fn reverse(&self) -> Column {957match self {958Column::Series(s) => s.reverse().into(),959Column::Partitioned(s) => s.reverse().into(),960Column::Scalar(_) => self.clone(),961}962}963964pub fn equals(&self, other: &Column) -> bool {965// @scalar-opt966self.as_materialized_series()967.equals(other.as_materialized_series())968}969970pub fn equals_missing(&self, other: &Column) -> bool {971// @scalar-opt972self.as_materialized_series()973.equals_missing(other.as_materialized_series())974}975976pub fn set_sorted_flag(&mut self, sorted: IsSorted) {977// @scalar-opt978match self {979Column::Series(s) => s.set_sorted_flag(sorted),980Column::Partitioned(s) => s.set_sorted_flag(sorted),981Column::Scalar(_) => {},982}983}984985pub fn get_flags(&self) -> StatisticsFlags {986match self {987Column::Series(s) => s.get_flags(),988// @partition-opt989Column::Partitioned(_) => StatisticsFlags::empty(),990Column::Scalar(_) => {991StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST992},993}994}995996/// Returns whether the flags were set997pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {998match self {999Column::Series(s) => {1000s.set_flags(flags);1001true1002},1003// @partition-opt1004Column::Partitioned(_) => false,1005Column::Scalar(_) => false,1006}1007}10081009pub fn vec_hash(1010&self,1011build_hasher: PlSeedableRandomStateQuality,1012buf: &mut Vec<u64>,1013) -> PolarsResult<()> {1014// @scalar-opt?1015self.as_materialized_series().vec_hash(build_hasher, buf)1016}10171018pub fn vec_hash_combine(1019&self,1020build_hasher: PlSeedableRandomStateQuality,1021hashes: &mut [u64],1022) -> PolarsResult<()> {1023// @scalar-opt?1024self.as_materialized_series()1025.vec_hash_combine(build_hasher, hashes)1026}10271028pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {1029// @scalar-opt1030self.into_materialized_series()1031.append(other.as_materialized_series())?;1032Ok(self)1033}1034pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {1035self.into_materialized_series()1036.append_owned(other.take_materialized_series())?;1037Ok(self)1038}10391040pub fn arg_sort(&self, options: SortOptions) -> IdxCa {1041if self.is_empty() {1042return IdxCa::from_vec(self.name().clone(), Vec::new());1043}10441045if self.null_count() == self.len() {1046// We might need to maintain order so just respect the descending parameter.1047let values = if options.descending {1048(0..self.len() as IdxSize).rev().collect()1049} else {1050(0..self.len() as IdxSize).collect()1051};10521053return IdxCa::from_vec(self.name().clone(), values);1054}10551056let is_sorted = Some(self.is_sorted_flag());1057let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {1058return self.as_materialized_series().arg_sort(options);1059};10601061// Fast path: the data is sorted.1062let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);1063let invert = options.descending != is_sorted_dsc;10641065let mut values = Vec::with_capacity(self.len());10661067#[inline(never)]1068fn extend(1069start: IdxSize,1070end: IdxSize,1071slf: &Column,1072values: &mut Vec<IdxSize>,1073is_only_nulls: bool,1074invert: bool,1075maintain_order: bool,1076) {1077debug_assert!(start <= end);1078debug_assert!(start as usize <= slf.len());1079debug_assert!(end as usize <= slf.len());10801081if !invert || is_only_nulls {1082values.extend(start..end);1083return;1084}10851086// If we don't have to maintain order but we have to invert. Just flip it around.1087if !maintain_order {1088values.extend((start..end).rev());1089return;1090}10911092// If we want to maintain order but we also needs to invert, we need to invert1093// per group of items.1094//1095// @NOTE: Since the column is sorted, arg_unique can also take a fast path and1096// just do a single traversal.1097let arg_unique = slf1098.slice(start as i64, (end - start) as usize)1099.arg_unique()1100.unwrap();11011102assert!(!arg_unique.has_nulls());11031104let num_unique = arg_unique.len();11051106// Fast path: all items are unique.1107if num_unique == (end - start) as usize {1108values.extend((start..end).rev());1109return;1110}11111112if num_unique == 1 {1113values.extend(start..end);1114return;1115}11161117let mut prev_idx = end - start;1118for chunk in arg_unique.downcast_iter() {1119for &idx in chunk.values().as_slice().iter().rev() {1120values.extend(start + idx..start + prev_idx);1121prev_idx = idx;1122}1123}1124}1125macro_rules! extend {1126($start:expr, $end:expr) => {1127extend!($start, $end, is_only_nulls = false);1128};1129($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {1130extend(1131$start,1132$end,1133self,1134&mut values,1135$is_only_nulls,1136invert,1137options.maintain_order,1138);1139};1140}11411142let length = self.len() as IdxSize;1143let null_count = self.null_count() as IdxSize;11441145if null_count == 0 {1146extend!(0, length);1147} else {1148let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();1149match (options.nulls_last, has_nulls_last) {1150(true, true) => {1151// Current: Nulls last, Wanted: Nulls last1152extend!(0, length - null_count);1153extend!(length - null_count, length, is_only_nulls = true);1154},1155(true, false) => {1156// Current: Nulls first, Wanted: Nulls last1157extend!(null_count, length);1158extend!(0, null_count, is_only_nulls = true);1159},1160(false, true) => {1161// Current: Nulls last, Wanted: Nulls first1162extend!(length - null_count, length, is_only_nulls = true);1163extend!(0, length - null_count);1164},1165(false, false) => {1166// Current: Nulls first, Wanted: Nulls first1167extend!(0, null_count, is_only_nulls = true);1168extend!(null_count, length);1169},1170}1171}11721173// @NOTE: This can theoretically be pushed into the previous operation but it is really1174// worth it... probably not...1175if let Some(limit) = options.limit {1176let limit = limit.min(length);1177values.truncate(limit as usize);1178}11791180IdxCa::from_vec(self.name().clone(), values)1181}11821183pub fn arg_sort_multiple(1184&self,1185by: &[Column],1186options: &SortMultipleOptions,1187) -> PolarsResult<IdxCa> {1188// @scalar-opt1189self.as_materialized_series().arg_sort_multiple(by, options)1190}11911192pub fn arg_unique(&self) -> PolarsResult<IdxCa> {1193match self {1194Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),1195_ => self.as_materialized_series().arg_unique(),1196}1197}11981199pub fn bit_repr(&self) -> Option<BitRepr> {1200// @scalar-opt1201self.as_materialized_series().bit_repr()1202}12031204pub fn into_frame(self) -> DataFrame {1205// SAFETY: A single-column dataframe cannot have length mismatches or duplicate names1206unsafe { DataFrame::new_no_checks(self.len(), vec![self]) }1207}12081209pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {1210// @scalar-opt1211self.into_materialized_series()1212.extend(other.as_materialized_series())?;1213Ok(self)1214}12151216pub fn rechunk(&self) -> Column {1217match self {1218Column::Series(s) => s.rechunk().into(),1219Column::Partitioned(s) => {1220if let Some(s) = s.lazy_as_materialized_series() {1221// This should always hold for partitioned.1222debug_assert_eq!(s.n_chunks(), 1)1223}1224self.clone()1225},1226Column::Scalar(s) => {1227if s.lazy_as_materialized_series()1228.filter(|x| x.n_chunks() > 1)1229.is_some()1230{1231Column::Scalar(ScalarColumn::new(1232s.name().clone(),1233s.scalar().clone(),1234s.len(),1235))1236} else {1237self.clone()1238}1239},1240}1241}12421243pub fn explode(&self, skip_empty: bool) -> PolarsResult<Column> {1244self.as_materialized_series()1245.explode(skip_empty)1246.map(Column::from)1247}1248pub fn implode(&self) -> PolarsResult<ListChunked> {1249self.as_materialized_series().implode()1250}12511252pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {1253// @scalar-opt1254self.as_materialized_series()1255.fill_null(strategy)1256.map(Column::from)1257}12581259pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {1260// @scalar-opt1261self.as_materialized_series()1262.divide(rhs.as_materialized_series())1263.map(Column::from)1264}12651266pub fn shift(&self, periods: i64) -> Column {1267// @scalar-opt1268self.as_materialized_series().shift(periods).into()1269}12701271#[cfg(feature = "zip_with")]1272pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {1273// @scalar-opt1274self.as_materialized_series()1275.zip_with(mask, other.as_materialized_series())1276.map(Self::from)1277}12781279#[cfg(feature = "zip_with")]1280pub fn zip_with_same_type(1281&self,1282mask: &ChunkedArray<BooleanType>,1283other: &Column,1284) -> PolarsResult<Column> {1285// @scalar-opt1286self.as_materialized_series()1287.zip_with_same_type(mask, other.as_materialized_series())1288.map(Column::from)1289}12901291pub fn drop_nulls(&self) -> Column {1292match self {1293Column::Series(s) => s.drop_nulls().into_column(),1294// @partition-opt1295Column::Partitioned(s) => s.as_materialized_series().drop_nulls().into_column(),1296Column::Scalar(s) => s.drop_nulls().into_column(),1297}1298}12991300/// Packs every element into a list.1301pub fn as_list(&self) -> ListChunked {1302// @scalar-opt1303// @partition-opt1304self.as_materialized_series().as_list()1305}13061307pub fn is_sorted_flag(&self) -> IsSorted {1308match self {1309Column::Series(s) => s.is_sorted_flag(),1310Column::Partitioned(s) => s.partitions().is_sorted_flag(),1311Column::Scalar(_) => IsSorted::Ascending,1312}1313}13141315pub fn unique(&self) -> PolarsResult<Column> {1316match self {1317Column::Series(s) => s.unique().map(Column::from),1318// @partition-opt1319Column::Partitioned(s) => s.as_materialized_series().unique().map(Column::from),1320Column::Scalar(s) => {1321_ = s.as_single_value_series().unique()?;1322if s.is_empty() {1323return Ok(s.clone().into_column());1324}13251326Ok(s.resize(1).into_column())1327},1328}1329}1330pub fn unique_stable(&self) -> PolarsResult<Column> {1331match self {1332Column::Series(s) => s.unique_stable().map(Column::from),1333// @partition-opt1334Column::Partitioned(s) => s.as_materialized_series().unique_stable().map(Column::from),1335Column::Scalar(s) => {1336_ = s.as_single_value_series().unique_stable()?;1337if s.is_empty() {1338return Ok(s.clone().into_column());1339}13401341Ok(s.resize(1).into_column())1342},1343}1344}13451346pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {1347// @scalar-opt1348self.as_materialized_series()1349.reshape_list(dimensions)1350.map(Self::from)1351}13521353#[cfg(feature = "dtype-array")]1354pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {1355// @scalar-opt1356self.as_materialized_series()1357.reshape_array(dimensions)1358.map(Self::from)1359}13601361pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {1362// @scalar-opt1363self.as_materialized_series()1364.sort(sort_options)1365.map(Self::from)1366}13671368pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {1369match self {1370Column::Series(s) => s.filter(filter).map(Column::from),1371Column::Partitioned(s) => s.as_materialized_series().filter(filter).map(Column::from),1372Column::Scalar(s) => {1373if s.is_empty() {1374return Ok(s.clone().into_column());1375}13761377// Broadcasting1378if filter.len() == 1 {1379return match filter.get(0) {1380Some(true) => Ok(s.clone().into_column()),1381_ => Ok(s.resize(0).into_column()),1382};1383}13841385Ok(s.resize(filter.sum().unwrap() as usize).into_column())1386},1387}1388}13891390#[cfg(feature = "random")]1391pub fn shuffle(&self, seed: Option<u64>) -> Self {1392// @scalar-opt1393self.as_materialized_series().shuffle(seed).into()1394}13951396#[cfg(feature = "random")]1397pub fn sample_frac(1398&self,1399frac: f64,1400with_replacement: bool,1401shuffle: bool,1402seed: Option<u64>,1403) -> PolarsResult<Self> {1404self.as_materialized_series()1405.sample_frac(frac, with_replacement, shuffle, seed)1406.map(Self::from)1407}14081409#[cfg(feature = "random")]1410pub fn sample_n(1411&self,1412n: usize,1413with_replacement: bool,1414shuffle: bool,1415seed: Option<u64>,1416) -> PolarsResult<Self> {1417self.as_materialized_series()1418.sample_n(n, with_replacement, shuffle, seed)1419.map(Self::from)1420}14211422pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Column> {1423polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive");1424if self.len().saturating_sub(offset) == 0 {1425return Ok(self.clear());1426}14271428match self {1429Column::Series(s) => Ok(s.gather_every(n, offset)?.into()),1430Column::Partitioned(s) => {1431Ok(s.as_materialized_series().gather_every(n, offset)?.into())1432},1433Column::Scalar(s) => {1434let total = s.len() - offset;1435Ok(s.resize(1 + (total - 1) / n).into())1436},1437}1438}14391440pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {1441if self.is_empty() {1442return Ok(Self::new_scalar(1443self.name().clone(),1444Scalar::new(self.dtype().clone(), value.into_static()),1445n,1446));1447}14481449match self {1450Column::Series(s) => s.extend_constant(value, n).map(Column::from),1451Column::Partitioned(s) => s.extend_constant(value, n).map(Column::from),1452Column::Scalar(s) => {1453if s.scalar().as_any_value() == value {1454Ok(s.resize(s.len() + n).into())1455} else {1456s.as_materialized_series()1457.extend_constant(value, n)1458.map(Column::from)1459}1460},1461}1462}14631464pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {1465self.try_map_unary_elementwise_to_bool(|s| s.is_finite())1466}1467pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {1468self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())1469}1470pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {1471self.try_map_unary_elementwise_to_bool(|s| s.is_nan())1472}1473pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {1474self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())1475}14761477pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self1478where1479T: Num + NumCast,1480{1481// @scalar-opt1482self.as_materialized_series()1483.wrapping_trunc_div_scalar(rhs)1484.into()1485}14861487pub fn product(&self) -> PolarsResult<Scalar> {1488// @scalar-opt1489self.as_materialized_series().product()1490}14911492pub fn phys_iter(&self) -> SeriesPhysIter<'_> {1493// @scalar-opt1494self.as_materialized_series().phys_iter()1495}14961497#[inline]1498pub fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {1499polars_ensure!(index < self.len(), oob = index, self.len());15001501// SAFETY: Bounds check done just before.1502Ok(unsafe { self.get_unchecked(index) })1503}1504/// # Safety1505///1506/// Does not perform bounds check on `index`1507#[inline(always)]1508pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {1509debug_assert!(index < self.len());15101511match self {1512Column::Series(s) => unsafe { s.get_unchecked(index) },1513Column::Partitioned(s) => unsafe { s.get_unchecked(index) },1514Column::Scalar(s) => s.scalar().as_any_value(),1515}1516}15171518#[cfg(feature = "object")]1519pub fn get_object(1520&self,1521index: usize,1522) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {1523self.as_materialized_series().get_object(index)1524}15251526pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {1527self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)1528}1529pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {1530self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)1531}1532pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {1533self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)1534}15351536pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {1537match (self, other) {1538(Column::Series(lhs), Column::Series(rhs)) => {1539lhs.take().try_add_owned(rhs.take()).map(Column::from)1540},1541(lhs, rhs) => lhs + rhs,1542}1543}1544pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {1545match (self, other) {1546(Column::Series(lhs), Column::Series(rhs)) => {1547lhs.take().try_sub_owned(rhs.take()).map(Column::from)1548},1549(lhs, rhs) => lhs - rhs,1550}1551}1552pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {1553match (self, other) {1554(Column::Series(lhs), Column::Series(rhs)) => {1555lhs.take().try_mul_owned(rhs.take()).map(Column::from)1556},1557(lhs, rhs) => lhs * rhs,1558}1559}15601561pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {1562Ok(self.get(index)?.str_value())1563}15641565pub fn min_reduce(&self) -> PolarsResult<Scalar> {1566match self {1567Column::Series(s) => s.min_reduce(),1568Column::Partitioned(s) => s.min_reduce(),1569Column::Scalar(s) => {1570// We don't really want to deal with handling the full semantics here so we just1571// cast to a single value series. This is a tiny bit wasteful, but probably fine.1572s.as_single_value_series().min_reduce()1573},1574}1575}1576pub fn max_reduce(&self) -> PolarsResult<Scalar> {1577match self {1578Column::Series(s) => s.max_reduce(),1579Column::Partitioned(s) => s.max_reduce(),1580Column::Scalar(s) => {1581// We don't really want to deal with handling the full semantics here so we just1582// cast to a single value series. This is a tiny bit wasteful, but probably fine.1583s.as_single_value_series().max_reduce()1584},1585}1586}1587pub fn median_reduce(&self) -> PolarsResult<Scalar> {1588match self {1589Column::Series(s) => s.median_reduce(),1590Column::Partitioned(s) => s.as_materialized_series().median_reduce(),1591Column::Scalar(s) => {1592// We don't really want to deal with handling the full semantics here so we just1593// cast to a single value series. This is a tiny bit wasteful, but probably fine.1594s.as_single_value_series().median_reduce()1595},1596}1597}1598pub fn mean_reduce(&self) -> Scalar {1599match self {1600Column::Series(s) => s.mean_reduce(),1601Column::Partitioned(s) => s.as_materialized_series().mean_reduce(),1602Column::Scalar(s) => {1603// We don't really want to deal with handling the full semantics here so we just1604// cast to a single value series. This is a tiny bit wasteful, but probably fine.1605s.as_single_value_series().mean_reduce()1606},1607}1608}1609pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {1610match self {1611Column::Series(s) => s.std_reduce(ddof),1612Column::Partitioned(s) => s.as_materialized_series().std_reduce(ddof),1613Column::Scalar(s) => {1614// We don't really want to deal with handling the full semantics here so we just1615// cast to a small series. This is a tiny bit wasteful, but probably fine.1616let n = s.len().min(ddof as usize + 1);1617s.as_n_values_series(n).std_reduce(ddof)1618},1619}1620}1621pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {1622match self {1623Column::Series(s) => s.var_reduce(ddof),1624Column::Partitioned(s) => s.as_materialized_series().var_reduce(ddof),1625Column::Scalar(s) => {1626// We don't really want to deal with handling the full semantics here so we just1627// cast to a small series. This is a tiny bit wasteful, but probably fine.1628let n = s.len().min(ddof as usize + 1);1629s.as_n_values_series(n).var_reduce(ddof)1630},1631}1632}1633pub fn sum_reduce(&self) -> PolarsResult<Scalar> {1634// @partition-opt1635// @scalar-opt1636self.as_materialized_series().sum_reduce()1637}1638pub fn and_reduce(&self) -> PolarsResult<Scalar> {1639match self {1640Column::Series(s) => s.and_reduce(),1641Column::Partitioned(s) => s.and_reduce(),1642Column::Scalar(s) => {1643// We don't really want to deal with handling the full semantics here so we just1644// cast to a single value series. This is a tiny bit wasteful, but probably fine.1645s.as_single_value_series().and_reduce()1646},1647}1648}1649pub fn or_reduce(&self) -> PolarsResult<Scalar> {1650match self {1651Column::Series(s) => s.or_reduce(),1652Column::Partitioned(s) => s.or_reduce(),1653Column::Scalar(s) => {1654// We don't really want to deal with handling the full semantics here so we just1655// cast to a single value series. This is a tiny bit wasteful, but probably fine.1656s.as_single_value_series().or_reduce()1657},1658}1659}1660pub fn xor_reduce(&self) -> PolarsResult<Scalar> {1661match self {1662Column::Series(s) => s.xor_reduce(),1663// @partition-opt1664Column::Partitioned(s) => s.as_materialized_series().xor_reduce(),1665Column::Scalar(s) => {1666// We don't really want to deal with handling the full semantics here so we just1667// cast to a single value series. This is a tiny bit wasteful, but probably fine.1668//1669// We have to deal with the fact that xor is 0 if there is an even number of1670// elements and the value if there is an odd number of elements. If there are zero1671// elements the result should be `null`.1672s.as_n_values_series(2 - s.len() % 2).xor_reduce()1673},1674}1675}1676pub fn n_unique(&self) -> PolarsResult<usize> {1677match self {1678Column::Series(s) => s.n_unique(),1679Column::Partitioned(s) => s.partitions().n_unique(),1680Column::Scalar(s) => s.as_single_value_series().n_unique(),1681}1682}1683pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {1684self.as_materialized_series()1685.quantile_reduce(quantile, method)1686}16871688pub(crate) fn estimated_size(&self) -> usize {1689// @scalar-opt1690self.as_materialized_series().estimated_size()1691}16921693pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {1694match self {1695Column::Series(s) => s.sort_with(options).map(Self::from),1696// @partition-opt1697Column::Partitioned(s) => s1698.as_materialized_series()1699.sort_with(options)1700.map(Self::from),1701Column::Scalar(s) => {1702// This makes this function throw the same errors as Series::sort_with1703_ = s.as_single_value_series().sort_with(options)?;17041705Ok(self.clone())1706},1707}1708}17091710pub fn map_unary_elementwise_to_bool(1711&self,1712f: impl Fn(&Series) -> BooleanChunked,1713) -> BooleanChunked {1714self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))1715.unwrap()1716}1717pub fn try_map_unary_elementwise_to_bool(1718&self,1719f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,1720) -> PolarsResult<BooleanChunked> {1721match self {1722Column::Series(s) => f(s),1723Column::Partitioned(s) => f(s.as_materialized_series()),1724Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),1725}1726}17271728pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {1729self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()1730}1731pub fn try_apply_unary_elementwise(1732&self,1733f: impl Fn(&Series) -> PolarsResult<Series>,1734) -> PolarsResult<Column> {1735match self {1736Column::Series(s) => f(s).map(Column::from),1737Column::Partitioned(s) => s.try_apply_unary_elementwise(f).map(Self::from),1738Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(1739f(&s.as_single_value_series())?,1740s.len(),1741)1742.into()),1743}1744}17451746pub fn apply_broadcasting_binary_elementwise(1747&self,1748other: &Self,1749op: impl Fn(&Series, &Series) -> Series,1750) -> PolarsResult<Column> {1751self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))1752}1753pub fn try_apply_broadcasting_binary_elementwise(1754&self,1755other: &Self,1756op: impl Fn(&Series, &Series) -> PolarsResult<Series>,1757) -> PolarsResult<Column> {1758fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {1759match (a.len(), b.len()) {1760// broadcasting1761(1, o) | (o, 1) => Ok(o),1762// equal1763(a, b) if a == b => Ok(a),1764// unequal1765(a, b) => {1766polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)1767},1768}1769}17701771// Here we rely on the underlying broadcast operations.1772let length = output_length(self, other)?;1773match (self, other) {1774(Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),1775(Column::Series(lhs), Column::Scalar(rhs)) => {1776op(lhs, &rhs.as_single_value_series()).map(Column::from)1777},1778(Column::Scalar(lhs), Column::Series(rhs)) => {1779op(&lhs.as_single_value_series(), rhs).map(Column::from)1780},1781(Column::Scalar(lhs), Column::Scalar(rhs)) => {1782let lhs = lhs.as_single_value_series();1783let rhs = rhs.as_single_value_series();17841785Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())1786},1787// @partition-opt1788(lhs, rhs) => {1789op(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)1790},1791}1792}17931794pub fn apply_binary_elementwise(1795&self,1796other: &Self,1797f: impl Fn(&Series, &Series) -> Series,1798f_lb: impl Fn(&Scalar, &Series) -> Series,1799f_rb: impl Fn(&Series, &Scalar) -> Series,1800) -> Column {1801self.try_apply_binary_elementwise(1802other,1803|lhs, rhs| Ok(f(lhs, rhs)),1804|lhs, rhs| Ok(f_lb(lhs, rhs)),1805|lhs, rhs| Ok(f_rb(lhs, rhs)),1806)1807.unwrap()1808}1809pub fn try_apply_binary_elementwise(1810&self,1811other: &Self,1812f: impl Fn(&Series, &Series) -> PolarsResult<Series>,1813f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,1814f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,1815) -> PolarsResult<Column> {1816debug_assert_eq!(self.len(), other.len());18171818match (self, other) {1819(Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),1820(Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),1821(Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),1822(Column::Scalar(lhs), Column::Scalar(rhs)) => {1823let lhs = lhs.as_single_value_series();1824let rhs = rhs.as_single_value_series();18251826Ok(1827ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())1828.into_column(),1829)1830},1831// @partition-opt1832(lhs, rhs) => {1833f(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)1834},1835}1836}18371838#[cfg(feature = "approx_unique")]1839pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {1840match self {1841Column::Series(s) => s.approx_n_unique(),1842// @partition-opt1843Column::Partitioned(s) => s.as_materialized_series().approx_n_unique(),1844Column::Scalar(s) => {1845// @NOTE: We do this for the error handling.1846s.as_single_value_series().approx_n_unique()?;1847Ok(1)1848},1849}1850}18511852pub fn n_chunks(&self) -> usize {1853match self {1854Column::Series(s) => s.n_chunks(),1855Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),1856Column::Partitioned(s) => {1857if let Some(s) = s.lazy_as_materialized_series() {1858// This should always hold for partitioned.1859debug_assert_eq!(s.n_chunks(), 1)1860}186111862},1863}1864}18651866#[expect(clippy::wrong_self_convention)]1867pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {1868// @scalar-opt1869self.as_materialized_series().into_total_ord_inner()1870}1871#[expect(unused, clippy::wrong_self_convention)]1872pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {1873// @scalar-opt1874self.as_materialized_series().into_total_eq_inner()1875}18761877pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {1878// Rechunk to one chunk if necessary1879let mut series = self.take_materialized_series();1880if series.n_chunks() > 1 {1881series = series.rechunk();1882}1883series.to_arrow(0, compat_level)1884}18851886pub fn trim_lists_to_normalized_offsets(&self) -> Option<Column> {1887self.as_materialized_series()1888.trim_lists_to_normalized_offsets()1889.map(Column::from)1890}18911892pub fn propagate_nulls(&self) -> Option<Column> {1893self.as_materialized_series()1894.propagate_nulls()1895.map(Column::from)1896}1897}18981899impl Default for Column {1900fn default() -> Self {1901Self::new_scalar(1902PlSmallStr::EMPTY,1903Scalar::new(DataType::Int64, AnyValue::Null),19040,1905)1906}1907}19081909impl PartialEq for Column {1910fn eq(&self, other: &Self) -> bool {1911// @scalar-opt1912self.as_materialized_series()1913.eq(other.as_materialized_series())1914}1915}19161917impl From<Series> for Column {1918#[inline]1919fn from(series: Series) -> Self {1920// We instantiate a Scalar Column if the Series is length is 1. This makes it possible for1921// future operations to be faster.1922if series.len() == 1 {1923return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));1924}19251926Self::Series(SeriesColumn::new(series))1927}1928}19291930impl<T: IntoSeries> IntoColumn for T {1931#[inline]1932fn into_column(self) -> Column {1933self.into_series().into()1934}1935}19361937impl IntoColumn for Column {1938#[inline(always)]1939fn into_column(self) -> Column {1940self1941}1942}19431944/// We don't want to serialize the scalar columns. So this helps pretend that columns are always1945/// initialized without implementing From<Column> for Series.1946///1947/// Those casts should be explicit.1948#[derive(Clone)]1949#[cfg_attr(feature = "serde", derive(serde::Serialize))]1950#[cfg_attr(feature = "serde", serde(into = "Series"))]1951struct _SerdeSeries(Series);19521953impl From<Column> for _SerdeSeries {1954#[inline]1955fn from(value: Column) -> Self {1956Self(value.take_materialized_series())1957}1958}19591960impl From<_SerdeSeries> for Series {1961#[inline]1962fn from(value: _SerdeSeries) -> Self {1963value.01964}1965}196619671968