Path: blob/main/crates/polars-core/src/frame/column/scalar.rs
6940 views
use std::sync::OnceLock;12use polars_error::PolarsResult;3use polars_utils::pl_str::PlSmallStr;45use super::{AnyValue, Column, DataType, IntoColumn, Scalar, Series};6use crate::chunked_array::cast::CastOptions;78/// A [`Column`] that consists of a repeated [`Scalar`]9///10/// This is lazily materialized into a [`Series`].11#[derive(Debug, Clone)]12pub struct ScalarColumn {13name: PlSmallStr,14// The value of this scalar may be incoherent when `length == 0`.15scalar: Scalar,16length: usize,1718// invariants:19// materialized.name() == name20// materialized.len() == length21// materialized.dtype() == value.dtype22// materialized[i] == value, for all 0 <= i < length23/// A lazily materialized [`Series`] variant of this [`ScalarColumn`]24materialized: OnceLock<Series>,25}2627impl ScalarColumn {28#[inline]29pub fn new(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {30Self {31name,32scalar,33length,3435materialized: OnceLock::new(),36}37}3839#[inline]40pub fn new_empty(name: PlSmallStr, dtype: DataType) -> Self {41Self {42name,43scalar: Scalar::new(dtype, AnyValue::Null),44length: 0,4546materialized: OnceLock::new(),47}48}4950pub fn full_null(name: PlSmallStr, length: usize, dtype: DataType) -> Self {51Self::new(name, Scalar::null(dtype), length)52}5354pub fn name(&self) -> &PlSmallStr {55&self.name56}5758pub fn scalar(&self) -> &Scalar {59&self.scalar60}6162pub fn dtype(&self) -> &DataType {63self.scalar.dtype()64}6566pub fn len(&self) -> usize {67self.length68}6970pub fn is_empty(&self) -> bool {71self.length == 072}7374fn _to_series(name: PlSmallStr, value: Scalar, length: usize) -> Series {75let series = if length == 0 {76Series::new_empty(name, value.dtype())77} else {78value.into_series(name).new_from_index(0, length)79};8081debug_assert_eq!(series.len(), length);8283series84}8586/// Materialize the [`ScalarColumn`] into a [`Series`].87pub fn to_series(&self) -> Series {88Self::_to_series(self.name.clone(), self.scalar.clone(), self.length)89}9091/// Get the [`ScalarColumn`] as [`Series`] if it was already materialized.92pub fn lazy_as_materialized_series(&self) -> Option<&Series> {93self.materialized.get()94}9596/// Get the [`ScalarColumn`] as [`Series`]97///98/// This needs to materialize upon the first call. Afterwards, this is cached.99pub fn as_materialized_series(&self) -> &Series {100self.materialized.get_or_init(|| self.to_series())101}102103/// Take the [`ScalarColumn`] and materialize as a [`Series`] if not already done.104pub fn take_materialized_series(self) -> Series {105self.materialized106.into_inner()107.unwrap_or_else(|| Self::_to_series(self.name, self.scalar, self.length))108}109110/// Take the [`ScalarColumn`] as a series with a single value.111///112/// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`.113pub fn as_single_value_series(&self) -> Series {114self.as_n_values_series(1)115}116117/// Take the [`ScalarColumn`] as a series with a `n` values.118///119/// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`.120pub fn as_n_values_series(&self, n: usize) -> Series {121let length = usize::min(n, self.length);122123match self.materialized.get() {124// Don't take a refcount if we only want length-1 (or empty) - the materialized series125// could be extremely large.126Some(s) if length == self.length || length > 1 => s.head(Some(length)),127_ => Self::_to_series(self.name.clone(), self.scalar.clone(), length),128}129}130131/// Create a new [`ScalarColumn`] from a `length=1` Series and expand it `length`.132///133/// This will panic if the value cannot be made static or if the series has length `0`.134#[inline]135pub fn unit_scalar_from_series(series: Series) -> Self {136assert_eq!(series.len(), 1);137// SAFETY: We just did the bounds check138let value = unsafe { series.get_unchecked(0) };139let value = value.into_static();140let value = Scalar::new(series.dtype().clone(), value);141let mut sc = ScalarColumn::new(series.name().clone(), value, 1);142sc.materialized = OnceLock::from(series);143sc144}145146/// Create a new [`ScalarColumn`] from a `length<=1` Series and expand it `length`.147///148/// If `series` is empty and `length` is non-zero, a full-NULL column of `length` will be returned.149///150/// This will panic if the value cannot be made static.151pub fn from_single_value_series(series: Series, length: usize) -> Self {152debug_assert!(series.len() <= 1);153154let value = if series.is_empty() {155AnyValue::Null156} else {157unsafe { series.get_unchecked(0) }.into_static()158};159let value = Scalar::new(series.dtype().clone(), value);160ScalarColumn::new(series.name().clone(), value, length)161}162163/// Resize the [`ScalarColumn`] to new `length`.164///165/// This reuses the materialized [`Series`], if `length <= self.length`.166pub fn resize(&self, length: usize) -> ScalarColumn {167if self.length == length {168return self.clone();169}170171// This is violates an invariant if this triggers, the scalar value is undefined if the172// self.length == 0 so therefore we should never resize using that value.173debug_assert!(length == 0 || self.length > 0);174175let mut resized = Self {176name: self.name.clone(),177scalar: self.scalar.clone(),178length,179materialized: OnceLock::new(),180};181182if length == self.length || (length < self.length && length > 1) {183if let Some(materialized) = self.materialized.get() {184resized.materialized = OnceLock::from(materialized.head(Some(length)));185debug_assert_eq!(resized.materialized.get().unwrap().len(), length);186}187}188189resized190}191192pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {193// @NOTE: We expect that when casting the materialized series mostly does not need change194// the physical array. Therefore, we try to cast the entire materialized array if it is195// available.196197match self.materialized.get() {198Some(s) => {199let materialized = s.cast_with_options(dtype, options)?;200assert_eq!(self.length, materialized.len());201202let mut casted = if materialized.is_empty() {203Self::new_empty(materialized.name().clone(), materialized.dtype().clone())204} else {205// SAFETY: Just did bounds check206let scalar = unsafe { materialized.get_unchecked(0) }.into_static();207Self::new(208materialized.name().clone(),209Scalar::new(materialized.dtype().clone(), scalar),210self.length,211)212};213casted.materialized = OnceLock::from(materialized);214Ok(casted)215},216None => {217let s = self218.as_single_value_series()219.cast_with_options(dtype, options)?;220221if self.length == 0 {222Ok(Self::new_empty(s.name().clone(), s.dtype().clone()))223} else {224assert_eq!(1, s.len());225Ok(Self::from_single_value_series(s, self.length))226}227},228}229}230231pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {232self.cast_with_options(dtype, CastOptions::Strict)233}234pub fn cast(&self, dtype: &DataType) -> PolarsResult<Self> {235self.cast_with_options(dtype, CastOptions::NonStrict)236}237/// # Safety238///239/// This can lead to invalid memory access in downstream code.240pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {241// @NOTE: We expect that when casting the materialized series mostly does not need change242// the physical array. Therefore, we try to cast the entire materialized array if it is243// available.244245match self.materialized.get() {246Some(s) => {247let materialized = s.cast_unchecked(dtype)?;248assert_eq!(self.length, materialized.len());249250let mut casted = if materialized.is_empty() {251Self::new_empty(materialized.name().clone(), materialized.dtype().clone())252} else {253// SAFETY: Just did bounds check254let scalar = unsafe { materialized.get_unchecked(0) }.into_static();255Self::new(256materialized.name().clone(),257Scalar::new(materialized.dtype().clone(), scalar),258self.length,259)260};261casted.materialized = OnceLock::from(materialized);262Ok(casted)263},264None => {265let s = self.as_single_value_series().cast_unchecked(dtype)?;266assert_eq!(1, s.len());267268if self.length == 0 {269Ok(Self::new_empty(s.name().clone(), s.dtype().clone()))270} else {271Ok(Self::from_single_value_series(s, self.length))272}273},274}275}276277pub fn rename(&mut self, name: PlSmallStr) -> &mut Self {278if let Some(series) = self.materialized.get_mut() {279series.rename(name.clone());280}281282self.name = name;283self284}285286pub fn has_nulls(&self) -> bool {287self.length != 0 && self.scalar.is_null()288}289290pub fn drop_nulls(&self) -> Self {291if self.scalar.is_null() {292self.resize(0)293} else {294self.clone()295}296}297298pub fn into_nulls(mut self) -> Self {299self.scalar.update(AnyValue::Null);300self301}302303pub fn map_scalar(&mut self, map_scalar: impl Fn(Scalar) -> Scalar) {304self.scalar = map_scalar(std::mem::take(&mut self.scalar));305self.materialized.take();306}307pub fn with_value(&mut self, value: AnyValue<'static>) -> &mut Self {308self.scalar.update(value);309self.materialized.take();310self311}312}313314impl IntoColumn for ScalarColumn {315#[inline(always)]316fn into_column(self) -> Column {317self.into()318}319}320321impl From<ScalarColumn> for Column {322#[inline]323fn from(value: ScalarColumn) -> Self {324Self::Scalar(value)325}326}327328#[cfg(feature = "dsl-schema")]329impl schemars::JsonSchema for ScalarColumn {330fn schema_name() -> String {331"ScalarColumn".to_owned()332}333334fn schema_id() -> std::borrow::Cow<'static, str> {335std::borrow::Cow::Borrowed(concat!(module_path!(), "::", "ScalarColumn"))336}337338fn json_schema(generator: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {339serde_impl::SerializeWrap::json_schema(generator)340}341}342343#[cfg(feature = "serde")]344mod serde_impl {345use std::sync::OnceLock;346347use polars_error::PolarsError;348use polars_utils::pl_str::PlSmallStr;349350use super::ScalarColumn;351use crate::frame::{Scalar, Series};352353#[derive(serde::Serialize, serde::Deserialize)]354#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]355pub struct SerializeWrap {356name: PlSmallStr,357/// Unit-length series for dispatching to IPC serialize358unit_series: Series,359length: usize,360}361362impl From<&ScalarColumn> for SerializeWrap {363fn from(value: &ScalarColumn) -> Self {364Self {365name: value.name.clone(),366unit_series: value.scalar.clone().into_series(PlSmallStr::EMPTY),367length: value.length,368}369}370}371372impl TryFrom<SerializeWrap> for ScalarColumn {373type Error = PolarsError;374375fn try_from(value: SerializeWrap) -> Result<Self, Self::Error> {376let slf = Self {377name: value.name,378scalar: Scalar::new(379value.unit_series.dtype().clone(),380value.unit_series.get(0)?.into_static(),381),382length: value.length,383materialized: OnceLock::new(),384};385386Ok(slf)387}388}389390impl serde::ser::Serialize for ScalarColumn {391fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>392where393S: serde::Serializer,394{395SerializeWrap::from(self).serialize(serializer)396}397}398399impl<'de> serde::de::Deserialize<'de> for ScalarColumn {400fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>401where402D: serde::Deserializer<'de>,403{404use serde::de::Error;405406SerializeWrap::deserialize(deserializer)407.and_then(|x| ScalarColumn::try_from(x).map_err(D::Error::custom))408}409}410}411412413