Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/frame/column/scalar.rs
6940 views
1
use std::sync::OnceLock;
2
3
use polars_error::PolarsResult;
4
use polars_utils::pl_str::PlSmallStr;
5
6
use super::{AnyValue, Column, DataType, IntoColumn, Scalar, Series};
7
use crate::chunked_array::cast::CastOptions;
8
9
/// A [`Column`] that consists of a repeated [`Scalar`]
10
///
11
/// This is lazily materialized into a [`Series`].
12
#[derive(Debug, Clone)]
13
pub struct ScalarColumn {
14
name: PlSmallStr,
15
// The value of this scalar may be incoherent when `length == 0`.
16
scalar: Scalar,
17
length: usize,
18
19
// invariants:
20
// materialized.name() == name
21
// materialized.len() == length
22
// materialized.dtype() == value.dtype
23
// materialized[i] == value, for all 0 <= i < length
24
/// A lazily materialized [`Series`] variant of this [`ScalarColumn`]
25
materialized: OnceLock<Series>,
26
}
27
28
impl ScalarColumn {
29
#[inline]
30
pub fn new(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
31
Self {
32
name,
33
scalar,
34
length,
35
36
materialized: OnceLock::new(),
37
}
38
}
39
40
#[inline]
41
pub fn new_empty(name: PlSmallStr, dtype: DataType) -> Self {
42
Self {
43
name,
44
scalar: Scalar::new(dtype, AnyValue::Null),
45
length: 0,
46
47
materialized: OnceLock::new(),
48
}
49
}
50
51
pub fn full_null(name: PlSmallStr, length: usize, dtype: DataType) -> Self {
52
Self::new(name, Scalar::null(dtype), length)
53
}
54
55
pub fn name(&self) -> &PlSmallStr {
56
&self.name
57
}
58
59
pub fn scalar(&self) -> &Scalar {
60
&self.scalar
61
}
62
63
pub fn dtype(&self) -> &DataType {
64
self.scalar.dtype()
65
}
66
67
pub fn len(&self) -> usize {
68
self.length
69
}
70
71
pub fn is_empty(&self) -> bool {
72
self.length == 0
73
}
74
75
fn _to_series(name: PlSmallStr, value: Scalar, length: usize) -> Series {
76
let series = if length == 0 {
77
Series::new_empty(name, value.dtype())
78
} else {
79
value.into_series(name).new_from_index(0, length)
80
};
81
82
debug_assert_eq!(series.len(), length);
83
84
series
85
}
86
87
/// Materialize the [`ScalarColumn`] into a [`Series`].
88
pub fn to_series(&self) -> Series {
89
Self::_to_series(self.name.clone(), self.scalar.clone(), self.length)
90
}
91
92
/// Get the [`ScalarColumn`] as [`Series`] if it was already materialized.
93
pub fn lazy_as_materialized_series(&self) -> Option<&Series> {
94
self.materialized.get()
95
}
96
97
/// Get the [`ScalarColumn`] as [`Series`]
98
///
99
/// This needs to materialize upon the first call. Afterwards, this is cached.
100
pub fn as_materialized_series(&self) -> &Series {
101
self.materialized.get_or_init(|| self.to_series())
102
}
103
104
/// Take the [`ScalarColumn`] and materialize as a [`Series`] if not already done.
105
pub fn take_materialized_series(self) -> Series {
106
self.materialized
107
.into_inner()
108
.unwrap_or_else(|| Self::_to_series(self.name, self.scalar, self.length))
109
}
110
111
/// Take the [`ScalarColumn`] as a series with a single value.
112
///
113
/// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`.
114
pub fn as_single_value_series(&self) -> Series {
115
self.as_n_values_series(1)
116
}
117
118
/// Take the [`ScalarColumn`] as a series with a `n` values.
119
///
120
/// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`.
121
pub fn as_n_values_series(&self, n: usize) -> Series {
122
let length = usize::min(n, self.length);
123
124
match self.materialized.get() {
125
// Don't take a refcount if we only want length-1 (or empty) - the materialized series
126
// could be extremely large.
127
Some(s) if length == self.length || length > 1 => s.head(Some(length)),
128
_ => Self::_to_series(self.name.clone(), self.scalar.clone(), length),
129
}
130
}
131
132
/// Create a new [`ScalarColumn`] from a `length=1` Series and expand it `length`.
133
///
134
/// This will panic if the value cannot be made static or if the series has length `0`.
135
#[inline]
136
pub fn unit_scalar_from_series(series: Series) -> Self {
137
assert_eq!(series.len(), 1);
138
// SAFETY: We just did the bounds check
139
let value = unsafe { series.get_unchecked(0) };
140
let value = value.into_static();
141
let value = Scalar::new(series.dtype().clone(), value);
142
let mut sc = ScalarColumn::new(series.name().clone(), value, 1);
143
sc.materialized = OnceLock::from(series);
144
sc
145
}
146
147
/// Create a new [`ScalarColumn`] from a `length<=1` Series and expand it `length`.
148
///
149
/// If `series` is empty and `length` is non-zero, a full-NULL column of `length` will be returned.
150
///
151
/// This will panic if the value cannot be made static.
152
pub fn from_single_value_series(series: Series, length: usize) -> Self {
153
debug_assert!(series.len() <= 1);
154
155
let value = if series.is_empty() {
156
AnyValue::Null
157
} else {
158
unsafe { series.get_unchecked(0) }.into_static()
159
};
160
let value = Scalar::new(series.dtype().clone(), value);
161
ScalarColumn::new(series.name().clone(), value, length)
162
}
163
164
/// Resize the [`ScalarColumn`] to new `length`.
165
///
166
/// This reuses the materialized [`Series`], if `length <= self.length`.
167
pub fn resize(&self, length: usize) -> ScalarColumn {
168
if self.length == length {
169
return self.clone();
170
}
171
172
// This is violates an invariant if this triggers, the scalar value is undefined if the
173
// self.length == 0 so therefore we should never resize using that value.
174
debug_assert!(length == 0 || self.length > 0);
175
176
let mut resized = Self {
177
name: self.name.clone(),
178
scalar: self.scalar.clone(),
179
length,
180
materialized: OnceLock::new(),
181
};
182
183
if length == self.length || (length < self.length && length > 1) {
184
if let Some(materialized) = self.materialized.get() {
185
resized.materialized = OnceLock::from(materialized.head(Some(length)));
186
debug_assert_eq!(resized.materialized.get().unwrap().len(), length);
187
}
188
}
189
190
resized
191
}
192
193
pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
194
// @NOTE: We expect that when casting the materialized series mostly does not need change
195
// the physical array. Therefore, we try to cast the entire materialized array if it is
196
// available.
197
198
match self.materialized.get() {
199
Some(s) => {
200
let materialized = s.cast_with_options(dtype, options)?;
201
assert_eq!(self.length, materialized.len());
202
203
let mut casted = if materialized.is_empty() {
204
Self::new_empty(materialized.name().clone(), materialized.dtype().clone())
205
} else {
206
// SAFETY: Just did bounds check
207
let scalar = unsafe { materialized.get_unchecked(0) }.into_static();
208
Self::new(
209
materialized.name().clone(),
210
Scalar::new(materialized.dtype().clone(), scalar),
211
self.length,
212
)
213
};
214
casted.materialized = OnceLock::from(materialized);
215
Ok(casted)
216
},
217
None => {
218
let s = self
219
.as_single_value_series()
220
.cast_with_options(dtype, options)?;
221
222
if self.length == 0 {
223
Ok(Self::new_empty(s.name().clone(), s.dtype().clone()))
224
} else {
225
assert_eq!(1, s.len());
226
Ok(Self::from_single_value_series(s, self.length))
227
}
228
},
229
}
230
}
231
232
pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
233
self.cast_with_options(dtype, CastOptions::Strict)
234
}
235
pub fn cast(&self, dtype: &DataType) -> PolarsResult<Self> {
236
self.cast_with_options(dtype, CastOptions::NonStrict)
237
}
238
/// # Safety
239
///
240
/// This can lead to invalid memory access in downstream code.
241
pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
242
// @NOTE: We expect that when casting the materialized series mostly does not need change
243
// the physical array. Therefore, we try to cast the entire materialized array if it is
244
// available.
245
246
match self.materialized.get() {
247
Some(s) => {
248
let materialized = s.cast_unchecked(dtype)?;
249
assert_eq!(self.length, materialized.len());
250
251
let mut casted = if materialized.is_empty() {
252
Self::new_empty(materialized.name().clone(), materialized.dtype().clone())
253
} else {
254
// SAFETY: Just did bounds check
255
let scalar = unsafe { materialized.get_unchecked(0) }.into_static();
256
Self::new(
257
materialized.name().clone(),
258
Scalar::new(materialized.dtype().clone(), scalar),
259
self.length,
260
)
261
};
262
casted.materialized = OnceLock::from(materialized);
263
Ok(casted)
264
},
265
None => {
266
let s = self.as_single_value_series().cast_unchecked(dtype)?;
267
assert_eq!(1, s.len());
268
269
if self.length == 0 {
270
Ok(Self::new_empty(s.name().clone(), s.dtype().clone()))
271
} else {
272
Ok(Self::from_single_value_series(s, self.length))
273
}
274
},
275
}
276
}
277
278
pub fn rename(&mut self, name: PlSmallStr) -> &mut Self {
279
if let Some(series) = self.materialized.get_mut() {
280
series.rename(name.clone());
281
}
282
283
self.name = name;
284
self
285
}
286
287
pub fn has_nulls(&self) -> bool {
288
self.length != 0 && self.scalar.is_null()
289
}
290
291
pub fn drop_nulls(&self) -> Self {
292
if self.scalar.is_null() {
293
self.resize(0)
294
} else {
295
self.clone()
296
}
297
}
298
299
pub fn into_nulls(mut self) -> Self {
300
self.scalar.update(AnyValue::Null);
301
self
302
}
303
304
pub fn map_scalar(&mut self, map_scalar: impl Fn(Scalar) -> Scalar) {
305
self.scalar = map_scalar(std::mem::take(&mut self.scalar));
306
self.materialized.take();
307
}
308
pub fn with_value(&mut self, value: AnyValue<'static>) -> &mut Self {
309
self.scalar.update(value);
310
self.materialized.take();
311
self
312
}
313
}
314
315
impl IntoColumn for ScalarColumn {
316
#[inline(always)]
317
fn into_column(self) -> Column {
318
self.into()
319
}
320
}
321
322
impl From<ScalarColumn> for Column {
323
#[inline]
324
fn from(value: ScalarColumn) -> Self {
325
Self::Scalar(value)
326
}
327
}
328
329
#[cfg(feature = "dsl-schema")]
330
impl schemars::JsonSchema for ScalarColumn {
331
fn schema_name() -> String {
332
"ScalarColumn".to_owned()
333
}
334
335
fn schema_id() -> std::borrow::Cow<'static, str> {
336
std::borrow::Cow::Borrowed(concat!(module_path!(), "::", "ScalarColumn"))
337
}
338
339
fn json_schema(generator: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
340
serde_impl::SerializeWrap::json_schema(generator)
341
}
342
}
343
344
#[cfg(feature = "serde")]
345
mod serde_impl {
346
use std::sync::OnceLock;
347
348
use polars_error::PolarsError;
349
use polars_utils::pl_str::PlSmallStr;
350
351
use super::ScalarColumn;
352
use crate::frame::{Scalar, Series};
353
354
#[derive(serde::Serialize, serde::Deserialize)]
355
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
356
pub struct SerializeWrap {
357
name: PlSmallStr,
358
/// Unit-length series for dispatching to IPC serialize
359
unit_series: Series,
360
length: usize,
361
}
362
363
impl From<&ScalarColumn> for SerializeWrap {
364
fn from(value: &ScalarColumn) -> Self {
365
Self {
366
name: value.name.clone(),
367
unit_series: value.scalar.clone().into_series(PlSmallStr::EMPTY),
368
length: value.length,
369
}
370
}
371
}
372
373
impl TryFrom<SerializeWrap> for ScalarColumn {
374
type Error = PolarsError;
375
376
fn try_from(value: SerializeWrap) -> Result<Self, Self::Error> {
377
let slf = Self {
378
name: value.name,
379
scalar: Scalar::new(
380
value.unit_series.dtype().clone(),
381
value.unit_series.get(0)?.into_static(),
382
),
383
length: value.length,
384
materialized: OnceLock::new(),
385
};
386
387
Ok(slf)
388
}
389
}
390
391
impl serde::ser::Serialize for ScalarColumn {
392
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
393
where
394
S: serde::Serializer,
395
{
396
SerializeWrap::from(self).serialize(serializer)
397
}
398
}
399
400
impl<'de> serde::de::Deserialize<'de> for ScalarColumn {
401
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
402
where
403
D: serde::Deserializer<'de>,
404
{
405
use serde::de::Error;
406
407
SerializeWrap::deserialize(deserializer)
408
.and_then(|x| ScalarColumn::try_from(x).map_err(D::Error::custom))
409
}
410
}
411
}
412
413