Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/plans/lit.rs
6940 views
1
use std::hash::{Hash, Hasher};
2
3
#[cfg(feature = "temporal")]
4
use chrono::{Duration as ChronoDuration, NaiveDate, NaiveDateTime};
5
use polars_core::chunked_array::cast::CastOptions;
6
use polars_core::prelude::*;
7
use polars_core::utils::materialize_dyn_int;
8
use polars_utils::hashing::hash_to_partition;
9
#[cfg(feature = "serde")]
10
use serde::{Deserialize, Serialize};
11
12
use crate::constants::get_literal_name;
13
use crate::prelude::*;
14
15
#[derive(Clone, PartialEq)]
16
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
17
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
18
pub enum DynLiteralValue {
19
Str(PlSmallStr),
20
Int(i128),
21
Float(f64),
22
List(DynListLiteralValue),
23
}
24
#[derive(Clone, PartialEq)]
25
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
26
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
27
pub enum DynListLiteralValue {
28
Str(Box<[Option<PlSmallStr>]>),
29
Int(Box<[Option<i128>]>),
30
Float(Box<[Option<f64>]>),
31
List(Box<[Option<DynListLiteralValue>]>),
32
}
33
34
impl Hash for DynLiteralValue {
35
fn hash<H: Hasher>(&self, state: &mut H) {
36
std::mem::discriminant(self).hash(state);
37
match self {
38
Self::Str(i) => i.hash(state),
39
Self::Int(i) => i.hash(state),
40
Self::Float(i) => i.to_ne_bytes().hash(state),
41
Self::List(i) => i.hash(state),
42
}
43
}
44
}
45
46
impl Hash for DynListLiteralValue {
47
fn hash<H: Hasher>(&self, state: &mut H) {
48
std::mem::discriminant(self).hash(state);
49
match self {
50
Self::Str(i) => i.hash(state),
51
Self::Int(i) => i.hash(state),
52
Self::Float(i) => i
53
.iter()
54
.for_each(|i| i.map(|i| i.to_ne_bytes()).hash(state)),
55
Self::List(i) => i.hash(state),
56
}
57
}
58
}
59
60
#[derive(Clone, PartialEq, Hash)]
61
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
62
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
63
pub struct RangeLiteralValue {
64
pub low: i128,
65
pub high: i128,
66
pub dtype: DataType,
67
}
68
#[derive(Clone, PartialEq)]
69
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
70
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
71
pub enum LiteralValue {
72
/// A dynamically inferred literal value. This needs to be materialized into a specific type.
73
Dyn(DynLiteralValue),
74
Scalar(Scalar),
75
Series(SpecialEq<Series>),
76
Range(RangeLiteralValue),
77
}
78
79
pub enum MaterializedLiteralValue {
80
Scalar(Scalar),
81
Series(Series),
82
}
83
84
impl DynListLiteralValue {
85
pub fn try_materialize_to_dtype(
86
self,
87
dtype: &DataType,
88
options: CastOptions,
89
) -> PolarsResult<Scalar> {
90
let Some(inner_dtype) = dtype.inner_dtype() else {
91
polars_bail!(InvalidOperation: "conversion from list literal to `{dtype}` failed.");
92
};
93
94
let s = match self {
95
DynListLiteralValue::Str(vs) => {
96
StringChunked::from_iter_options(PlSmallStr::from_static("literal"), vs.into_iter())
97
.into_series()
98
},
99
DynListLiteralValue::Int(vs) => {
100
#[cfg(feature = "dtype-i128")]
101
{
102
Int128Chunked::from_iter_options(
103
PlSmallStr::from_static("literal"),
104
vs.into_iter(),
105
)
106
.into_series()
107
}
108
109
#[cfg(not(feature = "dtype-i128"))]
110
{
111
Int64Chunked::from_iter_options(
112
PlSmallStr::from_static("literal"),
113
vs.into_iter().map(|v| v.map(|v| v as i64)),
114
)
115
.into_series()
116
}
117
},
118
DynListLiteralValue::Float(vs) => Float64Chunked::from_iter_options(
119
PlSmallStr::from_static("literal"),
120
vs.into_iter(),
121
)
122
.into_series(),
123
DynListLiteralValue::List(_) => todo!("nested lists"),
124
};
125
126
let s = s.cast_with_options(inner_dtype, options)?;
127
let value = match dtype {
128
DataType::List(_) => AnyValue::List(s),
129
#[cfg(feature = "dtype-array")]
130
DataType::Array(_, size) => AnyValue::Array(s, *size),
131
_ => unreachable!(),
132
};
133
134
Ok(Scalar::new(dtype.clone(), value))
135
}
136
}
137
138
impl DynLiteralValue {
139
pub fn try_materialize_to_dtype(
140
self,
141
dtype: &DataType,
142
options: CastOptions,
143
) -> PolarsResult<Scalar> {
144
match self {
145
DynLiteralValue::Str(s) => Ok(Scalar::from(s).cast_with_options(dtype, options)?),
146
DynLiteralValue::Int(i) => {
147
#[cfg(not(feature = "dtype-i128"))]
148
let i: i64 = i.try_into().expect("activate dtype-i128 feature");
149
150
Ok(Scalar::from(i).cast_with_options(dtype, options)?)
151
},
152
DynLiteralValue::Float(f) => Ok(Scalar::from(f).cast_with_options(dtype, options)?),
153
DynLiteralValue::List(dyn_list_value) => {
154
dyn_list_value.try_materialize_to_dtype(dtype, options)
155
},
156
}
157
}
158
}
159
160
impl RangeLiteralValue {
161
pub fn try_materialize_to_series(self, dtype: &DataType) -> PolarsResult<Series> {
162
fn handle_range_oob(range: &RangeLiteralValue, to_dtype: &DataType) -> PolarsResult<()> {
163
polars_bail!(
164
InvalidOperation:
165
"conversion from `{}` to `{to_dtype}` failed for range({}, {})",
166
range.dtype, range.low, range.high,
167
)
168
}
169
170
let s = match dtype {
171
DataType::Int32 => {
172
if self.low < i32::MIN as i128 || self.high > i32::MAX as i128 {
173
handle_range_oob(&self, dtype)?;
174
}
175
176
new_int_range::<Int32Type>(
177
self.low as i32,
178
self.high as i32,
179
1,
180
PlSmallStr::from_static("range"),
181
)
182
.unwrap()
183
},
184
DataType::Int64 => {
185
if self.low < i64::MIN as i128 || self.high > i64::MAX as i128 {
186
handle_range_oob(&self, dtype)?;
187
}
188
189
new_int_range::<Int64Type>(
190
self.low as i64,
191
self.high as i64,
192
1,
193
PlSmallStr::from_static("range"),
194
)
195
.unwrap()
196
},
197
DataType::UInt32 => {
198
if self.low < u32::MIN as i128 || self.high > u32::MAX as i128 {
199
handle_range_oob(&self, dtype)?;
200
}
201
new_int_range::<UInt32Type>(
202
self.low as u32,
203
self.high as u32,
204
1,
205
PlSmallStr::from_static("range"),
206
)
207
.unwrap()
208
},
209
_ => polars_bail!(InvalidOperation: "unsupported range datatype `{dtype}`"),
210
};
211
212
Ok(s)
213
}
214
}
215
216
impl LiteralValue {
217
/// Get the output name as [`PlSmallStr`].
218
pub(crate) fn output_column_name(&self) -> &PlSmallStr {
219
match self {
220
LiteralValue::Series(s) => s.name(),
221
_ => get_literal_name(),
222
}
223
}
224
225
pub fn extract_usize(&self) -> PolarsResult<usize> {
226
macro_rules! cast_usize {
227
($v:expr) => {
228
usize::try_from($v).map_err(
229
|_| polars_err!(InvalidOperation: "cannot convert value {} to usize", $v)
230
)
231
}
232
}
233
match &self {
234
Self::Dyn(DynLiteralValue::Int(v)) => cast_usize!(*v),
235
Self::Scalar(sc) => match sc.as_any_value() {
236
AnyValue::UInt8(v) => Ok(v as usize),
237
AnyValue::UInt16(v) => Ok(v as usize),
238
AnyValue::UInt32(v) => cast_usize!(v),
239
AnyValue::UInt64(v) => cast_usize!(v),
240
AnyValue::Int8(v) => cast_usize!(v),
241
AnyValue::Int16(v) => cast_usize!(v),
242
AnyValue::Int32(v) => cast_usize!(v),
243
AnyValue::Int64(v) => cast_usize!(v),
244
AnyValue::Int128(v) => cast_usize!(v),
245
_ => {
246
polars_bail!(InvalidOperation: "expression must be constant literal to extract integer")
247
},
248
},
249
_ => {
250
polars_bail!(InvalidOperation: "expression must be constant literal to extract integer")
251
},
252
}
253
}
254
255
pub fn extract_i64(&self) -> PolarsResult<i64> {
256
macro_rules! cast_i64 {
257
($v:expr) => {
258
i64::try_from($v).map_err(
259
|_| polars_err!(InvalidOperation: "cannot convert value {} to i64", $v)
260
)
261
}
262
}
263
match &self {
264
Self::Dyn(DynLiteralValue::Int(v)) => cast_i64!(*v),
265
Self::Scalar(sc) => match sc.as_any_value() {
266
AnyValue::UInt8(v) => Ok(v as i64),
267
AnyValue::UInt16(v) => Ok(v as i64),
268
AnyValue::UInt32(v) => cast_i64!(v),
269
AnyValue::UInt64(v) => cast_i64!(v),
270
AnyValue::Int8(v) => cast_i64!(v),
271
AnyValue::Int16(v) => cast_i64!(v),
272
AnyValue::Int32(v) => cast_i64!(v),
273
AnyValue::Int64(v) => Ok(v),
274
AnyValue::Int128(v) => cast_i64!(v),
275
_ => {
276
polars_bail!(InvalidOperation: "expression must be constant literal to extract integer")
277
},
278
},
279
_ => {
280
polars_bail!(InvalidOperation: "expression must be constant literal to extract integer")
281
},
282
}
283
}
284
285
pub fn materialize(self) -> Self {
286
match self {
287
LiteralValue::Dyn(_) => {
288
let av = self.to_any_value().unwrap();
289
av.into()
290
},
291
lv => lv,
292
}
293
}
294
295
pub fn is_scalar(&self) -> bool {
296
!matches!(self, LiteralValue::Series(_) | LiteralValue::Range { .. })
297
}
298
299
pub fn to_any_value(&self) -> Option<AnyValue<'_>> {
300
let av = match self {
301
Self::Scalar(sc) => sc.value().clone(),
302
Self::Range(range) => {
303
let s = range.clone().try_materialize_to_series(&range.dtype).ok()?;
304
AnyValue::List(s)
305
},
306
Self::Series(_) => return None,
307
Self::Dyn(d) => match d {
308
DynLiteralValue::Int(v) => materialize_dyn_int(*v),
309
DynLiteralValue::Float(v) => AnyValue::Float64(*v),
310
DynLiteralValue::Str(v) => AnyValue::String(v),
311
DynLiteralValue::List(_) => todo!(),
312
},
313
};
314
Some(av)
315
}
316
317
/// Getter for the `DataType` of the value
318
pub fn get_datatype(&self) -> DataType {
319
match self {
320
Self::Dyn(d) => match d {
321
DynLiteralValue::Int(v) => DataType::Unknown(UnknownKind::Int(*v)),
322
DynLiteralValue::Float(_) => DataType::Unknown(UnknownKind::Float),
323
DynLiteralValue::Str(_) => DataType::Unknown(UnknownKind::Str),
324
DynLiteralValue::List(_) => todo!(),
325
},
326
Self::Scalar(sc) => sc.dtype().clone(),
327
Self::Series(s) => s.dtype().clone(),
328
Self::Range(s) => s.dtype.clone(),
329
}
330
}
331
332
pub fn new_idxsize(value: IdxSize) -> Self {
333
LiteralValue::Scalar(value.into())
334
}
335
336
pub fn extract_str(&self) -> Option<&str> {
337
match self {
338
LiteralValue::Dyn(DynLiteralValue::Str(s)) => Some(s.as_str()),
339
LiteralValue::Scalar(sc) => match sc.value() {
340
AnyValue::String(s) => Some(s),
341
AnyValue::StringOwned(s) => Some(s),
342
_ => None,
343
},
344
_ => None,
345
}
346
}
347
348
pub fn extract_binary(&self) -> Option<&[u8]> {
349
match self {
350
LiteralValue::Scalar(sc) => match sc.value() {
351
AnyValue::Binary(s) => Some(s),
352
AnyValue::BinaryOwned(s) => Some(s),
353
_ => None,
354
},
355
_ => None,
356
}
357
}
358
359
pub fn is_null(&self) -> bool {
360
match self {
361
Self::Scalar(sc) => sc.is_null(),
362
Self::Series(s) => s.len() == 1 && s.null_count() == 1,
363
Self::Dyn(_) => false,
364
Self::Range(_) => false,
365
}
366
}
367
368
pub fn bool(&self) -> Option<bool> {
369
match self {
370
LiteralValue::Scalar(s) => match s.as_any_value() {
371
AnyValue::Boolean(b) => Some(b),
372
_ => None,
373
},
374
_ => None,
375
}
376
}
377
378
pub const fn untyped_null() -> Self {
379
Self::Scalar(Scalar::null(DataType::Null))
380
}
381
382
pub fn implode(self) -> PolarsResult<Self> {
383
let series = match self.materialize() {
384
LiteralValue::Dyn(_) => unreachable!(),
385
LiteralValue::Scalar(scalar) => scalar.into_series(PlSmallStr::EMPTY),
386
LiteralValue::Series(series) => series.into_inner(),
387
LiteralValue::Range(range) => {
388
let dtype = range.dtype.clone();
389
range.try_materialize_to_series(&dtype)?
390
},
391
};
392
393
let dtype = DataType::List(Box::new(series.dtype().clone()));
394
Ok(LiteralValue::Scalar(Scalar::new(
395
dtype,
396
AnyValue::List(series),
397
)))
398
}
399
}
400
401
impl From<Scalar> for LiteralValue {
402
fn from(value: Scalar) -> Self {
403
Self::Scalar(value)
404
}
405
}
406
407
pub trait Literal {
408
/// [Literal](Expr::Literal) expression.
409
fn lit(self) -> Expr;
410
}
411
412
pub trait TypedLiteral: Literal {
413
/// [Literal](Expr::Literal) expression.
414
fn typed_lit(self) -> Expr
415
where
416
Self: Sized,
417
{
418
self.lit()
419
}
420
}
421
422
impl TypedLiteral for String {}
423
impl TypedLiteral for &str {}
424
425
impl Literal for PlSmallStr {
426
fn lit(self) -> Expr {
427
Expr::Literal(Scalar::from(self).into())
428
}
429
}
430
431
impl Literal for String {
432
fn lit(self) -> Expr {
433
Expr::Literal(Scalar::from(PlSmallStr::from_string(self)).into())
434
}
435
}
436
437
impl Literal for &str {
438
fn lit(self) -> Expr {
439
Expr::Literal(Scalar::from(PlSmallStr::from_str(self)).into())
440
}
441
}
442
443
impl Literal for Vec<u8> {
444
fn lit(self) -> Expr {
445
Expr::Literal(Scalar::from(self).into())
446
}
447
}
448
449
impl Literal for &[u8] {
450
fn lit(self) -> Expr {
451
Expr::Literal(Scalar::from(self.to_vec()).into())
452
}
453
}
454
455
impl From<AnyValue<'_>> for LiteralValue {
456
fn from(value: AnyValue<'_>) -> Self {
457
Self::Scalar(Scalar::new(value.dtype(), value.into_static()))
458
}
459
}
460
461
macro_rules! make_literal {
462
($TYPE:ty, $SCALAR:ident) => {
463
impl Literal for $TYPE {
464
fn lit(self) -> Expr {
465
Expr::Literal(Scalar::from(self).into())
466
}
467
}
468
};
469
}
470
471
macro_rules! make_literal_typed {
472
($TYPE:ty, $SCALAR:ident) => {
473
impl TypedLiteral for $TYPE {
474
fn typed_lit(self) -> Expr {
475
Expr::Literal(Scalar::from(self).into())
476
}
477
}
478
};
479
}
480
481
macro_rules! make_dyn_lit {
482
($TYPE:ty, $SCALAR:ident) => {
483
impl Literal for $TYPE {
484
fn lit(self) -> Expr {
485
Expr::Literal(LiteralValue::Dyn(DynLiteralValue::$SCALAR(
486
self.try_into().unwrap(),
487
)))
488
}
489
}
490
};
491
}
492
493
make_literal!(bool, Boolean);
494
make_literal_typed!(f32, Float32);
495
make_literal_typed!(f64, Float64);
496
make_literal_typed!(i8, Int8);
497
make_literal_typed!(i16, Int16);
498
make_literal_typed!(i32, Int32);
499
make_literal_typed!(i64, Int64);
500
make_literal_typed!(i128, Int128);
501
make_literal_typed!(u8, UInt8);
502
make_literal_typed!(u16, UInt16);
503
make_literal_typed!(u32, UInt32);
504
make_literal_typed!(u64, UInt64);
505
506
make_dyn_lit!(f32, Float);
507
make_dyn_lit!(f64, Float);
508
make_dyn_lit!(i8, Int);
509
make_dyn_lit!(i16, Int);
510
make_dyn_lit!(i32, Int);
511
make_dyn_lit!(i64, Int);
512
make_dyn_lit!(u8, Int);
513
make_dyn_lit!(u16, Int);
514
make_dyn_lit!(u32, Int);
515
make_dyn_lit!(u64, Int);
516
make_dyn_lit!(i128, Int);
517
518
/// The literal Null
519
pub struct Null {}
520
pub const NULL: Null = Null {};
521
522
impl Literal for Null {
523
fn lit(self) -> Expr {
524
Expr::Literal(LiteralValue::Scalar(Scalar::null(DataType::Null)))
525
}
526
}
527
528
#[cfg(feature = "dtype-datetime")]
529
impl Literal for NaiveDateTime {
530
fn lit(self) -> Expr {
531
if in_nanoseconds_window(&self) {
532
Expr::Literal(
533
Scalar::new_datetime(
534
self.and_utc().timestamp_nanos_opt().unwrap(),
535
TimeUnit::Nanoseconds,
536
None,
537
)
538
.into(),
539
)
540
} else {
541
Expr::Literal(
542
Scalar::new_datetime(
543
self.and_utc().timestamp_micros(),
544
TimeUnit::Microseconds,
545
None,
546
)
547
.into(),
548
)
549
}
550
}
551
}
552
553
#[cfg(feature = "dtype-duration")]
554
impl Literal for ChronoDuration {
555
fn lit(self) -> Expr {
556
if let Some(value) = self.num_nanoseconds() {
557
Expr::Literal(Scalar::new_duration(value, TimeUnit::Nanoseconds).into())
558
} else {
559
Expr::Literal(
560
Scalar::new_duration(self.num_microseconds().unwrap(), TimeUnit::Microseconds)
561
.into(),
562
)
563
}
564
}
565
}
566
567
#[cfg(feature = "dtype-duration")]
568
impl Literal for Duration {
569
fn lit(self) -> Expr {
570
assert!(
571
self.months() == 0,
572
"Cannot create literal duration that is not of fixed length; found {self}"
573
);
574
let ns = self.duration_ns();
575
Expr::Literal(
576
Scalar::new_duration(
577
if self.negative() { -ns } else { ns },
578
TimeUnit::Nanoseconds,
579
)
580
.into(),
581
)
582
}
583
}
584
585
#[cfg(feature = "dtype-datetime")]
586
impl Literal for NaiveDate {
587
fn lit(self) -> Expr {
588
self.and_hms_opt(0, 0, 0).unwrap().lit()
589
}
590
}
591
592
impl Literal for Series {
593
fn lit(self) -> Expr {
594
Expr::Literal(LiteralValue::Series(SpecialEq::new(self)))
595
}
596
}
597
598
impl Literal for LiteralValue {
599
fn lit(self) -> Expr {
600
Expr::Literal(self)
601
}
602
}
603
604
impl Literal for Scalar {
605
fn lit(self) -> Expr {
606
Expr::Literal(self.into())
607
}
608
}
609
610
/// Create a Literal Expression from `L`. A literal expression behaves like a column that contains a single distinct
611
/// value.
612
///
613
/// The column is automatically of the "correct" length to make the operations work. Often this is determined by the
614
/// length of the `LazyFrame` it is being used with. For instance, `lazy_df.with_column(lit(5).alias("five"))` creates a
615
/// new column named "five" that is the length of the Dataframe (at the time `collect` is called), where every value in
616
/// the column is `5`.
617
pub fn lit<L: Literal>(t: L) -> Expr {
618
t.lit()
619
}
620
621
pub fn typed_lit<L: TypedLiteral>(t: L) -> Expr {
622
t.typed_lit()
623
}
624
625
impl Hash for LiteralValue {
626
fn hash<H: Hasher>(&self, state: &mut H) {
627
std::mem::discriminant(self).hash(state);
628
match self {
629
LiteralValue::Series(s) => {
630
// Free stats
631
s.dtype().hash(state);
632
let len = s.len();
633
len.hash(state);
634
s.null_count().hash(state);
635
const RANDOM: u64 = 0x2c194fa5df32a367;
636
let mut rng = (len as u64) ^ RANDOM;
637
for _ in 0..std::cmp::min(5, len) {
638
let idx = hash_to_partition(rng, len);
639
s.get(idx).unwrap().hash(state);
640
rng = rng.rotate_right(17).wrapping_add(RANDOM);
641
}
642
},
643
LiteralValue::Range(range) => range.hash(state),
644
LiteralValue::Scalar(sc) => sc.hash(state),
645
LiteralValue::Dyn(d) => d.hash(state),
646
}
647
}
648
}
649
650