Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/plans/lit.rs
8446 views
1
use std::hash::{Hash, Hasher};
2
3
#[cfg(feature = "temporal")]
4
use chrono::{Duration as ChronoDuration, NaiveDate, NaiveDateTime};
5
use polars_core::chunked_array::cast::CastOptions;
6
use polars_core::prelude::*;
7
use polars_core::utils::materialize_dyn_int;
8
use polars_utils::float16::pf16;
9
use polars_utils::hashing::hash_to_partition;
10
#[cfg(feature = "serde")]
11
use serde::{Deserialize, Serialize};
12
13
use crate::constants::get_literal_name;
14
use crate::prelude::*;
15
16
#[derive(Clone, PartialEq)]
17
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
18
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
19
pub enum DynLiteralValue {
20
Str(PlSmallStr),
21
Int(i128),
22
Float(f64),
23
List(DynListLiteralValue),
24
}
25
#[derive(Clone, PartialEq)]
26
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
27
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
28
pub enum DynListLiteralValue {
29
Str(Box<[Option<PlSmallStr>]>),
30
Int(Box<[Option<i128>]>),
31
Float(Box<[Option<f64>]>),
32
List(Box<[Option<DynListLiteralValue>]>),
33
}
34
35
impl Hash for DynLiteralValue {
36
fn hash<H: Hasher>(&self, state: &mut H) {
37
std::mem::discriminant(self).hash(state);
38
match self {
39
Self::Str(i) => i.hash(state),
40
Self::Int(i) => i.hash(state),
41
Self::Float(i) => i.to_ne_bytes().hash(state),
42
Self::List(i) => i.hash(state),
43
}
44
}
45
}
46
47
impl Hash for DynListLiteralValue {
48
fn hash<H: Hasher>(&self, state: &mut H) {
49
std::mem::discriminant(self).hash(state);
50
match self {
51
Self::Str(i) => i.hash(state),
52
Self::Int(i) => i.hash(state),
53
Self::Float(i) => i
54
.iter()
55
.for_each(|i| i.map(|i| i.to_ne_bytes()).hash(state)),
56
Self::List(i) => i.hash(state),
57
}
58
}
59
}
60
61
#[derive(Clone, PartialEq, Hash)]
62
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
63
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
64
pub struct RangeLiteralValue {
65
pub low: i128,
66
pub high: i128,
67
pub dtype: DataType,
68
}
69
#[derive(Clone, PartialEq)]
70
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
71
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
72
pub enum LiteralValue {
73
/// A dynamically inferred literal value. This needs to be materialized into a specific type.
74
Dyn(DynLiteralValue),
75
Scalar(Scalar),
76
Series(SpecialEq<Series>),
77
Range(RangeLiteralValue),
78
}
79
80
pub enum MaterializedLiteralValue {
81
Scalar(Scalar),
82
Series(Series),
83
}
84
85
impl DynListLiteralValue {
86
pub fn try_materialize_to_dtype(
87
self,
88
dtype: &DataType,
89
options: CastOptions,
90
) -> PolarsResult<Scalar> {
91
let Some(inner_dtype) = dtype.inner_dtype() else {
92
polars_bail!(InvalidOperation: "conversion from list literal to `{dtype}` failed.");
93
};
94
95
let s = match self {
96
DynListLiteralValue::Str(vs) => {
97
StringChunked::from_iter_options(get_literal_name(), vs.into_iter()).into_series()
98
},
99
DynListLiteralValue::Int(vs) => {
100
#[cfg(feature = "dtype-i128")]
101
{
102
Int128Chunked::from_iter_options(get_literal_name(), vs.into_iter())
103
.into_series()
104
}
105
106
#[cfg(not(feature = "dtype-i128"))]
107
{
108
Int64Chunked::from_iter_options(
109
get_literal_name(),
110
vs.into_iter().map(|v| v.map(|v| v as i64)),
111
)
112
.into_series()
113
}
114
},
115
DynListLiteralValue::Float(vs) => {
116
Float64Chunked::from_iter_options(get_literal_name(), vs.into_iter()).into_series()
117
},
118
DynListLiteralValue::List(_) => todo!("nested lists"),
119
};
120
121
let s = s.cast_with_options(inner_dtype, options)?;
122
let value = match dtype {
123
DataType::List(_) => AnyValue::List(s),
124
#[cfg(feature = "dtype-array")]
125
DataType::Array(_, size) => AnyValue::Array(s, *size),
126
_ => unreachable!(),
127
};
128
129
Ok(Scalar::new(dtype.clone(), value))
130
}
131
}
132
133
impl DynLiteralValue {
134
pub fn try_materialize_to_dtype(
135
self,
136
dtype: &DataType,
137
options: CastOptions,
138
) -> PolarsResult<Scalar> {
139
match self {
140
DynLiteralValue::Str(s) => Ok(Scalar::from(s).cast_with_options(dtype, options)?),
141
DynLiteralValue::Int(i) => {
142
#[cfg(not(feature = "dtype-i128"))]
143
let i: i64 = i.try_into().expect("activate dtype-i128 feature");
144
145
Ok(Scalar::from(i).cast_with_options(dtype, options)?)
146
},
147
DynLiteralValue::Float(f) => Ok(Scalar::from(f).cast_with_options(dtype, options)?),
148
DynLiteralValue::List(dyn_list_value) => {
149
dyn_list_value.try_materialize_to_dtype(dtype, options)
150
},
151
}
152
}
153
}
154
155
impl RangeLiteralValue {
156
pub fn try_materialize_to_series(self, dtype: &DataType) -> PolarsResult<Series> {
157
fn handle_range_oob(range: &RangeLiteralValue, to_dtype: &DataType) -> PolarsResult<()> {
158
polars_bail!(
159
InvalidOperation:
160
"conversion from `{}` to `{to_dtype}` failed for range({}, {})",
161
range.dtype, range.low, range.high,
162
)
163
}
164
165
let s = match dtype {
166
DataType::Int32 => {
167
if self.low < i32::MIN as i128 || self.high > i32::MAX as i128 {
168
handle_range_oob(&self, dtype)?;
169
}
170
171
new_int_range::<Int32Type>(
172
self.low as i32,
173
self.high as i32,
174
1,
175
PlSmallStr::from_static("range"),
176
)
177
.unwrap()
178
},
179
DataType::Int64 => {
180
if self.low < i64::MIN as i128 || self.high > i64::MAX as i128 {
181
handle_range_oob(&self, dtype)?;
182
}
183
184
new_int_range::<Int64Type>(
185
self.low as i64,
186
self.high as i64,
187
1,
188
PlSmallStr::from_static("range"),
189
)
190
.unwrap()
191
},
192
DataType::UInt32 => {
193
if self.low < u32::MIN as i128 || self.high > u32::MAX as i128 {
194
handle_range_oob(&self, dtype)?;
195
}
196
new_int_range::<UInt32Type>(
197
self.low as u32,
198
self.high as u32,
199
1,
200
PlSmallStr::from_static("range"),
201
)
202
.unwrap()
203
},
204
_ => polars_bail!(InvalidOperation: "unsupported range datatype `{dtype}`"),
205
};
206
207
Ok(s)
208
}
209
}
210
211
impl LiteralValue {
212
/// Get the output name as [`PlSmallStr`].
213
pub(crate) fn output_column_name(&self) -> PlSmallStr {
214
match self {
215
LiteralValue::Series(s) => s.name().clone(),
216
_ => get_literal_name(),
217
}
218
}
219
220
pub fn extract_usize(&self) -> PolarsResult<usize> {
221
macro_rules! cast_usize {
222
($v:expr) => {
223
usize::try_from($v).map_err(
224
|_| polars_err!(InvalidOperation: "cannot convert value {} to usize", $v)
225
)
226
}
227
}
228
match &self {
229
Self::Dyn(DynLiteralValue::Int(v)) => cast_usize!(*v),
230
Self::Scalar(sc) => match sc.as_any_value() {
231
AnyValue::UInt8(v) => Ok(v as usize),
232
AnyValue::UInt16(v) => Ok(v as usize),
233
AnyValue::UInt32(v) => cast_usize!(v),
234
AnyValue::UInt64(v) => cast_usize!(v),
235
AnyValue::Int8(v) => cast_usize!(v),
236
AnyValue::Int16(v) => cast_usize!(v),
237
AnyValue::Int32(v) => cast_usize!(v),
238
AnyValue::Int64(v) => cast_usize!(v),
239
AnyValue::Int128(v) => cast_usize!(v),
240
_ => {
241
polars_bail!(InvalidOperation: "expression must be constant literal to extract integer")
242
},
243
},
244
_ => {
245
polars_bail!(InvalidOperation: "expression must be constant literal to extract integer")
246
},
247
}
248
}
249
250
pub fn extract_i64(&self) -> PolarsResult<i64> {
251
macro_rules! cast_i64 {
252
($v:expr) => {
253
i64::try_from($v).map_err(
254
|_| polars_err!(InvalidOperation: "cannot convert value {} to i64", $v)
255
)
256
}
257
}
258
match &self {
259
Self::Dyn(DynLiteralValue::Int(v)) => cast_i64!(*v),
260
Self::Scalar(sc) => match sc.as_any_value() {
261
AnyValue::UInt8(v) => Ok(v as i64),
262
AnyValue::UInt16(v) => Ok(v as i64),
263
AnyValue::UInt32(v) => cast_i64!(v),
264
AnyValue::UInt64(v) => cast_i64!(v),
265
AnyValue::Int8(v) => cast_i64!(v),
266
AnyValue::Int16(v) => cast_i64!(v),
267
AnyValue::Int32(v) => cast_i64!(v),
268
AnyValue::Int64(v) => Ok(v),
269
AnyValue::Int128(v) => cast_i64!(v),
270
_ => {
271
polars_bail!(InvalidOperation: "expression must be constant literal to extract integer")
272
},
273
},
274
_ => {
275
polars_bail!(InvalidOperation: "expression must be constant literal to extract integer")
276
},
277
}
278
}
279
280
pub fn materialize(self) -> Self {
281
match self {
282
LiteralValue::Dyn(_) => {
283
let av = self.to_any_value().unwrap();
284
av.into()
285
},
286
lv => lv,
287
}
288
}
289
290
pub fn is_scalar(&self) -> bool {
291
!matches!(self, LiteralValue::Series(_) | LiteralValue::Range { .. })
292
}
293
294
pub fn to_any_value(&self) -> Option<AnyValue<'_>> {
295
let av = match self {
296
Self::Scalar(sc) => sc.value().clone(),
297
Self::Range(range) => {
298
let s = range.clone().try_materialize_to_series(&range.dtype).ok()?;
299
AnyValue::List(s)
300
},
301
Self::Series(_) => return None,
302
Self::Dyn(d) => match d {
303
DynLiteralValue::Int(v) => materialize_dyn_int(*v),
304
DynLiteralValue::Float(v) => AnyValue::Float64(*v),
305
DynLiteralValue::Str(v) => AnyValue::String(v),
306
DynLiteralValue::List(_) => todo!(),
307
},
308
};
309
Some(av)
310
}
311
312
/// Getter for the `DataType` of the value
313
pub fn get_datatype(&self) -> DataType {
314
match self {
315
Self::Dyn(d) => match d {
316
DynLiteralValue::Int(v) => DataType::Unknown(UnknownKind::Int(*v)),
317
DynLiteralValue::Float(_) => DataType::Unknown(UnknownKind::Float),
318
DynLiteralValue::Str(_) => DataType::Unknown(UnknownKind::Str),
319
DynLiteralValue::List(_) => todo!(),
320
},
321
Self::Scalar(sc) => sc.dtype().clone(),
322
Self::Series(s) => s.dtype().clone(),
323
Self::Range(s) => s.dtype.clone(),
324
}
325
}
326
327
pub fn new_idxsize(value: IdxSize) -> Self {
328
LiteralValue::Scalar(value.into())
329
}
330
331
pub fn extract_str(&self) -> Option<&str> {
332
match self {
333
LiteralValue::Dyn(DynLiteralValue::Str(s)) => Some(s.as_str()),
334
LiteralValue::Scalar(sc) => match sc.value() {
335
AnyValue::String(s) => Some(s),
336
AnyValue::StringOwned(s) => Some(s),
337
_ => None,
338
},
339
_ => None,
340
}
341
}
342
343
pub fn extract_binary(&self) -> Option<&[u8]> {
344
match self {
345
LiteralValue::Scalar(sc) => match sc.value() {
346
AnyValue::Binary(s) => Some(s),
347
AnyValue::BinaryOwned(s) => Some(s),
348
_ => None,
349
},
350
_ => None,
351
}
352
}
353
354
pub fn is_null(&self) -> bool {
355
match self {
356
Self::Scalar(sc) => sc.is_null(),
357
Self::Series(s) => s.len() == 1 && s.null_count() == 1,
358
Self::Dyn(_) => false,
359
Self::Range(_) => false,
360
}
361
}
362
363
pub fn bool(&self) -> Option<bool> {
364
match self {
365
LiteralValue::Scalar(s) => match s.as_any_value() {
366
AnyValue::Boolean(b) => Some(b),
367
_ => None,
368
},
369
_ => None,
370
}
371
}
372
373
pub const fn untyped_null() -> Self {
374
Self::Scalar(Scalar::null(DataType::Null))
375
}
376
377
pub fn implode(self) -> PolarsResult<Self> {
378
let series = match self.materialize() {
379
LiteralValue::Dyn(_) => unreachable!(),
380
LiteralValue::Scalar(scalar) => scalar.into_series(PlSmallStr::EMPTY),
381
LiteralValue::Series(series) => series.into_inner(),
382
LiteralValue::Range(range) => {
383
let dtype = range.dtype.clone();
384
range.try_materialize_to_series(&dtype)?
385
},
386
};
387
388
let dtype = DataType::List(Box::new(series.dtype().clone()));
389
Ok(LiteralValue::Scalar(Scalar::new(
390
dtype,
391
AnyValue::List(series),
392
)))
393
}
394
}
395
396
impl From<Scalar> for LiteralValue {
397
fn from(value: Scalar) -> Self {
398
Self::Scalar(value)
399
}
400
}
401
402
pub trait Literal {
403
/// [Literal](Expr::Literal) expression.
404
fn lit(self) -> Expr;
405
}
406
407
pub trait TypedLiteral: Literal {
408
/// [Literal](Expr::Literal) expression.
409
fn typed_lit(self) -> Expr
410
where
411
Self: Sized,
412
{
413
self.lit()
414
}
415
}
416
417
impl TypedLiteral for String {}
418
impl TypedLiteral for &str {}
419
420
impl Literal for PlSmallStr {
421
fn lit(self) -> Expr {
422
Expr::Literal(Scalar::from(self).into())
423
}
424
}
425
426
impl Literal for String {
427
fn lit(self) -> Expr {
428
Expr::Literal(Scalar::from(PlSmallStr::from_string(self)).into())
429
}
430
}
431
432
impl Literal for &str {
433
fn lit(self) -> Expr {
434
Expr::Literal(Scalar::from(PlSmallStr::from_str(self)).into())
435
}
436
}
437
438
impl Literal for Vec<u8> {
439
fn lit(self) -> Expr {
440
Expr::Literal(Scalar::from(self).into())
441
}
442
}
443
444
impl Literal for &[u8] {
445
fn lit(self) -> Expr {
446
Expr::Literal(Scalar::from(self.to_vec()).into())
447
}
448
}
449
450
impl From<AnyValue<'_>> for LiteralValue {
451
fn from(value: AnyValue<'_>) -> Self {
452
Self::Scalar(Scalar::new(value.dtype(), value.into_static()))
453
}
454
}
455
456
macro_rules! make_literal {
457
($TYPE:ty, $SCALAR:ident) => {
458
impl Literal for $TYPE {
459
fn lit(self) -> Expr {
460
Expr::Literal(Scalar::from(self).into())
461
}
462
}
463
};
464
}
465
466
macro_rules! make_literal_typed {
467
($TYPE:ty, $SCALAR:ident) => {
468
impl TypedLiteral for $TYPE {
469
fn typed_lit(self) -> Expr {
470
Expr::Literal(Scalar::from(self).into())
471
}
472
}
473
};
474
}
475
476
macro_rules! make_dyn_lit {
477
($TYPE:ty, $SCALAR:ident) => {
478
impl Literal for $TYPE {
479
fn lit(self) -> Expr {
480
Expr::Literal(LiteralValue::Dyn(DynLiteralValue::$SCALAR(
481
self.try_into().unwrap(),
482
)))
483
}
484
}
485
};
486
}
487
488
make_literal!(bool, Boolean);
489
490
make_literal_typed!(pf16, Float16);
491
make_literal_typed!(f32, Float32);
492
make_literal_typed!(f64, Float64);
493
make_literal_typed!(i8, Int8);
494
make_literal_typed!(i16, Int16);
495
make_literal_typed!(i32, Int32);
496
make_literal_typed!(i64, Int64);
497
make_literal_typed!(i128, Int128);
498
make_literal_typed!(u8, UInt8);
499
make_literal_typed!(u16, UInt16);
500
make_literal_typed!(u32, UInt32);
501
make_literal_typed!(u64, UInt64);
502
make_literal_typed!(u128, UInt128);
503
504
make_dyn_lit!(pf16, Float);
505
make_dyn_lit!(f32, Float);
506
make_dyn_lit!(f64, Float);
507
make_dyn_lit!(i8, Int);
508
make_dyn_lit!(i16, Int);
509
make_dyn_lit!(i32, Int);
510
make_dyn_lit!(i64, Int);
511
make_dyn_lit!(u8, Int);
512
make_dyn_lit!(u16, Int);
513
make_dyn_lit!(u32, Int);
514
make_dyn_lit!(u64, Int);
515
make_dyn_lit!(i128, Int);
516
make_dyn_lit!(u128, Int);
517
518
/// The literal Null
519
pub struct Null {}
520
pub const NULL: Null = Null {};
521
522
impl Literal for Null {
523
fn lit(self) -> Expr {
524
Expr::Literal(LiteralValue::Scalar(Scalar::null(DataType::Null)))
525
}
526
}
527
528
#[cfg(feature = "dtype-datetime")]
529
impl Literal for NaiveDateTime {
530
fn lit(self) -> Expr {
531
if in_nanoseconds_window(&self) {
532
Expr::Literal(
533
Scalar::new_datetime(
534
self.and_utc().timestamp_nanos_opt().unwrap(),
535
TimeUnit::Nanoseconds,
536
None,
537
)
538
.into(),
539
)
540
} else {
541
Expr::Literal(
542
Scalar::new_datetime(
543
self.and_utc().timestamp_micros(),
544
TimeUnit::Microseconds,
545
None,
546
)
547
.into(),
548
)
549
}
550
}
551
}
552
553
#[cfg(feature = "dtype-duration")]
554
impl Literal for ChronoDuration {
555
fn lit(self) -> Expr {
556
if let Some(value) = self.num_nanoseconds() {
557
Expr::Literal(Scalar::new_duration(value, TimeUnit::Nanoseconds).into())
558
} else {
559
Expr::Literal(
560
Scalar::new_duration(self.num_microseconds().unwrap(), TimeUnit::Microseconds)
561
.into(),
562
)
563
}
564
}
565
}
566
567
#[cfg(feature = "dtype-duration")]
568
impl Literal for Duration {
569
fn lit(self) -> Expr {
570
assert!(
571
self.months() == 0,
572
"Cannot create literal duration that is not of fixed length; found {self}"
573
);
574
let ns = self.duration_ns();
575
Expr::Literal(
576
Scalar::new_duration(
577
if self.negative() { -ns } else { ns },
578
TimeUnit::Nanoseconds,
579
)
580
.into(),
581
)
582
}
583
}
584
585
#[cfg(feature = "dtype-datetime")]
586
impl Literal for NaiveDate {
587
fn lit(self) -> Expr {
588
self.and_hms_opt(0, 0, 0).unwrap().lit()
589
}
590
}
591
592
impl Literal for Series {
593
fn lit(self) -> Expr {
594
Expr::Literal(LiteralValue::Series(SpecialEq::new(self)))
595
}
596
}
597
598
impl Literal for LiteralValue {
599
fn lit(self) -> Expr {
600
Expr::Literal(self)
601
}
602
}
603
604
impl Literal for Scalar {
605
fn lit(self) -> Expr {
606
Expr::Literal(self.into())
607
}
608
}
609
610
pub fn typed_lit<L: TypedLiteral>(t: L) -> Expr {
611
t.typed_lit()
612
}
613
614
impl Hash for LiteralValue {
615
fn hash<H: Hasher>(&self, state: &mut H) {
616
std::mem::discriminant(self).hash(state);
617
match self {
618
LiteralValue::Series(s) => {
619
// Free stats
620
s.dtype().hash(state);
621
let len = s.len();
622
len.hash(state);
623
s.null_count().hash(state);
624
const RANDOM: u64 = 0x2c194fa5df32a367;
625
let mut rng = (len as u64) ^ RANDOM;
626
for _ in 0..std::cmp::min(5, len) {
627
let idx = hash_to_partition(rng, len);
628
s.get(idx).unwrap().hash(state);
629
rng = rng.rotate_right(17).wrapping_add(RANDOM);
630
}
631
},
632
LiteralValue::Range(range) => range.hash(state),
633
LiteralValue::Scalar(sc) => sc.hash(state),
634
LiteralValue::Dyn(d) => d.hash(state),
635
}
636
}
637
}
638
639