Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/any_value.rs
8431 views
1
use std::fmt::Write;
2
3
use arrow::bitmap::MutableBitmap;
4
use num_traits::AsPrimitive;
5
use polars_compute::cast::SerPrimitive;
6
7
#[cfg(feature = "dtype-categorical")]
8
use crate::chunked_array::builder::CategoricalChunkedBuilder;
9
use crate::chunked_array::builder::{AnonymousOwnedListBuilder, get_list_builder};
10
use crate::prelude::*;
11
use crate::utils::any_values_to_supertype;
12
13
impl<'a, T: AsRef<[AnyValue<'a>]>> NamedFrom<T, [AnyValue<'a>]> for Series {
14
/// Construct a new [`Series`] from a collection of [`AnyValue`].
15
///
16
/// # Panics
17
///
18
/// Panics if the values do not all share the same data type (with the exception
19
/// of [`DataType::Null`], which is always allowed).
20
///
21
/// [`AnyValue`]: crate::datatypes::AnyValue
22
fn new(name: PlSmallStr, values: T) -> Self {
23
let values = values.as_ref();
24
Series::from_any_values(name, values, true).expect("data types of values should match")
25
}
26
}
27
28
impl Series {
29
/// Construct a new [`Series`] from a slice of AnyValues.
30
///
31
/// The data type of the resulting Series is determined by the `values`
32
/// and the `strict` parameter:
33
/// - If `strict` is `true`, the data type is equal to the data type of the
34
/// first non-null value. If any other non-null values do not match this
35
/// data type, an error is raised. If the first non-null value is a
36
/// decimal the slice is scanned for the maximum precision and scale possible.
37
/// - If `strict` is `false`, the data type is the supertype of the `values`.
38
/// An error is returned if no supertype can be determined.
39
/// **WARNING**: A full pass over the values is required to determine the supertype.
40
/// - If no values were passed, the resulting data type is `Null`.
41
pub fn from_any_values(
42
name: PlSmallStr,
43
values: &[AnyValue],
44
strict: bool,
45
) -> PolarsResult<Self> {
46
fn get_first_non_null_dtype(values: &[AnyValue]) -> DataType {
47
let mut all_flat_null = true;
48
let first_non_null = values.iter().find(|av| {
49
if !av.is_null() {
50
all_flat_null = false
51
};
52
!av.is_nested_null()
53
});
54
match first_non_null {
55
Some(av) => av.dtype(),
56
None => {
57
if all_flat_null {
58
DataType::Null
59
} else {
60
// Second pass to check for the nested null value that
61
// toggled `all_flat_null` to false, e.g. a List(Null).
62
let first_nested_null = values.iter().find(|av| !av.is_null()).unwrap();
63
first_nested_null.dtype()
64
}
65
},
66
}
67
}
68
let dtype = if strict {
69
match get_first_non_null_dtype(values) {
70
#[cfg(feature = "dtype-decimal")]
71
DataType::Decimal(mut prec, mut scale) => {
72
for v in values {
73
if let DataType::Decimal(p, s) = v.dtype() {
74
prec = prec.max(p);
75
scale = scale.max(s);
76
}
77
}
78
DataType::Decimal(prec, scale)
79
},
80
dt => dt,
81
}
82
} else {
83
any_values_to_supertype(values)?
84
};
85
86
Self::from_any_values_and_dtype(name, values, &dtype, strict)
87
}
88
89
/// Construct a new [`Series`] with the given `dtype` from a slice of AnyValues.
90
///
91
/// If `strict` is `true`, an error is returned if the values do not match the given
92
/// data type. If `strict` is `false`, values that do not match the given data type
93
/// are cast. If casting is not possible, the values are set to null instead.
94
pub fn from_any_values_and_dtype(
95
name: PlSmallStr,
96
values: &[AnyValue],
97
dtype: &DataType,
98
strict: bool,
99
) -> PolarsResult<Self> {
100
if values.is_empty() {
101
return Ok(Self::new_empty(name, dtype));
102
}
103
104
let mut s = match dtype {
105
#[cfg(feature = "dtype-i8")]
106
DataType::Int8 => any_values_to_integer::<Int8Type>(values, strict)?.into_series(),
107
#[cfg(feature = "dtype-i16")]
108
DataType::Int16 => any_values_to_integer::<Int16Type>(values, strict)?.into_series(),
109
DataType::Int32 => any_values_to_integer::<Int32Type>(values, strict)?.into_series(),
110
DataType::Int64 => any_values_to_integer::<Int64Type>(values, strict)?.into_series(),
111
#[cfg(feature = "dtype-i128")]
112
DataType::Int128 => any_values_to_integer::<Int128Type>(values, strict)?.into_series(),
113
#[cfg(feature = "dtype-u8")]
114
DataType::UInt8 => any_values_to_integer::<UInt8Type>(values, strict)?.into_series(),
115
#[cfg(feature = "dtype-u16")]
116
DataType::UInt16 => any_values_to_integer::<UInt16Type>(values, strict)?.into_series(),
117
DataType::UInt32 => any_values_to_integer::<UInt32Type>(values, strict)?.into_series(),
118
DataType::UInt64 => any_values_to_integer::<UInt64Type>(values, strict)?.into_series(),
119
#[cfg(feature = "dtype-u128")]
120
DataType::UInt128 => {
121
any_values_to_integer::<UInt128Type>(values, strict)?.into_series()
122
},
123
#[cfg(feature = "dtype-f16")]
124
DataType::Float16 => any_values_to_f16(values, strict)?.into_series(),
125
DataType::Float32 => any_values_to_f32(values, strict)?.into_series(),
126
DataType::Float64 => any_values_to_f64(values, strict)?.into_series(),
127
DataType::Boolean => any_values_to_bool(values, strict)?.into_series(),
128
DataType::String => any_values_to_string(values, strict)?.into_series(),
129
DataType::Binary => any_values_to_binary(values, strict)?.into_series(),
130
DataType::BinaryOffset => any_values_to_binary_offset(values, strict)?.into_series(),
131
#[cfg(feature = "dtype-date")]
132
DataType::Date => any_values_to_date(values, strict)?.into_series(),
133
#[cfg(feature = "dtype-time")]
134
DataType::Time => any_values_to_time(values, strict)?.into_series(),
135
#[cfg(feature = "dtype-datetime")]
136
DataType::Datetime(tu, tz) => {
137
any_values_to_datetime(values, *tu, (*tz).clone(), strict)?.into_series()
138
},
139
#[cfg(feature = "dtype-duration")]
140
DataType::Duration(tu) => any_values_to_duration(values, *tu, strict)?.into_series(),
141
#[cfg(feature = "dtype-categorical")]
142
dt @ (DataType::Categorical(_, _) | DataType::Enum(_, _)) => {
143
any_values_to_categorical(values, dt, strict)?
144
},
145
#[cfg(feature = "dtype-decimal")]
146
DataType::Decimal(precision, scale) => {
147
any_values_to_decimal(values, *precision, *scale, strict)?.into_series()
148
},
149
#[cfg(feature = "dtype-extension")]
150
DataType::Extension(typ, storage) => {
151
Series::from_any_values_and_dtype(name.clone(), values, storage, strict)?
152
.into_extension(typ.clone())
153
},
154
DataType::List(inner) => any_values_to_list(values, inner, strict)?.into_series(),
155
#[cfg(feature = "dtype-array")]
156
DataType::Array(inner, size) => any_values_to_array(values, inner, strict, *size)?
157
.into_series()
158
.cast(&DataType::Array(inner.clone(), *size))?,
159
#[cfg(feature = "dtype-struct")]
160
DataType::Struct(fields) => any_values_to_struct(values, fields, strict)?,
161
#[cfg(feature = "object")]
162
DataType::Object(_) => any_values_to_object(values)?,
163
DataType::Null => Series::new_null(PlSmallStr::EMPTY, values.len()),
164
dt => {
165
polars_bail!(
166
InvalidOperation:
167
"constructing a Series with data type {dt:?} from AnyValues is not supported"
168
)
169
},
170
};
171
s.rename(name);
172
Ok(s)
173
}
174
}
175
176
fn any_values_to_primitive_nonstrict<T: PolarsNumericType>(values: &[AnyValue]) -> ChunkedArray<T> {
177
values
178
.iter()
179
.map(|av| av.extract::<T::Native>())
180
.collect_trusted()
181
}
182
183
fn any_values_to_integer<T: PolarsIntegerType>(
184
values: &[AnyValue],
185
strict: bool,
186
) -> PolarsResult<ChunkedArray<T>> {
187
fn any_values_to_integer_strict<T: PolarsIntegerType>(
188
values: &[AnyValue],
189
) -> PolarsResult<ChunkedArray<T>> {
190
let mut builder = PrimitiveChunkedBuilder::<T>::new(PlSmallStr::EMPTY, values.len());
191
for av in values {
192
match &av {
193
av if av.is_integer() => {
194
let opt_val = av.extract::<T::Native>();
195
let val = match opt_val {
196
Some(v) => v,
197
None => return Err(invalid_value_error(&T::get_static_dtype(), av)),
198
};
199
builder.append_value(val)
200
},
201
AnyValue::Null => builder.append_null(),
202
av => return Err(invalid_value_error(&T::get_static_dtype(), av)),
203
}
204
}
205
Ok(builder.finish())
206
}
207
208
if strict {
209
any_values_to_integer_strict::<T>(values)
210
} else {
211
Ok(any_values_to_primitive_nonstrict::<T>(values))
212
}
213
}
214
215
#[cfg(feature = "dtype-f16")]
216
fn any_values_to_f16(values: &[AnyValue], strict: bool) -> PolarsResult<Float16Chunked> {
217
fn any_values_to_f16_strict(values: &[AnyValue]) -> PolarsResult<Float16Chunked> {
218
let mut builder =
219
PrimitiveChunkedBuilder::<Float16Type>::new(PlSmallStr::EMPTY, values.len());
220
for av in values {
221
match av {
222
AnyValue::Float16(i) => builder.append_value(*i),
223
AnyValue::Null => builder.append_null(),
224
av => return Err(invalid_value_error(&DataType::Float16, av)),
225
}
226
}
227
Ok(builder.finish())
228
}
229
if strict {
230
any_values_to_f16_strict(values)
231
} else {
232
Ok(any_values_to_primitive_nonstrict::<Float16Type>(values))
233
}
234
}
235
236
fn any_values_to_f32(values: &[AnyValue], strict: bool) -> PolarsResult<Float32Chunked> {
237
fn any_values_to_f32_strict(values: &[AnyValue]) -> PolarsResult<Float32Chunked> {
238
let mut builder =
239
PrimitiveChunkedBuilder::<Float32Type>::new(PlSmallStr::EMPTY, values.len());
240
for av in values {
241
match av {
242
AnyValue::Float32(i) => builder.append_value(*i),
243
AnyValue::Float16(i) => builder.append_value(i.as_()),
244
AnyValue::Null => builder.append_null(),
245
av => return Err(invalid_value_error(&DataType::Float32, av)),
246
}
247
}
248
Ok(builder.finish())
249
}
250
if strict {
251
any_values_to_f32_strict(values)
252
} else {
253
Ok(any_values_to_primitive_nonstrict::<Float32Type>(values))
254
}
255
}
256
fn any_values_to_f64(values: &[AnyValue], strict: bool) -> PolarsResult<Float64Chunked> {
257
fn any_values_to_f64_strict(values: &[AnyValue]) -> PolarsResult<Float64Chunked> {
258
let mut builder =
259
PrimitiveChunkedBuilder::<Float64Type>::new(PlSmallStr::EMPTY, values.len());
260
for av in values {
261
match av {
262
AnyValue::Float64(i) => builder.append_value(*i),
263
AnyValue::Float32(i) => builder.append_value(*i as f64),
264
AnyValue::Float16(i) => builder.append_value(i.as_()),
265
AnyValue::Null => builder.append_null(),
266
av => return Err(invalid_value_error(&DataType::Float64, av)),
267
}
268
}
269
Ok(builder.finish())
270
}
271
if strict {
272
any_values_to_f64_strict(values)
273
} else {
274
Ok(any_values_to_primitive_nonstrict::<Float64Type>(values))
275
}
276
}
277
278
fn any_values_to_bool(values: &[AnyValue], strict: bool) -> PolarsResult<BooleanChunked> {
279
let mut builder = BooleanChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
280
for av in values {
281
match av {
282
AnyValue::Boolean(b) => builder.append_value(*b),
283
AnyValue::Null => builder.append_null(),
284
av => {
285
if strict {
286
return Err(invalid_value_error(&DataType::Boolean, av));
287
}
288
match av.cast(&DataType::Boolean) {
289
AnyValue::Boolean(b) => builder.append_value(b),
290
_ => builder.append_null(),
291
}
292
},
293
}
294
}
295
Ok(builder.finish())
296
}
297
298
fn any_values_to_string(values: &[AnyValue], strict: bool) -> PolarsResult<StringChunked> {
299
fn any_values_to_string_strict(values: &[AnyValue]) -> PolarsResult<StringChunked> {
300
let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
301
for av in values {
302
match av {
303
AnyValue::String(s) => builder.append_value(s),
304
AnyValue::StringOwned(s) => builder.append_value(s),
305
AnyValue::Null => builder.append_null(),
306
av => return Err(invalid_value_error(&DataType::String, av)),
307
}
308
}
309
Ok(builder.finish())
310
}
311
fn any_values_to_string_nonstrict(values: &[AnyValue]) -> StringChunked {
312
fn _write_any_value(av: &AnyValue<'_>, buffer: &mut String) {
313
match av {
314
AnyValue::String(s) => buffer.push_str(s),
315
AnyValue::Float64(f) => {
316
SerPrimitive::write(unsafe { buffer.as_mut_vec() }, *f);
317
},
318
AnyValue::Float32(f) => {
319
SerPrimitive::write(unsafe { buffer.as_mut_vec() }, *f);
320
},
321
#[cfg(feature = "dtype-f16")]
322
AnyValue::Float16(f) => {
323
SerPrimitive::write(unsafe { buffer.as_mut_vec() }, *f);
324
},
325
#[cfg(feature = "dtype-struct")]
326
AnyValue::StructOwned(payload) => {
327
buffer.push('{');
328
let mut iter = payload.0.iter().peekable();
329
while let Some(child) = iter.next() {
330
_write_any_value(child, buffer);
331
if iter.peek().is_some() {
332
buffer.push(',')
333
}
334
}
335
buffer.push('}');
336
},
337
#[cfg(feature = "dtype-struct")]
338
AnyValue::Struct(_, _, flds) => {
339
let mut vals = Vec::with_capacity(flds.len());
340
av._materialize_struct_av(&mut vals);
341
342
buffer.push('{');
343
let mut iter = vals.iter().peekable();
344
while let Some(child) = iter.next() {
345
_write_any_value(child, buffer);
346
if iter.peek().is_some() {
347
buffer.push(',')
348
}
349
}
350
buffer.push('}');
351
},
352
#[cfg(feature = "dtype-array")]
353
AnyValue::Array(vals, _) => {
354
buffer.push('[');
355
let mut iter = vals.iter().peekable();
356
while let Some(child) = iter.next() {
357
_write_any_value(&child, buffer);
358
if iter.peek().is_some() {
359
buffer.push(',');
360
}
361
}
362
buffer.push(']');
363
},
364
AnyValue::List(vals) => {
365
buffer.push('[');
366
let mut iter = vals.iter().peekable();
367
while let Some(child) = iter.next() {
368
_write_any_value(&child, buffer);
369
if iter.peek().is_some() {
370
buffer.push(',');
371
}
372
}
373
buffer.push(']');
374
},
375
av => {
376
write!(buffer, "{av}").unwrap();
377
},
378
}
379
}
380
381
let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
382
let mut owned = String::new(); // Amortize allocations.
383
for av in values {
384
owned.clear();
385
386
match av {
387
AnyValue::String(s) => builder.append_value(s),
388
AnyValue::StringOwned(s) => builder.append_value(s),
389
AnyValue::Null => builder.append_null(),
390
AnyValue::Binary(_) | AnyValue::BinaryOwned(_) => builder.append_null(),
391
392
// Explicitly convert and dump floating-point values to strings
393
// to preserve as much precision as possible.
394
// Using write!(..., "{av}") steps through Display formatting
395
// which rounds to an arbitrary precision thus losing information.
396
av => {
397
_write_any_value(av, &mut owned);
398
builder.append_value(&owned);
399
},
400
}
401
}
402
builder.finish()
403
}
404
if strict {
405
any_values_to_string_strict(values)
406
} else {
407
Ok(any_values_to_string_nonstrict(values))
408
}
409
}
410
411
fn any_values_to_binary(values: &[AnyValue], strict: bool) -> PolarsResult<BinaryChunked> {
412
fn any_values_to_binary_strict(values: &[AnyValue]) -> PolarsResult<BinaryChunked> {
413
let mut builder = BinaryChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
414
for av in values {
415
match av {
416
AnyValue::Binary(s) => builder.append_value(*s),
417
AnyValue::BinaryOwned(s) => builder.append_value(&**s),
418
AnyValue::Null => builder.append_null(),
419
av => return Err(invalid_value_error(&DataType::Binary, av)),
420
}
421
}
422
Ok(builder.finish())
423
}
424
fn any_values_to_binary_nonstrict(values: &[AnyValue]) -> BinaryChunked {
425
values
426
.iter()
427
.map(|av| match av {
428
AnyValue::Binary(b) => Some(*b),
429
AnyValue::BinaryOwned(b) => Some(&**b),
430
AnyValue::String(s) => Some(s.as_bytes()),
431
AnyValue::StringOwned(s) => Some(s.as_bytes()),
432
_ => None,
433
})
434
.collect_trusted()
435
}
436
if strict {
437
any_values_to_binary_strict(values)
438
} else {
439
Ok(any_values_to_binary_nonstrict(values))
440
}
441
}
442
443
fn any_values_to_binary_offset(
444
values: &[AnyValue],
445
strict: bool,
446
) -> PolarsResult<BinaryOffsetChunked> {
447
let mut builder = MutableBinaryArray::<i64>::new();
448
for av in values {
449
match av {
450
AnyValue::Binary(s) => builder.push(Some(*s)),
451
AnyValue::BinaryOwned(s) => builder.push(Some(&**s)),
452
AnyValue::Null => builder.push_null(),
453
av => {
454
if strict {
455
return Err(invalid_value_error(&DataType::Binary, av));
456
} else {
457
builder.push_null();
458
};
459
},
460
}
461
}
462
Ok(BinaryOffsetChunked::with_chunk(
463
Default::default(),
464
builder.into(),
465
))
466
}
467
468
#[cfg(feature = "dtype-date")]
469
fn any_values_to_date(values: &[AnyValue], strict: bool) -> PolarsResult<DateChunked> {
470
let mut builder = PrimitiveChunkedBuilder::<Int32Type>::new(PlSmallStr::EMPTY, values.len());
471
for av in values {
472
match av {
473
AnyValue::Date(i) => builder.append_value(*i),
474
AnyValue::Null => builder.append_null(),
475
av => {
476
if strict {
477
return Err(invalid_value_error(&DataType::Date, av));
478
}
479
match av.cast(&DataType::Date) {
480
AnyValue::Date(i) => builder.append_value(i),
481
_ => builder.append_null(),
482
}
483
},
484
}
485
}
486
Ok(builder.finish().into_date())
487
}
488
489
#[cfg(feature = "dtype-time")]
490
fn any_values_to_time(values: &[AnyValue], strict: bool) -> PolarsResult<TimeChunked> {
491
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
492
for av in values {
493
match av {
494
AnyValue::Time(i) => builder.append_value(*i),
495
AnyValue::Null => builder.append_null(),
496
av => {
497
if strict {
498
return Err(invalid_value_error(&DataType::Time, av));
499
}
500
match av.cast(&DataType::Time) {
501
AnyValue::Time(i) => builder.append_value(i),
502
_ => builder.append_null(),
503
}
504
},
505
}
506
}
507
Ok(builder.finish().into_time())
508
}
509
510
#[cfg(feature = "dtype-datetime")]
511
fn any_values_to_datetime(
512
values: &[AnyValue],
513
time_unit: TimeUnit,
514
time_zone: Option<TimeZone>,
515
strict: bool,
516
) -> PolarsResult<DatetimeChunked> {
517
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
518
let target_dtype = DataType::Datetime(time_unit, time_zone.clone());
519
for av in values {
520
match av {
521
AnyValue::Datetime(i, tu, _) if *tu == time_unit => builder.append_value(*i),
522
AnyValue::DatetimeOwned(i, tu, _) if *tu == time_unit => builder.append_value(*i),
523
AnyValue::Null => builder.append_null(),
524
av => {
525
if strict {
526
return Err(invalid_value_error(&target_dtype, av));
527
}
528
match av.cast(&target_dtype) {
529
AnyValue::Datetime(i, _, _) => builder.append_value(i),
530
AnyValue::DatetimeOwned(i, _, _) => builder.append_value(i),
531
_ => builder.append_null(),
532
}
533
},
534
}
535
}
536
Ok(builder.finish().into_datetime(time_unit, time_zone))
537
}
538
539
#[cfg(feature = "dtype-duration")]
540
fn any_values_to_duration(
541
values: &[AnyValue],
542
time_unit: TimeUnit,
543
strict: bool,
544
) -> PolarsResult<DurationChunked> {
545
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
546
let target_dtype = DataType::Duration(time_unit);
547
for av in values {
548
match av {
549
AnyValue::Duration(i, tu) if *tu == time_unit => builder.append_value(*i),
550
AnyValue::Null => builder.append_null(),
551
av => {
552
if strict {
553
return Err(invalid_value_error(&target_dtype, av));
554
}
555
match av.cast(&target_dtype) {
556
AnyValue::Duration(i, _) => builder.append_value(i),
557
_ => builder.append_null(),
558
}
559
},
560
}
561
}
562
Ok(builder.finish().into_duration(time_unit))
563
}
564
565
#[cfg(feature = "dtype-categorical")]
566
fn any_values_to_categorical(
567
values: &[AnyValue],
568
dtype: &DataType,
569
strict: bool,
570
) -> PolarsResult<Series> {
571
with_match_categorical_physical_type!(dtype.cat_physical().unwrap(), |$C| {
572
let mut builder = CategoricalChunkedBuilder::<$C>::new(PlSmallStr::EMPTY, dtype.clone());
573
574
let mut owned = String::new(); // Amortize allocations.
575
for av in values {
576
let ret = match av {
577
AnyValue::String(s) => builder.append_str(s),
578
AnyValue::StringOwned(s) => builder.append_str(s),
579
580
&AnyValue::Enum(cat, &ref map) |
581
&AnyValue::EnumOwned(cat, ref map) |
582
&AnyValue::Categorical(cat, &ref map) |
583
&AnyValue::CategoricalOwned(cat, ref map) => builder.append_cat(cat, map),
584
585
AnyValue::Binary(_) | AnyValue::BinaryOwned(_) if !strict => {
586
builder.append_null();
587
Ok(())
588
},
589
AnyValue::Null => {
590
builder.append_null();
591
Ok(())
592
}
593
594
av => {
595
if strict {
596
return Err(invalid_value_error(&DataType::String, av));
597
}
598
599
owned.clear();
600
write!(owned, "{av}").unwrap();
601
builder.append_str(&owned)
602
},
603
};
604
605
if let Err(e) = ret {
606
if strict {
607
return Err(e);
608
} else {
609
builder.append_null();
610
}
611
}
612
}
613
614
let ca = builder.finish();
615
Ok(ca.into_series())
616
})
617
}
618
619
#[cfg(feature = "dtype-decimal")]
620
fn any_values_to_decimal(
621
values: &[AnyValue],
622
precision: usize,
623
scale: usize,
624
strict: bool,
625
) -> PolarsResult<DecimalChunked> {
626
let target_dtype = DataType::Decimal(precision, scale);
627
628
let mut builder = PrimitiveChunkedBuilder::<Int128Type>::new(PlSmallStr::EMPTY, values.len());
629
for av in values {
630
match av {
631
// Allow equal or less scale. We do want to support different scales even in 'strict' mode.
632
AnyValue::Decimal(v, p, s) if *s <= scale => {
633
if *p <= precision && *s == scale {
634
builder.append_value(*v)
635
} else {
636
match av.strict_cast(&target_dtype) {
637
Some(AnyValue::Decimal(i, _, _)) => builder.append_value(i),
638
_ => builder.append_null(),
639
}
640
}
641
},
642
AnyValue::Null => builder.append_null(),
643
av => {
644
if strict {
645
return Err(invalid_value_error(&target_dtype, av));
646
}
647
match av.strict_cast(&target_dtype) {
648
Some(AnyValue::Decimal(i, _, _)) => builder.append_value(i),
649
_ => builder.append_null(),
650
}
651
},
652
};
653
}
654
655
// Build the array and do a precision check if needed.
656
builder.finish().into_decimal(precision, scale)
657
}
658
659
fn any_values_to_list(
660
avs: &[AnyValue],
661
inner_type: &DataType,
662
strict: bool,
663
) -> PolarsResult<ListChunked> {
664
// GB:
665
// Lord forgive for the sins I have committed in this function. The amount of strange
666
// exceptions that need to happen for this to work are insane and I feel like I am going crazy.
667
//
668
// This function is essentially a copy of the `<ListChunked as FromIterator>` where it does not
669
// sample the datatype from the first element and instead we give it explicitly. This allows
670
// this function to properly assign a datatype if `avs` starts with a `null` value. Previously,
671
// this was solved by assigning the `dtype` again afterwards, but why? We should not link the
672
// implementation of these functions. We still need to assign the dtype of the ListArray and
673
// such, anyways.
674
//
675
// Then, `collect_ca_with_dtype` does not possess the necessary exceptions shown in this
676
// function to use that. I have tried adding the exceptions there and it broke other things. I
677
// really do feel like this is the simplest solution.
678
679
let mut valid = true;
680
let capacity = avs.len();
681
682
let ca = match inner_type {
683
// AnyValues with empty lists in python can create
684
// Series of an unknown dtype.
685
// We use the anonymousbuilder without a dtype
686
// the empty arrays is then not added (we add an extra offset instead)
687
// the next non-empty series then must have the correct dtype.
688
DataType::Null => {
689
let mut builder = AnonymousOwnedListBuilder::new(PlSmallStr::EMPTY, capacity, None);
690
for av in avs {
691
match av {
692
AnyValue::List(b) => builder.append_series(b)?,
693
AnyValue::Null => builder.append_null(),
694
_ => {
695
valid = false;
696
builder.append_null();
697
},
698
}
699
}
700
builder.finish()
701
},
702
703
#[cfg(feature = "object")]
704
DataType::Object(_) => polars_bail!(nyi = "Nested object types"),
705
706
_ => {
707
let mut builder =
708
get_list_builder(inner_type, capacity * 5, capacity, PlSmallStr::EMPTY);
709
for av in avs {
710
match av {
711
AnyValue::List(b) => match b.cast(inner_type) {
712
Ok(casted) => {
713
if casted.null_count() != b.null_count() {
714
valid = !strict;
715
}
716
builder.append_series(&casted)?;
717
},
718
Err(_) => {
719
valid = false;
720
for _ in 0..b.len() {
721
builder.append_null();
722
}
723
},
724
},
725
AnyValue::Null => builder.append_null(),
726
_ => {
727
valid = false;
728
builder.append_null()
729
},
730
}
731
}
732
733
builder.finish()
734
},
735
};
736
737
if strict && !valid {
738
polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", DataType::List(Box::new(inner_type.clone())));
739
}
740
741
Ok(ca)
742
}
743
744
#[cfg(feature = "dtype-array")]
745
fn any_values_to_array(
746
avs: &[AnyValue],
747
inner_type: &DataType,
748
strict: bool,
749
width: usize,
750
) -> PolarsResult<ArrayChunked> {
751
fn to_arr(s: &Series) -> Option<ArrayRef> {
752
if s.chunks().len() > 1 {
753
let s = s.rechunk();
754
Some(s.chunks()[0].clone())
755
} else {
756
Some(s.chunks()[0].clone())
757
}
758
}
759
760
let target_dtype = DataType::Array(Box::new(inner_type.clone()), width);
761
762
// This is handled downstream. The builder will choose the first non null type.
763
let mut valid = true;
764
#[allow(unused_mut)]
765
let mut out: ArrayChunked = if inner_type == &DataType::Null {
766
avs.iter()
767
.map(|av| match av {
768
AnyValue::List(b) | AnyValue::Array(b, _) => to_arr(b),
769
AnyValue::Null => None,
770
_ => {
771
valid = false;
772
None
773
},
774
})
775
.collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())
776
}
777
// Make sure that wrongly inferred AnyValues don't deviate from the datatype.
778
else {
779
avs.iter()
780
.map(|av| match av {
781
AnyValue::List(b) | AnyValue::Array(b, _) => {
782
if b.dtype() == inner_type {
783
to_arr(b)
784
} else {
785
let s = match b.cast(inner_type) {
786
Ok(out) => out,
787
Err(_) => Series::full_null(b.name().clone(), b.len(), inner_type),
788
};
789
to_arr(&s)
790
}
791
},
792
AnyValue::Null => None,
793
_ => {
794
valid = false;
795
None
796
},
797
})
798
.collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())
799
};
800
801
if strict && !valid {
802
polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", target_dtype);
803
}
804
polars_ensure!(
805
out.width() == width,
806
SchemaMismatch: "got mixed size array widths where width {} was expected", width
807
);
808
809
// Ensure the logical type is correct for nested types.
810
#[cfg(feature = "dtype-struct")]
811
if !matches!(inner_type, DataType::Null) && out.inner_dtype().is_nested() {
812
unsafe {
813
out.set_dtype(target_dtype);
814
};
815
}
816
817
Ok(out)
818
}
819
820
#[cfg(feature = "dtype-struct")]
821
fn _any_values_to_struct<'a>(
822
av_fields: &[Field],
823
av_values: &[AnyValue<'a>],
824
field_index: usize,
825
field: &Field,
826
fields: &[Field],
827
field_avs: &mut Vec<AnyValue<'a>>,
828
) {
829
// TODO: Optimize.
830
831
let mut append_by_search = || {
832
// Search for the name.
833
if let Some(i) = av_fields
834
.iter()
835
.position(|av_fld| av_fld.name == field.name)
836
{
837
field_avs.push(av_values[i].clone());
838
return;
839
}
840
field_avs.push(AnyValue::Null)
841
};
842
843
// All fields are available in this single value.
844
// We can use the index to get value.
845
if fields.len() == av_fields.len() {
846
if fields.iter().zip(av_fields.iter()).any(|(l, r)| l != r) {
847
append_by_search()
848
} else {
849
let av_val = av_values
850
.get(field_index)
851
.cloned()
852
.unwrap_or(AnyValue::Null);
853
field_avs.push(av_val)
854
}
855
}
856
// Not all fields are available, we search the proper field.
857
else {
858
// Search for the name.
859
append_by_search()
860
}
861
}
862
863
#[cfg(feature = "dtype-struct")]
864
fn any_values_to_struct(
865
values: &[AnyValue],
866
fields: &[Field],
867
strict: bool,
868
) -> PolarsResult<Series> {
869
// Fast path for structs with no fields.
870
if fields.is_empty() {
871
return Ok(
872
StructChunked::from_series(PlSmallStr::EMPTY, values.len(), [].iter())?.into_series(),
873
);
874
}
875
876
// The physical series fields of the struct.
877
let mut series_fields = Vec::with_capacity(fields.len());
878
let mut has_outer_validity = false;
879
let mut field_avs = Vec::with_capacity(values.len());
880
for (i, field) in fields.iter().enumerate() {
881
field_avs.clear();
882
883
for av in values.iter() {
884
match av {
885
AnyValue::StructOwned(payload) => {
886
let av_fields = &payload.1;
887
let av_values = &payload.0;
888
_any_values_to_struct(av_fields, av_values, i, field, fields, &mut field_avs);
889
},
890
AnyValue::Struct(_, _, av_fields) => {
891
let av_values: Vec<_> = av._iter_struct_av().collect();
892
_any_values_to_struct(av_fields, &av_values, i, field, fields, &mut field_avs);
893
},
894
AnyValue::List(s) if s.len() == fields.len() => {
895
let av = unsafe { s.get_unchecked(i) };
896
field_avs.push(av);
897
},
898
#[cfg(feature = "dtype-array")]
899
AnyValue::Array(s, _) if s.len() == fields.len() => {
900
let av = unsafe { s.get_unchecked(i) };
901
field_avs.push(av);
902
},
903
AnyValue::Null => {
904
has_outer_validity = true;
905
field_avs.push(AnyValue::Null)
906
},
907
_ => {
908
if strict {
909
return Err(invalid_value_error(&DataType::Struct(fields.to_vec()), av));
910
} else {
911
has_outer_validity = true;
912
field_avs.push(AnyValue::Null)
913
}
914
},
915
}
916
}
917
// If the inferred dtype is null, we let auto inference work.
918
let s = if matches!(field.dtype, DataType::Null) {
919
Series::from_any_values(field.name().clone(), &field_avs, strict)?
920
} else {
921
Series::from_any_values_and_dtype(
922
field.name().clone(),
923
&field_avs,
924
&field.dtype,
925
strict,
926
)?
927
};
928
series_fields.push(s)
929
}
930
931
let mut out =
932
StructChunked::from_series(PlSmallStr::EMPTY, values.len(), series_fields.iter())?;
933
if has_outer_validity {
934
let mut validity = MutableBitmap::new();
935
validity.extend_constant(values.len(), true);
936
for (i, v) in values.iter().enumerate() {
937
if matches!(v, AnyValue::Null) {
938
unsafe { validity.set_unchecked(i, false) }
939
}
940
}
941
out.set_outer_validity(Some(validity.freeze()))
942
}
943
Ok(out.into_series())
944
}
945
946
#[cfg(feature = "object")]
947
fn any_values_to_object(values: &[AnyValue]) -> PolarsResult<Series> {
948
use crate::chunked_array::object::registry;
949
let converter = registry::get_object_converter();
950
let mut builder = registry::get_object_builder(PlSmallStr::EMPTY, values.len());
951
for av in values {
952
match av {
953
AnyValue::Object(val) => builder.append_value(val.as_any()),
954
AnyValue::Null => builder.append_null(),
955
_ => {
956
// This is needed because in Python users can send mixed types.
957
// This only works if you set a global converter.
958
let any = converter(av.as_borrowed());
959
builder.append_value(&*any)
960
},
961
}
962
}
963
964
Ok(builder.to_series())
965
}
966
967
fn invalid_value_error(dtype: &DataType, value: &AnyValue) -> PolarsError {
968
polars_err!(
969
SchemaMismatch:
970
"unexpected value while building Series of type {:?}; found value of type {:?}: {}",
971
dtype,
972
value.dtype(),
973
value
974
)
975
}
976
977