Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/any_value.rs
6940 views
1
use std::fmt::Write;
2
3
use arrow::bitmap::MutableBitmap;
4
5
#[cfg(feature = "dtype-categorical")]
6
use crate::chunked_array::builder::CategoricalChunkedBuilder;
7
use crate::chunked_array::builder::{AnonymousOwnedListBuilder, get_list_builder};
8
use crate::prelude::*;
9
use crate::utils::any_values_to_supertype;
10
11
impl<'a, T: AsRef<[AnyValue<'a>]>> NamedFrom<T, [AnyValue<'a>]> for Series {
12
/// Construct a new [`Series`] from a collection of [`AnyValue`].
13
///
14
/// # Panics
15
///
16
/// Panics if the values do not all share the same data type (with the exception
17
/// of [`DataType::Null`], which is always allowed).
18
///
19
/// [`AnyValue`]: crate::datatypes::AnyValue
20
fn new(name: PlSmallStr, values: T) -> Self {
21
let values = values.as_ref();
22
Series::from_any_values(name, values, true).expect("data types of values should match")
23
}
24
}
25
26
impl Series {
27
/// Construct a new [`Series`] from a slice of AnyValues.
28
///
29
/// The data type of the resulting Series is determined by the `values`
30
/// and the `strict` parameter:
31
/// - If `strict` is `true`, the data type is equal to the data type of the
32
/// first non-null value. If any other non-null values do not match this
33
/// data type, an error is raised.
34
/// - If `strict` is `false`, the data type is the supertype of the `values`.
35
/// An error is returned if no supertype can be determined.
36
/// **WARNING**: A full pass over the values is required to determine the supertype.
37
/// - If no values were passed, the resulting data type is `Null`.
38
pub fn from_any_values(
39
name: PlSmallStr,
40
values: &[AnyValue],
41
strict: bool,
42
) -> PolarsResult<Self> {
43
fn get_first_non_null_dtype(values: &[AnyValue]) -> DataType {
44
let mut all_flat_null = true;
45
let first_non_null = values.iter().find(|av| {
46
if !av.is_null() {
47
all_flat_null = false
48
};
49
!av.is_nested_null()
50
});
51
match first_non_null {
52
Some(av) => av.dtype(),
53
None => {
54
if all_flat_null {
55
DataType::Null
56
} else {
57
// Second pass to check for the nested null value that
58
// toggled `all_flat_null` to false, e.g. a List(Null).
59
let first_nested_null = values.iter().find(|av| !av.is_null()).unwrap();
60
first_nested_null.dtype()
61
}
62
},
63
}
64
}
65
let dtype = if strict {
66
get_first_non_null_dtype(values)
67
} else {
68
// Currently does not work correctly for Decimal because equality is not implemented.
69
any_values_to_supertype(values)?
70
};
71
72
// TODO: Remove this when Decimal data type equality is implemented.
73
#[cfg(feature = "dtype-decimal")]
74
if dtype.is_decimal() {
75
let dtype = DataType::Decimal(None, None);
76
return Self::from_any_values_and_dtype(name, values, &dtype, strict);
77
}
78
79
Self::from_any_values_and_dtype(name, values, &dtype, strict)
80
}
81
82
/// Construct a new [`Series`] with the given `dtype` from a slice of AnyValues.
83
///
84
/// If `strict` is `true`, an error is returned if the values do not match the given
85
/// data type. If `strict` is `false`, values that do not match the given data type
86
/// are cast. If casting is not possible, the values are set to null instead.
87
pub fn from_any_values_and_dtype(
88
name: PlSmallStr,
89
values: &[AnyValue],
90
dtype: &DataType,
91
strict: bool,
92
) -> PolarsResult<Self> {
93
if values.is_empty() {
94
return Ok(Self::new_empty(name, dtype));
95
}
96
97
let mut s = match dtype {
98
#[cfg(feature = "dtype-i8")]
99
DataType::Int8 => any_values_to_integer::<Int8Type>(values, strict)?.into_series(),
100
#[cfg(feature = "dtype-i16")]
101
DataType::Int16 => any_values_to_integer::<Int16Type>(values, strict)?.into_series(),
102
DataType::Int32 => any_values_to_integer::<Int32Type>(values, strict)?.into_series(),
103
DataType::Int64 => any_values_to_integer::<Int64Type>(values, strict)?.into_series(),
104
#[cfg(feature = "dtype-i128")]
105
DataType::Int128 => any_values_to_integer::<Int128Type>(values, strict)?.into_series(),
106
#[cfg(feature = "dtype-u8")]
107
DataType::UInt8 => any_values_to_integer::<UInt8Type>(values, strict)?.into_series(),
108
#[cfg(feature = "dtype-u16")]
109
DataType::UInt16 => any_values_to_integer::<UInt16Type>(values, strict)?.into_series(),
110
DataType::UInt32 => any_values_to_integer::<UInt32Type>(values, strict)?.into_series(),
111
DataType::UInt64 => any_values_to_integer::<UInt64Type>(values, strict)?.into_series(),
112
DataType::Float32 => any_values_to_f32(values, strict)?.into_series(),
113
DataType::Float64 => any_values_to_f64(values, strict)?.into_series(),
114
DataType::Boolean => any_values_to_bool(values, strict)?.into_series(),
115
DataType::String => any_values_to_string(values, strict)?.into_series(),
116
DataType::Binary => any_values_to_binary(values, strict)?.into_series(),
117
DataType::BinaryOffset => any_values_to_binary_offset(values, strict)?.into_series(),
118
#[cfg(feature = "dtype-date")]
119
DataType::Date => any_values_to_date(values, strict)?.into_series(),
120
#[cfg(feature = "dtype-time")]
121
DataType::Time => any_values_to_time(values, strict)?.into_series(),
122
#[cfg(feature = "dtype-datetime")]
123
DataType::Datetime(tu, tz) => {
124
any_values_to_datetime(values, *tu, (*tz).clone(), strict)?.into_series()
125
},
126
#[cfg(feature = "dtype-duration")]
127
DataType::Duration(tu) => any_values_to_duration(values, *tu, strict)?.into_series(),
128
#[cfg(feature = "dtype-categorical")]
129
dt @ (DataType::Categorical(_, _) | DataType::Enum(_, _)) => {
130
any_values_to_categorical(values, dt, strict)?
131
},
132
#[cfg(feature = "dtype-decimal")]
133
DataType::Decimal(precision, scale) => {
134
any_values_to_decimal(values, *precision, *scale, strict)?.into_series()
135
},
136
DataType::List(inner) => any_values_to_list(values, inner, strict)?.into_series(),
137
#[cfg(feature = "dtype-array")]
138
DataType::Array(inner, size) => any_values_to_array(values, inner, strict, *size)?
139
.into_series()
140
.cast(&DataType::Array(inner.clone(), *size))?,
141
#[cfg(feature = "dtype-struct")]
142
DataType::Struct(fields) => any_values_to_struct(values, fields, strict)?,
143
#[cfg(feature = "object")]
144
DataType::Object(_) => any_values_to_object(values)?,
145
DataType::Null => Series::new_null(PlSmallStr::EMPTY, values.len()),
146
dt => {
147
polars_bail!(
148
InvalidOperation:
149
"constructing a Series with data type {dt:?} from AnyValues is not supported"
150
)
151
},
152
};
153
s.rename(name);
154
Ok(s)
155
}
156
}
157
158
fn any_values_to_primitive_nonstrict<T: PolarsNumericType>(values: &[AnyValue]) -> ChunkedArray<T> {
159
values
160
.iter()
161
.map(|av| av.extract::<T::Native>())
162
.collect_trusted()
163
}
164
165
fn any_values_to_integer<T: PolarsIntegerType>(
166
values: &[AnyValue],
167
strict: bool,
168
) -> PolarsResult<ChunkedArray<T>> {
169
fn any_values_to_integer_strict<T: PolarsIntegerType>(
170
values: &[AnyValue],
171
) -> PolarsResult<ChunkedArray<T>> {
172
let mut builder = PrimitiveChunkedBuilder::<T>::new(PlSmallStr::EMPTY, values.len());
173
for av in values {
174
match &av {
175
av if av.is_integer() => {
176
let opt_val = av.extract::<T::Native>();
177
let val = match opt_val {
178
Some(v) => v,
179
None => return Err(invalid_value_error(&T::get_static_dtype(), av)),
180
};
181
builder.append_value(val)
182
},
183
AnyValue::Null => builder.append_null(),
184
av => return Err(invalid_value_error(&T::get_static_dtype(), av)),
185
}
186
}
187
Ok(builder.finish())
188
}
189
190
if strict {
191
any_values_to_integer_strict::<T>(values)
192
} else {
193
Ok(any_values_to_primitive_nonstrict::<T>(values))
194
}
195
}
196
197
fn any_values_to_f32(values: &[AnyValue], strict: bool) -> PolarsResult<Float32Chunked> {
198
fn any_values_to_f32_strict(values: &[AnyValue]) -> PolarsResult<Float32Chunked> {
199
let mut builder =
200
PrimitiveChunkedBuilder::<Float32Type>::new(PlSmallStr::EMPTY, values.len());
201
for av in values {
202
match av {
203
AnyValue::Float32(i) => builder.append_value(*i),
204
AnyValue::Null => builder.append_null(),
205
av => return Err(invalid_value_error(&DataType::Float32, av)),
206
}
207
}
208
Ok(builder.finish())
209
}
210
if strict {
211
any_values_to_f32_strict(values)
212
} else {
213
Ok(any_values_to_primitive_nonstrict::<Float32Type>(values))
214
}
215
}
216
fn any_values_to_f64(values: &[AnyValue], strict: bool) -> PolarsResult<Float64Chunked> {
217
fn any_values_to_f64_strict(values: &[AnyValue]) -> PolarsResult<Float64Chunked> {
218
let mut builder =
219
PrimitiveChunkedBuilder::<Float64Type>::new(PlSmallStr::EMPTY, values.len());
220
for av in values {
221
match av {
222
AnyValue::Float64(i) => builder.append_value(*i),
223
AnyValue::Float32(i) => builder.append_value(*i as f64),
224
AnyValue::Null => builder.append_null(),
225
av => return Err(invalid_value_error(&DataType::Float64, av)),
226
}
227
}
228
Ok(builder.finish())
229
}
230
if strict {
231
any_values_to_f64_strict(values)
232
} else {
233
Ok(any_values_to_primitive_nonstrict::<Float64Type>(values))
234
}
235
}
236
237
fn any_values_to_bool(values: &[AnyValue], strict: bool) -> PolarsResult<BooleanChunked> {
238
let mut builder = BooleanChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
239
for av in values {
240
match av {
241
AnyValue::Boolean(b) => builder.append_value(*b),
242
AnyValue::Null => builder.append_null(),
243
av => {
244
if strict {
245
return Err(invalid_value_error(&DataType::Boolean, av));
246
}
247
match av.cast(&DataType::Boolean) {
248
AnyValue::Boolean(b) => builder.append_value(b),
249
_ => builder.append_null(),
250
}
251
},
252
}
253
}
254
Ok(builder.finish())
255
}
256
257
fn any_values_to_string(values: &[AnyValue], strict: bool) -> PolarsResult<StringChunked> {
258
fn any_values_to_string_strict(values: &[AnyValue]) -> PolarsResult<StringChunked> {
259
let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
260
for av in values {
261
match av {
262
AnyValue::String(s) => builder.append_value(s),
263
AnyValue::StringOwned(s) => builder.append_value(s),
264
AnyValue::Null => builder.append_null(),
265
av => return Err(invalid_value_error(&DataType::String, av)),
266
}
267
}
268
Ok(builder.finish())
269
}
270
fn any_values_to_string_nonstrict(values: &[AnyValue]) -> StringChunked {
271
let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
272
let mut owned = String::new(); // Amortize allocations.
273
for av in values {
274
match av {
275
AnyValue::String(s) => builder.append_value(s),
276
AnyValue::StringOwned(s) => builder.append_value(s),
277
AnyValue::Null => builder.append_null(),
278
AnyValue::Binary(_) | AnyValue::BinaryOwned(_) => builder.append_null(),
279
av => {
280
owned.clear();
281
write!(owned, "{av}").unwrap();
282
builder.append_value(&owned);
283
},
284
}
285
}
286
builder.finish()
287
}
288
if strict {
289
any_values_to_string_strict(values)
290
} else {
291
Ok(any_values_to_string_nonstrict(values))
292
}
293
}
294
295
fn any_values_to_binary(values: &[AnyValue], strict: bool) -> PolarsResult<BinaryChunked> {
296
fn any_values_to_binary_strict(values: &[AnyValue]) -> PolarsResult<BinaryChunked> {
297
let mut builder = BinaryChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
298
for av in values {
299
match av {
300
AnyValue::Binary(s) => builder.append_value(*s),
301
AnyValue::BinaryOwned(s) => builder.append_value(&**s),
302
AnyValue::Null => builder.append_null(),
303
av => return Err(invalid_value_error(&DataType::Binary, av)),
304
}
305
}
306
Ok(builder.finish())
307
}
308
fn any_values_to_binary_nonstrict(values: &[AnyValue]) -> BinaryChunked {
309
values
310
.iter()
311
.map(|av| match av {
312
AnyValue::Binary(b) => Some(*b),
313
AnyValue::BinaryOwned(b) => Some(&**b),
314
AnyValue::String(s) => Some(s.as_bytes()),
315
AnyValue::StringOwned(s) => Some(s.as_str().as_bytes()),
316
_ => None,
317
})
318
.collect_trusted()
319
}
320
if strict {
321
any_values_to_binary_strict(values)
322
} else {
323
Ok(any_values_to_binary_nonstrict(values))
324
}
325
}
326
327
fn any_values_to_binary_offset(
328
values: &[AnyValue],
329
strict: bool,
330
) -> PolarsResult<BinaryOffsetChunked> {
331
let mut builder = MutableBinaryArray::<i64>::new();
332
for av in values {
333
match av {
334
AnyValue::Binary(s) => builder.push(Some(*s)),
335
AnyValue::BinaryOwned(s) => builder.push(Some(&**s)),
336
AnyValue::Null => builder.push_null(),
337
av => {
338
if strict {
339
return Err(invalid_value_error(&DataType::Binary, av));
340
} else {
341
builder.push_null();
342
};
343
},
344
}
345
}
346
Ok(BinaryOffsetChunked::with_chunk(
347
Default::default(),
348
builder.into(),
349
))
350
}
351
352
#[cfg(feature = "dtype-date")]
353
fn any_values_to_date(values: &[AnyValue], strict: bool) -> PolarsResult<DateChunked> {
354
let mut builder = PrimitiveChunkedBuilder::<Int32Type>::new(PlSmallStr::EMPTY, values.len());
355
for av in values {
356
match av {
357
AnyValue::Date(i) => builder.append_value(*i),
358
AnyValue::Null => builder.append_null(),
359
av => {
360
if strict {
361
return Err(invalid_value_error(&DataType::Date, av));
362
}
363
match av.cast(&DataType::Date) {
364
AnyValue::Date(i) => builder.append_value(i),
365
_ => builder.append_null(),
366
}
367
},
368
}
369
}
370
Ok(builder.finish().into_date())
371
}
372
373
#[cfg(feature = "dtype-time")]
374
fn any_values_to_time(values: &[AnyValue], strict: bool) -> PolarsResult<TimeChunked> {
375
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
376
for av in values {
377
match av {
378
AnyValue::Time(i) => builder.append_value(*i),
379
AnyValue::Null => builder.append_null(),
380
av => {
381
if strict {
382
return Err(invalid_value_error(&DataType::Time, av));
383
}
384
match av.cast(&DataType::Time) {
385
AnyValue::Time(i) => builder.append_value(i),
386
_ => builder.append_null(),
387
}
388
},
389
}
390
}
391
Ok(builder.finish().into_time())
392
}
393
394
#[cfg(feature = "dtype-datetime")]
395
fn any_values_to_datetime(
396
values: &[AnyValue],
397
time_unit: TimeUnit,
398
time_zone: Option<TimeZone>,
399
strict: bool,
400
) -> PolarsResult<DatetimeChunked> {
401
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
402
let target_dtype = DataType::Datetime(time_unit, time_zone.clone());
403
for av in values {
404
match av {
405
AnyValue::Datetime(i, tu, _) if *tu == time_unit => builder.append_value(*i),
406
AnyValue::DatetimeOwned(i, tu, _) if *tu == time_unit => builder.append_value(*i),
407
AnyValue::Null => builder.append_null(),
408
av => {
409
if strict {
410
return Err(invalid_value_error(&target_dtype, av));
411
}
412
match av.cast(&target_dtype) {
413
AnyValue::Datetime(i, _, _) => builder.append_value(i),
414
AnyValue::DatetimeOwned(i, _, _) => builder.append_value(i),
415
_ => builder.append_null(),
416
}
417
},
418
}
419
}
420
Ok(builder.finish().into_datetime(time_unit, time_zone))
421
}
422
423
#[cfg(feature = "dtype-duration")]
424
fn any_values_to_duration(
425
values: &[AnyValue],
426
time_unit: TimeUnit,
427
strict: bool,
428
) -> PolarsResult<DurationChunked> {
429
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
430
let target_dtype = DataType::Duration(time_unit);
431
for av in values {
432
match av {
433
AnyValue::Duration(i, tu) if *tu == time_unit => builder.append_value(*i),
434
AnyValue::Null => builder.append_null(),
435
av => {
436
if strict {
437
return Err(invalid_value_error(&target_dtype, av));
438
}
439
match av.cast(&target_dtype) {
440
AnyValue::Duration(i, _) => builder.append_value(i),
441
_ => builder.append_null(),
442
}
443
},
444
}
445
}
446
Ok(builder.finish().into_duration(time_unit))
447
}
448
449
#[cfg(feature = "dtype-categorical")]
450
fn any_values_to_categorical(
451
values: &[AnyValue],
452
dtype: &DataType,
453
strict: bool,
454
) -> PolarsResult<Series> {
455
with_match_categorical_physical_type!(dtype.cat_physical().unwrap(), |$C| {
456
let mut builder = CategoricalChunkedBuilder::<$C>::new(PlSmallStr::EMPTY, dtype.clone());
457
458
let mut owned = String::new(); // Amortize allocations.
459
for av in values {
460
let ret = match av {
461
AnyValue::String(s) => builder.append_str(s),
462
AnyValue::StringOwned(s) => builder.append_str(s),
463
464
&AnyValue::Enum(cat, &ref map) |
465
&AnyValue::EnumOwned(cat, ref map) |
466
&AnyValue::Categorical(cat, &ref map) |
467
&AnyValue::CategoricalOwned(cat, ref map) => builder.append_cat(cat, map),
468
469
AnyValue::Binary(_) | AnyValue::BinaryOwned(_) if !strict => {
470
builder.append_null();
471
Ok(())
472
},
473
AnyValue::Null => {
474
builder.append_null();
475
Ok(())
476
}
477
478
av => {
479
if strict {
480
return Err(invalid_value_error(&DataType::String, av));
481
}
482
483
owned.clear();
484
write!(owned, "{av}").unwrap();
485
builder.append_str(&owned)
486
},
487
};
488
489
if let Err(e) = ret {
490
if strict {
491
return Err(e);
492
} else {
493
builder.append_null();
494
}
495
}
496
}
497
498
let ca = builder.finish();
499
Ok(ca.into_series())
500
})
501
}
502
503
#[cfg(feature = "dtype-decimal")]
504
fn any_values_to_decimal(
505
values: &[AnyValue],
506
precision: Option<usize>,
507
scale: Option<usize>, // If None, we're inferring the scale.
508
strict: bool,
509
) -> PolarsResult<DecimalChunked> {
510
/// Get the maximum scale among AnyValues
511
fn infer_scale(
512
values: &[AnyValue],
513
precision: Option<usize>,
514
strict: bool,
515
) -> PolarsResult<usize> {
516
let mut max_scale = 0;
517
for av in values {
518
let av_scale = match av {
519
AnyValue::Decimal(_, scale) => *scale,
520
AnyValue::Null => continue,
521
av => {
522
if strict {
523
let target_dtype = DataType::Decimal(precision, None);
524
return Err(invalid_value_error(&target_dtype, av));
525
}
526
continue;
527
},
528
};
529
max_scale = max_scale.max(av_scale);
530
}
531
Ok(max_scale)
532
}
533
let scale = match scale {
534
Some(s) => s,
535
None => infer_scale(values, precision, strict)?,
536
};
537
let target_dtype = DataType::Decimal(precision, Some(scale));
538
539
let mut builder = PrimitiveChunkedBuilder::<Int128Type>::new(PlSmallStr::EMPTY, values.len());
540
for av in values {
541
match av {
542
// Allow equal or less scale. We do want to support different scales even in 'strict' mode.
543
AnyValue::Decimal(v, s) if *s <= scale => {
544
if *s == scale {
545
builder.append_value(*v)
546
} else {
547
match av.strict_cast(&target_dtype) {
548
Some(AnyValue::Decimal(i, _)) => builder.append_value(i),
549
_ => builder.append_null(),
550
}
551
}
552
},
553
AnyValue::Null => builder.append_null(),
554
av => {
555
if strict {
556
return Err(invalid_value_error(&target_dtype, av));
557
}
558
// TODO: Precision check, else set to null
559
match av.strict_cast(&target_dtype) {
560
Some(AnyValue::Decimal(i, _)) => builder.append_value(i),
561
_ => builder.append_null(),
562
}
563
},
564
};
565
}
566
567
// Build the array and do a precision check if needed.
568
builder.finish().into_decimal(precision, scale)
569
}
570
571
fn any_values_to_list(
572
avs: &[AnyValue],
573
inner_type: &DataType,
574
strict: bool,
575
) -> PolarsResult<ListChunked> {
576
// GB:
577
// Lord forgive for the sins I have committed in this function. The amount of strange
578
// exceptions that need to happen for this to work are insane and I feel like I am going crazy.
579
//
580
// This function is essentially a copy of the `<ListChunked as FromIterator>` where it does not
581
// sample the datatype from the first element and instead we give it explicitly. This allows
582
// this function to properly assign a datatype if `avs` starts with a `null` value. Previously,
583
// this was solved by assigning the `dtype` again afterwards, but why? We should not link the
584
// implementation of these functions. We still need to assign the dtype of the ListArray and
585
// such, anyways.
586
//
587
// Then, `collect_ca_with_dtype` does not possess the necessary exceptions shown in this
588
// function to use that. I have tried adding the exceptions there and it broke other things. I
589
// really do feel like this is the simplest solution.
590
591
let mut valid = true;
592
let capacity = avs.len();
593
594
let ca = match inner_type {
595
// AnyValues with empty lists in python can create
596
// Series of an unknown dtype.
597
// We use the anonymousbuilder without a dtype
598
// the empty arrays is then not added (we add an extra offset instead)
599
// the next non-empty series then must have the correct dtype.
600
DataType::Null => {
601
let mut builder = AnonymousOwnedListBuilder::new(PlSmallStr::EMPTY, capacity, None);
602
for av in avs {
603
match av {
604
AnyValue::List(b) => builder.append_series(b)?,
605
AnyValue::Null => builder.append_null(),
606
_ => {
607
valid = false;
608
builder.append_null();
609
},
610
}
611
}
612
builder.finish()
613
},
614
615
#[cfg(feature = "object")]
616
DataType::Object(_) => polars_bail!(nyi = "Nested object types"),
617
618
_ => {
619
let mut builder =
620
get_list_builder(inner_type, capacity * 5, capacity, PlSmallStr::EMPTY);
621
for av in avs {
622
match av {
623
AnyValue::List(b) => match b.cast(inner_type) {
624
Ok(casted) => {
625
if casted.null_count() != b.null_count() {
626
valid = !strict;
627
}
628
builder.append_series(&casted)?;
629
},
630
Err(_) => {
631
valid = false;
632
for _ in 0..b.len() {
633
builder.append_null();
634
}
635
},
636
},
637
AnyValue::Null => builder.append_null(),
638
_ => {
639
valid = false;
640
builder.append_null()
641
},
642
}
643
}
644
645
builder.finish()
646
},
647
};
648
649
if strict && !valid {
650
polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", DataType::List(Box::new(inner_type.clone())));
651
}
652
653
Ok(ca)
654
}
655
656
#[cfg(feature = "dtype-array")]
657
fn any_values_to_array(
658
avs: &[AnyValue],
659
inner_type: &DataType,
660
strict: bool,
661
width: usize,
662
) -> PolarsResult<ArrayChunked> {
663
fn to_arr(s: &Series) -> Option<ArrayRef> {
664
if s.chunks().len() > 1 {
665
let s = s.rechunk();
666
Some(s.chunks()[0].clone())
667
} else {
668
Some(s.chunks()[0].clone())
669
}
670
}
671
672
let target_dtype = DataType::Array(Box::new(inner_type.clone()), width);
673
674
// This is handled downstream. The builder will choose the first non null type.
675
let mut valid = true;
676
#[allow(unused_mut)]
677
let mut out: ArrayChunked = if inner_type == &DataType::Null {
678
avs.iter()
679
.map(|av| match av {
680
AnyValue::List(b) | AnyValue::Array(b, _) => to_arr(b),
681
AnyValue::Null => None,
682
_ => {
683
valid = false;
684
None
685
},
686
})
687
.collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())
688
}
689
// Make sure that wrongly inferred AnyValues don't deviate from the datatype.
690
else {
691
avs.iter()
692
.map(|av| match av {
693
AnyValue::List(b) | AnyValue::Array(b, _) => {
694
if b.dtype() == inner_type {
695
to_arr(b)
696
} else {
697
let s = match b.cast(inner_type) {
698
Ok(out) => out,
699
Err(_) => Series::full_null(b.name().clone(), b.len(), inner_type),
700
};
701
to_arr(&s)
702
}
703
},
704
AnyValue::Null => None,
705
_ => {
706
valid = false;
707
None
708
},
709
})
710
.collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())
711
};
712
713
if strict && !valid {
714
polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", target_dtype);
715
}
716
polars_ensure!(
717
out.width() == width,
718
SchemaMismatch: "got mixed size array widths where width {} was expected", width
719
);
720
721
// Ensure the logical type is correct for nested types.
722
#[cfg(feature = "dtype-struct")]
723
if !matches!(inner_type, DataType::Null) && out.inner_dtype().is_nested() {
724
unsafe {
725
out.set_dtype(target_dtype);
726
};
727
}
728
729
Ok(out)
730
}
731
732
#[cfg(feature = "dtype-struct")]
733
fn _any_values_to_struct<'a>(
734
av_fields: &[Field],
735
av_values: &[AnyValue<'a>],
736
field_index: usize,
737
field: &Field,
738
fields: &[Field],
739
field_avs: &mut Vec<AnyValue<'a>>,
740
) {
741
// TODO: Optimize.
742
743
let mut append_by_search = || {
744
// Search for the name.
745
if let Some(i) = av_fields
746
.iter()
747
.position(|av_fld| av_fld.name == field.name)
748
{
749
field_avs.push(av_values[i].clone());
750
return;
751
}
752
field_avs.push(AnyValue::Null)
753
};
754
755
// All fields are available in this single value.
756
// We can use the index to get value.
757
if fields.len() == av_fields.len() {
758
if fields.iter().zip(av_fields.iter()).any(|(l, r)| l != r) {
759
append_by_search()
760
} else {
761
let av_val = av_values
762
.get(field_index)
763
.cloned()
764
.unwrap_or(AnyValue::Null);
765
field_avs.push(av_val)
766
}
767
}
768
// Not all fields are available, we search the proper field.
769
else {
770
// Search for the name.
771
append_by_search()
772
}
773
}
774
775
#[cfg(feature = "dtype-struct")]
776
fn any_values_to_struct(
777
values: &[AnyValue],
778
fields: &[Field],
779
strict: bool,
780
) -> PolarsResult<Series> {
781
// Fast path for structs with no fields.
782
if fields.is_empty() {
783
return Ok(
784
StructChunked::from_series(PlSmallStr::EMPTY, values.len(), [].iter())?.into_series(),
785
);
786
}
787
788
// The physical series fields of the struct.
789
let mut series_fields = Vec::with_capacity(fields.len());
790
let mut has_outer_validity = false;
791
let mut field_avs = Vec::with_capacity(values.len());
792
for (i, field) in fields.iter().enumerate() {
793
field_avs.clear();
794
795
for av in values.iter() {
796
match av {
797
AnyValue::StructOwned(payload) => {
798
let av_fields = &payload.1;
799
let av_values = &payload.0;
800
_any_values_to_struct(av_fields, av_values, i, field, fields, &mut field_avs);
801
},
802
AnyValue::Struct(_, _, av_fields) => {
803
let av_values: Vec<_> = av._iter_struct_av().collect();
804
_any_values_to_struct(av_fields, &av_values, i, field, fields, &mut field_avs);
805
},
806
_ => {
807
has_outer_validity = true;
808
field_avs.push(AnyValue::Null)
809
},
810
}
811
}
812
// If the inferred dtype is null, we let auto inference work.
813
let s = if matches!(field.dtype, DataType::Null) {
814
Series::from_any_values(field.name().clone(), &field_avs, strict)?
815
} else {
816
Series::from_any_values_and_dtype(
817
field.name().clone(),
818
&field_avs,
819
&field.dtype,
820
strict,
821
)?
822
};
823
series_fields.push(s)
824
}
825
826
let mut out =
827
StructChunked::from_series(PlSmallStr::EMPTY, values.len(), series_fields.iter())?;
828
if has_outer_validity {
829
let mut validity = MutableBitmap::new();
830
validity.extend_constant(values.len(), true);
831
for (i, v) in values.iter().enumerate() {
832
if matches!(v, AnyValue::Null) {
833
unsafe { validity.set_unchecked(i, false) }
834
}
835
}
836
out.set_outer_validity(Some(validity.freeze()))
837
}
838
Ok(out.into_series())
839
}
840
841
#[cfg(feature = "object")]
842
fn any_values_to_object(values: &[AnyValue]) -> PolarsResult<Series> {
843
use crate::chunked_array::object::registry;
844
let converter = registry::get_object_converter();
845
let mut builder = registry::get_object_builder(PlSmallStr::EMPTY, values.len());
846
for av in values {
847
match av {
848
AnyValue::Object(val) => builder.append_value(val.as_any()),
849
AnyValue::Null => builder.append_null(),
850
_ => {
851
// This is needed because in Python users can send mixed types.
852
// This only works if you set a global converter.
853
let any = converter(av.as_borrowed());
854
builder.append_value(&*any)
855
},
856
}
857
}
858
859
Ok(builder.to_series())
860
}
861
862
fn invalid_value_error(dtype: &DataType, value: &AnyValue) -> PolarsError {
863
polars_err!(
864
SchemaMismatch:
865
"unexpected value while building Series of type {:?}; found value of type {:?}: {}",
866
dtype,
867
value.dtype(),
868
value
869
)
870
}
871
872