Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/chunked_array/cast.rs
6940 views
1
//! Implementations of the ChunkCast Trait.
2
3
use std::borrow::Cow;
4
5
use polars_compute::cast::CastOptionsImpl;
6
#[cfg(feature = "serde-lazy")]
7
use serde::{Deserialize, Serialize};
8
9
use super::flags::StatisticsFlags;
10
#[cfg(feature = "dtype-datetime")]
11
use crate::prelude::DataType::Datetime;
12
use crate::prelude::*;
13
use crate::utils::handle_casting_failures;
14
15
#[derive(Copy, Clone, Debug, Default, PartialEq, Hash, Eq)]
16
#[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))]
17
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
18
#[repr(u8)]
19
pub enum CastOptions {
20
/// Raises on overflow
21
#[default]
22
Strict,
23
/// Overflow is replaced with null
24
NonStrict,
25
/// Allows wrapping overflow
26
Overflowing,
27
}
28
29
impl CastOptions {
30
pub fn is_strict(&self) -> bool {
31
matches!(self, CastOptions::Strict)
32
}
33
}
34
35
impl From<CastOptions> for CastOptionsImpl {
36
fn from(value: CastOptions) -> Self {
37
let wrapped = match value {
38
CastOptions::Strict | CastOptions::NonStrict => false,
39
CastOptions::Overflowing => true,
40
};
41
CastOptionsImpl {
42
wrapped,
43
partial: false,
44
}
45
}
46
}
47
48
pub(crate) fn cast_chunks(
49
chunks: &[ArrayRef],
50
dtype: &DataType,
51
options: CastOptions,
52
) -> PolarsResult<Vec<ArrayRef>> {
53
let check_nulls = matches!(options, CastOptions::Strict);
54
let options = options.into();
55
56
let arrow_dtype = dtype.try_to_arrow(CompatLevel::newest())?;
57
chunks
58
.iter()
59
.map(|arr| {
60
let out = polars_compute::cast::cast(arr.as_ref(), &arrow_dtype, options);
61
if check_nulls {
62
out.and_then(|new| {
63
polars_ensure!(arr.null_count() == new.null_count(), ComputeError: "strict cast failed");
64
Ok(new)
65
})
66
67
} else {
68
out
69
}
70
})
71
.collect::<PolarsResult<Vec<_>>>()
72
}
73
74
fn cast_impl_inner(
75
name: PlSmallStr,
76
chunks: &[ArrayRef],
77
dtype: &DataType,
78
options: CastOptions,
79
) -> PolarsResult<Series> {
80
let chunks = match dtype {
81
#[cfg(feature = "dtype-decimal")]
82
DataType::Decimal(_, _) => {
83
let mut chunks = cast_chunks(chunks, dtype, options)?;
84
// @NOTE: We cannot cast here as that will lower the scale.
85
for chunk in chunks.iter_mut() {
86
*chunk = std::mem::take(
87
chunk
88
.as_any_mut()
89
.downcast_mut::<PrimitiveArray<i128>>()
90
.unwrap(),
91
)
92
.to(ArrowDataType::Int128)
93
.to_boxed();
94
}
95
chunks
96
},
97
_ => cast_chunks(chunks, &dtype.to_physical(), options)?,
98
};
99
100
let out = Series::try_from((name, chunks))?;
101
use DataType::*;
102
let out = match dtype {
103
Date => out.into_date(),
104
Datetime(tu, tz) => match tz {
105
#[cfg(feature = "timezones")]
106
Some(tz) => {
107
TimeZone::validate_time_zone(tz)?;
108
out.into_datetime(*tu, Some(tz.clone()))
109
},
110
_ => out.into_datetime(*tu, None),
111
},
112
Duration(tu) => out.into_duration(*tu),
113
#[cfg(feature = "dtype-time")]
114
Time => out.into_time(),
115
#[cfg(feature = "dtype-decimal")]
116
Decimal(precision, scale) => out.into_decimal(*precision, scale.unwrap_or(0))?,
117
_ => out,
118
};
119
120
Ok(out)
121
}
122
123
fn cast_impl(
124
name: PlSmallStr,
125
chunks: &[ArrayRef],
126
dtype: &DataType,
127
options: CastOptions,
128
) -> PolarsResult<Series> {
129
cast_impl_inner(name, chunks, dtype, options)
130
}
131
132
#[cfg(feature = "dtype-struct")]
133
fn cast_single_to_struct(
134
name: PlSmallStr,
135
chunks: &[ArrayRef],
136
fields: &[Field],
137
options: CastOptions,
138
) -> PolarsResult<Series> {
139
polars_ensure!(fields.len() == 1, InvalidOperation: "must specify one field in the struct");
140
let mut new_fields = Vec::with_capacity(fields.len());
141
// cast to first field dtype
142
let mut fields = fields.iter();
143
let fld = fields.next().unwrap();
144
let s = cast_impl_inner(fld.name.clone(), chunks, &fld.dtype, options)?;
145
let length = s.len();
146
new_fields.push(s);
147
148
for fld in fields {
149
new_fields.push(Series::full_null(fld.name.clone(), length, &fld.dtype));
150
}
151
152
StructChunked::from_series(name, length, new_fields.iter()).map(|ca| ca.into_series())
153
}
154
155
impl<T> ChunkedArray<T>
156
where
157
T: PolarsNumericType,
158
{
159
fn cast_impl(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
160
if self.dtype() == dtype {
161
// SAFETY: chunks are correct dtype
162
let mut out = unsafe {
163
Series::from_chunks_and_dtype_unchecked(
164
self.name().clone(),
165
self.chunks.clone(),
166
dtype,
167
)
168
};
169
out.set_sorted_flag(self.is_sorted_flag());
170
return Ok(out);
171
}
172
match dtype {
173
// LEGACY
174
// TODO @ cat-rework: remove after exposing to/from physical functions.
175
#[cfg(feature = "dtype-categorical")]
176
DataType::Categorical(cats, _mapping) => {
177
let s = self.cast_with_options(&cats.physical().dtype(), options)?;
178
with_match_categorical_physical_type!(cats.physical(), |$C| {
179
// SAFETY: we are guarded by the type system.
180
type PhysCa = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
181
let ca: &PhysCa = s.as_ref().as_ref();
182
Ok(CategoricalChunked::<$C>::from_cats_and_dtype(ca.clone(), dtype.clone())
183
.into_series())
184
})
185
},
186
187
// LEGACY
188
// TODO @ cat-rework: remove after exposing to/from physical functions.
189
#[cfg(feature = "dtype-categorical")]
190
DataType::Enum(fcats, _mapping) => {
191
let s = self.cast_with_options(&fcats.physical().dtype(), options)?;
192
with_match_categorical_physical_type!(fcats.physical(), |$C| {
193
// SAFETY: we are guarded by the type system.
194
type PhysCa = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
195
let ca: &PhysCa = s.as_ref().as_ref();
196
Ok(CategoricalChunked::<$C>::from_cats_and_dtype(ca.clone(), dtype.clone()).into_series())
197
})
198
},
199
200
#[cfg(feature = "dtype-struct")]
201
DataType::Struct(fields) => {
202
cast_single_to_struct(self.name().clone(), &self.chunks, fields, options)
203
},
204
_ => cast_impl_inner(self.name().clone(), &self.chunks, dtype, options).map(|mut s| {
205
// maintain sorted if data types
206
// - remain signed
207
// - unsigned -> signed
208
// this may still fail with overflow?
209
let to_signed = dtype.is_signed_integer();
210
let unsigned2unsigned =
211
self.dtype().is_unsigned_integer() && dtype.is_unsigned_integer();
212
let allowed = to_signed || unsigned2unsigned;
213
214
if (allowed)
215
&& (s.null_count() == self.null_count())
216
// physical to logicals
217
|| (self.dtype().to_physical() == dtype.to_physical())
218
{
219
let is_sorted = self.is_sorted_flag();
220
s.set_sorted_flag(is_sorted)
221
}
222
s
223
}),
224
}
225
}
226
}
227
228
impl<T> ChunkCast for ChunkedArray<T>
229
where
230
T: PolarsNumericType,
231
{
232
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
233
self.cast_impl(dtype, options)
234
}
235
236
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
237
match dtype {
238
// LEGACY
239
// TODO @ cat-rework: remove after exposing to/from physical functions.
240
#[cfg(feature = "dtype-categorical")]
241
DataType::Categorical(cats, _mapping) => {
242
polars_ensure!(self.dtype() == &cats.physical().dtype(), ComputeError: "cannot cast numeric types to 'Categorical'");
243
with_match_categorical_physical_type!(cats.physical(), |$C| {
244
// SAFETY: we are guarded by the type system.
245
type PhysCa = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
246
let ca = unsafe { &*(self as *const ChunkedArray<T> as *const PhysCa) };
247
Ok(CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(ca.clone(), dtype.clone())
248
.into_series())
249
})
250
},
251
252
// LEGACY
253
// TODO @ cat-rework: remove after exposing to/from physical functions.
254
#[cfg(feature = "dtype-categorical")]
255
DataType::Enum(fcats, _mapping) => {
256
polars_ensure!(self.dtype() == &fcats.physical().dtype(), ComputeError: "cannot cast numeric types to 'Enum'");
257
with_match_categorical_physical_type!(fcats.physical(), |$C| {
258
// SAFETY: we are guarded by the type system.
259
type PhysCa = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
260
let ca = unsafe { &*(self as *const ChunkedArray<T> as *const PhysCa) };
261
Ok(CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(ca.clone(), dtype.clone()).into_series())
262
})
263
},
264
265
_ => self.cast_impl(dtype, CastOptions::Overflowing),
266
}
267
}
268
}
269
270
impl ChunkCast for StringChunked {
271
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
272
match dtype {
273
#[cfg(feature = "dtype-categorical")]
274
DataType::Categorical(cats, _mapping) => {
275
with_match_categorical_physical_type!(cats.physical(), |$C| {
276
Ok(CategoricalChunked::<$C>::from_str_iter(self.name().clone(), dtype.clone(), self.iter())?
277
.into_series())
278
})
279
},
280
#[cfg(feature = "dtype-categorical")]
281
DataType::Enum(fcats, _mapping) => {
282
let ret = with_match_categorical_physical_type!(fcats.physical(), |$C| {
283
CategoricalChunked::<$C>::from_str_iter(self.name().clone(), dtype.clone(), self.iter())?
284
.into_series()
285
});
286
287
if options.is_strict() && self.null_count() != ret.null_count() {
288
handle_casting_failures(&self.clone().into_series(), &ret)?;
289
}
290
291
Ok(ret)
292
},
293
#[cfg(feature = "dtype-struct")]
294
DataType::Struct(fields) => {
295
cast_single_to_struct(self.name().clone(), &self.chunks, fields, options)
296
},
297
#[cfg(feature = "dtype-decimal")]
298
DataType::Decimal(precision, scale) => match (precision, scale) {
299
(precision, Some(scale)) => {
300
let chunks = self.downcast_iter().map(|arr| {
301
polars_compute::cast::binview_to_decimal(
302
&arr.to_binview(),
303
*precision,
304
*scale,
305
)
306
.to(ArrowDataType::Int128)
307
});
308
Ok(Int128Chunked::from_chunk_iter(self.name().clone(), chunks)
309
.into_decimal_unchecked(*precision, *scale)
310
.into_series())
311
},
312
(None, None) => self.to_decimal_infer(100),
313
_ => {
314
polars_bail!(ComputeError: "expected 'precision' or 'scale' when casting to Decimal")
315
},
316
},
317
#[cfg(feature = "dtype-date")]
318
DataType::Date => {
319
let result = cast_chunks(&self.chunks, dtype, options)?;
320
let out = Series::try_from((self.name().clone(), result))?;
321
Ok(out)
322
},
323
#[cfg(feature = "dtype-datetime")]
324
DataType::Datetime(time_unit, time_zone) => match time_zone {
325
#[cfg(feature = "timezones")]
326
Some(time_zone) => {
327
TimeZone::validate_time_zone(time_zone)?;
328
let result = cast_chunks(
329
&self.chunks,
330
&Datetime(time_unit.to_owned(), Some(time_zone.clone())),
331
options,
332
)?;
333
Series::try_from((self.name().clone(), result))
334
},
335
_ => {
336
let result =
337
cast_chunks(&self.chunks, &Datetime(time_unit.to_owned(), None), options)?;
338
Series::try_from((self.name().clone(), result))
339
},
340
},
341
_ => cast_impl(self.name().clone(), &self.chunks, dtype, options),
342
}
343
}
344
345
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
346
self.cast_with_options(dtype, CastOptions::Overflowing)
347
}
348
}
349
350
impl BinaryChunked {
351
/// # Safety
352
/// String is not validated
353
pub unsafe fn to_string_unchecked(&self) -> StringChunked {
354
let chunks = self
355
.downcast_iter()
356
.map(|arr| unsafe { arr.to_utf8view_unchecked() }.boxed())
357
.collect();
358
let field = Arc::new(Field::new(self.name().clone(), DataType::String));
359
360
let mut ca = StringChunked::new_with_compute_len(field, chunks);
361
362
use StatisticsFlags as F;
363
ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
364
ca
365
}
366
}
367
368
impl StringChunked {
369
pub fn as_binary(&self) -> BinaryChunked {
370
let chunks = self
371
.downcast_iter()
372
.map(|arr| arr.to_binview().boxed())
373
.collect();
374
let field = Arc::new(Field::new(self.name().clone(), DataType::Binary));
375
376
let mut ca = BinaryChunked::new_with_compute_len(field, chunks);
377
378
use StatisticsFlags as F;
379
ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
380
ca
381
}
382
}
383
384
impl ChunkCast for BinaryChunked {
385
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
386
match dtype {
387
#[cfg(feature = "dtype-struct")]
388
DataType::Struct(fields) => {
389
cast_single_to_struct(self.name().clone(), &self.chunks, fields, options)
390
},
391
_ => cast_impl(self.name().clone(), &self.chunks, dtype, options),
392
}
393
}
394
395
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
396
match dtype {
397
DataType::String => unsafe { Ok(self.to_string_unchecked().into_series()) },
398
_ => self.cast_with_options(dtype, CastOptions::Overflowing),
399
}
400
}
401
}
402
403
impl ChunkCast for BinaryOffsetChunked {
404
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
405
match dtype {
406
#[cfg(feature = "dtype-struct")]
407
DataType::Struct(fields) => {
408
cast_single_to_struct(self.name().clone(), &self.chunks, fields, options)
409
},
410
_ => cast_impl(self.name().clone(), &self.chunks, dtype, options),
411
}
412
}
413
414
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
415
self.cast_with_options(dtype, CastOptions::Overflowing)
416
}
417
}
418
419
impl ChunkCast for BooleanChunked {
420
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
421
match dtype {
422
#[cfg(feature = "dtype-struct")]
423
DataType::Struct(fields) => {
424
cast_single_to_struct(self.name().clone(), &self.chunks, fields, options)
425
},
426
#[cfg(feature = "dtype-categorical")]
427
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
428
polars_bail!(InvalidOperation: "cannot cast Boolean to Categorical");
429
},
430
_ => cast_impl(self.name().clone(), &self.chunks, dtype, options),
431
}
432
}
433
434
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
435
self.cast_with_options(dtype, CastOptions::Overflowing)
436
}
437
}
438
439
/// We cannot cast anything to or from List/LargeList
440
/// So this implementation casts the inner type
441
impl ChunkCast for ListChunked {
442
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
443
let ca = self
444
.trim_lists_to_normalized_offsets()
445
.map_or(Cow::Borrowed(self), Cow::Owned);
446
let ca = ca.propagate_nulls().map_or(ca, Cow::Owned);
447
448
use DataType::*;
449
match dtype {
450
List(child_type) => {
451
match (ca.inner_dtype(), &**child_type) {
452
(old, new) if old == new => Ok(ca.into_owned().into_series()),
453
// TODO @ cat-rework: can we implement this now?
454
#[cfg(feature = "dtype-categorical")]
455
(dt, Categorical(_, _) | Enum(_, _))
456
if !matches!(dt, Categorical(_, _) | Enum(_, _) | String | Null) =>
457
{
458
polars_bail!(InvalidOperation: "cannot cast List inner type: '{:?}' to Categorical", dt)
459
},
460
_ => {
461
// ensure the inner logical type bubbles up
462
let (arr, child_type) = cast_list(ca.as_ref(), child_type, options)?;
463
// SAFETY: we just cast so the dtype matches.
464
// we must take this path to correct for physical types.
465
unsafe {
466
Ok(Series::from_chunks_and_dtype_unchecked(
467
ca.name().clone(),
468
vec![arr],
469
&List(Box::new(child_type)),
470
))
471
}
472
},
473
}
474
},
475
#[cfg(feature = "dtype-array")]
476
Array(child_type, width) => {
477
let physical_type = dtype.to_physical();
478
479
// TODO @ cat-rework: can we implement this now?
480
// TODO!: properly implement this recursively.
481
#[cfg(feature = "dtype-categorical")]
482
polars_ensure!(!matches!(&**child_type, Categorical(_, _)), InvalidOperation: "array of categorical is not yet supported");
483
484
// cast to the physical type to avoid logical chunks.
485
let chunks = cast_chunks(ca.chunks(), &physical_type, options)?;
486
// SAFETY: we just cast so the dtype matches.
487
// we must take this path to correct for physical types.
488
unsafe {
489
Ok(Series::from_chunks_and_dtype_unchecked(
490
ca.name().clone(),
491
chunks,
492
&Array(child_type.clone(), *width),
493
))
494
}
495
},
496
#[cfg(feature = "dtype-u8")]
497
Binary => {
498
polars_ensure!(
499
matches!(self.inner_dtype(), UInt8),
500
InvalidOperation: "cannot cast List type (inner: '{:?}', to: '{:?}')",
501
self.inner_dtype(),
502
dtype,
503
);
504
let chunks = cast_chunks(self.chunks(), &DataType::Binary, options)?;
505
506
// SAFETY: we just cast so the dtype matches.
507
unsafe {
508
Ok(Series::from_chunks_and_dtype_unchecked(
509
self.name().clone(),
510
chunks,
511
&DataType::Binary,
512
))
513
}
514
},
515
_ => {
516
polars_bail!(
517
InvalidOperation: "cannot cast List type (inner: '{:?}', to: '{:?}')",
518
ca.inner_dtype(),
519
dtype,
520
)
521
},
522
}
523
}
524
525
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
526
use DataType::*;
527
match dtype {
528
List(child_type) => cast_list_unchecked(self, child_type),
529
_ => self.cast_with_options(dtype, CastOptions::Overflowing),
530
}
531
}
532
}
533
534
/// We cannot cast anything to or from List/LargeList
535
/// So this implementation casts the inner type
536
#[cfg(feature = "dtype-array")]
537
impl ChunkCast for ArrayChunked {
538
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
539
let ca = self
540
.trim_lists_to_normalized_offsets()
541
.map_or(Cow::Borrowed(self), Cow::Owned);
542
let ca = ca.propagate_nulls().map_or(ca, Cow::Owned);
543
544
use DataType::*;
545
match dtype {
546
Array(child_type, width) => {
547
polars_ensure!(
548
*width == ca.width(),
549
InvalidOperation: "cannot cast Array to a different width"
550
);
551
552
match (ca.inner_dtype(), &**child_type) {
553
(old, new) if old == new => Ok(ca.into_owned().into_series()),
554
// TODO @ cat-rework: can we implement this now?
555
#[cfg(feature = "dtype-categorical")]
556
(dt, Categorical(_, _) | Enum(_, _)) if !matches!(dt, String) => {
557
polars_bail!(InvalidOperation: "cannot cast Array inner type: '{:?}' to dtype: {:?}", dt, child_type)
558
},
559
_ => {
560
// ensure the inner logical type bubbles up
561
let (arr, child_type) =
562
cast_fixed_size_list(ca.as_ref(), child_type, options)?;
563
// SAFETY: we just cast so the dtype matches.
564
// we must take this path to correct for physical types.
565
unsafe {
566
Ok(Series::from_chunks_and_dtype_unchecked(
567
ca.name().clone(),
568
vec![arr],
569
&Array(Box::new(child_type), *width),
570
))
571
}
572
},
573
}
574
},
575
List(child_type) => {
576
let physical_type = dtype.to_physical();
577
// cast to the physical type to avoid logical chunks.
578
let chunks = cast_chunks(ca.chunks(), &physical_type, options)?;
579
// SAFETY: we just cast so the dtype matches.
580
// we must take this path to correct for physical types.
581
unsafe {
582
Ok(Series::from_chunks_and_dtype_unchecked(
583
ca.name().clone(),
584
chunks,
585
&List(child_type.clone()),
586
))
587
}
588
},
589
_ => {
590
polars_bail!(
591
InvalidOperation: "cannot cast Array type (inner: '{:?}', to: '{:?}')",
592
ca.inner_dtype(),
593
dtype,
594
)
595
},
596
}
597
}
598
599
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
600
self.cast_with_options(dtype, CastOptions::Overflowing)
601
}
602
}
603
604
// Returns inner data type. This is needed because a cast can instantiate the dtype inner
605
// values for instance with categoricals
606
fn cast_list(
607
ca: &ListChunked,
608
child_type: &DataType,
609
options: CastOptions,
610
) -> PolarsResult<(ArrayRef, DataType)> {
611
// We still rechunk because we must bubble up a single data-type
612
// TODO!: consider a version that works on chunks and merges the data-types and arrays.
613
let ca = ca.rechunk();
614
let arr = ca.downcast_as_array();
615
// SAFETY: inner dtype is passed correctly
616
let s = unsafe {
617
Series::from_chunks_and_dtype_unchecked(
618
PlSmallStr::EMPTY,
619
vec![arr.values().clone()],
620
ca.inner_dtype(),
621
)
622
};
623
let new_inner = s.cast_with_options(child_type, options)?;
624
625
let inner_dtype = new_inner.dtype().clone();
626
debug_assert_eq!(&inner_dtype, child_type);
627
628
let new_values = new_inner.array_ref(0).clone();
629
630
let dtype = ListArray::<i64>::default_datatype(new_values.dtype().clone());
631
let new_arr = ListArray::<i64>::new(
632
dtype,
633
arr.offsets().clone(),
634
new_values,
635
arr.validity().cloned(),
636
);
637
Ok((new_arr.boxed(), inner_dtype))
638
}
639
640
unsafe fn cast_list_unchecked(ca: &ListChunked, child_type: &DataType) -> PolarsResult<Series> {
641
// TODO! add chunked, but this must correct for list offsets.
642
let ca = ca.rechunk();
643
let arr = ca.downcast_as_array();
644
// SAFETY: inner dtype is passed correctly
645
let s = unsafe {
646
Series::from_chunks_and_dtype_unchecked(
647
PlSmallStr::EMPTY,
648
vec![arr.values().clone()],
649
ca.inner_dtype(),
650
)
651
};
652
let new_inner = s.cast_unchecked(child_type)?;
653
let new_values = new_inner.array_ref(0).clone();
654
655
let dtype = ListArray::<i64>::default_datatype(new_values.dtype().clone());
656
let new_arr = ListArray::<i64>::new(
657
dtype,
658
arr.offsets().clone(),
659
new_values,
660
arr.validity().cloned(),
661
);
662
Ok(ListChunked::from_chunks_and_dtype_unchecked(
663
ca.name().clone(),
664
vec![Box::new(new_arr)],
665
DataType::List(Box::new(child_type.clone())),
666
)
667
.into_series())
668
}
669
670
// Returns inner data type. This is needed because a cast can instantiate the dtype inner
671
// values for instance with categoricals
672
#[cfg(feature = "dtype-array")]
673
fn cast_fixed_size_list(
674
ca: &ArrayChunked,
675
child_type: &DataType,
676
options: CastOptions,
677
) -> PolarsResult<(ArrayRef, DataType)> {
678
let ca = ca.rechunk();
679
let arr = ca.downcast_as_array();
680
// SAFETY: inner dtype is passed correctly
681
let s = unsafe {
682
Series::from_chunks_and_dtype_unchecked(
683
PlSmallStr::EMPTY,
684
vec![arr.values().clone()],
685
ca.inner_dtype(),
686
)
687
};
688
let new_inner = s.cast_with_options(child_type, options)?;
689
690
let inner_dtype = new_inner.dtype().clone();
691
debug_assert_eq!(&inner_dtype, child_type);
692
693
let new_values = new_inner.array_ref(0).clone();
694
695
let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), ca.width());
696
let new_arr = FixedSizeListArray::new(dtype, ca.len(), new_values, arr.validity().cloned());
697
Ok((Box::new(new_arr), inner_dtype))
698
}
699
700
#[cfg(test)]
701
mod test {
702
use crate::chunked_array::cast::CastOptions;
703
use crate::prelude::*;
704
705
#[test]
706
fn test_cast_list() -> PolarsResult<()> {
707
let mut builder = ListPrimitiveChunkedBuilder::<Int32Type>::new(
708
PlSmallStr::from_static("a"),
709
10,
710
10,
711
DataType::Int32,
712
);
713
builder.append_opt_slice(Some(&[1i32, 2, 3]));
714
builder.append_opt_slice(Some(&[1i32, 2, 3]));
715
let ca = builder.finish();
716
717
let new = ca.cast_with_options(
718
&DataType::List(DataType::Float64.into()),
719
CastOptions::Strict,
720
)?;
721
722
assert_eq!(new.dtype(), &DataType::List(DataType::Float64.into()));
723
Ok(())
724
}
725
726
#[test]
727
#[cfg(feature = "dtype-categorical")]
728
fn test_cast_noop() {
729
// check if we can cast categorical twice without panic
730
let ca = StringChunked::new(PlSmallStr::from_static("foo"), &["bar", "ham"]);
731
let cats = Categories::global();
732
let out = ca
733
.cast_with_options(
734
&DataType::from_categories(cats.clone()),
735
CastOptions::Strict,
736
)
737
.unwrap();
738
let out = out.cast(&DataType::from_categories(cats)).unwrap();
739
assert!(matches!(out.dtype(), &DataType::Categorical(_, _)))
740
}
741
}
742
743