Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/chunked_array/cast.rs
8415 views
1
//! Implementations of the ChunkCast Trait.
2
3
use std::borrow::Cow;
4
5
use polars_compute::cast::CastOptionsImpl;
6
#[cfg(feature = "serde-lazy")]
7
use serde::{Deserialize, Serialize};
8
9
use super::flags::StatisticsFlags;
10
#[cfg(feature = "dtype-datetime")]
11
use crate::prelude::DataType::Datetime;
12
use crate::prelude::*;
13
use crate::utils::{handle_array_casting_failures, handle_casting_failures};
14
15
#[derive(Copy, Clone, Debug, Default, PartialEq, Hash, Eq)]
16
#[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))]
17
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
18
#[repr(u8)]
19
pub enum CastOptions {
20
/// Raises on overflow
21
#[default]
22
Strict,
23
/// Overflow is replaced with null
24
NonStrict,
25
/// Allows wrapping overflow
26
Overflowing,
27
}
28
29
impl CastOptions {
30
pub fn is_strict(&self) -> bool {
31
matches!(self, CastOptions::Strict)
32
}
33
}
34
35
impl From<CastOptions> for CastOptionsImpl {
36
fn from(value: CastOptions) -> Self {
37
let wrapped = match value {
38
CastOptions::Strict | CastOptions::NonStrict => false,
39
CastOptions::Overflowing => true,
40
};
41
CastOptionsImpl {
42
wrapped,
43
partial: false,
44
}
45
}
46
}
47
48
pub(crate) fn cast_chunks(
49
chunks: &[ArrayRef],
50
dtype: &DataType,
51
options: CastOptions,
52
) -> PolarsResult<Vec<ArrayRef>> {
53
let check_nulls = matches!(options, CastOptions::Strict);
54
let options = options.into();
55
56
let arrow_dtype = dtype.try_to_arrow(CompatLevel::newest())?;
57
chunks
58
.iter()
59
.map(|arr| {
60
let out = polars_compute::cast::cast(arr.as_ref(), &arrow_dtype, options);
61
if check_nulls {
62
out.and_then(|new| {
63
if arr.null_count() != new.null_count() {
64
handle_array_casting_failures(&**arr, &*new)?;
65
}
66
Ok(new)
67
})
68
} else {
69
out
70
}
71
})
72
.collect::<PolarsResult<Vec<_>>>()
73
}
74
75
fn cast_impl_inner(
76
name: PlSmallStr,
77
chunks: &[ArrayRef],
78
dtype: &DataType,
79
options: CastOptions,
80
) -> PolarsResult<Series> {
81
let chunks = match dtype {
82
#[cfg(feature = "dtype-decimal")]
83
DataType::Decimal(_, _) => {
84
let mut chunks = cast_chunks(chunks, dtype, options)?;
85
// @NOTE: We cannot cast here as that will lower the scale.
86
for chunk in chunks.iter_mut() {
87
*chunk = std::mem::take(
88
chunk
89
.as_any_mut()
90
.downcast_mut::<PrimitiveArray<i128>>()
91
.unwrap(),
92
)
93
.to(ArrowDataType::Int128)
94
.to_boxed();
95
}
96
chunks
97
},
98
_ => cast_chunks(chunks, &dtype.to_physical(), options)?,
99
};
100
101
let out = Series::try_from((name, chunks))?;
102
use DataType::*;
103
let out = match dtype {
104
Date => out.into_date(),
105
Datetime(tu, tz) => match tz {
106
#[cfg(feature = "timezones")]
107
Some(tz) => {
108
TimeZone::validate_time_zone(tz)?;
109
out.into_datetime(*tu, Some(tz.clone()))
110
},
111
_ => out.into_datetime(*tu, None),
112
},
113
Duration(tu) => out.into_duration(*tu),
114
#[cfg(feature = "dtype-time")]
115
Time => out.into_time(),
116
#[cfg(feature = "dtype-decimal")]
117
Decimal(precision, scale) => out.into_decimal(*precision, *scale)?,
118
_ => out,
119
};
120
121
Ok(out)
122
}
123
124
fn cast_impl(
125
name: PlSmallStr,
126
chunks: &[ArrayRef],
127
dtype: &DataType,
128
options: CastOptions,
129
) -> PolarsResult<Series> {
130
cast_impl_inner(name, chunks, dtype, options)
131
}
132
133
#[cfg(feature = "dtype-struct")]
134
fn cast_single_to_struct(
135
name: PlSmallStr,
136
chunks: &[ArrayRef],
137
fields: &[Field],
138
options: CastOptions,
139
) -> PolarsResult<Series> {
140
polars_ensure!(fields.len() == 1, InvalidOperation: "must specify one field in the struct");
141
let mut new_fields = Vec::with_capacity(fields.len());
142
// cast to first field dtype
143
let mut fields = fields.iter();
144
let fld = fields.next().unwrap();
145
let s = cast_impl_inner(fld.name.clone(), chunks, &fld.dtype, options)?;
146
let length = s.len();
147
new_fields.push(s);
148
149
for fld in fields {
150
new_fields.push(Series::full_null(fld.name.clone(), length, &fld.dtype));
151
}
152
153
StructChunked::from_series(name, length, new_fields.iter()).map(|ca| ca.into_series())
154
}
155
156
impl<T> ChunkedArray<T>
157
where
158
T: PolarsNumericType,
159
{
160
fn cast_impl(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
161
if self.dtype() == dtype {
162
// SAFETY: chunks are correct dtype
163
let mut out = unsafe {
164
Series::from_chunks_and_dtype_unchecked(
165
self.name().clone(),
166
self.chunks.clone(),
167
dtype,
168
)
169
};
170
out.set_sorted_flag(self.is_sorted_flag());
171
return Ok(out);
172
}
173
match dtype {
174
// LEGACY
175
// TODO @ cat-rework: remove after exposing to/from physical functions.
176
#[cfg(feature = "dtype-categorical")]
177
DataType::Categorical(cats, _mapping) => {
178
let s = self.cast_with_options(&cats.physical().dtype(), options)?;
179
with_match_categorical_physical_type!(cats.physical(), |$C| {
180
// SAFETY: we are guarded by the type system.
181
type PhysCa = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
182
let ca: &PhysCa = s.as_ref().as_ref();
183
Ok(CategoricalChunked::<$C>::from_cats_and_dtype(ca.clone(), dtype.clone())
184
.into_series())
185
})
186
},
187
188
// LEGACY
189
// TODO @ cat-rework: remove after exposing to/from physical functions.
190
#[cfg(feature = "dtype-categorical")]
191
DataType::Enum(fcats, _mapping) => {
192
let s = self.cast_with_options(&fcats.physical().dtype(), options)?;
193
with_match_categorical_physical_type!(fcats.physical(), |$C| {
194
// SAFETY: we are guarded by the type system.
195
type PhysCa = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
196
let ca: &PhysCa = s.as_ref().as_ref();
197
Ok(CategoricalChunked::<$C>::from_cats_and_dtype(ca.clone(), dtype.clone()).into_series())
198
})
199
},
200
201
#[cfg(feature = "dtype-struct")]
202
DataType::Struct(fields) => {
203
cast_single_to_struct(self.name().clone(), &self.chunks, fields, options)
204
},
205
_ => cast_impl_inner(self.name().clone(), &self.chunks, dtype, options).map(|mut s| {
206
// maintain sorted if data types
207
// - remain signed
208
// - unsigned -> signed
209
// this may still fail with overflow?
210
let to_signed = dtype.is_signed_integer();
211
let unsigned2unsigned =
212
self.dtype().is_unsigned_integer() && dtype.is_unsigned_integer();
213
let allowed = to_signed || unsigned2unsigned;
214
215
if (allowed)
216
&& (s.null_count() == self.null_count())
217
// physical to logicals
218
|| (self.dtype().to_physical() == dtype.to_physical())
219
{
220
let is_sorted = self.is_sorted_flag();
221
s.set_sorted_flag(is_sorted)
222
}
223
s
224
}),
225
}
226
}
227
}
228
229
impl<T> ChunkCast for ChunkedArray<T>
230
where
231
T: PolarsNumericType,
232
{
233
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
234
self.cast_impl(dtype, options)
235
}
236
237
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
238
match dtype {
239
// LEGACY
240
// TODO @ cat-rework: remove after exposing to/from physical functions.
241
#[cfg(feature = "dtype-categorical")]
242
DataType::Categorical(cats, _mapping) => {
243
polars_ensure!(self.dtype() == &cats.physical().dtype(), ComputeError: "cannot cast numeric types to 'Categorical'");
244
with_match_categorical_physical_type!(cats.physical(), |$C| {
245
// SAFETY: we are guarded by the type system.
246
type PhysCa = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
247
let ca = unsafe { &*(self as *const ChunkedArray<T> as *const PhysCa) };
248
Ok(CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(ca.clone(), dtype.clone())
249
.into_series())
250
})
251
},
252
253
// LEGACY
254
// TODO @ cat-rework: remove after exposing to/from physical functions.
255
#[cfg(feature = "dtype-categorical")]
256
DataType::Enum(fcats, _mapping) => {
257
polars_ensure!(self.dtype() == &fcats.physical().dtype(), ComputeError: "cannot cast numeric types to 'Enum'");
258
with_match_categorical_physical_type!(fcats.physical(), |$C| {
259
// SAFETY: we are guarded by the type system.
260
type PhysCa = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
261
let ca = unsafe { &*(self as *const ChunkedArray<T> as *const PhysCa) };
262
Ok(CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(ca.clone(), dtype.clone()).into_series())
263
})
264
},
265
266
_ => self.cast_impl(dtype, CastOptions::Overflowing),
267
}
268
}
269
}
270
271
impl ChunkCast for StringChunked {
272
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
273
match dtype {
274
#[cfg(feature = "dtype-categorical")]
275
DataType::Categorical(cats, _mapping) => {
276
with_match_categorical_physical_type!(cats.physical(), |$C| {
277
Ok(CategoricalChunked::<$C>::from_str_iter(self.name().clone(), dtype.clone(), self.iter())?
278
.into_series())
279
})
280
},
281
#[cfg(feature = "dtype-categorical")]
282
DataType::Enum(fcats, _mapping) => {
283
let ret = with_match_categorical_physical_type!(fcats.physical(), |$C| {
284
CategoricalChunked::<$C>::from_str_iter(self.name().clone(), dtype.clone(), self.iter())?
285
.into_series()
286
});
287
288
if options.is_strict() && self.null_count() != ret.null_count() {
289
handle_casting_failures(&self.clone().into_series(), &ret)?;
290
}
291
292
Ok(ret)
293
},
294
#[cfg(feature = "dtype-struct")]
295
DataType::Struct(fields) => {
296
cast_single_to_struct(self.name().clone(), &self.chunks, fields, options)
297
},
298
#[cfg(feature = "dtype-decimal")]
299
DataType::Decimal(precision, scale) => {
300
let chunks = self.downcast_iter().map(|arr| {
301
polars_compute::cast::binview_to_decimal(&arr.to_binview(), *precision, *scale)
302
.to(ArrowDataType::Int128)
303
});
304
let ca = Int128Chunked::from_chunk_iter(self.name().clone(), chunks);
305
Ok(ca.into_decimal_unchecked(*precision, *scale).into_series())
306
},
307
#[cfg(feature = "dtype-date")]
308
DataType::Date => {
309
let result = cast_chunks(&self.chunks, dtype, options)?;
310
let out = Series::try_from((self.name().clone(), result))?;
311
Ok(out)
312
},
313
#[cfg(feature = "dtype-datetime")]
314
DataType::Datetime(time_unit, time_zone) => match time_zone {
315
#[cfg(feature = "timezones")]
316
Some(time_zone) => {
317
TimeZone::validate_time_zone(time_zone)?;
318
let result = cast_chunks(
319
&self.chunks,
320
&Datetime(time_unit.to_owned(), Some(time_zone.clone())),
321
options,
322
)?;
323
Series::try_from((self.name().clone(), result))
324
},
325
_ => {
326
let result =
327
cast_chunks(&self.chunks, &Datetime(time_unit.to_owned(), None), options)?;
328
Series::try_from((self.name().clone(), result))
329
},
330
},
331
_ => cast_impl(self.name().clone(), &self.chunks, dtype, options),
332
}
333
}
334
335
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
336
self.cast_with_options(dtype, CastOptions::Overflowing)
337
}
338
}
339
340
impl BinaryChunked {
341
/// # Safety
342
/// String is not validated
343
pub unsafe fn to_string_unchecked(&self) -> StringChunked {
344
let chunks = self
345
.downcast_iter()
346
.map(|arr| unsafe { arr.to_utf8view_unchecked() }.boxed())
347
.collect();
348
let field = Arc::new(Field::new(self.name().clone(), DataType::String));
349
350
let mut ca = StringChunked::new_with_compute_len(field, chunks);
351
352
use StatisticsFlags as F;
353
ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
354
ca
355
}
356
}
357
358
impl StringChunked {
359
pub fn as_binary(&self) -> BinaryChunked {
360
let chunks = self
361
.downcast_iter()
362
.map(|arr| arr.to_binview().boxed())
363
.collect();
364
let field = Arc::new(Field::new(self.name().clone(), DataType::Binary));
365
366
let mut ca = BinaryChunked::new_with_compute_len(field, chunks);
367
368
use StatisticsFlags as F;
369
ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
370
ca
371
}
372
}
373
374
impl ChunkCast for BinaryChunked {
375
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
376
match dtype {
377
#[cfg(feature = "dtype-struct")]
378
DataType::Struct(fields) => {
379
cast_single_to_struct(self.name().clone(), &self.chunks, fields, options)
380
},
381
_ => cast_impl(self.name().clone(), &self.chunks, dtype, options),
382
}
383
}
384
385
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
386
match dtype {
387
DataType::String => unsafe { Ok(self.to_string_unchecked().into_series()) },
388
_ => self.cast_with_options(dtype, CastOptions::Overflowing),
389
}
390
}
391
}
392
393
impl ChunkCast for BinaryOffsetChunked {
394
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
395
match dtype {
396
#[cfg(feature = "dtype-struct")]
397
DataType::Struct(fields) => {
398
cast_single_to_struct(self.name().clone(), &self.chunks, fields, options)
399
},
400
_ => cast_impl(self.name().clone(), &self.chunks, dtype, options),
401
}
402
}
403
404
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
405
self.cast_with_options(dtype, CastOptions::Overflowing)
406
}
407
}
408
409
impl ChunkCast for BooleanChunked {
410
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
411
match dtype {
412
#[cfg(feature = "dtype-struct")]
413
DataType::Struct(fields) => {
414
cast_single_to_struct(self.name().clone(), &self.chunks, fields, options)
415
},
416
#[cfg(feature = "dtype-categorical")]
417
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
418
polars_bail!(InvalidOperation: "cannot cast Boolean to Categorical");
419
},
420
_ => cast_impl(self.name().clone(), &self.chunks, dtype, options),
421
}
422
}
423
424
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
425
self.cast_with_options(dtype, CastOptions::Overflowing)
426
}
427
}
428
429
/// We cannot cast anything to or from List/LargeList
430
/// So this implementation casts the inner type
431
impl ChunkCast for ListChunked {
432
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
433
let ca = self
434
.trim_lists_to_normalized_offsets()
435
.map_or(Cow::Borrowed(self), Cow::Owned);
436
let ca = ca.propagate_nulls().map_or(ca, Cow::Owned);
437
438
use DataType::*;
439
match dtype {
440
List(child_type) => {
441
match (ca.inner_dtype(), &**child_type) {
442
(old, new) if old == new => Ok(ca.into_owned().into_series()),
443
// TODO @ cat-rework: can we implement this now?
444
#[cfg(feature = "dtype-categorical")]
445
(dt, Categorical(_, _) | Enum(_, _))
446
if !matches!(dt, Categorical(_, _) | Enum(_, _) | String | Null) =>
447
{
448
polars_bail!(InvalidOperation: "cannot cast List inner type: '{:?}' to Categorical", dt)
449
},
450
_ => {
451
// ensure the inner logical type bubbles up
452
let (arr, child_type) = cast_list(ca.as_ref(), child_type, options)?;
453
// SAFETY: we just cast so the dtype matches.
454
// we must take this path to correct for physical types.
455
unsafe {
456
Ok(Series::from_chunks_and_dtype_unchecked(
457
ca.name().clone(),
458
vec![arr],
459
&List(Box::new(child_type)),
460
))
461
}
462
},
463
}
464
},
465
#[cfg(feature = "dtype-array")]
466
Array(child_type, width) => {
467
let physical_type = dtype.to_physical();
468
469
// cast to the physical type to avoid logical chunks.
470
let chunks = cast_chunks(ca.chunks(), &physical_type, options)?;
471
// SAFETY: we just cast so the dtype matches.
472
// we must take this path to correct for physical types.
473
unsafe {
474
Ok(Series::from_chunks_and_dtype_unchecked(
475
ca.name().clone(),
476
chunks,
477
&Array(child_type.clone(), *width),
478
))
479
}
480
},
481
#[cfg(feature = "dtype-u8")]
482
Binary => {
483
polars_ensure!(
484
matches!(self.inner_dtype(), UInt8),
485
InvalidOperation: "cannot cast List type (inner: '{:?}', to: '{:?}')",
486
self.inner_dtype(),
487
dtype,
488
);
489
let chunks = cast_chunks(self.chunks(), &DataType::Binary, options)?;
490
491
// SAFETY: we just cast so the dtype matches.
492
unsafe {
493
Ok(Series::from_chunks_and_dtype_unchecked(
494
self.name().clone(),
495
chunks,
496
&DataType::Binary,
497
))
498
}
499
},
500
_ => {
501
polars_bail!(
502
InvalidOperation: "cannot cast List type (inner: '{:?}', to: '{:?}')",
503
ca.inner_dtype(),
504
dtype,
505
)
506
},
507
}
508
}
509
510
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
511
use DataType::*;
512
match dtype {
513
List(child_type) => cast_list_unchecked(self, child_type),
514
_ => self.cast_with_options(dtype, CastOptions::Overflowing),
515
}
516
}
517
}
518
519
/// We cannot cast anything to or from List/LargeList
520
/// So this implementation casts the inner type
521
#[cfg(feature = "dtype-array")]
522
impl ChunkCast for ArrayChunked {
523
fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series> {
524
let ca = self
525
.trim_lists_to_normalized_offsets()
526
.map_or(Cow::Borrowed(self), Cow::Owned);
527
let ca = ca.propagate_nulls().map_or(ca, Cow::Owned);
528
529
use DataType::*;
530
match dtype {
531
Array(child_type, width) => {
532
polars_ensure!(
533
*width == ca.width(),
534
InvalidOperation: "cannot cast Array to a different width"
535
);
536
537
match (ca.inner_dtype(), &**child_type) {
538
(old, new) if old == new => Ok(ca.into_owned().into_series()),
539
// TODO @ cat-rework: can we implement this now?
540
#[cfg(feature = "dtype-categorical")]
541
(dt, Categorical(_, _) | Enum(_, _)) if !matches!(dt, String) => {
542
polars_bail!(InvalidOperation: "cannot cast Array inner type: '{:?}' to dtype: {:?}", dt, child_type)
543
},
544
_ => {
545
// ensure the inner logical type bubbles up
546
let (arr, child_type) =
547
cast_fixed_size_list(ca.as_ref(), child_type, options)?;
548
// SAFETY: we just cast so the dtype matches.
549
// we must take this path to correct for physical types.
550
unsafe {
551
Ok(Series::from_chunks_and_dtype_unchecked(
552
ca.name().clone(),
553
vec![arr],
554
&Array(Box::new(child_type), *width),
555
))
556
}
557
},
558
}
559
},
560
List(child_type) => {
561
let physical_type = dtype.to_physical();
562
// cast to the physical type to avoid logical chunks.
563
let chunks = cast_chunks(ca.chunks(), &physical_type, options)?;
564
// SAFETY: we just cast so the dtype matches.
565
// we must take this path to correct for physical types.
566
unsafe {
567
Ok(Series::from_chunks_and_dtype_unchecked(
568
ca.name().clone(),
569
chunks,
570
&List(child_type.clone()),
571
))
572
}
573
},
574
_ => {
575
polars_bail!(
576
InvalidOperation: "cannot cast Array type (inner: '{:?}', to: '{:?}')",
577
ca.inner_dtype(),
578
dtype,
579
)
580
},
581
}
582
}
583
584
unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series> {
585
self.cast_with_options(dtype, CastOptions::Overflowing)
586
}
587
}
588
589
// Returns inner data type. This is needed because a cast can instantiate the dtype inner
590
// values for instance with categoricals
591
fn cast_list(
592
ca: &ListChunked,
593
child_type: &DataType,
594
options: CastOptions,
595
) -> PolarsResult<(ArrayRef, DataType)> {
596
// We still rechunk because we must bubble up a single data-type
597
// TODO!: consider a version that works on chunks and merges the data-types and arrays.
598
let ca = ca.rechunk();
599
let arr = ca.downcast_as_array();
600
// SAFETY: inner dtype is passed correctly
601
let s = unsafe {
602
Series::from_chunks_and_dtype_unchecked(
603
PlSmallStr::EMPTY,
604
vec![arr.values().clone()],
605
ca.inner_dtype(),
606
)
607
};
608
let new_inner = s.cast_with_options(child_type, options)?;
609
610
let inner_dtype = new_inner.dtype().clone();
611
debug_assert_eq!(&inner_dtype, child_type);
612
613
let new_values = new_inner.array_ref(0).clone();
614
615
let dtype = ListArray::<i64>::default_datatype(new_values.dtype().clone());
616
let new_arr = ListArray::<i64>::new(
617
dtype,
618
arr.offsets().clone(),
619
new_values,
620
arr.validity().cloned(),
621
);
622
Ok((new_arr.boxed(), inner_dtype))
623
}
624
625
unsafe fn cast_list_unchecked(ca: &ListChunked, child_type: &DataType) -> PolarsResult<Series> {
626
// TODO! add chunked, but this must correct for list offsets.
627
let ca = ca.rechunk();
628
let arr = ca.downcast_as_array();
629
// SAFETY: inner dtype is passed correctly
630
let s = unsafe {
631
Series::from_chunks_and_dtype_unchecked(
632
PlSmallStr::EMPTY,
633
vec![arr.values().clone()],
634
ca.inner_dtype(),
635
)
636
};
637
let new_inner = s.cast_unchecked(child_type)?;
638
let new_values = new_inner.array_ref(0).clone();
639
640
let dtype = ListArray::<i64>::default_datatype(new_values.dtype().clone());
641
let new_arr = ListArray::<i64>::new(
642
dtype,
643
arr.offsets().clone(),
644
new_values,
645
arr.validity().cloned(),
646
);
647
Ok(ListChunked::from_chunks_and_dtype_unchecked(
648
ca.name().clone(),
649
vec![Box::new(new_arr)],
650
DataType::List(Box::new(child_type.clone())),
651
)
652
.into_series())
653
}
654
655
// Returns inner data type. This is needed because a cast can instantiate the dtype inner
656
// values for instance with categoricals
657
#[cfg(feature = "dtype-array")]
658
fn cast_fixed_size_list(
659
ca: &ArrayChunked,
660
child_type: &DataType,
661
options: CastOptions,
662
) -> PolarsResult<(ArrayRef, DataType)> {
663
let ca = ca.rechunk();
664
let arr = ca.downcast_as_array();
665
// SAFETY: inner dtype is passed correctly
666
let s = unsafe {
667
Series::from_chunks_and_dtype_unchecked(
668
PlSmallStr::EMPTY,
669
vec![arr.values().clone()],
670
ca.inner_dtype(),
671
)
672
};
673
let new_inner = s.cast_with_options(child_type, options)?;
674
675
let inner_dtype = new_inner.dtype().clone();
676
debug_assert_eq!(&inner_dtype, child_type);
677
678
let new_values = new_inner.array_ref(0).clone();
679
680
let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), ca.width());
681
let new_arr = FixedSizeListArray::new(dtype, ca.len(), new_values, arr.validity().cloned());
682
Ok((Box::new(new_arr), inner_dtype))
683
}
684
685
#[cfg(test)]
686
mod test {
687
use crate::chunked_array::cast::CastOptions;
688
use crate::prelude::*;
689
690
#[test]
691
fn test_cast_list() -> PolarsResult<()> {
692
let mut builder = ListPrimitiveChunkedBuilder::<Int32Type>::new(
693
PlSmallStr::from_static("a"),
694
10,
695
10,
696
DataType::Int32,
697
);
698
builder.append_opt_slice(Some(&[1i32, 2, 3]));
699
builder.append_opt_slice(Some(&[1i32, 2, 3]));
700
let ca = builder.finish();
701
702
let new = ca.cast_with_options(
703
&DataType::List(DataType::Float64.into()),
704
CastOptions::Strict,
705
)?;
706
707
assert_eq!(new.dtype(), &DataType::List(DataType::Float64.into()));
708
Ok(())
709
}
710
711
#[test]
712
#[cfg(feature = "dtype-categorical")]
713
fn test_cast_noop() {
714
// check if we can cast categorical twice without panic
715
let ca = StringChunked::new(PlSmallStr::from_static("foo"), &["bar", "ham"]);
716
let cats = Categories::global();
717
let out = ca
718
.cast_with_options(
719
&DataType::from_categories(cats.clone()),
720
CastOptions::Strict,
721
)
722
.unwrap();
723
let out = out.cast(&DataType::from_categories(cats)).unwrap();
724
assert!(matches!(out.dtype(), &DataType::Categorical(_, _)))
725
}
726
}
727
728