CoCalc -- mod.rs

GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-compute/src/cast/mod.rs
⁶⁹³⁹ views
1
//! Defines different casting operators such as [`cast`] or [`primitive_to_binary`].
2

3
mod binary_to;
4
mod binview_to;
5
mod boolean_to;
6
mod decimal_to;
7
mod dictionary_to;
8
mod primitive_to;
9
mod utf8_to;
10

11
use arrow::bitmap::MutableBitmap;
12
pub use binary_to::*;
13
#[cfg(feature = "dtype-decimal")]
14
pub use binview_to::binview_to_decimal;
15
use binview_to::utf8view_to_primitive_dyn;
16
pub use binview_to::utf8view_to_utf8;
17
pub use boolean_to::*;
18
pub use decimal_to::*;
19
pub mod temporal;
20
use arrow::array::*;
21
use arrow::datatypes::*;
22
use arrow::match_integer_type;
23
use arrow::offset::{Offset, Offsets};
24
use binview_to::{
25
    binview_to_dictionary, utf8view_to_date32_dyn, utf8view_to_dictionary,
26
    utf8view_to_naive_timestamp_dyn, view_to_binary,
27
};
28
pub use binview_to::{binview_to_fixed_size_list_dyn, binview_to_primitive_dyn};
29
use dictionary_to::*;
30
use polars_error::{PolarsResult, polars_bail, polars_ensure, polars_err};
31
use polars_utils::IdxSize;
32
pub use primitive_to::*;
33
use temporal::utf8view_to_timestamp;
34
pub use utf8_to::*;
35

36
/// options defining how Cast kernels behave
37
#[derive(Clone, Copy, Debug, Default)]
38
pub struct CastOptionsImpl {
39
    /// default to false
40
    /// whether an overflowing cast should be converted to `None` (default), or be wrapped (i.e. `256i16 as u8 = 0` vectorized).
41
    /// Settings this to `true` is 5-6x faster for numeric types.
42
    pub wrapped: bool,
43
    /// default to false
44
    /// whether to cast to an integer at the best-effort
45
    pub partial: bool,
46
}
47

48
impl CastOptionsImpl {
49
    pub fn unchecked() -> Self {
50
        Self {
51
            wrapped: true,
52
            partial: false,
53
        }
54
    }
55
}
56

57
impl CastOptionsImpl {
58
    fn with_wrapped(&self, v: bool) -> Self {
59
        let mut option = *self;
60
        option.wrapped = v;
61
        option
62
    }
63
}
64

65
macro_rules! primitive_dyn {
66
    ($from:expr, $expr:tt) => {{
67
        let from = $from.as_any().downcast_ref().unwrap();
68
        Ok(Box::new($expr(from)))
69
    }};
70
    ($from:expr, $expr:tt, $to:expr) => {{
71
        let from = $from.as_any().downcast_ref().unwrap();
72
        Ok(Box::new($expr(from, $to)))
73
    }};
74
    ($from:expr, $expr:tt, $from_t:expr, $to:expr) => {{
75
        let from = $from.as_any().downcast_ref().unwrap();
76
        Ok(Box::new($expr(from, $from_t, $to)))
77
    }};
78
    ($from:expr, $expr:tt, $arg1:expr, $arg2:expr, $arg3:expr) => {{
79
        let from = $from.as_any().downcast_ref().unwrap();
80
        Ok(Box::new($expr(from, $arg1, $arg2, $arg3)))
81
    }};
82
}
83

84
fn cast_struct(
85
    array: &StructArray,
86
    to_type: &ArrowDataType,
87
    options: CastOptionsImpl,
88
) -> PolarsResult<StructArray> {
89
    let values = array.values();
90
    let fields = StructArray::get_fields(to_type);
91
    let new_values = values
92
        .iter()
93
        .zip(fields)
94
        .map(|(arr, field)| cast(arr.as_ref(), field.dtype(), options))
95
        .collect::<PolarsResult<Vec<_>>>()?;
96

97
    Ok(StructArray::new(
98
        to_type.clone(),
99
        array.len(),
100
        new_values,
101
        array.validity().cloned(),
102
    ))
103
}
104

105
fn cast_list<O: Offset>(
106
    array: &ListArray<O>,
107
    to_type: &ArrowDataType,
108
    options: CastOptionsImpl,
109
) -> PolarsResult<ListArray<O>> {
110
    let values = array.values();
111
    let new_values = cast(
112
        values.as_ref(),
113
        ListArray::<O>::get_child_type(to_type),
114
        options,
115
    )?;
116

117
    Ok(ListArray::<O>::new(
118
        to_type.clone(),
119
        array.offsets().clone(),
120
        new_values,
121
        array.validity().cloned(),
122
    ))
123
}
124

125
fn cast_list_to_large_list(array: &ListArray<i32>, to_type: &ArrowDataType) -> ListArray<i64> {
126
    let offsets = array.offsets().into();
127

128
    ListArray::<i64>::new(
129
        to_type.clone(),
130
        offsets,
131
        array.values().clone(),
132
        array.validity().cloned(),
133
    )
134
}
135

136
fn cast_large_to_list(array: &ListArray<i64>, to_type: &ArrowDataType) -> ListArray<i32> {
137
    let offsets = array.offsets().try_into().expect("Convertme to error");
138

139
    ListArray::<i32>::new(
140
        to_type.clone(),
141
        offsets,
142
        array.values().clone(),
143
        array.validity().cloned(),
144
    )
145
}
146

147
fn cast_fixed_size_list_to_list<O: Offset>(
148
    fixed: &FixedSizeListArray,
149
    to_type: &ArrowDataType,
150
    options: CastOptionsImpl,
151
) -> PolarsResult<ListArray<O>> {
152
    let new_values = cast(
153
        fixed.values().as_ref(),
154
        ListArray::<O>::get_child_type(to_type),
155
        options,
156
    )?;
157

158
    let offsets = (0..=fixed.len())
159
        .map(|ix| O::from_as_usize(ix * fixed.size()))
160
        .collect::<Vec<_>>();
161
    // SAFETY: offsets _are_ monotonically increasing
162
    let offsets = unsafe { Offsets::new_unchecked(offsets) };
163

164
    Ok(ListArray::<O>::new(
165
        to_type.clone(),
166
        offsets.into(),
167
        new_values,
168
        fixed.validity().cloned(),
169
    ))
170
}
171

172
pub(super) fn cast_list_to_fixed_size_list<O: Offset>(
173
    list: &ListArray<O>,
174
    inner: &Field,
175
    size: usize,
176
    options: CastOptionsImpl,
177
) -> PolarsResult<FixedSizeListArray> {
178
    let null_cnt = list.null_count();
179
    let new_values = if null_cnt == 0 {
180
        let start_offset = list.offsets().first().to_usize();
181
        let offsets = list.offsets().buffer();
182

183
        let mut is_valid = true;
184
        for (i, offset) in offsets.iter().enumerate() {
185
            is_valid &= offset.to_usize() == start_offset + i * size;
186
        }
187

188
        polars_ensure!(is_valid, ComputeError: "not all elements have the specified width {size}");
189

190
        let sliced_values = list
191
            .values()
192
            .sliced(start_offset, list.offsets().range().to_usize());
193
        cast(sliced_values.as_ref(), inner.dtype(), options)?
194
    } else {
195
        let offsets = list.offsets().as_slice();
196
        // Check the lengths of each list are equal to the fixed size.
197
        // SAFETY: we know the index is in bound.
198
        let mut expected_offset = unsafe { *offsets.get_unchecked(0) } + O::from_as_usize(size);
199
        for i in 1..=list.len() {
200
            // SAFETY: we know the index is in bound.
201
            let current_offset = unsafe { *offsets.get_unchecked(i) };
202
            if list.is_null(i - 1) {
203
                expected_offset = current_offset + O::from_as_usize(size);
204
            } else {
205
                polars_ensure!(current_offset == expected_offset, ComputeError:
206
            "not all elements have the specified width {size}");
207
                expected_offset += O::from_as_usize(size);
208
            }
209
        }
210

211
        // Build take indices for the values. This is used to fill in the null slots.
212
        let mut indices =
213
            MutablePrimitiveArray::<IdxSize>::with_capacity(list.values().len() + null_cnt * size);
214
        for i in 0..list.len() {
215
            if list.is_null(i) {
216
                indices.extend_constant(size, None)
217
            } else {
218
                // SAFETY: we know the index is in bound.
219
                let current_offset = unsafe { *offsets.get_unchecked(i) };
220
                for j in 0..size {
221
                    indices.push(Some(
222
                        (current_offset + O::from_as_usize(j)).to_usize() as IdxSize
223
                    ));
224
                }
225
            }
226
        }
227
        let take_values =
228
            unsafe { crate::gather::take_unchecked(list.values().as_ref(), &indices.freeze()) };
229

230
        cast(take_values.as_ref(), inner.dtype(), options)?
231
    };
232

233
    FixedSizeListArray::try_new(
234
        ArrowDataType::FixedSizeList(Box::new(inner.clone()), size),
235
        list.len(),
236
        new_values,
237
        list.validity().cloned(),
238
    )
239
    .map_err(|_| polars_err!(ComputeError: "not all elements have the specified width {size}"))
240
}
241

242
fn cast_list_uint8_to_binary<O: Offset>(list: &ListArray<O>) -> PolarsResult<BinaryViewArray> {
243
    let mut views = Vec::with_capacity(list.len());
244
    let mut result_validity = MutableBitmap::from_len_set(list.len());
245

246
    let u8array: &PrimitiveArray<u8> = list.values().as_any().downcast_ref().unwrap();
247
    let slice = u8array.values().as_slice();
248
    let mut cloned_buffers = vec![u8array.values().clone()];
249
    let mut buf_index = 0;
250
    let mut previous_buf_lengths = 0;
251
    let validity = list.validity();
252
    let internal_validity = list.values().validity();
253
    let offsets = list.offsets();
254

255
    let mut all_views_inline = true;
256

257
    // In a View for BinaryViewArray, both length and offset are u32.
258
    #[cfg(not(test))]
259
    const MAX_BUF_SIZE: usize = u32::MAX as usize;
260

261
    // This allows us to test some invariants without using 4GB of RAM; see mod
262
    // tests below.
263
    #[cfg(test)]
264
    const MAX_BUF_SIZE: usize = 15;
265

266
    for index in 0..list.len() {
267
        // Check if there's a null instead of a list:
268
        if let Some(validity) = validity {
269
            // SAFETY: We are generating indexes limited to < list.len().
270
            debug_assert!(index < validity.len());
271
            if unsafe { !validity.get_bit_unchecked(index) } {
272
                debug_assert!(index < result_validity.len());
273
                unsafe {
274
                    result_validity.set_unchecked(index, false);
275
                }
276
                views.push(View::default());
277
                continue;
278
            }
279
        }
280

281
        // SAFETY: We are generating indexes limited to < list.len().
282
        debug_assert!(index < offsets.len());
283
        let (start, end) = unsafe { offsets.start_end_unchecked(index) };
284
        let length = end - start;
285
        polars_ensure!(
286
            length <= MAX_BUF_SIZE,
287
            InvalidOperation: format!("when casting to BinaryView, list lengths must be <= {MAX_BUF_SIZE}")
288
        );
289

290
        // Check if the list contains nulls:
291
        if let Some(internal_validity) = internal_validity {
292
            if internal_validity.null_count_range(start, length) > 0 {
293
                debug_assert!(index < result_validity.len());
294
                unsafe {
295
                    result_validity.set_unchecked(index, false);
296
                }
297
                views.push(View::default());
298
                continue;
299
            }
300
        }
301

302
        if end - previous_buf_lengths > MAX_BUF_SIZE {
303
            // View offsets must fit in u32 (or smaller value when running Rust
304
            // tests), and we've determined the end of the next view will be
305
            // past that.
306
            buf_index += 1;
307
            let (previous, next) = cloned_buffers
308
                .last()
309
                .unwrap()
310
                .split_at(start - previous_buf_lengths);
311
            debug_assert!(previous.len() <= MAX_BUF_SIZE);
312
            previous_buf_lengths += previous.len();
313
            *(cloned_buffers.last_mut().unwrap()) = previous;
314
            cloned_buffers.push(next);
315
        }
316
        let view = View::new_from_bytes(
317
            &slice[start..end],
318
            buf_index,
319
            (start - previous_buf_lengths) as u32,
320
        );
321
        if !view.is_inline() {
322
            all_views_inline = false;
323
        }
324
        debug_assert_eq!(
325
            unsafe { view.get_slice_unchecked(&cloned_buffers) },
326
            &slice[start..end]
327
        );
328
        views.push(view);
329
    }
330

331
    // Optimization: don't actually need buffers if Views are all inline.
332
    if all_views_inline {
333
        cloned_buffers.clear();
334
    }
335

336
    let result_buffers = cloned_buffers.into_boxed_slice().into();
337
    let result = if cfg!(debug_assertions) {
338
        // A safer wrapper around new_unchecked_unknown_md; it shouldn't ever
339
        // fail in practice.
340
        BinaryViewArrayGeneric::try_new(
341
            ArrowDataType::BinaryView,
342
            views.into(),
343
            result_buffers,
344
            result_validity.into(),
345
        )?
346
    } else {
347
        unsafe {
348
            BinaryViewArrayGeneric::new_unchecked_unknown_md(
349
                ArrowDataType::BinaryView,
350
                views.into(),
351
                result_buffers,
352
                result_validity.into(),
353
                // We could compute this ourselves, but we want to make this code
354
                // match debug_assertions path as much as possible.
355
                None,
356
            )
357
        }
358
    };
359

360
    Ok(result)
361
}
362

363
pub fn cast_default(array: &dyn Array, to_type: &ArrowDataType) -> PolarsResult<Box<dyn Array>> {
364
    cast(array, to_type, Default::default())
365
}
366

367
pub fn cast_unchecked(array: &dyn Array, to_type: &ArrowDataType) -> PolarsResult<Box<dyn Array>> {
368
    cast(array, to_type, CastOptionsImpl::unchecked())
369
}
370

371
/// Cast `array` to the provided data type and return a new [`Array`] with
372
/// type `to_type`, if possible.
373
///
374
/// Behavior:
375
/// * PrimitiveArray to PrimitiveArray: overflowing cast will be None
376
/// * Boolean to Utf8: `true` => '1', `false` => `0`
377
/// * Utf8 to numeric: strings that can't be parsed to numbers return null, float strings
378
///   in integer casts return null
379
/// * Numeric to boolean: 0 returns `false`, any other value returns `true`
380
/// * List to List: the underlying data type is cast
381
/// * Fixed Size List to List: the underlying data type is cast
382
/// * List to Fixed Size List: the offsets are checked for valid order, then the
383
///   underlying type is cast.
384
/// * List of UInt8 to Binary: the list of integers becomes binary data, nulls in the list means it becomes a null
385
/// * Struct to Struct: the underlying fields are cast.
386
/// * PrimitiveArray to List: a list array with 1 value per slot is created
387
/// * Date32 and Date64: precision lost when going to higher interval
388
/// * Time32 and Time64: precision lost when going to higher interval
389
/// * Timestamp and Date{32|64}: precision lost when going to higher interval
390
/// * Temporal to/from backing primitive: zero-copy with data type change
391
///
392
/// Unsupported Casts
393
/// * non-`StructArray` to `StructArray` or `StructArray` to non-`StructArray`
394
/// * List to primitive (other than UInt8)
395
/// * Utf8 to boolean
396
/// * Interval and duration
397
pub fn cast(
398
    array: &dyn Array,
399
    to_type: &ArrowDataType,
400
    options: CastOptionsImpl,
401
) -> PolarsResult<Box<dyn Array>> {
402
    use ArrowDataType::*;
403
    let from_type = array.dtype();
404

405
    // clone array if types are the same
406
    if from_type == to_type {
407
        return Ok(clone(array));
408
    }
409

410
    let as_options = options.with_wrapped(true);
411
    match (from_type, to_type) {
412
        (Null, _) | (_, Null) => Ok(new_null_array(to_type.clone(), array.len())),
413
        (Struct(from_fd), Struct(to_fd)) => {
414
            polars_ensure!(from_fd.len() == to_fd.len(), InvalidOperation: "Cannot cast struct with different number of fields.");
415
            cast_struct(array.as_any().downcast_ref().unwrap(), to_type, options).map(|x| x.boxed())
416
        },
417
        (Struct(_), _) | (_, Struct(_)) => polars_bail!(InvalidOperation:
418
            "Cannot cast from struct to other types"
419
        ),
420
        (Dictionary(index_type, ..), _) => match_integer_type!(index_type, |$T| {
421
            dictionary_cast_dyn::<$T>(array, to_type, options)
422
        }),
423
        (_, Dictionary(index_type, value_type, _)) => match_integer_type!(index_type, |$T| {
424
            cast_to_dictionary::<$T>(array, value_type, options)
425
        }),
426
        // not supported by polars
427
        // (List(_), FixedSizeList(inner, size)) => cast_list_to_fixed_size_list::<i32>(
428
        //     array.as_any().downcast_ref().unwrap(),
429
        //     inner.as_ref(),
430
        //     *size,
431
        //     options,
432
        // )
433
        // .map(|x| x.boxed()),
434
        (LargeList(_), FixedSizeList(inner, size)) => cast_list_to_fixed_size_list::<i64>(
435
            array.as_any().downcast_ref().unwrap(),
436
            inner.as_ref(),
437
            *size,
438
            options,
439
        )
440
        .map(|x| x.boxed()),
441
        (FixedSizeList(_, _), List(_)) => cast_fixed_size_list_to_list::<i32>(
442
            array.as_any().downcast_ref().unwrap(),
443
            to_type,
444
            options,
445
        )
446
        .map(|x| x.boxed()),
447
        (FixedSizeList(_, _), LargeList(_)) => cast_fixed_size_list_to_list::<i64>(
448
            array.as_any().downcast_ref().unwrap(),
449
            to_type,
450
            options,
451
        )
452
        .map(|x| x.boxed()),
453
        (List(field), BinaryView) if matches!(field.dtype(), UInt8) => {
454
            cast_list_uint8_to_binary::<i32>(array.as_any().downcast_ref().unwrap())
455
                .map(|arr| arr.boxed())
456
        },
457
        (LargeList(field), BinaryView) if matches!(field.dtype(), UInt8) => {
458
            cast_list_uint8_to_binary::<i64>(array.as_any().downcast_ref().unwrap())
459
                .map(|arr| arr.boxed())
460
        },
461
        (BinaryView, _) => match to_type {
462
            Utf8View => array
463
                .as_any()
464
                .downcast_ref::<BinaryViewArray>()
465
                .unwrap()
466
                .to_utf8view()
467
                .map(|arr| arr.boxed()),
468
            LargeBinary => Ok(binview_to::view_to_binary::<i64>(
469
                array.as_any().downcast_ref().unwrap(),
470
            )
471
            .boxed()),
472
            LargeList(inner) if matches!(inner.dtype, ArrowDataType::UInt8) => {
473
                let bin_array = view_to_binary::<i64>(array.as_any().downcast_ref().unwrap());
474
                Ok(binary_to_list(&bin_array, to_type.clone()).boxed())
475
            },
476
            _ => polars_bail!(InvalidOperation:
477
                "casting from {from_type:?} to {to_type:?} not supported",
478
            ),
479
        },
480
        (LargeList(_), LargeList(_)) => {
481
            cast_list::<i64>(array.as_any().downcast_ref().unwrap(), to_type, options)
482
                .map(|x| x.boxed())
483
        },
484
        (List(lhs), LargeList(rhs)) if lhs == rhs => {
485
            Ok(cast_list_to_large_list(array.as_any().downcast_ref().unwrap(), to_type).boxed())
486
        },
487
        (LargeList(lhs), List(rhs)) if lhs == rhs => {
488
            Ok(cast_large_to_list(array.as_any().downcast_ref().unwrap(), to_type).boxed())
489
        },
490

491
        (_, List(to)) => {
492
            // cast primitive to list's primitive
493
            let values = cast(array, &to.dtype, options)?;
494
            // create offsets, where if array.len() = 2, we have [0,1,2]
495
            let offsets = (0..=array.len() as i32).collect::<Vec<_>>();
496
            // SAFETY: offsets _are_ monotonically increasing
497
            let offsets = unsafe { Offsets::new_unchecked(offsets) };
498

499
            let list_array = ListArray::<i32>::new(to_type.clone(), offsets.into(), values, None);
500

501
            Ok(Box::new(list_array))
502
        },
503

504
        (_, LargeList(to)) if from_type != &LargeBinary => {
505
            // cast primitive to list's primitive
506
            let values = cast(array, &to.dtype, options)?;
507
            // create offsets, where if array.len() = 2, we have [0,1,2]
508
            let offsets = (0..=array.len() as i64).collect::<Vec<_>>();
509
            // SAFETY: offsets _are_ monotonically increasing
510
            let offsets = unsafe { Offsets::new_unchecked(offsets) };
511

512
            let list_array = ListArray::<i64>::new(
513
                to_type.clone(),
514
                offsets.into(),
515
                values,
516
                array.validity().cloned(),
517
            );
518

519
            Ok(Box::new(list_array))
520
        },
521

522
        (Utf8View, _) => {
523
            let arr = array.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
524

525
            match to_type {
526
                BinaryView => Ok(arr.to_binview().boxed()),
527
                LargeUtf8 => Ok(binview_to::utf8view_to_utf8::<i64>(arr).boxed()),
528
                UInt8 => utf8view_to_primitive_dyn::<u8>(arr, to_type, options),
529
                UInt16 => utf8view_to_primitive_dyn::<u16>(arr, to_type, options),
530
                UInt32 => utf8view_to_primitive_dyn::<u32>(arr, to_type, options),
531
                UInt64 => utf8view_to_primitive_dyn::<u64>(arr, to_type, options),
532
                Int8 => utf8view_to_primitive_dyn::<i8>(arr, to_type, options),
533
                Int16 => utf8view_to_primitive_dyn::<i16>(arr, to_type, options),
534
                Int32 => utf8view_to_primitive_dyn::<i32>(arr, to_type, options),
535
                Int64 => utf8view_to_primitive_dyn::<i64>(arr, to_type, options),
536
                #[cfg(feature = "dtype-i128")]
537
                Int128 => utf8view_to_primitive_dyn::<i128>(arr, to_type, options),
538
                Float32 => utf8view_to_primitive_dyn::<f32>(arr, to_type, options),
539
                Float64 => utf8view_to_primitive_dyn::<f64>(arr, to_type, options),
540
                Timestamp(time_unit, None) => {
541
                    utf8view_to_naive_timestamp_dyn(array, time_unit.to_owned())
542
                },
543
                Timestamp(time_unit, Some(time_zone)) => utf8view_to_timestamp(
544
                    array.as_any().downcast_ref().unwrap(),
545
                    RFC3339,
546
                    time_zone.clone(),
547
                    time_unit.to_owned(),
548
                )
549
                .map(|arr| arr.boxed()),
550
                Date32 => utf8view_to_date32_dyn(array),
551
                #[cfg(feature = "dtype-decimal")]
552
                Decimal(precision, scale) => {
553
                    Ok(binview_to_decimal(&arr.to_binview(), Some(*precision), *scale).to_boxed())
554
                },
555
                _ => polars_bail!(InvalidOperation:
556
                    "casting from {from_type:?} to {to_type:?} not supported",
557
                ),
558
            }
559
        },
560

561
        (_, Boolean) => match from_type {
562
            UInt8 => primitive_to_boolean_dyn::<u8>(array, to_type.clone()),
563
            UInt16 => primitive_to_boolean_dyn::<u16>(array, to_type.clone()),
564
            UInt32 => primitive_to_boolean_dyn::<u32>(array, to_type.clone()),
565
            UInt64 => primitive_to_boolean_dyn::<u64>(array, to_type.clone()),
566
            Int8 => primitive_to_boolean_dyn::<i8>(array, to_type.clone()),
567
            Int16 => primitive_to_boolean_dyn::<i16>(array, to_type.clone()),
568
            Int32 => primitive_to_boolean_dyn::<i32>(array, to_type.clone()),
569
            Int64 => primitive_to_boolean_dyn::<i64>(array, to_type.clone()),
570
            #[cfg(feature = "dtype-i128")]
571
            Int128 => primitive_to_boolean_dyn::<i128>(array, to_type.clone()),
572
            Float32 => primitive_to_boolean_dyn::<f32>(array, to_type.clone()),
573
            Float64 => primitive_to_boolean_dyn::<f64>(array, to_type.clone()),
574
            Decimal(_, _) => primitive_to_boolean_dyn::<i128>(array, to_type.clone()),
575
            _ => polars_bail!(InvalidOperation:
576
                "casting from {from_type:?} to {to_type:?} not supported",
577
            ),
578
        },
579
        (Boolean, _) => match to_type {
580
            UInt8 => boolean_to_primitive_dyn::<u8>(array),
581
            UInt16 => boolean_to_primitive_dyn::<u16>(array),
582
            UInt32 => boolean_to_primitive_dyn::<u32>(array),
583
            UInt64 => boolean_to_primitive_dyn::<u64>(array),
584
            Int8 => boolean_to_primitive_dyn::<i8>(array),
585
            Int16 => boolean_to_primitive_dyn::<i16>(array),
586
            Int32 => boolean_to_primitive_dyn::<i32>(array),
587
            Int64 => boolean_to_primitive_dyn::<i64>(array),
588
            #[cfg(feature = "dtype-i128")]
589
            Int128 => boolean_to_primitive_dyn::<i128>(array),
590
            Float32 => boolean_to_primitive_dyn::<f32>(array),
591
            Float64 => boolean_to_primitive_dyn::<f64>(array),
592
            Utf8View => boolean_to_utf8view_dyn(array),
593
            BinaryView => boolean_to_binaryview_dyn(array),
594
            _ => polars_bail!(InvalidOperation:
595
                "casting from {from_type:?} to {to_type:?} not supported",
596
            ),
597
        },
598
        (_, BinaryView) => from_to_binview(array, from_type, to_type).map(|arr| arr.boxed()),
599
        (_, Utf8View) => match from_type {
600
            LargeUtf8 => Ok(utf8_to_utf8view(
601
                array.as_any().downcast_ref::<Utf8Array<i64>>().unwrap(),
602
            )
603
            .boxed()),
604
            Utf8 => Ok(
605
                utf8_to_utf8view(array.as_any().downcast_ref::<Utf8Array<i32>>().unwrap()).boxed(),
606
            ),
607
            #[cfg(feature = "dtype-decimal")]
608
            Decimal(_, _) => Ok(decimal_to_utf8view_dyn(array).boxed()),
609
            _ => from_to_binview(array, from_type, to_type)
610
                .map(|arr| unsafe { arr.to_utf8view_unchecked() }.boxed()),
611
        },
612
        (Utf8, _) => match to_type {
613
            LargeUtf8 => Ok(Box::new(utf8_to_large_utf8(
614
                array.as_any().downcast_ref().unwrap(),
615
            ))),
616
            _ => polars_bail!(InvalidOperation:
617
                "casting from {from_type:?} to {to_type:?} not supported",
618
            ),
619
        },
620
        (LargeUtf8, _) => match to_type {
621
            LargeBinary => Ok(utf8_to_binary::<i64>(
622
                array.as_any().downcast_ref().unwrap(),
623
                to_type.clone(),
624
            )
625
            .boxed()),
626
            _ => polars_bail!(InvalidOperation:
627
                "casting from {from_type:?} to {to_type:?} not supported",
628
            ),
629
        },
630
        (_, LargeUtf8) => match from_type {
631
            UInt8 => primitive_to_utf8_dyn::<u8, i64>(array),
632
            LargeBinary => {
633
                binary_to_utf8::<i64>(array.as_any().downcast_ref().unwrap(), to_type.clone())
634
                    .map(|x| x.boxed())
635
            },
636
            _ => polars_bail!(InvalidOperation:
637
                "casting from {from_type:?} to {to_type:?} not supported",
638
            ),
639
        },
640

641
        (Binary, _) => match to_type {
642
            LargeBinary => Ok(Box::new(binary_to_large_binary(
643
                array.as_any().downcast_ref().unwrap(),
644
                to_type.clone(),
645
            ))),
646
            _ => polars_bail!(InvalidOperation:
647
                "casting from {from_type:?} to {to_type:?} not supported",
648
            ),
649
        },
650

651
        (LargeBinary, _) => match to_type {
652
            UInt8 => binary_to_primitive_dyn::<i64, u8>(array, to_type, options),
653
            UInt16 => binary_to_primitive_dyn::<i64, u16>(array, to_type, options),
654
            UInt32 => binary_to_primitive_dyn::<i64, u32>(array, to_type, options),
655
            UInt64 => binary_to_primitive_dyn::<i64, u64>(array, to_type, options),
656
            Int8 => binary_to_primitive_dyn::<i64, i8>(array, to_type, options),
657
            Int16 => binary_to_primitive_dyn::<i64, i16>(array, to_type, options),
658
            Int32 => binary_to_primitive_dyn::<i64, i32>(array, to_type, options),
659
            Int64 => binary_to_primitive_dyn::<i64, i64>(array, to_type, options),
660
            #[cfg(feature = "dtype-i128")]
661
            Int128 => binary_to_primitive_dyn::<i64, i128>(array, to_type, options),
662
            Float32 => binary_to_primitive_dyn::<i64, f32>(array, to_type, options),
663
            Float64 => binary_to_primitive_dyn::<i64, f64>(array, to_type, options),
664
            Binary => {
665
                binary_large_to_binary(array.as_any().downcast_ref().unwrap(), to_type.clone())
666
                    .map(|x| x.boxed())
667
            },
668
            LargeUtf8 => {
669
                binary_to_utf8::<i64>(array.as_any().downcast_ref().unwrap(), to_type.clone())
670
                    .map(|x| x.boxed())
671
            },
672
            _ => polars_bail!(InvalidOperation:
673
                "casting from {from_type:?} to {to_type:?} not supported",
674
            ),
675
        },
676
        (FixedSizeBinary(_), _) => match to_type {
677
            Binary => Ok(fixed_size_binary_binary::<i32>(
678
                array.as_any().downcast_ref().unwrap(),
679
                to_type.clone(),
680
            )
681
            .boxed()),
682
            LargeBinary => Ok(fixed_size_binary_binary::<i64>(
683
                array.as_any().downcast_ref().unwrap(),
684
                to_type.clone(),
685
            )
686
            .boxed()),
687
            _ => polars_bail!(InvalidOperation:
688
                "casting from {from_type:?} to {to_type:?} not supported",
689
            ),
690
        },
691
        // start numeric casts
692
        (UInt8, UInt16) => primitive_to_primitive_dyn::<u8, u16>(array, to_type, as_options),
693
        (UInt8, UInt32) => primitive_to_primitive_dyn::<u8, u32>(array, to_type, as_options),
694
        (UInt8, UInt64) => primitive_to_primitive_dyn::<u8, u64>(array, to_type, as_options),
695
        (UInt8, Int8) => primitive_to_primitive_dyn::<u8, i8>(array, to_type, options),
696
        (UInt8, Int16) => primitive_to_primitive_dyn::<u8, i16>(array, to_type, options),
697
        (UInt8, Int32) => primitive_to_primitive_dyn::<u8, i32>(array, to_type, options),
698
        (UInt8, Int64) => primitive_to_primitive_dyn::<u8, i64>(array, to_type, options),
699
        #[cfg(feature = "dtype-i128")]
700
        (UInt8, Int128) => primitive_to_primitive_dyn::<u8, i128>(array, to_type, options),
701
        (UInt8, Float32) => primitive_to_primitive_dyn::<u8, f32>(array, to_type, as_options),
702
        (UInt8, Float64) => primitive_to_primitive_dyn::<u8, f64>(array, to_type, as_options),
703
        (UInt8, Decimal(p, s)) => integer_to_decimal_dyn::<u8>(array, *p, *s),
704

705
        (UInt16, UInt8) => primitive_to_primitive_dyn::<u16, u8>(array, to_type, options),
706
        (UInt16, UInt32) => primitive_to_primitive_dyn::<u16, u32>(array, to_type, as_options),
707
        (UInt16, UInt64) => primitive_to_primitive_dyn::<u16, u64>(array, to_type, as_options),
708
        (UInt16, Int8) => primitive_to_primitive_dyn::<u16, i8>(array, to_type, options),
709
        (UInt16, Int16) => primitive_to_primitive_dyn::<u16, i16>(array, to_type, options),
710
        (UInt16, Int32) => primitive_to_primitive_dyn::<u16, i32>(array, to_type, options),
711
        (UInt16, Int64) => primitive_to_primitive_dyn::<u16, i64>(array, to_type, options),
712
        #[cfg(feature = "dtype-i128")]
713
        (UInt16, Int128) => primitive_to_primitive_dyn::<u16, i128>(array, to_type, options),
714
        (UInt16, Float32) => primitive_to_primitive_dyn::<u16, f32>(array, to_type, as_options),
715
        (UInt16, Float64) => primitive_to_primitive_dyn::<u16, f64>(array, to_type, as_options),
716
        (UInt16, Decimal(p, s)) => integer_to_decimal_dyn::<u16>(array, *p, *s),
717

718
        (UInt32, UInt8) => primitive_to_primitive_dyn::<u32, u8>(array, to_type, options),
719
        (UInt32, UInt16) => primitive_to_primitive_dyn::<u32, u16>(array, to_type, options),
720
        (UInt32, UInt64) => primitive_to_primitive_dyn::<u32, u64>(array, to_type, as_options),
721
        (UInt32, Int8) => primitive_to_primitive_dyn::<u32, i8>(array, to_type, options),
722
        (UInt32, Int16) => primitive_to_primitive_dyn::<u32, i16>(array, to_type, options),
723
        (UInt32, Int32) => primitive_to_primitive_dyn::<u32, i32>(array, to_type, options),
724
        (UInt32, Int64) => primitive_to_primitive_dyn::<u32, i64>(array, to_type, options),
725
        #[cfg(feature = "dtype-i128")]
726
        (UInt32, Int128) => primitive_to_primitive_dyn::<u32, i128>(array, to_type, options),
727
        (UInt32, Float32) => primitive_to_primitive_dyn::<u32, f32>(array, to_type, as_options),
728
        (UInt32, Float64) => primitive_to_primitive_dyn::<u32, f64>(array, to_type, as_options),
729
        (UInt32, Decimal(p, s)) => integer_to_decimal_dyn::<u32>(array, *p, *s),
730

731
        (UInt64, UInt8) => primitive_to_primitive_dyn::<u64, u8>(array, to_type, options),
732
        (UInt64, UInt16) => primitive_to_primitive_dyn::<u64, u16>(array, to_type, options),
733
        (UInt64, UInt32) => primitive_to_primitive_dyn::<u64, u32>(array, to_type, options),
734
        (UInt64, Int8) => primitive_to_primitive_dyn::<u64, i8>(array, to_type, options),
735
        (UInt64, Int16) => primitive_to_primitive_dyn::<u64, i16>(array, to_type, options),
736
        (UInt64, Int32) => primitive_to_primitive_dyn::<u64, i32>(array, to_type, options),
737
        (UInt64, Int64) => primitive_to_primitive_dyn::<u64, i64>(array, to_type, options),
738
        #[cfg(feature = "dtype-i128")]
739
        (UInt64, Int128) => primitive_to_primitive_dyn::<u64, i128>(array, to_type, options),
740
        (UInt64, Float32) => primitive_to_primitive_dyn::<u64, f32>(array, to_type, as_options),
741
        (UInt64, Float64) => primitive_to_primitive_dyn::<u64, f64>(array, to_type, as_options),
742
        (UInt64, Decimal(p, s)) => integer_to_decimal_dyn::<u64>(array, *p, *s),
743

744
        (Int8, UInt8) => primitive_to_primitive_dyn::<i8, u8>(array, to_type, options),
745
        (Int8, UInt16) => primitive_to_primitive_dyn::<i8, u16>(array, to_type, options),
746
        (Int8, UInt32) => primitive_to_primitive_dyn::<i8, u32>(array, to_type, options),
747
        (Int8, UInt64) => primitive_to_primitive_dyn::<i8, u64>(array, to_type, options),
748
        (Int8, Int16) => primitive_to_primitive_dyn::<i8, i16>(array, to_type, as_options),
749
        (Int8, Int32) => primitive_to_primitive_dyn::<i8, i32>(array, to_type, as_options),
750
        (Int8, Int64) => primitive_to_primitive_dyn::<i8, i64>(array, to_type, as_options),
751
        #[cfg(feature = "dtype-i128")]
752
        (Int8, Int128) => primitive_to_primitive_dyn::<i8, i128>(array, to_type, as_options),
753
        (Int8, Float32) => primitive_to_primitive_dyn::<i8, f32>(array, to_type, as_options),
754
        (Int8, Float64) => primitive_to_primitive_dyn::<i8, f64>(array, to_type, as_options),
755
        (Int8, Decimal(p, s)) => integer_to_decimal_dyn::<i8>(array, *p, *s),
756

757
        (Int16, UInt8) => primitive_to_primitive_dyn::<i16, u8>(array, to_type, options),
758
        (Int16, UInt16) => primitive_to_primitive_dyn::<i16, u16>(array, to_type, options),
759
        (Int16, UInt32) => primitive_to_primitive_dyn::<i16, u32>(array, to_type, options),
760
        (Int16, UInt64) => primitive_to_primitive_dyn::<i16, u64>(array, to_type, options),
761
        (Int16, Int8) => primitive_to_primitive_dyn::<i16, i8>(array, to_type, options),
762
        (Int16, Int32) => primitive_to_primitive_dyn::<i16, i32>(array, to_type, as_options),
763
        (Int16, Int64) => primitive_to_primitive_dyn::<i16, i64>(array, to_type, as_options),
764
        #[cfg(feature = "dtype-i128")]
765
        (Int16, Int128) => primitive_to_primitive_dyn::<i16, i128>(array, to_type, as_options),
766
        (Int16, Float32) => primitive_to_primitive_dyn::<i16, f32>(array, to_type, as_options),
767
        (Int16, Float64) => primitive_to_primitive_dyn::<i16, f64>(array, to_type, as_options),
768
        (Int16, Decimal(p, s)) => integer_to_decimal_dyn::<i16>(array, *p, *s),
769

770
        (Int32, UInt8) => primitive_to_primitive_dyn::<i32, u8>(array, to_type, options),
771
        (Int32, UInt16) => primitive_to_primitive_dyn::<i32, u16>(array, to_type, options),
772
        (Int32, UInt32) => primitive_to_primitive_dyn::<i32, u32>(array, to_type, options),
773
        (Int32, UInt64) => primitive_to_primitive_dyn::<i32, u64>(array, to_type, options),
774
        (Int32, Int8) => primitive_to_primitive_dyn::<i32, i8>(array, to_type, options),
775
        (Int32, Int16) => primitive_to_primitive_dyn::<i32, i16>(array, to_type, options),
776
        (Int32, Int64) => primitive_to_primitive_dyn::<i32, i64>(array, to_type, as_options),
777
        #[cfg(feature = "dtype-i128")]
778
        (Int32, Int128) => primitive_to_primitive_dyn::<i32, i128>(array, to_type, as_options),
779
        (Int32, Float32) => primitive_to_primitive_dyn::<i32, f32>(array, to_type, as_options),
780
        (Int32, Float64) => primitive_to_primitive_dyn::<i32, f64>(array, to_type, as_options),
781
        (Int32, Decimal(p, s)) => integer_to_decimal_dyn::<i32>(array, *p, *s),
782

783
        (Int64, UInt8) => primitive_to_primitive_dyn::<i64, u8>(array, to_type, options),
784
        (Int64, UInt16) => primitive_to_primitive_dyn::<i64, u16>(array, to_type, options),
785
        (Int64, UInt32) => primitive_to_primitive_dyn::<i64, u32>(array, to_type, options),
786
        (Int64, UInt64) => primitive_to_primitive_dyn::<i64, u64>(array, to_type, options),
787
        (Int64, Int8) => primitive_to_primitive_dyn::<i64, i8>(array, to_type, options),
788
        (Int64, Int16) => primitive_to_primitive_dyn::<i64, i16>(array, to_type, options),
789
        (Int64, Int32) => primitive_to_primitive_dyn::<i64, i32>(array, to_type, options),
790
        #[cfg(feature = "dtype-i128")]
791
        (Int64, Int128) => primitive_to_primitive_dyn::<i64, i128>(array, to_type, options),
792
        (Int64, Float32) => primitive_to_primitive_dyn::<i64, f32>(array, to_type, options),
793
        (Int64, Float64) => primitive_to_primitive_dyn::<i64, f64>(array, to_type, as_options),
794
        (Int64, Decimal(p, s)) => integer_to_decimal_dyn::<i64>(array, *p, *s),
795

796
        #[cfg(feature = "dtype-i128")]
797
        (Int128, UInt8) => primitive_to_primitive_dyn::<i128, u8>(array, to_type, options),
798
        #[cfg(feature = "dtype-i128")]
799
        (Int128, UInt16) => primitive_to_primitive_dyn::<i128, u16>(array, to_type, options),
800
        #[cfg(feature = "dtype-i128")]
801
        (Int128, UInt32) => primitive_to_primitive_dyn::<i128, u32>(array, to_type, options),
802
        #[cfg(feature = "dtype-i128")]
803
        (Int128, UInt64) => primitive_to_primitive_dyn::<i128, u64>(array, to_type, options),
804
        #[cfg(feature = "dtype-i128")]
805
        (Int128, Int8) => primitive_to_primitive_dyn::<i128, i8>(array, to_type, options),
806
        #[cfg(feature = "dtype-i128")]
807
        (Int128, Int16) => primitive_to_primitive_dyn::<i128, i16>(array, to_type, options),
808
        #[cfg(feature = "dtype-i128")]
809
        (Int128, Int32) => primitive_to_primitive_dyn::<i128, i32>(array, to_type, options),
810
        #[cfg(feature = "dtype-i128")]
811
        (Int128, Int64) => primitive_to_primitive_dyn::<i128, i64>(array, to_type, options),
812
        #[cfg(feature = "dtype-i128")]
813
        (Int128, Float32) => primitive_to_primitive_dyn::<i128, f32>(array, to_type, options),
814
        #[cfg(feature = "dtype-i128")]
815
        (Int128, Float64) => primitive_to_primitive_dyn::<i128, f64>(array, to_type, as_options),
816
        #[cfg(feature = "dtype-i128")]
817
        (Int128, Decimal(p, s)) => integer_to_decimal_dyn::<i128>(array, *p, *s),
818

819
        (Float16, Float32) => {
820
            let from = array.as_any().downcast_ref().unwrap();
821
            Ok(f16_to_f32(from).boxed())
822
        },
823

824
        (Float32, UInt8) => primitive_to_primitive_dyn::<f32, u8>(array, to_type, options),
825
        (Float32, UInt16) => primitive_to_primitive_dyn::<f32, u16>(array, to_type, options),
826
        (Float32, UInt32) => primitive_to_primitive_dyn::<f32, u32>(array, to_type, options),
827
        (Float32, UInt64) => primitive_to_primitive_dyn::<f32, u64>(array, to_type, options),
828
        (Float32, Int8) => primitive_to_primitive_dyn::<f32, i8>(array, to_type, options),
829
        (Float32, Int16) => primitive_to_primitive_dyn::<f32, i16>(array, to_type, options),
830
        (Float32, Int32) => primitive_to_primitive_dyn::<f32, i32>(array, to_type, options),
831
        (Float32, Int64) => primitive_to_primitive_dyn::<f32, i64>(array, to_type, options),
832
        (Float32, Int128) => primitive_to_primitive_dyn::<f32, i128>(array, to_type, options),
833
        (Float32, Float64) => primitive_to_primitive_dyn::<f32, f64>(array, to_type, as_options),
834
        (Float32, Decimal(p, s)) => float_to_decimal_dyn::<f32>(array, *p, *s),
835

836
        (Float64, UInt8) => primitive_to_primitive_dyn::<f64, u8>(array, to_type, options),
837
        (Float64, UInt16) => primitive_to_primitive_dyn::<f64, u16>(array, to_type, options),
838
        (Float64, UInt32) => primitive_to_primitive_dyn::<f64, u32>(array, to_type, options),
839
        (Float64, UInt64) => primitive_to_primitive_dyn::<f64, u64>(array, to_type, options),
840
        (Float64, Int8) => primitive_to_primitive_dyn::<f64, i8>(array, to_type, options),
841
        (Float64, Int16) => primitive_to_primitive_dyn::<f64, i16>(array, to_type, options),
842
        (Float64, Int32) => primitive_to_primitive_dyn::<f64, i32>(array, to_type, options),
843
        (Float64, Int64) => primitive_to_primitive_dyn::<f64, i64>(array, to_type, options),
844
        (Float64, Int128) => primitive_to_primitive_dyn::<f64, i128>(array, to_type, options),
845
        (Float64, Float32) => primitive_to_primitive_dyn::<f64, f32>(array, to_type, options),
846
        (Float64, Decimal(p, s)) => float_to_decimal_dyn::<f64>(array, *p, *s),
847

848
        (Decimal(_, _), UInt8) => decimal_to_integer_dyn::<u8>(array),
849
        (Decimal(_, _), UInt16) => decimal_to_integer_dyn::<u16>(array),
850
        (Decimal(_, _), UInt32) => decimal_to_integer_dyn::<u32>(array),
851
        (Decimal(_, _), UInt64) => decimal_to_integer_dyn::<u64>(array),
852
        (Decimal(_, _), Int8) => decimal_to_integer_dyn::<i8>(array),
853
        (Decimal(_, _), Int16) => decimal_to_integer_dyn::<i16>(array),
854
        (Decimal(_, _), Int32) => decimal_to_integer_dyn::<i32>(array),
855
        (Decimal(_, _), Int64) => decimal_to_integer_dyn::<i64>(array),
856
        (Decimal(_, _), Int128) => decimal_to_integer_dyn::<i128>(array),
857
        (Decimal(_, _), Float32) => decimal_to_float_dyn::<f32>(array),
858
        (Decimal(_, _), Float64) => decimal_to_float_dyn::<f64>(array),
859
        (Decimal(_, _), Decimal(to_p, to_s)) => decimal_to_decimal_dyn(array, *to_p, *to_s),
860
        // end numeric casts
861

862
        // temporal casts
863
        (Int32, Date32) => primitive_to_same_primitive_dyn::<i32>(array, to_type),
864
        (Int32, Time32(TimeUnit::Second)) => primitive_dyn!(array, int32_to_time32s),
865
        (Int32, Time32(TimeUnit::Millisecond)) => primitive_dyn!(array, int32_to_time32ms),
866
        // No support for microsecond/nanosecond with i32
867
        (Date32, Int32) => primitive_to_same_primitive_dyn::<i32>(array, to_type),
868
        (Date32, Int64) => primitive_to_primitive_dyn::<i32, i64>(array, to_type, options),
869
        (Time32(_), Int32) => primitive_to_same_primitive_dyn::<i32>(array, to_type),
870
        (Int64, Date64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
871
        // No support for second/milliseconds with i64
872
        (Int64, Time64(TimeUnit::Microsecond)) => primitive_dyn!(array, int64_to_time64us),
873
        (Int64, Time64(TimeUnit::Nanosecond)) => primitive_dyn!(array, int64_to_time64ns),
874

875
        (Date64, Int32) => primitive_to_primitive_dyn::<i64, i32>(array, to_type, options),
876
        (Date64, Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
877
        (Time64(_), Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
878
        (Date32, Date64) => primitive_dyn!(array, date32_to_date64),
879
        (Date64, Date32) => primitive_dyn!(array, date64_to_date32),
880
        (Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => {
881
            primitive_dyn!(array, time32s_to_time32ms)
882
        },
883
        (Time32(TimeUnit::Millisecond), Time32(TimeUnit::Second)) => {
884
            primitive_dyn!(array, time32ms_to_time32s)
885
        },
886
        (Time32(from_unit), Time64(to_unit)) => {
887
            primitive_dyn!(array, time32_to_time64, *from_unit, *to_unit)
888
        },
889
        (Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond)) => {
890
            primitive_dyn!(array, time64us_to_time64ns)
891
        },
892
        (Time64(TimeUnit::Nanosecond), Time64(TimeUnit::Microsecond)) => {
893
            primitive_dyn!(array, time64ns_to_time64us)
894
        },
895
        (Time64(from_unit), Time32(to_unit)) => {
896
            primitive_dyn!(array, time64_to_time32, *from_unit, *to_unit)
897
        },
898
        (Timestamp(_, _), Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
899
        (Int64, Timestamp(_, _)) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
900
        (Timestamp(from_unit, _), Timestamp(to_unit, tz)) => {
901
            primitive_dyn!(array, timestamp_to_timestamp, *from_unit, *to_unit, tz)
902
        },
903
        (Timestamp(from_unit, _), Date32) => primitive_dyn!(array, timestamp_to_date32, *from_unit),
904
        (Timestamp(from_unit, _), Date64) => primitive_dyn!(array, timestamp_to_date64, *from_unit),
905

906
        (Int64, Duration(_)) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
907
        (Duration(_), Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
908

909
        // Not supported by Polars.
910
        // (Interval(IntervalUnit::DayTime), Interval(IntervalUnit::MonthDayNano)) => {
911
        //     primitive_dyn!(array, days_ms_to_months_days_ns)
912
        // },
913
        // (Interval(IntervalUnit::YearMonth), Interval(IntervalUnit::MonthDayNano)) => {
914
        //     primitive_dyn!(array, months_to_months_days_ns)
915
        // },
916
        _ => polars_bail!(InvalidOperation:
917
            "casting from {from_type:?} to {to_type:?} not supported",
918
        ),
919
    }
920
}
921

922
/// Attempts to encode an array into an `ArrayDictionary` with index
923
/// type K and value (dictionary) type value_type
924
///
925
/// K is the key type
926
fn cast_to_dictionary<K: DictionaryKey>(
927
    array: &dyn Array,
928
    dict_value_type: &ArrowDataType,
929
    options: CastOptionsImpl,
930
) -> PolarsResult<Box<dyn Array>> {
931
    let array = cast(array, dict_value_type, options)?;
932
    let array = array.as_ref();
933
    match *dict_value_type {
934
        ArrowDataType::Int8 => primitive_to_dictionary_dyn::<i8, K>(array),
935
        ArrowDataType::Int16 => primitive_to_dictionary_dyn::<i16, K>(array),
936
        ArrowDataType::Int32 => primitive_to_dictionary_dyn::<i32, K>(array),
937
        ArrowDataType::Int64 => primitive_to_dictionary_dyn::<i64, K>(array),
938
        ArrowDataType::UInt8 => primitive_to_dictionary_dyn::<u8, K>(array),
939
        ArrowDataType::UInt16 => primitive_to_dictionary_dyn::<u16, K>(array),
940
        ArrowDataType::UInt32 => primitive_to_dictionary_dyn::<u32, K>(array),
941
        ArrowDataType::UInt64 => primitive_to_dictionary_dyn::<u64, K>(array),
942
        ArrowDataType::BinaryView => {
943
            binview_to_dictionary::<K>(array.as_any().downcast_ref().unwrap())
944
                .map(|arr| arr.boxed())
945
        },
946
        ArrowDataType::Utf8View => {
947
            utf8view_to_dictionary::<K>(array.as_any().downcast_ref().unwrap())
948
                .map(|arr| arr.boxed())
949
        },
950
        ArrowDataType::LargeUtf8 => utf8_to_dictionary_dyn::<i64, K>(array),
951
        ArrowDataType::LargeBinary => binary_to_dictionary_dyn::<i64, K>(array),
952
        ArrowDataType::Time64(_) => primitive_to_dictionary_dyn::<i64, K>(array),
953
        ArrowDataType::Timestamp(_, _) => primitive_to_dictionary_dyn::<i64, K>(array),
954
        ArrowDataType::Date32 => primitive_to_dictionary_dyn::<i32, K>(array),
955
        _ => polars_bail!(ComputeError:
956
            "unsupported output type for dictionary packing: {dict_value_type:?}"
957
        ),
958
    }
959
}
960

961
fn from_to_binview(
962
    array: &dyn Array,
963
    from_type: &ArrowDataType,
964
    to_type: &ArrowDataType,
965
) -> PolarsResult<BinaryViewArray> {
966
    use ArrowDataType::*;
967
    let binview = match from_type {
968
        UInt8 => primitive_to_binview_dyn::<u8>(array),
969
        UInt16 => primitive_to_binview_dyn::<u16>(array),
970
        UInt32 => primitive_to_binview_dyn::<u32>(array),
971
        UInt64 => primitive_to_binview_dyn::<u64>(array),
972
        Int8 => primitive_to_binview_dyn::<i8>(array),
973
        Int16 => primitive_to_binview_dyn::<i16>(array),
974
        Int32 => primitive_to_binview_dyn::<i32>(array),
975
        Int64 => primitive_to_binview_dyn::<i64>(array),
976
        Int128 => primitive_to_binview_dyn::<i128>(array),
977
        Float32 => primitive_to_binview_dyn::<f32>(array),
978
        Float64 => primitive_to_binview_dyn::<f64>(array),
979
        Binary => binary_to_binview::<i32>(array.as_any().downcast_ref().unwrap()),
980
        FixedSizeBinary(_) => fixed_size_binary_to_binview(array.as_any().downcast_ref().unwrap()),
981
        LargeBinary => binary_to_binview::<i64>(array.as_any().downcast_ref().unwrap()),
982
        _ => polars_bail!(InvalidOperation:
983
            "casting from {from_type:?} to {to_type:?} not supported",
984
        ),
985
    };
986
    Ok(binview)
987
}
988

989
#[cfg(test)]
990
mod tests {
991
    use arrow::offset::OffsetsBuffer;
992
    use polars_error::PolarsError;
993

994
    use super::*;
995

996
    /// When cfg(test), offsets for ``View``s generated by
997
    /// cast_list_uint8_to_binary() are limited to max value of 3, so buffers
998
    /// need to be split aggressively.
999
    #[test]
1000
    fn cast_list_uint8_to_binary_across_buffer_max_size() {
1001
        let dtype =
1002
            ArrowDataType::List(Box::new(Field::new("".into(), ArrowDataType::UInt8, true)));
1003
        let values = PrimitiveArray::from_slice((0u8..20).collect::<Vec<_>>()).boxed();
1004
        let list_u8 = ListArray::try_new(
1005
            dtype,
1006
            unsafe { OffsetsBuffer::new_unchecked(vec![0, 13, 18, 20].into()) },
1007
            values,
1008
            None,
1009
        )
1010
        .unwrap();
1011

1012
        let binary = cast(
1013
            &list_u8,
1014
            &ArrowDataType::BinaryView,
1015
            CastOptionsImpl::default(),
1016
        )
1017
        .unwrap();
1018
        let binary_array: &BinaryViewArray = binary.as_ref().as_any().downcast_ref().unwrap();
1019
        assert_eq!(
1020
            binary_array
1021
                .values_iter()
1022
                .map(|s| s.to_vec())
1023
                .collect::<Vec<Vec<u8>>>(),
1024
            vec![
1025
                vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
1026
                vec![13, 14, 15, 16, 17],
1027
                vec![18, 19]
1028
            ]
1029
        );
1030
        // max offset of 15 so we need to split:
1031
        assert_eq!(
1032
            binary_array
1033
                .data_buffers()
1034
                .iter()
1035
                .map(|buf| buf.len())
1036
                .collect::<Vec<_>>(),
1037
            vec![13, 7]
1038
        );
1039
    }
1040

1041
    /// Arrow spec requires views to fit in a single buffer. When cfg(test),
1042
    /// buffers generated by cast_list_uint8_to_binary are of size 15 or
1043
    /// smaller, so a list of size 16 should cause an error.
1044
    #[test]
1045
    fn cast_list_uint8_to_binary_errors_too_large_list() {
1046
        let values = PrimitiveArray::from_slice(vec![0u8; 16]);
1047
        let dtype =
1048
            ArrowDataType::List(Box::new(Field::new("".into(), ArrowDataType::UInt8, true)));
1049
        let list_u8 = ListArray::new(
1050
            dtype,
1051
            OffsetsBuffer::one_with_length(16),
1052
            values.boxed(),
1053
            None,
1054
        );
1055

1056
        let err = cast(
1057
            &list_u8,
1058
            &ArrowDataType::BinaryView,
1059
            CastOptionsImpl::default(),
1060
        )
1061
        .unwrap_err();
1062
        assert!(matches!(
1063
            err,
1064
            PolarsError::InvalidOperation(msg)
1065
                if msg.as_ref() == "when casting to BinaryView, list lengths must be <= 15"
1066
        ));
1067
    }
1068

1069
    /// When all views are <=12, cast_list_uint8_to_binary drops buffers in the
1070
    /// result because all views are inline.
1071
    #[test]
1072
    fn cast_list_uint8_to_binary_drops_small_buffers() {
1073
        let values = PrimitiveArray::from_slice(vec![10u8; 12]);
1074
        let dtype =
1075
            ArrowDataType::List(Box::new(Field::new("".into(), ArrowDataType::UInt8, true)));
1076
        let list_u8 = ListArray::new(
1077
            dtype,
1078
            OffsetsBuffer::one_with_length(12),
1079
            values.boxed(),
1080
            None,
1081
        );
1082
        let binary = cast(
1083
            &list_u8,
1084
            &ArrowDataType::BinaryView,
1085
            CastOptionsImpl::default(),
1086
        )
1087
        .unwrap();
1088
        let binary_array: &BinaryViewArray = binary.as_ref().as_any().downcast_ref().unwrap();
1089
        assert!(binary_array.data_buffers().is_empty());
1090
        assert_eq!(
1091
            binary_array
1092
                .values_iter()
1093
                .map(|s| s.to_vec())
1094
                .collect::<Vec<Vec<u8>>>(),
1095
            vec![vec![10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],]
1096
        );
1097
    }
1098
}
1099

1100
Product

Resources

Company