Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-compute/src/cast/primitive_to.rs
6939 views
1
use std::hash::Hash;
2
3
use arrow::array::*;
4
use arrow::bitmap::{Bitmap, BitmapBuilder};
5
use arrow::compute::arity::unary;
6
use arrow::datatypes::{ArrowDataType, TimeUnit};
7
use arrow::offset::{Offset, Offsets};
8
use arrow::types::{NativeType, f16};
9
use num_traits::{AsPrimitive, Float, ToPrimitive};
10
use polars_error::PolarsResult;
11
use polars_utils::pl_str::PlSmallStr;
12
use polars_utils::vec::PushUnchecked;
13
14
use super::CastOptionsImpl;
15
use super::temporal::*;
16
17
pub trait SerPrimitive {
18
fn write(f: &mut Vec<u8>, val: Self) -> usize
19
where
20
Self: Sized;
21
}
22
23
macro_rules! impl_ser_primitive {
24
($ptype:ident) => {
25
impl SerPrimitive for $ptype {
26
fn write(f: &mut Vec<u8>, val: Self) -> usize
27
where
28
Self: Sized,
29
{
30
let mut buffer = itoa::Buffer::new();
31
let value = buffer.format(val);
32
f.extend_from_slice(value.as_bytes());
33
value.len()
34
}
35
}
36
};
37
}
38
39
impl_ser_primitive!(i8);
40
impl_ser_primitive!(i16);
41
impl_ser_primitive!(i32);
42
impl_ser_primitive!(i64);
43
impl_ser_primitive!(i128);
44
impl_ser_primitive!(u8);
45
impl_ser_primitive!(u16);
46
impl_ser_primitive!(u32);
47
impl_ser_primitive!(u64);
48
49
impl SerPrimitive for f32 {
50
fn write(f: &mut Vec<u8>, val: Self) -> usize
51
where
52
Self: Sized,
53
{
54
let mut buffer = ryu::Buffer::new();
55
let value = buffer.format(val);
56
f.extend_from_slice(value.as_bytes());
57
value.len()
58
}
59
}
60
61
impl SerPrimitive for f64 {
62
fn write(f: &mut Vec<u8>, val: Self) -> usize
63
where
64
Self: Sized,
65
{
66
let mut buffer = ryu::Buffer::new();
67
let value = buffer.format(val);
68
f.extend_from_slice(value.as_bytes());
69
value.len()
70
}
71
}
72
73
fn fallible_unary<I, F, G, O>(
74
array: &PrimitiveArray<I>,
75
op: F,
76
fail: G,
77
dtype: ArrowDataType,
78
) -> PrimitiveArray<O>
79
where
80
I: NativeType,
81
O: NativeType,
82
F: Fn(I) -> O,
83
G: Fn(I) -> bool,
84
{
85
let values = array.values();
86
let mut out = Vec::with_capacity(array.len());
87
let mut i = 0;
88
89
while i < array.len() && !fail(values[i]) {
90
// SAFETY: We allocated enough before.
91
unsafe { out.push_unchecked(op(values[i])) };
92
i += 1;
93
}
94
95
if out.len() == array.len() {
96
return PrimitiveArray::<O>::new(dtype, out.into(), array.validity().cloned());
97
}
98
99
let mut validity = BitmapBuilder::with_capacity(array.len());
100
validity.extend_constant(out.len(), true);
101
102
for &value in &values[out.len()..] {
103
// SAFETY: We allocated enough before.
104
unsafe {
105
out.push_unchecked(op(value));
106
validity.push_unchecked(!fail(value));
107
}
108
}
109
110
debug_assert_eq!(out.len(), array.len());
111
debug_assert_eq!(validity.len(), array.len());
112
113
let validity = validity.freeze();
114
let validity = match array.validity() {
115
None => validity,
116
Some(arr_validity) => arrow::bitmap::and(&validity, arr_validity),
117
};
118
119
PrimitiveArray::<O>::new(dtype, out.into(), Some(validity))
120
}
121
122
fn primitive_to_values_and_offsets<T: NativeType + SerPrimitive, O: Offset>(
123
from: &PrimitiveArray<T>,
124
) -> (Vec<u8>, Offsets<O>) {
125
let mut values: Vec<u8> = Vec::with_capacity(from.len());
126
let mut offsets: Vec<O> = Vec::with_capacity(from.len() + 1);
127
offsets.push(O::default());
128
129
let mut offset: usize = 0;
130
131
unsafe {
132
for &x in from.values().iter() {
133
let len = T::write(&mut values, x);
134
135
offset += len;
136
offsets.push(O::from_as_usize(offset));
137
}
138
values.set_len(offset);
139
values.shrink_to_fit();
140
// SAFETY: offsets _are_ monotonically increasing
141
let offsets = Offsets::new_unchecked(offsets);
142
143
(values, offsets)
144
}
145
}
146
147
/// Returns a [`BooleanArray`] where every element is different from zero.
148
/// Validity is preserved.
149
pub fn primitive_to_boolean<T: NativeType>(
150
from: &PrimitiveArray<T>,
151
to_type: ArrowDataType,
152
) -> BooleanArray {
153
let iter = from.values().iter().map(|v| *v != T::default());
154
let values = Bitmap::from_trusted_len_iter(iter);
155
156
BooleanArray::new(to_type, values, from.validity().cloned())
157
}
158
159
pub(super) fn primitive_to_boolean_dyn<T>(
160
from: &dyn Array,
161
to_type: ArrowDataType,
162
) -> PolarsResult<Box<dyn Array>>
163
where
164
T: NativeType,
165
{
166
let from = from.as_any().downcast_ref().unwrap();
167
Ok(Box::new(primitive_to_boolean::<T>(from, to_type)))
168
}
169
170
/// Returns a [`Utf8Array`] where every element is the utf8 representation of the number.
171
pub(super) fn primitive_to_utf8<T: NativeType + SerPrimitive, O: Offset>(
172
from: &PrimitiveArray<T>,
173
) -> Utf8Array<O> {
174
let (values, offsets) = primitive_to_values_and_offsets(from);
175
unsafe {
176
Utf8Array::<O>::new_unchecked(
177
Utf8Array::<O>::default_dtype(),
178
offsets.into(),
179
values.into(),
180
from.validity().cloned(),
181
)
182
}
183
}
184
185
pub(super) fn primitive_to_utf8_dyn<T, O>(from: &dyn Array) -> PolarsResult<Box<dyn Array>>
186
where
187
O: Offset,
188
T: NativeType + SerPrimitive,
189
{
190
let from = from.as_any().downcast_ref().unwrap();
191
Ok(Box::new(primitive_to_utf8::<T, O>(from)))
192
}
193
194
pub(super) fn primitive_to_primitive_dyn<I, O>(
195
from: &dyn Array,
196
to_type: &ArrowDataType,
197
options: CastOptionsImpl,
198
) -> PolarsResult<Box<dyn Array>>
199
where
200
I: NativeType + num_traits::NumCast + num_traits::AsPrimitive<O>,
201
O: NativeType + num_traits::NumCast,
202
{
203
let from = from.as_any().downcast_ref::<PrimitiveArray<I>>().unwrap();
204
if options.wrapped {
205
Ok(Box::new(primitive_as_primitive::<I, O>(from, to_type)))
206
} else {
207
Ok(Box::new(primitive_to_primitive::<I, O>(from, to_type)))
208
}
209
}
210
211
/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of another physical type via numeric conversion.
212
pub fn primitive_to_primitive<I, O>(
213
from: &PrimitiveArray<I>,
214
to_type: &ArrowDataType,
215
) -> PrimitiveArray<O>
216
where
217
I: NativeType + num_traits::NumCast,
218
O: NativeType + num_traits::NumCast,
219
{
220
let iter = from
221
.iter()
222
.map(|v| v.and_then(|x| num_traits::cast::cast::<I, O>(*x)));
223
PrimitiveArray::<O>::from_trusted_len_iter(iter).to(to_type.clone())
224
}
225
226
/// Returns a [`PrimitiveArray<i128>`] with the cast values. Values are `None` on overflow
227
pub fn integer_to_decimal<T: NativeType + AsPrimitive<i128>>(
228
from: &PrimitiveArray<T>,
229
to_precision: usize,
230
to_scale: usize,
231
) -> PrimitiveArray<i128> {
232
assert!(to_precision <= 38);
233
assert!(to_scale <= 38);
234
235
let multiplier = 10_i128.pow(to_scale as u32);
236
let max_for_precision = 10_i128.pow(to_precision as u32) - 1;
237
let min_for_precision = -max_for_precision;
238
239
let values = from.iter().map(|x| {
240
x.and_then(|x| {
241
x.as_().checked_mul(multiplier).and_then(|x| {
242
if x > max_for_precision || x < min_for_precision {
243
None
244
} else {
245
Some(x)
246
}
247
})
248
})
249
});
250
251
PrimitiveArray::<i128>::from_trusted_len_iter(values)
252
.to(ArrowDataType::Decimal(to_precision, to_scale))
253
}
254
255
pub(super) fn integer_to_decimal_dyn<T>(
256
from: &dyn Array,
257
precision: usize,
258
scale: usize,
259
) -> PolarsResult<Box<dyn Array>>
260
where
261
T: NativeType + AsPrimitive<i128>,
262
{
263
let from = from.as_any().downcast_ref().unwrap();
264
Ok(Box::new(integer_to_decimal::<T>(from, precision, scale)))
265
}
266
267
/// Returns a [`PrimitiveArray<i128>`] with the cast values. Values are `None` on overflow
268
pub fn float_to_decimal<T>(
269
from: &PrimitiveArray<T>,
270
to_precision: usize,
271
to_scale: usize,
272
) -> PrimitiveArray<i128>
273
where
274
T: NativeType + Float + ToPrimitive,
275
f64: AsPrimitive<T>,
276
{
277
assert!(to_precision <= 38);
278
assert!(to_scale <= 38);
279
280
// 1.2 => 12
281
let multiplier: T = (10_f64).powi(to_scale as i32).as_();
282
let max_for_precision = 10_i128.pow(to_precision as u32) - 1;
283
let min_for_precision = -max_for_precision;
284
285
let values = from.iter().map(|x| {
286
x.and_then(|x| {
287
let x = (*x * multiplier).to_i128()?;
288
if x > max_for_precision || x < min_for_precision {
289
None
290
} else {
291
Some(x)
292
}
293
})
294
});
295
296
PrimitiveArray::<i128>::from_trusted_len_iter(values)
297
.to(ArrowDataType::Decimal(to_precision, to_scale))
298
}
299
300
pub(super) fn float_to_decimal_dyn<T>(
301
from: &dyn Array,
302
precision: usize,
303
scale: usize,
304
) -> PolarsResult<Box<dyn Array>>
305
where
306
T: NativeType + Float + ToPrimitive,
307
f64: AsPrimitive<T>,
308
{
309
let from = from.as_any().downcast_ref().unwrap();
310
Ok(Box::new(float_to_decimal::<T>(from, precision, scale)))
311
}
312
313
/// Cast [`PrimitiveArray`] as a [`PrimitiveArray`]
314
/// Same as `number as to_number_type` in rust
315
pub fn primitive_as_primitive<I, O>(
316
from: &PrimitiveArray<I>,
317
to_type: &ArrowDataType,
318
) -> PrimitiveArray<O>
319
where
320
I: NativeType + num_traits::AsPrimitive<O>,
321
O: NativeType,
322
{
323
unary(from, num_traits::AsPrimitive::<O>::as_, to_type.clone())
324
}
325
326
/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of the same physical type.
327
/// This is O(1).
328
pub fn primitive_to_same_primitive<T>(
329
from: &PrimitiveArray<T>,
330
to_type: &ArrowDataType,
331
) -> PrimitiveArray<T>
332
where
333
T: NativeType,
334
{
335
PrimitiveArray::<T>::new(
336
to_type.clone(),
337
from.values().clone(),
338
from.validity().cloned(),
339
)
340
}
341
342
/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of the same physical type.
343
/// This is O(1).
344
pub(super) fn primitive_to_same_primitive_dyn<T>(
345
from: &dyn Array,
346
to_type: &ArrowDataType,
347
) -> PolarsResult<Box<dyn Array>>
348
where
349
T: NativeType,
350
{
351
let from = from.as_any().downcast_ref().unwrap();
352
Ok(Box::new(primitive_to_same_primitive::<T>(from, to_type)))
353
}
354
355
pub(super) fn primitive_to_dictionary_dyn<T: NativeType + Eq + Hash, K: DictionaryKey>(
356
from: &dyn Array,
357
) -> PolarsResult<Box<dyn Array>> {
358
let from = from.as_any().downcast_ref().unwrap();
359
primitive_to_dictionary::<T, K>(from).map(|x| Box::new(x) as Box<dyn Array>)
360
}
361
362
/// Cast [`PrimitiveArray`] to [`DictionaryArray`]. Also known as packing.
363
/// # Errors
364
/// This function errors if the maximum key is smaller than the number of distinct elements
365
/// in the array.
366
pub fn primitive_to_dictionary<T: NativeType + Eq + Hash, K: DictionaryKey>(
367
from: &PrimitiveArray<T>,
368
) -> PolarsResult<DictionaryArray<K>> {
369
let iter = from.iter().map(|x| x.copied());
370
let mut array = MutableDictionaryArray::<K, _>::try_empty(MutablePrimitiveArray::<T>::from(
371
from.dtype().clone(),
372
))?;
373
array.reserve(from.len());
374
array.try_extend(iter)?;
375
376
Ok(array.into())
377
}
378
379
/// # Safety
380
///
381
/// `dtype` should be valid for primitive.
382
pub unsafe fn primitive_map_is_valid<T: NativeType>(
383
from: &PrimitiveArray<T>,
384
f: impl Fn(T) -> bool,
385
dtype: ArrowDataType,
386
) -> PrimitiveArray<T> {
387
let values = from.values().clone();
388
389
let validity: Bitmap = values.iter().map(|&v| f(v)).collect();
390
391
let validity = if validity.unset_bits() > 0 {
392
let new_validity = match from.validity() {
393
None => validity,
394
Some(v) => v & &validity,
395
};
396
397
Some(new_validity)
398
} else {
399
from.validity().cloned()
400
};
401
402
// SAFETY:
403
// - Validity did not change length
404
// - dtype should be valid
405
unsafe { PrimitiveArray::new_unchecked(dtype, values, validity) }
406
}
407
408
/// Conversion of `Int32` to `Time32(TimeUnit::Second)`
409
pub fn int32_to_time32s(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
410
// SAFETY: Time32(TimeUnit::Second) is valid for Int32
411
unsafe {
412
primitive_map_is_valid(
413
from,
414
|v| (0..SECONDS_IN_DAY as i32).contains(&v),
415
ArrowDataType::Time32(TimeUnit::Second),
416
)
417
}
418
}
419
420
/// Conversion of `Int32` to `Time32(TimeUnit::Millisecond)`
421
pub fn int32_to_time32ms(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
422
// SAFETY: Time32(TimeUnit::Millisecond) is valid for Int32
423
unsafe {
424
primitive_map_is_valid(
425
from,
426
|v| (0..MILLISECONDS_IN_DAY as i32).contains(&v),
427
ArrowDataType::Time32(TimeUnit::Millisecond),
428
)
429
}
430
}
431
432
/// Conversion of `Int64` to `Time32(TimeUnit::Microsecond)`
433
pub fn int64_to_time64us(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
434
// SAFETY: Time64(TimeUnit::Microsecond) is valid for Int64
435
unsafe {
436
primitive_map_is_valid(
437
from,
438
|v| (0..MICROSECONDS_IN_DAY).contains(&v),
439
ArrowDataType::Time32(TimeUnit::Microsecond),
440
)
441
}
442
}
443
444
/// Conversion of `Int64` to `Time32(TimeUnit::Nanosecond)`
445
pub fn int64_to_time64ns(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
446
// SAFETY: Time64(TimeUnit::Nanosecond) is valid for Int64
447
unsafe {
448
primitive_map_is_valid(
449
from,
450
|v| (0..NANOSECONDS_IN_DAY).contains(&v),
451
ArrowDataType::Time64(TimeUnit::Nanosecond),
452
)
453
}
454
}
455
456
/// Conversion of dates
457
pub fn date32_to_date64(from: &PrimitiveArray<i32>) -> PrimitiveArray<i64> {
458
unary(
459
from,
460
|x| x as i64 * MILLISECONDS_IN_DAY,
461
ArrowDataType::Date64,
462
)
463
}
464
465
/// Conversion of dates
466
pub fn date64_to_date32(from: &PrimitiveArray<i64>) -> PrimitiveArray<i32> {
467
unary(
468
from,
469
|x| (x / MILLISECONDS_IN_DAY) as i32,
470
ArrowDataType::Date32,
471
)
472
}
473
474
/// Conversion of times
475
pub fn time32s_to_time32ms(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
476
fallible_unary(
477
from,
478
|x| x.wrapping_mul(1000),
479
|x| x.checked_mul(1000).is_none(),
480
ArrowDataType::Time32(TimeUnit::Millisecond),
481
)
482
}
483
484
/// Conversion of times
485
pub fn time32ms_to_time32s(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
486
unary(from, |x| x / 1000, ArrowDataType::Time32(TimeUnit::Second))
487
}
488
489
/// Conversion of times
490
pub fn time64us_to_time64ns(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
491
fallible_unary(
492
from,
493
|x| x.wrapping_mul(1000),
494
|x| x.checked_mul(1000).is_none(),
495
ArrowDataType::Time64(TimeUnit::Nanosecond),
496
)
497
}
498
499
/// Conversion of times
500
pub fn time64ns_to_time64us(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
501
unary(
502
from,
503
|x| x / 1000,
504
ArrowDataType::Time64(TimeUnit::Microsecond),
505
)
506
}
507
508
/// Conversion of timestamp
509
pub fn timestamp_to_date64(from: &PrimitiveArray<i64>, from_unit: TimeUnit) -> PrimitiveArray<i64> {
510
let from_size = time_unit_multiple(from_unit);
511
let to_size = MILLISECONDS;
512
let to_type = ArrowDataType::Date64;
513
514
// Scale time_array by (to_size / from_size) using a
515
// single integer operation, but need to avoid integer
516
// math rounding down to zero
517
518
match to_size.cmp(&from_size) {
519
std::cmp::Ordering::Less => unary(from, |x| x / (from_size / to_size), to_type),
520
std::cmp::Ordering::Equal => primitive_to_same_primitive(from, &to_type),
521
std::cmp::Ordering::Greater => fallible_unary(
522
from,
523
|x| x.wrapping_mul(to_size / from_size),
524
|x| x.checked_mul(to_size / from_size).is_none(),
525
to_type,
526
),
527
}
528
}
529
530
/// Conversion of timestamp
531
pub fn timestamp_to_date32(from: &PrimitiveArray<i64>, from_unit: TimeUnit) -> PrimitiveArray<i32> {
532
let from_size = time_unit_multiple(from_unit) * SECONDS_IN_DAY;
533
unary(from, |x| (x / from_size) as i32, ArrowDataType::Date32)
534
}
535
536
/// Conversion of time
537
pub fn time32_to_time64(
538
from: &PrimitiveArray<i32>,
539
from_unit: TimeUnit,
540
to_unit: TimeUnit,
541
) -> PrimitiveArray<i64> {
542
let from_size = time_unit_multiple(from_unit);
543
let to_size = time_unit_multiple(to_unit);
544
let divisor = to_size / from_size;
545
fallible_unary(
546
from,
547
|x| (x as i64).wrapping_mul(divisor),
548
|x| (x as i64).checked_mul(divisor).is_none(),
549
ArrowDataType::Time64(to_unit),
550
)
551
}
552
553
/// Conversion of time
554
pub fn time64_to_time32(
555
from: &PrimitiveArray<i64>,
556
from_unit: TimeUnit,
557
to_unit: TimeUnit,
558
) -> PrimitiveArray<i32> {
559
let from_size = time_unit_multiple(from_unit);
560
let to_size = time_unit_multiple(to_unit);
561
let divisor = from_size / to_size;
562
unary(
563
from,
564
|x| (x / divisor) as i32,
565
ArrowDataType::Time32(to_unit),
566
)
567
}
568
569
/// Conversion of timestamp
570
pub fn timestamp_to_timestamp(
571
from: &PrimitiveArray<i64>,
572
from_unit: TimeUnit,
573
to_unit: TimeUnit,
574
tz: &Option<PlSmallStr>,
575
) -> PrimitiveArray<i64> {
576
let from_size = time_unit_multiple(from_unit);
577
let to_size = time_unit_multiple(to_unit);
578
let to_type = ArrowDataType::Timestamp(to_unit, tz.clone());
579
// we either divide or multiply, depending on size of each unit
580
if from_size >= to_size {
581
unary(from, |x| x / (from_size / to_size), to_type)
582
} else {
583
fallible_unary(
584
from,
585
|x| x.wrapping_mul(to_size / from_size),
586
|x| x.checked_mul(to_size / from_size).is_none(),
587
to_type,
588
)
589
}
590
}
591
592
/// Casts f16 into f32
593
pub fn f16_to_f32(from: &PrimitiveArray<f16>) -> PrimitiveArray<f32> {
594
unary(from, |x| x.to_f32(), ArrowDataType::Float32)
595
}
596
597
/// Returns a [`Utf8Array`] where every element is the utf8 representation of the number.
598
pub(super) fn primitive_to_binview<T: NativeType + SerPrimitive>(
599
from: &PrimitiveArray<T>,
600
) -> BinaryViewArray {
601
let mut mutable = MutableBinaryViewArray::with_capacity(from.len());
602
603
let mut scratch = vec![];
604
for &x in from.values().iter() {
605
unsafe { scratch.set_len(0) };
606
T::write(&mut scratch, x);
607
mutable.push_value_ignore_validity(&scratch)
608
}
609
610
mutable.freeze().with_validity(from.validity().cloned())
611
}
612
613
pub(super) fn primitive_to_binview_dyn<T>(from: &dyn Array) -> BinaryViewArray
614
where
615
T: NativeType + SerPrimitive,
616
{
617
let from = from.as_any().downcast_ref().unwrap();
618
primitive_to_binview::<T>(from)
619
}
620
621