Path: blob/main/crates/polars-compute/src/cast/primitive_to.rs
6939 views
use std::hash::Hash;12use arrow::array::*;3use arrow::bitmap::{Bitmap, BitmapBuilder};4use arrow::compute::arity::unary;5use arrow::datatypes::{ArrowDataType, TimeUnit};6use arrow::offset::{Offset, Offsets};7use arrow::types::{NativeType, f16};8use num_traits::{AsPrimitive, Float, ToPrimitive};9use polars_error::PolarsResult;10use polars_utils::pl_str::PlSmallStr;11use polars_utils::vec::PushUnchecked;1213use super::CastOptionsImpl;14use super::temporal::*;1516pub trait SerPrimitive {17fn write(f: &mut Vec<u8>, val: Self) -> usize18where19Self: Sized;20}2122macro_rules! impl_ser_primitive {23($ptype:ident) => {24impl SerPrimitive for $ptype {25fn write(f: &mut Vec<u8>, val: Self) -> usize26where27Self: Sized,28{29let mut buffer = itoa::Buffer::new();30let value = buffer.format(val);31f.extend_from_slice(value.as_bytes());32value.len()33}34}35};36}3738impl_ser_primitive!(i8);39impl_ser_primitive!(i16);40impl_ser_primitive!(i32);41impl_ser_primitive!(i64);42impl_ser_primitive!(i128);43impl_ser_primitive!(u8);44impl_ser_primitive!(u16);45impl_ser_primitive!(u32);46impl_ser_primitive!(u64);4748impl SerPrimitive for f32 {49fn write(f: &mut Vec<u8>, val: Self) -> usize50where51Self: Sized,52{53let mut buffer = ryu::Buffer::new();54let value = buffer.format(val);55f.extend_from_slice(value.as_bytes());56value.len()57}58}5960impl SerPrimitive for f64 {61fn write(f: &mut Vec<u8>, val: Self) -> usize62where63Self: Sized,64{65let mut buffer = ryu::Buffer::new();66let value = buffer.format(val);67f.extend_from_slice(value.as_bytes());68value.len()69}70}7172fn fallible_unary<I, F, G, O>(73array: &PrimitiveArray<I>,74op: F,75fail: G,76dtype: ArrowDataType,77) -> PrimitiveArray<O>78where79I: NativeType,80O: NativeType,81F: Fn(I) -> O,82G: Fn(I) -> bool,83{84let values = array.values();85let mut out = Vec::with_capacity(array.len());86let mut i = 0;8788while i < array.len() && !fail(values[i]) {89// SAFETY: We allocated enough before.90unsafe { out.push_unchecked(op(values[i])) };91i += 1;92}9394if out.len() == array.len() {95return PrimitiveArray::<O>::new(dtype, out.into(), array.validity().cloned());96}9798let mut validity = BitmapBuilder::with_capacity(array.len());99validity.extend_constant(out.len(), true);100101for &value in &values[out.len()..] {102// SAFETY: We allocated enough before.103unsafe {104out.push_unchecked(op(value));105validity.push_unchecked(!fail(value));106}107}108109debug_assert_eq!(out.len(), array.len());110debug_assert_eq!(validity.len(), array.len());111112let validity = validity.freeze();113let validity = match array.validity() {114None => validity,115Some(arr_validity) => arrow::bitmap::and(&validity, arr_validity),116};117118PrimitiveArray::<O>::new(dtype, out.into(), Some(validity))119}120121fn primitive_to_values_and_offsets<T: NativeType + SerPrimitive, O: Offset>(122from: &PrimitiveArray<T>,123) -> (Vec<u8>, Offsets<O>) {124let mut values: Vec<u8> = Vec::with_capacity(from.len());125let mut offsets: Vec<O> = Vec::with_capacity(from.len() + 1);126offsets.push(O::default());127128let mut offset: usize = 0;129130unsafe {131for &x in from.values().iter() {132let len = T::write(&mut values, x);133134offset += len;135offsets.push(O::from_as_usize(offset));136}137values.set_len(offset);138values.shrink_to_fit();139// SAFETY: offsets _are_ monotonically increasing140let offsets = Offsets::new_unchecked(offsets);141142(values, offsets)143}144}145146/// Returns a [`BooleanArray`] where every element is different from zero.147/// Validity is preserved.148pub fn primitive_to_boolean<T: NativeType>(149from: &PrimitiveArray<T>,150to_type: ArrowDataType,151) -> BooleanArray {152let iter = from.values().iter().map(|v| *v != T::default());153let values = Bitmap::from_trusted_len_iter(iter);154155BooleanArray::new(to_type, values, from.validity().cloned())156}157158pub(super) fn primitive_to_boolean_dyn<T>(159from: &dyn Array,160to_type: ArrowDataType,161) -> PolarsResult<Box<dyn Array>>162where163T: NativeType,164{165let from = from.as_any().downcast_ref().unwrap();166Ok(Box::new(primitive_to_boolean::<T>(from, to_type)))167}168169/// Returns a [`Utf8Array`] where every element is the utf8 representation of the number.170pub(super) fn primitive_to_utf8<T: NativeType + SerPrimitive, O: Offset>(171from: &PrimitiveArray<T>,172) -> Utf8Array<O> {173let (values, offsets) = primitive_to_values_and_offsets(from);174unsafe {175Utf8Array::<O>::new_unchecked(176Utf8Array::<O>::default_dtype(),177offsets.into(),178values.into(),179from.validity().cloned(),180)181}182}183184pub(super) fn primitive_to_utf8_dyn<T, O>(from: &dyn Array) -> PolarsResult<Box<dyn Array>>185where186O: Offset,187T: NativeType + SerPrimitive,188{189let from = from.as_any().downcast_ref().unwrap();190Ok(Box::new(primitive_to_utf8::<T, O>(from)))191}192193pub(super) fn primitive_to_primitive_dyn<I, O>(194from: &dyn Array,195to_type: &ArrowDataType,196options: CastOptionsImpl,197) -> PolarsResult<Box<dyn Array>>198where199I: NativeType + num_traits::NumCast + num_traits::AsPrimitive<O>,200O: NativeType + num_traits::NumCast,201{202let from = from.as_any().downcast_ref::<PrimitiveArray<I>>().unwrap();203if options.wrapped {204Ok(Box::new(primitive_as_primitive::<I, O>(from, to_type)))205} else {206Ok(Box::new(primitive_to_primitive::<I, O>(from, to_type)))207}208}209210/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of another physical type via numeric conversion.211pub fn primitive_to_primitive<I, O>(212from: &PrimitiveArray<I>,213to_type: &ArrowDataType,214) -> PrimitiveArray<O>215where216I: NativeType + num_traits::NumCast,217O: NativeType + num_traits::NumCast,218{219let iter = from220.iter()221.map(|v| v.and_then(|x| num_traits::cast::cast::<I, O>(*x)));222PrimitiveArray::<O>::from_trusted_len_iter(iter).to(to_type.clone())223}224225/// Returns a [`PrimitiveArray<i128>`] with the cast values. Values are `None` on overflow226pub fn integer_to_decimal<T: NativeType + AsPrimitive<i128>>(227from: &PrimitiveArray<T>,228to_precision: usize,229to_scale: usize,230) -> PrimitiveArray<i128> {231assert!(to_precision <= 38);232assert!(to_scale <= 38);233234let multiplier = 10_i128.pow(to_scale as u32);235let max_for_precision = 10_i128.pow(to_precision as u32) - 1;236let min_for_precision = -max_for_precision;237238let values = from.iter().map(|x| {239x.and_then(|x| {240x.as_().checked_mul(multiplier).and_then(|x| {241if x > max_for_precision || x < min_for_precision {242None243} else {244Some(x)245}246})247})248});249250PrimitiveArray::<i128>::from_trusted_len_iter(values)251.to(ArrowDataType::Decimal(to_precision, to_scale))252}253254pub(super) fn integer_to_decimal_dyn<T>(255from: &dyn Array,256precision: usize,257scale: usize,258) -> PolarsResult<Box<dyn Array>>259where260T: NativeType + AsPrimitive<i128>,261{262let from = from.as_any().downcast_ref().unwrap();263Ok(Box::new(integer_to_decimal::<T>(from, precision, scale)))264}265266/// Returns a [`PrimitiveArray<i128>`] with the cast values. Values are `None` on overflow267pub fn float_to_decimal<T>(268from: &PrimitiveArray<T>,269to_precision: usize,270to_scale: usize,271) -> PrimitiveArray<i128>272where273T: NativeType + Float + ToPrimitive,274f64: AsPrimitive<T>,275{276assert!(to_precision <= 38);277assert!(to_scale <= 38);278279// 1.2 => 12280let multiplier: T = (10_f64).powi(to_scale as i32).as_();281let max_for_precision = 10_i128.pow(to_precision as u32) - 1;282let min_for_precision = -max_for_precision;283284let values = from.iter().map(|x| {285x.and_then(|x| {286let x = (*x * multiplier).to_i128()?;287if x > max_for_precision || x < min_for_precision {288None289} else {290Some(x)291}292})293});294295PrimitiveArray::<i128>::from_trusted_len_iter(values)296.to(ArrowDataType::Decimal(to_precision, to_scale))297}298299pub(super) fn float_to_decimal_dyn<T>(300from: &dyn Array,301precision: usize,302scale: usize,303) -> PolarsResult<Box<dyn Array>>304where305T: NativeType + Float + ToPrimitive,306f64: AsPrimitive<T>,307{308let from = from.as_any().downcast_ref().unwrap();309Ok(Box::new(float_to_decimal::<T>(from, precision, scale)))310}311312/// Cast [`PrimitiveArray`] as a [`PrimitiveArray`]313/// Same as `number as to_number_type` in rust314pub fn primitive_as_primitive<I, O>(315from: &PrimitiveArray<I>,316to_type: &ArrowDataType,317) -> PrimitiveArray<O>318where319I: NativeType + num_traits::AsPrimitive<O>,320O: NativeType,321{322unary(from, num_traits::AsPrimitive::<O>::as_, to_type.clone())323}324325/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of the same physical type.326/// This is O(1).327pub fn primitive_to_same_primitive<T>(328from: &PrimitiveArray<T>,329to_type: &ArrowDataType,330) -> PrimitiveArray<T>331where332T: NativeType,333{334PrimitiveArray::<T>::new(335to_type.clone(),336from.values().clone(),337from.validity().cloned(),338)339}340341/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of the same physical type.342/// This is O(1).343pub(super) fn primitive_to_same_primitive_dyn<T>(344from: &dyn Array,345to_type: &ArrowDataType,346) -> PolarsResult<Box<dyn Array>>347where348T: NativeType,349{350let from = from.as_any().downcast_ref().unwrap();351Ok(Box::new(primitive_to_same_primitive::<T>(from, to_type)))352}353354pub(super) fn primitive_to_dictionary_dyn<T: NativeType + Eq + Hash, K: DictionaryKey>(355from: &dyn Array,356) -> PolarsResult<Box<dyn Array>> {357let from = from.as_any().downcast_ref().unwrap();358primitive_to_dictionary::<T, K>(from).map(|x| Box::new(x) as Box<dyn Array>)359}360361/// Cast [`PrimitiveArray`] to [`DictionaryArray`]. Also known as packing.362/// # Errors363/// This function errors if the maximum key is smaller than the number of distinct elements364/// in the array.365pub fn primitive_to_dictionary<T: NativeType + Eq + Hash, K: DictionaryKey>(366from: &PrimitiveArray<T>,367) -> PolarsResult<DictionaryArray<K>> {368let iter = from.iter().map(|x| x.copied());369let mut array = MutableDictionaryArray::<K, _>::try_empty(MutablePrimitiveArray::<T>::from(370from.dtype().clone(),371))?;372array.reserve(from.len());373array.try_extend(iter)?;374375Ok(array.into())376}377378/// # Safety379///380/// `dtype` should be valid for primitive.381pub unsafe fn primitive_map_is_valid<T: NativeType>(382from: &PrimitiveArray<T>,383f: impl Fn(T) -> bool,384dtype: ArrowDataType,385) -> PrimitiveArray<T> {386let values = from.values().clone();387388let validity: Bitmap = values.iter().map(|&v| f(v)).collect();389390let validity = if validity.unset_bits() > 0 {391let new_validity = match from.validity() {392None => validity,393Some(v) => v & &validity,394};395396Some(new_validity)397} else {398from.validity().cloned()399};400401// SAFETY:402// - Validity did not change length403// - dtype should be valid404unsafe { PrimitiveArray::new_unchecked(dtype, values, validity) }405}406407/// Conversion of `Int32` to `Time32(TimeUnit::Second)`408pub fn int32_to_time32s(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {409// SAFETY: Time32(TimeUnit::Second) is valid for Int32410unsafe {411primitive_map_is_valid(412from,413|v| (0..SECONDS_IN_DAY as i32).contains(&v),414ArrowDataType::Time32(TimeUnit::Second),415)416}417}418419/// Conversion of `Int32` to `Time32(TimeUnit::Millisecond)`420pub fn int32_to_time32ms(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {421// SAFETY: Time32(TimeUnit::Millisecond) is valid for Int32422unsafe {423primitive_map_is_valid(424from,425|v| (0..MILLISECONDS_IN_DAY as i32).contains(&v),426ArrowDataType::Time32(TimeUnit::Millisecond),427)428}429}430431/// Conversion of `Int64` to `Time32(TimeUnit::Microsecond)`432pub fn int64_to_time64us(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {433// SAFETY: Time64(TimeUnit::Microsecond) is valid for Int64434unsafe {435primitive_map_is_valid(436from,437|v| (0..MICROSECONDS_IN_DAY).contains(&v),438ArrowDataType::Time32(TimeUnit::Microsecond),439)440}441}442443/// Conversion of `Int64` to `Time32(TimeUnit::Nanosecond)`444pub fn int64_to_time64ns(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {445// SAFETY: Time64(TimeUnit::Nanosecond) is valid for Int64446unsafe {447primitive_map_is_valid(448from,449|v| (0..NANOSECONDS_IN_DAY).contains(&v),450ArrowDataType::Time64(TimeUnit::Nanosecond),451)452}453}454455/// Conversion of dates456pub fn date32_to_date64(from: &PrimitiveArray<i32>) -> PrimitiveArray<i64> {457unary(458from,459|x| x as i64 * MILLISECONDS_IN_DAY,460ArrowDataType::Date64,461)462}463464/// Conversion of dates465pub fn date64_to_date32(from: &PrimitiveArray<i64>) -> PrimitiveArray<i32> {466unary(467from,468|x| (x / MILLISECONDS_IN_DAY) as i32,469ArrowDataType::Date32,470)471}472473/// Conversion of times474pub fn time32s_to_time32ms(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {475fallible_unary(476from,477|x| x.wrapping_mul(1000),478|x| x.checked_mul(1000).is_none(),479ArrowDataType::Time32(TimeUnit::Millisecond),480)481}482483/// Conversion of times484pub fn time32ms_to_time32s(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {485unary(from, |x| x / 1000, ArrowDataType::Time32(TimeUnit::Second))486}487488/// Conversion of times489pub fn time64us_to_time64ns(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {490fallible_unary(491from,492|x| x.wrapping_mul(1000),493|x| x.checked_mul(1000).is_none(),494ArrowDataType::Time64(TimeUnit::Nanosecond),495)496}497498/// Conversion of times499pub fn time64ns_to_time64us(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {500unary(501from,502|x| x / 1000,503ArrowDataType::Time64(TimeUnit::Microsecond),504)505}506507/// Conversion of timestamp508pub fn timestamp_to_date64(from: &PrimitiveArray<i64>, from_unit: TimeUnit) -> PrimitiveArray<i64> {509let from_size = time_unit_multiple(from_unit);510let to_size = MILLISECONDS;511let to_type = ArrowDataType::Date64;512513// Scale time_array by (to_size / from_size) using a514// single integer operation, but need to avoid integer515// math rounding down to zero516517match to_size.cmp(&from_size) {518std::cmp::Ordering::Less => unary(from, |x| x / (from_size / to_size), to_type),519std::cmp::Ordering::Equal => primitive_to_same_primitive(from, &to_type),520std::cmp::Ordering::Greater => fallible_unary(521from,522|x| x.wrapping_mul(to_size / from_size),523|x| x.checked_mul(to_size / from_size).is_none(),524to_type,525),526}527}528529/// Conversion of timestamp530pub fn timestamp_to_date32(from: &PrimitiveArray<i64>, from_unit: TimeUnit) -> PrimitiveArray<i32> {531let from_size = time_unit_multiple(from_unit) * SECONDS_IN_DAY;532unary(from, |x| (x / from_size) as i32, ArrowDataType::Date32)533}534535/// Conversion of time536pub fn time32_to_time64(537from: &PrimitiveArray<i32>,538from_unit: TimeUnit,539to_unit: TimeUnit,540) -> PrimitiveArray<i64> {541let from_size = time_unit_multiple(from_unit);542let to_size = time_unit_multiple(to_unit);543let divisor = to_size / from_size;544fallible_unary(545from,546|x| (x as i64).wrapping_mul(divisor),547|x| (x as i64).checked_mul(divisor).is_none(),548ArrowDataType::Time64(to_unit),549)550}551552/// Conversion of time553pub fn time64_to_time32(554from: &PrimitiveArray<i64>,555from_unit: TimeUnit,556to_unit: TimeUnit,557) -> PrimitiveArray<i32> {558let from_size = time_unit_multiple(from_unit);559let to_size = time_unit_multiple(to_unit);560let divisor = from_size / to_size;561unary(562from,563|x| (x / divisor) as i32,564ArrowDataType::Time32(to_unit),565)566}567568/// Conversion of timestamp569pub fn timestamp_to_timestamp(570from: &PrimitiveArray<i64>,571from_unit: TimeUnit,572to_unit: TimeUnit,573tz: &Option<PlSmallStr>,574) -> PrimitiveArray<i64> {575let from_size = time_unit_multiple(from_unit);576let to_size = time_unit_multiple(to_unit);577let to_type = ArrowDataType::Timestamp(to_unit, tz.clone());578// we either divide or multiply, depending on size of each unit579if from_size >= to_size {580unary(from, |x| x / (from_size / to_size), to_type)581} else {582fallible_unary(583from,584|x| x.wrapping_mul(to_size / from_size),585|x| x.checked_mul(to_size / from_size).is_none(),586to_type,587)588}589}590591/// Casts f16 into f32592pub fn f16_to_f32(from: &PrimitiveArray<f16>) -> PrimitiveArray<f32> {593unary(from, |x| x.to_f32(), ArrowDataType::Float32)594}595596/// Returns a [`Utf8Array`] where every element is the utf8 representation of the number.597pub(super) fn primitive_to_binview<T: NativeType + SerPrimitive>(598from: &PrimitiveArray<T>,599) -> BinaryViewArray {600let mut mutable = MutableBinaryViewArray::with_capacity(from.len());601602let mut scratch = vec![];603for &x in from.values().iter() {604unsafe { scratch.set_len(0) };605T::write(&mut scratch, x);606mutable.push_value_ignore_validity(&scratch)607}608609mutable.freeze().with_validity(from.validity().cloned())610}611612pub(super) fn primitive_to_binview_dyn<T>(from: &dyn Array) -> BinaryViewArray613where614T: NativeType + SerPrimitive,615{616let from = from.as_any().downcast_ref().unwrap();617primitive_to_binview::<T>(from)618}619620621