Path: blob/main/crates/polars-arrow/src/array/builder.rs
6939 views
use polars_utils::IdxSize;12use crate::array::binary::BinaryArrayBuilder;3use crate::array::binview::BinaryViewArrayGenericBuilder;4use crate::array::boolean::BooleanArrayBuilder;5use crate::array::fixed_size_binary::FixedSizeBinaryArrayBuilder;6use crate::array::fixed_size_list::FixedSizeListArrayBuilder;7use crate::array::list::ListArrayBuilder;8use crate::array::null::NullArrayBuilder;9use crate::array::struct_::StructArrayBuilder;10use crate::array::{Array, PrimitiveArrayBuilder};11use crate::datatypes::{ArrowDataType, PhysicalType};12use crate::with_match_primitive_type_full;1314/// Used for arrays which can share buffers with input arrays to appends,15/// gathers, etc.16#[derive(Copy, Clone, Debug)]17pub enum ShareStrategy {18Never,19Always,20}2122pub trait StaticArrayBuilder: Send {23type Array: Array;2425fn dtype(&self) -> &ArrowDataType;26fn reserve(&mut self, additional: usize);2728/// Consume this builder returning the built array.29fn freeze(self) -> Self::Array;3031/// Return the built array and reset to an empty state.32fn freeze_reset(&mut self) -> Self::Array;3334/// Returns the length of this builder (so far).35fn len(&self) -> usize;3637/// Extend this builder with the given number of null elements.38fn extend_nulls(&mut self, length: usize);3940/// Extends this builder with the contents of the given array. May panic if41/// other does not match the dtype of this array.42fn extend(&mut self, other: &Self::Array, share: ShareStrategy) {43self.subslice_extend(other, 0, other.len(), share);44}4546/// Extends this builder with the contents of the given array subslice. May47/// panic if other does not match the dtype of this array.48fn subslice_extend(49&mut self,50other: &Self::Array,51start: usize,52length: usize,53share: ShareStrategy,54);5556/// The same as subslice_extend, but repeats the extension `repeats` times.57fn subslice_extend_repeated(58&mut self,59other: &Self::Array,60start: usize,61length: usize,62repeats: usize,63share: ShareStrategy,64) {65self.reserve(length * repeats);66for _ in 0..repeats {67self.subslice_extend(other, start, length, share)68}69}7071/// The same as subslice_extend, but repeats each element `repeats` times.72fn subslice_extend_each_repeated(73&mut self,74other: &Self::Array,75start: usize,76length: usize,77repeats: usize,78share: ShareStrategy,79);8081/// Extends this builder with the contents of the given array at the given82/// indices. That is, `other[idxs[i]]` is appended to this array in order,83/// for each i=0..idxs.len(). May panic if other does not match the84/// dtype of this array.85///86/// # Safety87/// The indices must be in-bounds.88unsafe fn gather_extend(&mut self, other: &Self::Array, idxs: &[IdxSize], share: ShareStrategy);8990/// Extends this builder with the contents of the given array at the given91/// indices. That is, `other[idxs[i]]` is appended to this array in order,92/// for each i=0..idxs.len(). May panic if other does not match the93/// dtype of this array. Out-of-bounds indices are mapped to nulls.94fn opt_gather_extend(&mut self, other: &Self::Array, idxs: &[IdxSize], share: ShareStrategy);95}9697impl<T: StaticArrayBuilder> ArrayBuilder for T {98#[inline(always)]99fn dtype(&self) -> &ArrowDataType {100StaticArrayBuilder::dtype(self)101}102103#[inline(always)]104fn reserve(&mut self, additional: usize) {105StaticArrayBuilder::reserve(self, additional)106}107108#[inline(always)]109fn freeze(self) -> Box<dyn Array> {110Box::new(StaticArrayBuilder::freeze(self))111}112113#[inline(always)]114fn freeze_reset(&mut self) -> Box<dyn Array> {115Box::new(StaticArrayBuilder::freeze_reset(self))116}117118#[inline(always)]119fn len(&self) -> usize {120StaticArrayBuilder::len(self)121}122123#[inline(always)]124fn extend_nulls(&mut self, length: usize) {125StaticArrayBuilder::extend_nulls(self, length);126}127128#[inline(always)]129fn subslice_extend(130&mut self,131other: &dyn Array,132start: usize,133length: usize,134share: ShareStrategy,135) {136let other: &T::Array = other.as_any().downcast_ref().unwrap();137StaticArrayBuilder::subslice_extend(self, other, start, length, share);138}139140#[inline(always)]141fn subslice_extend_repeated(142&mut self,143other: &dyn Array,144start: usize,145length: usize,146repeats: usize,147share: ShareStrategy,148) {149let other: &T::Array = other.as_any().downcast_ref().unwrap();150StaticArrayBuilder::subslice_extend_repeated(self, other, start, length, repeats, share);151}152153#[inline(always)]154fn subslice_extend_each_repeated(155&mut self,156other: &dyn Array,157start: usize,158length: usize,159repeats: usize,160share: ShareStrategy,161) {162let other: &T::Array = other.as_any().downcast_ref().unwrap();163StaticArrayBuilder::subslice_extend_each_repeated(164self, other, start, length, repeats, share,165);166}167168#[inline(always)]169unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {170let other: &T::Array = other.as_any().downcast_ref().unwrap();171StaticArrayBuilder::gather_extend(self, other, idxs, share);172}173174#[inline(always)]175fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {176let other: &T::Array = other.as_any().downcast_ref().unwrap();177StaticArrayBuilder::opt_gather_extend(self, other, idxs, share);178}179}180181#[allow(private_bounds)]182pub trait ArrayBuilder: ArrayBuilderBoxedHelper + Send {183fn dtype(&self) -> &ArrowDataType;184fn reserve(&mut self, additional: usize);185186/// Consume this builder returning the built array.187fn freeze(self) -> Box<dyn Array>;188189/// Return the built array and reset to an empty state.190fn freeze_reset(&mut self) -> Box<dyn Array>;191192/// Returns the length of this builder (so far).193fn len(&self) -> usize;194195/// Extend this builder with the given number of null elements.196fn extend_nulls(&mut self, length: usize);197198/// Extends this builder with the contents of the given array. May panic if199/// other does not match the dtype of this array.200fn extend(&mut self, other: &dyn Array, share: ShareStrategy) {201self.subslice_extend(other, 0, other.len(), share);202}203204/// Extends this builder with the contents of the given array subslice. May205/// panic if other does not match the dtype of this array.206fn subslice_extend(207&mut self,208other: &dyn Array,209start: usize,210length: usize,211share: ShareStrategy,212);213214/// The same as subslice_extend, but repeats the extension `repeats` times.215fn subslice_extend_repeated(216&mut self,217other: &dyn Array,218start: usize,219length: usize,220repeats: usize,221share: ShareStrategy,222);223224/// The same as subslice_extend, but repeats each element `repeats` times.225fn subslice_extend_each_repeated(226&mut self,227other: &dyn Array,228start: usize,229length: usize,230repeats: usize,231share: ShareStrategy,232);233234/// Extends this builder with the contents of the given array at the given235/// indices. That is, `other[idxs[i]]` is appended to this array in order,236/// for each i=0..idxs.len(). May panic if other does not match the237/// dtype of this array.238///239/// # Safety240/// The indices must be in-bounds.241unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy);242243/// Extends this builder with the contents of the given array at the given244/// indices. That is, `other[idxs[i]]` is appended to this array in order,245/// for each i=0..idxs.len(). May panic if other does not match the246/// dtype of this array. Out-of-bounds indices are mapped to nulls.247fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy);248}249250/// A hack that lets us call the consuming `freeze` method on Box<dyn ArrayBuilder>.251trait ArrayBuilderBoxedHelper {252fn freeze_boxed(self: Box<Self>) -> Box<dyn Array>;253}254255impl<T: ArrayBuilder> ArrayBuilderBoxedHelper for T {256fn freeze_boxed(self: Box<Self>) -> Box<dyn Array> {257self.freeze()258}259}260261impl ArrayBuilder for Box<dyn ArrayBuilder> {262#[inline(always)]263fn dtype(&self) -> &ArrowDataType {264(**self).dtype()265}266267#[inline(always)]268fn reserve(&mut self, additional: usize) {269(**self).reserve(additional)270}271272#[inline(always)]273fn freeze(self) -> Box<dyn Array> {274self.freeze_boxed()275}276277#[inline(always)]278fn freeze_reset(&mut self) -> Box<dyn Array> {279(**self).freeze_reset()280}281282#[inline(always)]283fn len(&self) -> usize {284(**self).len()285}286287#[inline(always)]288fn extend_nulls(&mut self, length: usize) {289(**self).extend_nulls(length);290}291292#[inline(always)]293fn subslice_extend(294&mut self,295other: &dyn Array,296start: usize,297length: usize,298share: ShareStrategy,299) {300(**self).subslice_extend(other, start, length, share);301}302303#[inline(always)]304fn subslice_extend_repeated(305&mut self,306other: &dyn Array,307start: usize,308length: usize,309repeats: usize,310share: ShareStrategy,311) {312(**self).subslice_extend_repeated(other, start, length, repeats, share);313}314315#[inline(always)]316fn subslice_extend_each_repeated(317&mut self,318other: &dyn Array,319start: usize,320length: usize,321repeats: usize,322share: ShareStrategy,323) {324(**self).subslice_extend_each_repeated(other, start, length, repeats, share);325}326327#[inline(always)]328unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {329(**self).gather_extend(other, idxs, share);330}331332#[inline(always)]333fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {334(**self).opt_gather_extend(other, idxs, share);335}336}337338/// Construct an ArrayBuilder for the given type.339pub fn make_builder(dtype: &ArrowDataType) -> Box<dyn ArrayBuilder> {340use PhysicalType::*;341match dtype.to_physical_type() {342Null => Box::new(NullArrayBuilder::new(dtype.clone())),343Boolean => Box::new(BooleanArrayBuilder::new(dtype.clone())),344Primitive(prim_t) => with_match_primitive_type_full!(prim_t, |$T| {345Box::new(PrimitiveArrayBuilder::<$T>::new(dtype.clone()))346}),347LargeBinary => Box::new(BinaryArrayBuilder::<i64>::new(dtype.clone())),348FixedSizeBinary => Box::new(FixedSizeBinaryArrayBuilder::new(dtype.clone())),349LargeList => {350let ArrowDataType::LargeList(inner_dt) = dtype else {351unreachable!()352};353Box::new(ListArrayBuilder::<i64, _>::new(354dtype.clone(),355make_builder(inner_dt.dtype()),356))357},358FixedSizeList => {359let ArrowDataType::FixedSizeList(inner_dt, _) = dtype else {360unreachable!()361};362Box::new(FixedSizeListArrayBuilder::new(363dtype.clone(),364make_builder(inner_dt.dtype()),365))366},367Struct => {368let ArrowDataType::Struct(fields) = dtype else {369unreachable!()370};371let builders = fields.iter().map(|f| make_builder(f.dtype())).collect();372Box::new(StructArrayBuilder::new(dtype.clone(), builders))373},374BinaryView => Box::new(BinaryViewArrayGenericBuilder::<[u8]>::new(dtype.clone())),375Utf8View => Box::new(BinaryViewArrayGenericBuilder::<str>::new(dtype.clone())),376377List | Binary | Utf8 | LargeUtf8 | Map | Union | Dictionary(_) => {378unimplemented!()379},380}381}382383384