Path: blob/main/crates/polars-arrow/src/array/list/builder.rs
6939 views
use polars_utils::IdxSize;12use super::ListArray;3use crate::array::builder::{ArrayBuilder, ShareStrategy, StaticArrayBuilder};4use crate::bitmap::OptBitmapBuilder;5use crate::datatypes::ArrowDataType;6use crate::offset::{Offset, Offsets, OffsetsBuffer};78pub struct ListArrayBuilder<O: Offset, B: ArrayBuilder> {9dtype: ArrowDataType,10offsets: Offsets<O>,11inner_builder: B,12validity: OptBitmapBuilder,13}1415impl<O: Offset, B: ArrayBuilder> ListArrayBuilder<O, B> {16pub fn new(dtype: ArrowDataType, inner_builder: B) -> Self {17Self {18dtype,19inner_builder,20offsets: Offsets::new(),21validity: OptBitmapBuilder::default(),22}23}24}2526impl<O: Offset, B: ArrayBuilder> StaticArrayBuilder for ListArrayBuilder<O, B> {27type Array = ListArray<O>;2829fn dtype(&self) -> &ArrowDataType {30&self.dtype31}3233fn reserve(&mut self, additional: usize) {34self.offsets.reserve(additional);35self.validity.reserve(additional);36// No inner reserve, we have no idea how large it needs to be.37}3839fn freeze(self) -> ListArray<O> {40let offsets = OffsetsBuffer::from(self.offsets);41let values = self.inner_builder.freeze();42let validity = self.validity.into_opt_validity();43ListArray::new(self.dtype, offsets, values, validity)44}4546fn freeze_reset(&mut self) -> Self::Array {47let offsets = OffsetsBuffer::from(core::mem::take(&mut self.offsets));48let values = self.inner_builder.freeze_reset();49let validity = core::mem::take(&mut self.validity).into_opt_validity();50ListArray::new(self.dtype.clone(), offsets, values, validity)51}5253fn len(&self) -> usize {54self.offsets.len_proxy()55}5657fn extend_nulls(&mut self, length: usize) {58self.offsets.extend_constant(length);59self.validity.extend_constant(length, false);60}6162fn subslice_extend(63&mut self,64other: &ListArray<O>,65start: usize,66length: usize,67share: ShareStrategy,68) {69let start_offset = other.offsets()[start].to_usize();70let stop_offset = other.offsets()[start + length].to_usize();71self.offsets72.try_extend_from_slice(other.offsets(), start, length)73.unwrap();74self.inner_builder.subslice_extend(75&**other.values(),76start_offset,77stop_offset - start_offset,78share,79);80self.validity81.subslice_extend_from_opt_validity(other.validity(), start, length);82}8384fn subslice_extend_each_repeated(85&mut self,86other: &ListArray<O>,87start: usize,88length: usize,89repeats: usize,90share: ShareStrategy,91) {92let other_offsets = other.offsets();93let other_values = &**other.values();9495let start_offset = other.offsets()[start].to_usize();96let stop_offset = other.offsets()[start + length].to_usize();97self.offsets.reserve(length * repeats);98self.inner_builder99.reserve((stop_offset - start_offset) * repeats);100for offset_idx in start..start + length {101let sublist_start = other_offsets[offset_idx].to_usize();102let sublist_stop = other_offsets[offset_idx + 1].to_usize();103for _ in 0..repeats {104self.offsets.try_push(sublist_stop - sublist_start).unwrap();105}106self.inner_builder.subslice_extend_repeated(107other_values,108sublist_start,109sublist_stop - sublist_start,110repeats,111share,112);113}114self.validity115.subslice_extend_each_repeated_from_opt_validity(116other.validity(),117start,118length,119repeats,120);121}122123unsafe fn gather_extend(124&mut self,125other: &ListArray<O>,126idxs: &[IdxSize],127share: ShareStrategy,128) {129let other_values = &**other.values();130let other_offsets = other.offsets();131132// Pre-compute proper length for reserve.133let total_len: usize = idxs134.iter()135.map(|i| {136let start = other_offsets.get_unchecked(*i as usize).to_usize();137let stop = other_offsets.get_unchecked(*i as usize + 1).to_usize();138stop - start139})140.sum();141self.inner_builder.reserve(total_len);142143// Group consecutive indices into larger copies.144let mut group_start = 0;145while group_start < idxs.len() {146let start_idx = idxs[group_start] as usize;147let mut group_len = 1;148while group_start + group_len < idxs.len()149&& idxs[group_start + group_len] as usize == start_idx + group_len150{151group_len += 1;152}153154let start_offset = other_offsets.get_unchecked(start_idx).to_usize();155let stop_offset = other_offsets156.get_unchecked(start_idx + group_len)157.to_usize();158self.offsets159.try_extend_from_slice(other_offsets, start_idx, group_len)160.unwrap();161self.inner_builder.subslice_extend(162other_values,163start_offset,164stop_offset - start_offset,165share,166);167group_start += group_len;168}169170self.validity171.gather_extend_from_opt_validity(other.validity(), idxs);172}173174fn opt_gather_extend(&mut self, other: &ListArray<O>, idxs: &[IdxSize], share: ShareStrategy) {175let other_values = &**other.values();176let other_offsets = other.offsets();177178unsafe {179// Pre-compute proper length for reserve.180let total_len: usize = idxs181.iter()182.map(|idx| {183if (*idx as usize) < other.len() {184let start = other_offsets.get_unchecked(*idx as usize).to_usize();185let stop = other_offsets.get_unchecked(*idx as usize + 1).to_usize();186stop - start187} else {1880189}190})191.sum();192self.inner_builder.reserve(total_len);193194// Group consecutive indices into larger copies.195let mut group_start = 0;196while group_start < idxs.len() {197let start_idx = idxs[group_start] as usize;198let mut group_len = 1;199let in_bounds = start_idx < other.len();200201if in_bounds {202while group_start + group_len < idxs.len()203&& idxs[group_start + group_len] as usize == start_idx + group_len204&& start_idx + group_len < other.len()205{206group_len += 1;207}208209let start_offset = other_offsets.get_unchecked(start_idx).to_usize();210let stop_offset = other_offsets211.get_unchecked(start_idx + group_len)212.to_usize();213self.offsets214.try_extend_from_slice(other_offsets, start_idx, group_len)215.unwrap();216self.inner_builder.subslice_extend(217other_values,218start_offset,219stop_offset - start_offset,220share,221);222} else {223while group_start + group_len < idxs.len()224&& idxs[group_start + group_len] as usize >= other.len()225{226group_len += 1;227}228self.offsets.extend_constant(group_len);229}230group_start += group_len;231}232233self.validity234.opt_gather_extend_from_opt_validity(other.validity(), idxs, other.len());235}236}237}238239240