Path: blob/main/crates/polars-compute/src/gather/generic_binary.rs
6939 views
use arrow::array::{GenericBinaryArray, PrimitiveArray};1use arrow::bitmap::{Bitmap, BitmapBuilder};2use arrow::buffer::Buffer;3use arrow::offset::{Offset, Offsets, OffsetsBuffer};4use polars_utils::vec::{CapacityByFactor, PushUnchecked};56use super::Index;78fn create_offsets<I: Iterator<Item = usize>, O: Offset>(9lengths: I,10idx_len: usize,11) -> OffsetsBuffer<O> {12let mut length_so_far = O::default();13let mut offsets = Vec::with_capacity(idx_len + 1);14offsets.push(length_so_far);1516for len in lengths {17unsafe {18length_so_far += O::from_usize(len).unwrap_unchecked();19offsets.push_unchecked(length_so_far)20};21}22unsafe { Offsets::new_unchecked(offsets).into() }23}2425pub(super) unsafe fn take_values<O: Offset>(26length: O,27starts: &[O],28offsets: &OffsetsBuffer<O>,29values: &[u8],30) -> Buffer<u8> {31let new_len = length.to_usize();32let mut buffer = Vec::with_capacity(new_len);33starts34.iter()35.map(|start| start.to_usize())36.zip(offsets.lengths())37.for_each(|(start, length)| {38let end = start + length;39buffer.extend_from_slice(values.get_unchecked(start..end));40});41buffer.into()42}4344// take implementation when neither values nor indices contain nulls45pub(super) unsafe fn take_no_validity_unchecked<O: Offset, I: Index>(46offsets: &OffsetsBuffer<O>,47values: &[u8],48indices: &[I],49) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {50let values_len = offsets.last().to_usize();51let fraction_estimate = indices.len() as f64 / offsets.len() as f64 + 0.3;52let mut buffer = Vec::<u8>::with_capacity_by_factor(values_len, fraction_estimate);5354let lengths = indices.iter().map(|index| index.to_usize()).map(|index| {55let (start, end) = offsets.start_end_unchecked(index);56buffer.extend_from_slice(values.get_unchecked(start..end));57end - start58});59let offsets = create_offsets(lengths, indices.len());6061(offsets, buffer.into(), None)62}6364// take implementation when only values contain nulls65pub(super) unsafe fn take_values_validity<O: Offset, I: Index, A: GenericBinaryArray<O>>(66values: &A,67indices: &[I],68) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {69let validity_values = values.validity().unwrap();70let validity = indices71.iter()72.map(|index| validity_values.get_bit_unchecked(index.to_usize()));73let validity = Bitmap::from_trusted_len_iter(validity);7475let mut total_length = O::default();7677let offsets = values.offsets();78let values_values = values.values();7980let mut starts = Vec::<O>::with_capacity(indices.len());81let lengths = indices.iter().map(|index| {82let index = index.to_usize();83let start = *offsets.get_unchecked(index);84let length = *offsets.get_unchecked(index + 1) - start;85total_length += length;86starts.push_unchecked(start);87length.to_usize()88});89let offsets = create_offsets(lengths, indices.len());90let buffer = take_values(total_length, starts.as_slice(), &offsets, values_values);9192(offsets, buffer, validity.into())93}9495// take implementation when only indices contain nulls96pub(super) unsafe fn take_indices_validity<O: Offset, I: Index>(97offsets: &OffsetsBuffer<O>,98values: &[u8],99indices: &PrimitiveArray<I>,100) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {101let mut total_length = O::default();102103let offsets = offsets.buffer();104105let mut starts = Vec::<O>::with_capacity(indices.len());106let lengths = indices.values().iter().map(|index| {107let index = index.to_usize();108let length;109match offsets.get(index + 1) {110Some(&next) => {111let start = *offsets.get_unchecked(index);112length = next - start;113total_length += length;114starts.push_unchecked(start);115},116None => {117length = O::zero();118starts.push_unchecked(O::default());119},120};121length.to_usize()122});123let offsets = create_offsets(lengths, indices.len());124125let buffer = take_values(total_length, &starts, &offsets, values);126127(offsets, buffer, indices.validity().cloned())128}129130// take implementation when both indices and values contain nulls131pub(super) unsafe fn take_values_indices_validity<O: Offset, I: Index, A: GenericBinaryArray<O>>(132values: &A,133indices: &PrimitiveArray<I>,134) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {135let mut total_length = O::default();136let mut validity = BitmapBuilder::with_capacity(indices.len());137138let values_validity = values.validity().unwrap();139let offsets = values.offsets();140let values_values = values.values();141142let mut starts = Vec::<O>::with_capacity(indices.len());143let lengths = indices.iter().map(|index| {144let length;145match index {146Some(index) => {147let index = index.to_usize();148if values_validity.get_bit(index) {149validity.push(true);150length = *offsets.get_unchecked(index + 1) - *offsets.get_unchecked(index);151starts.push_unchecked(*offsets.get_unchecked(index));152} else {153validity.push(false);154length = O::zero();155starts.push_unchecked(O::default());156}157},158None => {159validity.push(false);160length = O::zero();161starts.push_unchecked(O::default());162},163};164total_length += length;165length.to_usize()166});167let offsets = create_offsets(lengths, indices.len());168169let buffer = take_values(total_length, &starts, &offsets, values_values);170171(offsets, buffer, validity.into_opt_validity())172}173174175