Path: blob/main/crates/polars-arrow/src/array/physical_binary.rs
6939 views
use crate::bitmap::{BitmapBuilder, MutableBitmap};1use crate::offset::{Offset, Offsets};23/// # Safety4/// The caller must ensure that `iterator` is `TrustedLen`.5#[inline]6#[allow(clippy::type_complexity)]7pub(crate) unsafe fn try_trusted_len_unzip<E, I, P, O>(8iterator: I,9) -> std::result::Result<(Option<MutableBitmap>, Offsets<O>, Vec<u8>), E>10where11O: Offset,12P: AsRef<[u8]>,13I: Iterator<Item = std::result::Result<Option<P>, E>>,14{15let (_, upper) = iterator.size_hint();16let len = upper.expect("trusted_len_unzip requires an upper limit");1718let mut null = BitmapBuilder::with_capacity(len);19let mut offsets = Vec::<O>::with_capacity(len + 1);20let mut values = Vec::<u8>::new();2122let mut length = O::default();23let mut dst = offsets.as_mut_ptr();24std::ptr::write(dst, length);25dst = dst.add(1);26for item in iterator {27if let Some(item) = item? {28null.push_unchecked(true);29let s = item.as_ref();30length += O::from_as_usize(s.len());31values.extend_from_slice(s);32} else {33null.push_unchecked(false);34};3536std::ptr::write(dst, length);37dst = dst.add(1);38}39assert_eq!(40dst.offset_from(offsets.as_ptr()) as usize,41len + 1,42"Trusted iterator length was not accurately reported"43);44offsets.set_len(len + 1);4546Ok((47null.into_opt_mut_validity(),48Offsets::new_unchecked(offsets),49values,50))51}5253/// Creates [`MutableBitmap`] and two [`Vec`]s from an iterator of `Option`.54/// The first buffer corresponds to a offset buffer, the second one55/// corresponds to a values buffer.56/// # Safety57/// The caller must ensure that `iterator` is `TrustedLen`.58#[inline]59pub(crate) unsafe fn trusted_len_unzip<O, I, P>(60iterator: I,61) -> (Option<MutableBitmap>, Offsets<O>, Vec<u8>)62where63O: Offset,64P: AsRef<[u8]>,65I: Iterator<Item = Option<P>>,66{67let (_, upper) = iterator.size_hint();68let len = upper.expect("trusted_len_unzip requires an upper limit");6970let mut offsets = Offsets::<O>::with_capacity(len);71let mut values = Vec::<u8>::new();72let mut validity = MutableBitmap::new();7374extend_from_trusted_len_iter(&mut offsets, &mut values, &mut validity, iterator);7576let validity = if validity.unset_bits() > 0 {77Some(validity)78} else {79None80};8182(validity, offsets, values)83}8485/// Creates two [`Buffer`]s from an iterator of `&[u8]`.86/// The first buffer corresponds to a offset buffer, the second to a values buffer.87/// # Safety88/// The caller must ensure that `iterator` is [`TrustedLen`].89#[inline]90pub(crate) unsafe fn trusted_len_values_iter<O, I, P>(iterator: I) -> (Offsets<O>, Vec<u8>)91where92O: Offset,93P: AsRef<[u8]>,94I: Iterator<Item = P>,95{96let (_, upper) = iterator.size_hint();97let len = upper.expect("trusted_len_unzip requires an upper limit");9899let mut offsets = Offsets::<O>::with_capacity(len);100let mut values = Vec::<u8>::new();101102extend_from_trusted_len_values_iter(&mut offsets, &mut values, iterator);103104(offsets, values)105}106107// Populates `offsets` and `values` [`Vec`]s with information extracted108// from the incoming `iterator`.109// # Safety110// The caller must ensure the `iterator` is [`TrustedLen`]111#[inline]112pub(crate) unsafe fn extend_from_trusted_len_values_iter<I, P, O>(113offsets: &mut Offsets<O>,114values: &mut Vec<u8>,115iterator: I,116) where117O: Offset,118P: AsRef<[u8]>,119I: Iterator<Item = P>,120{121let lengths = iterator.map(|item| {122let s = item.as_ref();123// Push new entries for both `values` and `offsets` buffer124values.extend_from_slice(s);125s.len()126});127offsets.try_extend_from_lengths(lengths).unwrap();128}129130// Populates `offsets` and `values` [`Vec`]s with information extracted131// from the incoming `iterator`.132// the return value indicates how many items were added.133#[inline]134pub(crate) fn extend_from_values_iter<I, P, O>(135offsets: &mut Offsets<O>,136values: &mut Vec<u8>,137iterator: I,138) -> usize139where140O: Offset,141P: AsRef<[u8]>,142I: Iterator<Item = P>,143{144let (size_hint, _) = iterator.size_hint();145146offsets.reserve(size_hint);147148let start_index = offsets.len_proxy();149150for item in iterator {151let bytes = item.as_ref();152values.extend_from_slice(bytes);153offsets.try_push(bytes.len()).unwrap();154}155offsets.len_proxy() - start_index156}157158// Populates `offsets`, `values`, and `validity` [`Vec`]s with159// information extracted from the incoming `iterator`.160//161// # Safety162// The caller must ensure that `iterator` is [`TrustedLen`]163#[inline]164pub(crate) unsafe fn extend_from_trusted_len_iter<O, I, P>(165offsets: &mut Offsets<O>,166values: &mut Vec<u8>,167validity: &mut MutableBitmap,168iterator: I,169) where170O: Offset,171P: AsRef<[u8]>,172I: Iterator<Item = Option<P>>,173{174let (_, upper) = iterator.size_hint();175let additional = upper.expect("extend_from_trusted_len_iter requires an upper limit");176177offsets.reserve(additional);178validity.reserve(additional);179180let lengths = iterator.map(|item| {181if let Some(item) = item {182let bytes = item.as_ref();183values.extend_from_slice(bytes);184validity.push_unchecked(true);185bytes.len()186} else {187validity.push_unchecked(false);1880189}190});191offsets.try_extend_from_lengths(lengths).unwrap();192}193194/// Creates two [`Vec`]s from an iterator of `&[u8]`.195/// The first buffer corresponds to a offset buffer, the second to a values buffer.196#[inline]197pub(crate) fn values_iter<O, I, P>(iterator: I) -> (Offsets<O>, Vec<u8>)198where199O: Offset,200P: AsRef<[u8]>,201I: Iterator<Item = P>,202{203let (lower, _) = iterator.size_hint();204205let mut offsets = Offsets::<O>::with_capacity(lower);206let mut values = Vec::<u8>::new();207208for item in iterator {209let s = item.as_ref();210values.extend_from_slice(s);211offsets.try_push(s.len()).unwrap();212}213(offsets, values)214}215216/// Extends `validity` with all items from `other`217pub(crate) fn extend_validity(218length: usize,219validity: &mut Option<MutableBitmap>,220other: &Option<MutableBitmap>,221) {222if let Some(other) = other {223if let Some(validity) = validity {224let slice = other.as_slice();225// SAFETY: invariant offset + length <= slice.len()226unsafe { validity.extend_from_slice_unchecked(slice, 0, other.len()) }227} else {228let mut new_validity = MutableBitmap::from_len_set(length);229new_validity.extend_from_slice(other.as_slice(), 0, other.len());230*validity = Some(new_validity);231}232}233}234235236