Path: blob/main/crates/polars-arrow/src/legacy/array/utf8.rs
6939 views
use crate::array::{BinaryArray, Utf8Array};1use crate::datatypes::ArrowDataType;2use crate::legacy::trusted_len::TrustedLenPush;3use crate::offset::Offsets;45#[inline]6unsafe fn extend_from_trusted_len_values_iter<I, P>(7offsets: &mut Vec<i64>,8values: &mut Vec<u8>,9iterator: I,10) where11P: AsRef<[u8]>,12I: Iterator<Item = P>,13{14let mut total_length = 0;15offsets.push(total_length);16iterator.for_each(|item| {17let s = item.as_ref();18// Push new entries for both `values` and `offsets` buffer19values.extend_from_slice(s);2021total_length += s.len() as i64;22offsets.push_unchecked(total_length);23});24}2526/// # Safety27/// reported `len` must be correct.28#[inline]29unsafe fn fill_offsets_and_values<I, P>(30iterator: I,31value_capacity: usize,32len: usize,33) -> (Offsets<i64>, Vec<u8>)34where35P: AsRef<[u8]>,36I: Iterator<Item = P>,37{38let mut offsets = Vec::with_capacity(len + 1);39let mut values = Vec::<u8>::with_capacity(value_capacity);4041extend_from_trusted_len_values_iter(&mut offsets, &mut values, iterator);4243(Offsets::new_unchecked(offsets), values)44}4546struct StrAsBytes<P>(P);47impl<T: AsRef<str>> AsRef<[u8]> for StrAsBytes<T> {48#[inline(always)]49fn as_ref(&self) -> &[u8] {50self.0.as_ref().as_bytes()51}52}5354pub trait Utf8FromIter {55#[inline]56fn from_values_iter<I, S>(iter: I, len: usize, size_hint: usize) -> Utf8Array<i64>57where58S: AsRef<str>,59I: Iterator<Item = S>,60{61let iter = iter.map(StrAsBytes);62let (offsets, values) = unsafe { fill_offsets_and_values(iter, size_hint, len) };63unsafe {64Utf8Array::new_unchecked(65ArrowDataType::LargeUtf8,66offsets.into(),67values.into(),68None,69)70}71}72}7374impl Utf8FromIter for Utf8Array<i64> {}7576pub trait BinaryFromIter {77#[inline]78fn from_values_iter<I, S>(iter: I, len: usize, value_cap: usize) -> BinaryArray<i64>79where80S: AsRef<[u8]>,81I: Iterator<Item = S>,82{83let (offsets, values) = unsafe { fill_offsets_and_values(iter, value_cap, len) };84BinaryArray::new(85ArrowDataType::LargeBinary,86offsets.into(),87values.into(),88None,89)90}91}9293impl BinaryFromIter for BinaryArray<i64> {}949596