Path: blob/main/crates/polars-core/src/chunked_array/from.rs
6940 views
use super::*;12#[allow(clippy::all)]3fn from_chunks_list_dtype(chunks: &mut Vec<ArrayRef>, dtype: DataType) -> DataType {4// ensure we don't get List<null>5if let Some(arr) = chunks.get(0) {6DataType::from_arrow_dtype(arr.dtype())7} else {8dtype9}10}1112impl<T, A> From<A> for ChunkedArray<T>13where14T: PolarsDataType<Array = A>,15A: Array,16{17fn from(arr: A) -> Self {18Self::with_chunk(PlSmallStr::EMPTY, arr)19}20}2122impl<T> ChunkedArray<T>23where24T: PolarsDataType,25{26pub fn with_chunk<A>(name: PlSmallStr, arr: A) -> Self27where28A: Array,29T: PolarsDataType<Array = A>,30{31unsafe { Self::from_chunks(name, vec![Box::new(arr)]) }32}3334pub fn with_chunk_like<A>(ca: &Self, arr: A) -> Self35where36A: Array,37T: PolarsDataType<Array = A>,38{39Self::from_chunk_iter_like(ca, std::iter::once(arr))40}4142pub fn from_chunk_iter<I>(name: PlSmallStr, iter: I) -> Self43where44I: IntoIterator,45T: PolarsDataType<Array = <I as IntoIterator>::Item>,46<I as IntoIterator>::Item: Array,47{48let chunks = iter49.into_iter()50.map(|x| Box::new(x) as Box<dyn Array>)51.collect();52unsafe { Self::from_chunks(name, chunks) }53}5455pub fn from_chunk_iter_like<I>(ca: &Self, iter: I) -> Self56where57I: IntoIterator,58T: PolarsDataType<Array = <I as IntoIterator>::Item>,59<I as IntoIterator>::Item: Array,60{61let chunks = iter62.into_iter()63.map(|x| Box::new(x) as Box<dyn Array>)64.collect();65unsafe {66Self::from_chunks_and_dtype_unchecked(ca.name().clone(), chunks, ca.dtype().clone())67}68}6970pub fn try_from_chunk_iter<I, A, E>(name: PlSmallStr, iter: I) -> Result<Self, E>71where72I: IntoIterator<Item = Result<A, E>>,73T: PolarsDataType<Array = A>,74A: Array,75{76let chunks: Result<_, _> = iter77.into_iter()78.map(|x| Ok(Box::new(x?) as Box<dyn Array>))79.collect();80unsafe { Ok(Self::from_chunks(name, chunks?)) }81}8283pub(crate) fn from_chunk_iter_and_field<I>(field: Arc<Field>, chunks: I) -> Self84where85I: IntoIterator,86T: PolarsDataType<Array = <I as IntoIterator>::Item>,87<I as IntoIterator>::Item: Array,88{89assert_eq!(90std::mem::discriminant(&T::get_static_dtype()),91std::mem::discriminant(&field.dtype)92);9394let mut length = 0;95let mut null_count = 0;96let chunks = chunks97.into_iter()98.map(|x| {99length += x.len();100null_count += x.null_count();101Box::new(x) as Box<dyn Array>102})103.collect();104105unsafe { ChunkedArray::new_with_dims(field, chunks, length, null_count) }106}107108/// Create a new [`ChunkedArray`] from existing chunks.109///110/// # Safety111/// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.112pub unsafe fn from_chunks(name: PlSmallStr, mut chunks: Vec<ArrayRef>) -> Self {113let dtype = match T::get_static_dtype() {114dtype @ DataType::List(_) => from_chunks_list_dtype(&mut chunks, dtype),115#[cfg(feature = "dtype-array")]116dtype @ DataType::Array(_, _) => from_chunks_list_dtype(&mut chunks, dtype),117#[cfg(feature = "dtype-struct")]118dtype @ DataType::Struct(_) => from_chunks_list_dtype(&mut chunks, dtype),119dt => dt,120};121Self::from_chunks_and_dtype(name, chunks, dtype)122}123124/// # Safety125/// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.126pub unsafe fn with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {127ChunkedArray::new_with_compute_len(self.field.clone(), chunks)128}129130/// Create a new [`ChunkedArray`] from existing chunks.131///132/// # Safety133///134/// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.135pub unsafe fn from_chunks_and_dtype(136name: PlSmallStr,137chunks: Vec<ArrayRef>,138dtype: DataType,139) -> Self {140// assertions in debug mode141// that check if the data types in the arrays are as expected142#[cfg(debug_assertions)]143{144if !chunks.is_empty() && !chunks[0].is_empty() && dtype.is_primitive() {145assert_eq!(chunks[0].dtype(), &dtype.to_arrow(CompatLevel::newest()))146}147}148149Self::from_chunks_and_dtype_unchecked(name, chunks, dtype)150}151152/// Create a new [`ChunkedArray`] from existing chunks.153///154/// # Safety155///156/// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.157pub(crate) unsafe fn from_chunks_and_dtype_unchecked(158name: PlSmallStr,159chunks: Vec<ArrayRef>,160dtype: DataType,161) -> Self {162let field = Arc::new(Field::new(name, dtype));163ChunkedArray::new_with_compute_len(field, chunks)164}165166pub fn full_null_like(ca: &Self, length: usize) -> Self {167let chunks = std::iter::once(T::Array::full_null(168length,169ca.dtype().to_arrow(CompatLevel::newest()),170));171Self::from_chunk_iter_like(ca, chunks)172}173}174175impl<T> ChunkedArray<T>176where177T: PolarsNumericType,178{179/// Create a new ChunkedArray by taking ownership of the Vec. This operation is zero copy.180pub fn from_vec(name: PlSmallStr, v: Vec<T::Native>) -> Self {181Self::with_chunk(name, to_primitive::<T>(v, None))182}183184/// Create a new ChunkedArray from a Vec and a validity mask.185pub fn from_vec_validity(186name: PlSmallStr,187values: Vec<T::Native>,188buffer: Option<Bitmap>,189) -> Self {190let arr = to_array::<T>(values, buffer);191ChunkedArray::new_with_compute_len(192Arc::new(Field::new(name, T::get_static_dtype())),193vec![arr],194)195}196197/// Create a temporary [`ChunkedArray`] from a slice.198///199/// # Safety200/// The lifetime will be bound to the lifetime of the slice.201/// This will not be checked by the borrowchecker.202pub unsafe fn mmap_slice(name: PlSmallStr, values: &[T::Native]) -> Self {203Self::with_chunk(name, arrow::ffi::mmap::slice(values))204}205}206207impl BooleanChunked {208/// Create a temporary [`ChunkedArray`] from a slice.209///210/// # Safety211/// The lifetime will be bound to the lifetime of the slice.212/// This will not be checked by the borrowchecker.213pub unsafe fn mmap_slice(name: PlSmallStr, values: &[u8], offset: usize, len: usize) -> Self {214let arr = arrow::ffi::mmap::bitmap(values, offset, len).unwrap();215Self::with_chunk(name, arr)216}217218pub fn from_bitmap(name: PlSmallStr, bitmap: Bitmap) -> Self {219Self::with_chunk(220name,221BooleanArray::new(ArrowDataType::Boolean, bitmap, None),222)223}224}225226impl<'a, T> From<&'a ChunkedArray<T>> for Vec<Option<T::Physical<'a>>>227where228T: PolarsDataType,229{230fn from(ca: &'a ChunkedArray<T>) -> Self {231let mut out = Vec::with_capacity(ca.len());232for arr in ca.downcast_iter() {233out.extend(arr.iter())234}235out236}237}238impl From<StringChunked> for Vec<Option<String>> {239fn from(ca: StringChunked) -> Self {240ca.iter().map(|opt| opt.map(|s| s.to_string())).collect()241}242}243244impl From<BooleanChunked> for Vec<Option<bool>> {245fn from(ca: BooleanChunked) -> Self {246let mut out = Vec::with_capacity(ca.len());247for arr in ca.downcast_iter() {248out.extend(arr.iter())249}250out251}252}253254255