Path: blob/main/crates/polars-arrow/src/array/map/mod.rs
6939 views
use super::specification::try_check_offsets_bounds;1use super::{Array, Splitable, new_empty_array};2use crate::bitmap::Bitmap;3use crate::datatypes::{ArrowDataType, Field};4use crate::offset::OffsetsBuffer;56mod ffi;7pub(super) mod fmt;8mod iterator;910use polars_error::{PolarsResult, polars_bail};1112/// An array representing a (key, value), both of arbitrary logical types.13#[derive(Clone)]14pub struct MapArray {15dtype: ArrowDataType,16// invariant: field.len() == offsets.len()17offsets: OffsetsBuffer<i32>,18field: Box<dyn Array>,19// invariant: offsets.len() - 1 == Bitmap::len()20validity: Option<Bitmap>,21}2223impl MapArray {24/// Returns a new [`MapArray`].25/// # Errors26/// This function errors iff:27/// * `offsets.last()` is greater than `field.len()`28/// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Map`]29/// * The fields' `dtype` is not equal to the inner field of `dtype`30/// * The validity is not `None` and its length is different from `offsets.len() - 1`.31pub fn try_new(32dtype: ArrowDataType,33offsets: OffsetsBuffer<i32>,34field: Box<dyn Array>,35validity: Option<Bitmap>,36) -> PolarsResult<Self> {37try_check_offsets_bounds(&offsets, field.len())?;3839let inner_field = Self::try_get_field(&dtype)?;40if let ArrowDataType::Struct(inner) = inner_field.dtype() {41if inner.len() != 2 {42polars_bail!(ComputeError: "MapArray's inner `Struct` must have 2 fields (keys and maps)")43}44} else {45polars_bail!(ComputeError: "MapArray expects `DataType::Struct` as its inner logical type")46}47if field.dtype() != inner_field.dtype() {48polars_bail!(ComputeError: "MapArray expects `field.dtype` to match its inner DataType")49}5051if validity52.as_ref()53.is_some_and(|validity| validity.len() != offsets.len_proxy())54{55polars_bail!(ComputeError: "validity mask length must match the number of values")56}5758Ok(Self {59dtype,60field,61offsets,62validity,63})64}6566/// Creates a new [`MapArray`].67/// # Panics68/// * `offsets.last()` is greater than `field.len()`.69/// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Map`],70/// * The validity is not `None` and its length is different from `offsets.len() - 1`.71pub fn new(72dtype: ArrowDataType,73offsets: OffsetsBuffer<i32>,74field: Box<dyn Array>,75validity: Option<Bitmap>,76) -> Self {77Self::try_new(dtype, offsets, field, validity).unwrap()78}7980/// Returns a new null [`MapArray`] of `length`.81pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {82let field = new_empty_array(Self::get_field(&dtype).dtype().clone());83Self::new(84dtype,85vec![0i32; 1 + length].try_into().unwrap(),86field,87Some(Bitmap::new_zeroed(length)),88)89}9091/// Returns a new empty [`MapArray`].92pub fn new_empty(dtype: ArrowDataType) -> Self {93let field = new_empty_array(Self::get_field(&dtype).dtype().clone());94Self::new(dtype, OffsetsBuffer::default(), field, None)95}96}9798impl MapArray {99/// Returns a slice of this [`MapArray`].100/// # Panics101/// panics iff `offset + length > self.len()`102pub fn slice(&mut self, offset: usize, length: usize) {103assert!(104offset + length <= self.len(),105"the offset of the new Buffer cannot exceed the existing length"106);107unsafe { self.slice_unchecked(offset, length) }108}109110/// Returns a slice of this [`MapArray`].111///112/// # Safety113/// The caller must ensure that `offset + length < self.len()`.114#[inline]115pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {116self.validity = self117.validity118.take()119.map(|bitmap| bitmap.sliced_unchecked(offset, length))120.filter(|bitmap| bitmap.unset_bits() > 0);121self.offsets.slice_unchecked(offset, length + 1);122}123124impl_sliced!();125impl_mut_validity!();126impl_into_array!();127128pub(crate) fn try_get_field(dtype: &ArrowDataType) -> PolarsResult<&Field> {129if let ArrowDataType::Map(field, _) = dtype.to_logical_type() {130Ok(field.as_ref())131} else {132polars_bail!(ComputeError: "The dtype's logical type must be DataType::Map")133}134}135136pub(crate) fn get_field(dtype: &ArrowDataType) -> &Field {137Self::try_get_field(dtype).unwrap()138}139}140141// Accessors142impl MapArray {143/// Returns the length of this array144#[inline]145pub fn len(&self) -> usize {146self.offsets.len_proxy()147}148149/// returns the offsets150#[inline]151pub fn offsets(&self) -> &OffsetsBuffer<i32> {152&self.offsets153}154155/// Returns the field (guaranteed to be a `Struct`)156#[inline]157pub fn field(&self) -> &Box<dyn Array> {158&self.field159}160161/// Returns the element at index `i`.162#[inline]163pub fn value(&self, i: usize) -> Box<dyn Array> {164assert!(i < self.len());165unsafe { self.value_unchecked(i) }166}167168/// Returns the element at index `i`.169///170/// # Safety171/// Assumes that the `i < self.len`.172#[inline]173pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {174// soundness: the invariant of the function175let (start, end) = self.offsets.start_end_unchecked(i);176let length = end - start;177178// soundness: the invariant of the struct179self.field.sliced_unchecked(start, length)180}181}182183impl Array for MapArray {184impl_common_array!();185186fn validity(&self) -> Option<&Bitmap> {187self.validity.as_ref()188}189190#[inline]191fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {192Box::new(self.clone().with_validity(validity))193}194}195196impl Splitable for MapArray {197fn check_bound(&self, offset: usize) -> bool {198offset <= self.len()199}200201unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {202let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };203let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };204205(206Self {207dtype: self.dtype.clone(),208offsets: lhs_offsets,209field: self.field.clone(),210validity: lhs_validity,211},212Self {213dtype: self.dtype.clone(),214offsets: rhs_offsets,215field: self.field.clone(),216validity: rhs_validity,217},218)219}220}221222223