Path: blob/main/crates/polars-arrow/src/array/dictionary/mutable.rs
8382 views
use std::hash::Hash;1use std::sync::Arc;23use polars_error::PolarsResult;45use super::value_map::ValueMap;6use super::{DictionaryArray, DictionaryKey};7use crate::array::indexable::{AsIndexed, Indexable};8use crate::array::primitive::MutablePrimitiveArray;9use crate::array::{Array, MutableArray, TryExtend, TryPush};10use crate::bitmap::MutableBitmap;11use crate::datatypes::ArrowDataType;1213#[derive(Debug)]14pub struct MutableDictionaryArray<K: DictionaryKey, M: MutableArray> {15dtype: ArrowDataType,16map: ValueMap<K, M>,17// invariant: `max(keys) < map.values().len()`18keys: MutablePrimitiveArray<K>,19}2021impl<K: DictionaryKey, M: MutableArray> From<MutableDictionaryArray<K, M>> for DictionaryArray<K> {22fn from(other: MutableDictionaryArray<K, M>) -> Self {23// SAFETY: the invariant of this struct ensures that this is up-held24unsafe {25DictionaryArray::<K>::try_new_unchecked(26other.dtype,27other.keys.into(),28other.map.into_values().as_box(),29)30.unwrap()31}32}33}3435impl<K: DictionaryKey, M: MutableArray + Default> MutableDictionaryArray<K, M> {36/// Creates an empty [`MutableDictionaryArray`].37pub fn new() -> Self {38Self::try_empty(M::default()).unwrap()39}4041/// Creates an empty [`MutableDictionaryArray`] with the given value dtype.42pub fn empty_with_value_dtype(value_dtype: ArrowDataType) -> Self {43let keys = MutablePrimitiveArray::<K>::new();44let dtype = ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_dtype), false);45Self {46dtype,47map: ValueMap::<K, M>::try_empty(M::default()).unwrap(),48keys,49}50}51}5253impl<K: DictionaryKey, M: MutableArray + Default> Default for MutableDictionaryArray<K, M> {54fn default() -> Self {55Self::new()56}57}5859impl<K: DictionaryKey, M: MutableArray> MutableDictionaryArray<K, M> {60/// Creates an empty [`MutableDictionaryArray`] from a given empty values array.61/// # Errors62/// Errors if the array is non-empty.63pub fn try_empty(values: M) -> PolarsResult<Self> {64Ok(Self::from_value_map(ValueMap::<K, M>::try_empty(values)?))65}6667/// Creates an empty [`MutableDictionaryArray`] preloaded with a given dictionary of values.68/// Indices associated with those values are automatically assigned based on the order of69/// the values.70/// # Errors71/// Errors if there's more values than the maximum value of `K` or if values are not unique.72pub fn from_values(values: M) -> PolarsResult<Self>73where74M: Indexable,75M::Type: Eq + Hash,76{77Ok(Self::from_value_map(ValueMap::<K, M>::from_values(values)?))78}7980fn from_value_map(value_map: ValueMap<K, M>) -> Self {81let keys = MutablePrimitiveArray::<K>::new();82let dtype =83ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_map.dtype().clone()), false);84Self {85dtype,86map: value_map,87keys,88}89}9091/// Creates an empty [`MutableDictionaryArray`] retaining the same dictionary as the current92/// mutable dictionary array, but with no data. This may come useful when serializing the93/// array into multiple chunks, where there's a requirement that the dictionary is the same.94/// No copying is performed, the value map is moved over to the new array.95pub fn into_empty(self) -> Self {96Self::from_value_map(self.map)97}9899/// Same as `into_empty` but clones the inner value map instead of taking full ownership.100pub fn to_empty(&self) -> Self101where102M: Clone,103{104Self::from_value_map(self.map.clone())105}106107/// pushes a null value108pub fn push_null(&mut self) {109self.keys.push(None)110}111112/// returns a reference to the inner values.113pub fn values(&self) -> &M {114self.map.values()115}116117/// converts itself into [`Arc<dyn Array>`]118pub fn into_arc(self) -> Arc<dyn Array> {119let a: DictionaryArray<K> = self.into();120Arc::new(a)121}122123/// converts itself into [`Box<dyn Array>`]124pub fn into_box(self) -> Box<dyn Array> {125let a: DictionaryArray<K> = self.into();126Box::new(a)127}128129/// Reserves `additional` slots.130pub fn reserve(&mut self, additional: usize) {131self.keys.reserve(additional);132}133134/// Shrinks the capacity of the [`MutableDictionaryArray`] to fit its current length.135pub fn shrink_to_fit(&mut self) {136self.map.shrink_to_fit();137self.keys.shrink_to_fit();138}139140/// Returns the dictionary keys141pub fn keys(&self) -> &MutablePrimitiveArray<K> {142&self.keys143}144145fn take_into(&mut self) -> DictionaryArray<K> {146DictionaryArray::<K>::try_new(147self.dtype.clone(),148std::mem::take(&mut self.keys).into(),149self.map.take_into(),150)151.unwrap()152}153}154155impl<K: DictionaryKey, M: 'static + MutableArray> MutableArray for MutableDictionaryArray<K, M> {156fn len(&self) -> usize {157self.keys.len()158}159160fn validity(&self) -> Option<&MutableBitmap> {161self.keys.validity()162}163164fn as_box(&mut self) -> Box<dyn Array> {165Box::new(self.take_into())166}167168fn as_arc(&mut self) -> Arc<dyn Array> {169Arc::new(self.take_into())170}171172fn dtype(&self) -> &ArrowDataType {173&self.dtype174}175176fn as_any(&self) -> &dyn std::any::Any {177self178}179180fn as_mut_any(&mut self) -> &mut dyn std::any::Any {181self182}183184fn push_null(&mut self) {185self.keys.push(None)186}187188fn reserve(&mut self, additional: usize) {189self.reserve(additional)190}191192fn shrink_to_fit(&mut self) {193self.shrink_to_fit()194}195}196197impl<K, M, T> TryExtend<Option<T>> for MutableDictionaryArray<K, M>198where199K: DictionaryKey,200M: MutableArray + Indexable + TryExtend<Option<T>> + TryPush<Option<T>>,201T: AsIndexed<M>,202M::Type: Eq + Hash,203{204fn try_extend<II: IntoIterator<Item = Option<T>>>(&mut self, iter: II) -> PolarsResult<()> {205for value in iter {206if let Some(value) = value {207let key = self208.map209.try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;210self.keys.try_push(Some(key))?;211} else {212self.push_null();213}214}215Ok(())216}217}218219impl<K, M, T> TryPush<Option<T>> for MutableDictionaryArray<K, M>220where221K: DictionaryKey,222M: MutableArray + Indexable + TryPush<Option<T>>,223T: AsIndexed<M>,224M::Type: Eq + Hash,225{226fn try_push(&mut self, item: Option<T>) -> PolarsResult<()> {227if let Some(value) = item {228let key = self229.map230.try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;231self.keys.try_push(Some(key))?;232} else {233self.push_null();234}235Ok(())236}237}238239240