Path: blob/main/crates/polars-arrow/src/array/dictionary/mutable.rs
6939 views
use std::hash::Hash;1use std::sync::Arc;23use polars_error::PolarsResult;45use super::value_map::ValueMap;6use super::{DictionaryArray, DictionaryKey};7use crate::array::indexable::{AsIndexed, Indexable};8use crate::array::primitive::MutablePrimitiveArray;9use crate::array::{Array, MutableArray, TryExtend, TryPush};10use crate::bitmap::MutableBitmap;11use crate::datatypes::ArrowDataType;1213#[derive(Debug)]14pub struct MutableDictionaryArray<K: DictionaryKey, M: MutableArray> {15dtype: ArrowDataType,16map: ValueMap<K, M>,17// invariant: `max(keys) < map.values().len()`18keys: MutablePrimitiveArray<K>,19}2021impl<K: DictionaryKey, M: MutableArray> From<MutableDictionaryArray<K, M>> for DictionaryArray<K> {22fn from(other: MutableDictionaryArray<K, M>) -> Self {23// SAFETY: the invariant of this struct ensures that this is up-held24unsafe {25DictionaryArray::<K>::try_new_unchecked(26other.dtype,27other.keys.into(),28other.map.into_values().as_box(),29)30.unwrap()31}32}33}3435impl<K: DictionaryKey, M: MutableArray + Default> MutableDictionaryArray<K, M> {36/// Creates an empty [`MutableDictionaryArray`].37pub fn new() -> Self {38Self::try_empty(M::default()).unwrap()39}40}4142impl<K: DictionaryKey, M: MutableArray + Default> Default for MutableDictionaryArray<K, M> {43fn default() -> Self {44Self::new()45}46}4748impl<K: DictionaryKey, M: MutableArray> MutableDictionaryArray<K, M> {49/// Creates an empty [`MutableDictionaryArray`] from a given empty values array.50/// # Errors51/// Errors if the array is non-empty.52pub fn try_empty(values: M) -> PolarsResult<Self> {53Ok(Self::from_value_map(ValueMap::<K, M>::try_empty(values)?))54}5556/// Creates an empty [`MutableDictionaryArray`] preloaded with a given dictionary of values.57/// Indices associated with those values are automatically assigned based on the order of58/// the values.59/// # Errors60/// Errors if there's more values than the maximum value of `K` or if values are not unique.61pub fn from_values(values: M) -> PolarsResult<Self>62where63M: Indexable,64M::Type: Eq + Hash,65{66Ok(Self::from_value_map(ValueMap::<K, M>::from_values(values)?))67}6869fn from_value_map(value_map: ValueMap<K, M>) -> Self {70let keys = MutablePrimitiveArray::<K>::new();71let dtype =72ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_map.dtype().clone()), false);73Self {74dtype,75map: value_map,76keys,77}78}7980/// Creates an empty [`MutableDictionaryArray`] retaining the same dictionary as the current81/// mutable dictionary array, but with no data. This may come useful when serializing the82/// array into multiple chunks, where there's a requirement that the dictionary is the same.83/// No copying is performed, the value map is moved over to the new array.84pub fn into_empty(self) -> Self {85Self::from_value_map(self.map)86}8788/// Same as `into_empty` but clones the inner value map instead of taking full ownership.89pub fn to_empty(&self) -> Self90where91M: Clone,92{93Self::from_value_map(self.map.clone())94}9596/// pushes a null value97pub fn push_null(&mut self) {98self.keys.push(None)99}100101/// returns a reference to the inner values.102pub fn values(&self) -> &M {103self.map.values()104}105106/// converts itself into [`Arc<dyn Array>`]107pub fn into_arc(self) -> Arc<dyn Array> {108let a: DictionaryArray<K> = self.into();109Arc::new(a)110}111112/// converts itself into [`Box<dyn Array>`]113pub fn into_box(self) -> Box<dyn Array> {114let a: DictionaryArray<K> = self.into();115Box::new(a)116}117118/// Reserves `additional` slots.119pub fn reserve(&mut self, additional: usize) {120self.keys.reserve(additional);121}122123/// Shrinks the capacity of the [`MutableDictionaryArray`] to fit its current length.124pub fn shrink_to_fit(&mut self) {125self.map.shrink_to_fit();126self.keys.shrink_to_fit();127}128129/// Returns the dictionary keys130pub fn keys(&self) -> &MutablePrimitiveArray<K> {131&self.keys132}133134fn take_into(&mut self) -> DictionaryArray<K> {135DictionaryArray::<K>::try_new(136self.dtype.clone(),137std::mem::take(&mut self.keys).into(),138self.map.take_into(),139)140.unwrap()141}142}143144impl<K: DictionaryKey, M: 'static + MutableArray> MutableArray for MutableDictionaryArray<K, M> {145fn len(&self) -> usize {146self.keys.len()147}148149fn validity(&self) -> Option<&MutableBitmap> {150self.keys.validity()151}152153fn as_box(&mut self) -> Box<dyn Array> {154Box::new(self.take_into())155}156157fn as_arc(&mut self) -> Arc<dyn Array> {158Arc::new(self.take_into())159}160161fn dtype(&self) -> &ArrowDataType {162&self.dtype163}164165fn as_any(&self) -> &dyn std::any::Any {166self167}168169fn as_mut_any(&mut self) -> &mut dyn std::any::Any {170self171}172173fn push_null(&mut self) {174self.keys.push(None)175}176177fn reserve(&mut self, additional: usize) {178self.reserve(additional)179}180181fn shrink_to_fit(&mut self) {182self.shrink_to_fit()183}184}185186impl<K, M, T> TryExtend<Option<T>> for MutableDictionaryArray<K, M>187where188K: DictionaryKey,189M: MutableArray + Indexable + TryExtend<Option<T>> + TryPush<Option<T>>,190T: AsIndexed<M>,191M::Type: Eq + Hash,192{193fn try_extend<II: IntoIterator<Item = Option<T>>>(&mut self, iter: II) -> PolarsResult<()> {194for value in iter {195if let Some(value) = value {196let key = self197.map198.try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;199self.keys.try_push(Some(key))?;200} else {201self.push_null();202}203}204Ok(())205}206}207208impl<K, M, T> TryPush<Option<T>> for MutableDictionaryArray<K, M>209where210K: DictionaryKey,211M: MutableArray + Indexable + TryPush<Option<T>>,212T: AsIndexed<M>,213M::Type: Eq + Hash,214{215fn try_push(&mut self, item: Option<T>) -> PolarsResult<()> {216if let Some(value) = item {217let key = self218.map219.try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;220self.keys.try_push(Some(key))?;221} else {222self.push_null();223}224Ok(())225}226}227228229