Path: blob/main/crates/polars-core/src/series/implementations/null.rs
8420 views
use std::any::Any;12use polars_error::constants::LENGTH_LIMIT_MSG;34use self::compare_inner::TotalOrdInner;5use super::*;6use crate::chunked_array::ops::compare_inner::{IntoTotalEqInner, NonNull, TotalEqInner};7use crate::chunked_array::ops::sort::arg_sort_multiple::arg_sort_multiple_impl;8use crate::prelude::*;9use crate::series::private::{PrivateSeries, PrivateSeriesNumeric};10use crate::series::*;1112impl Series {13pub fn new_null(name: PlSmallStr, len: usize) -> Series {14NullChunked::new(name, len).into_series()15}16}1718#[derive(Clone)]19pub struct NullChunked {20pub(crate) name: PlSmallStr,21length: IdxSize,22// we still need chunks as many series consumers expect23// chunks to be there24chunks: Vec<ArrayRef>,25}2627impl NullChunked {28pub(crate) fn new(name: PlSmallStr, len: usize) -> Self {29Self {30name,31length: len as IdxSize,32chunks: vec![Box::new(arrow::array::NullArray::new(33ArrowDataType::Null,34len,35))],36}37}3839pub fn len(&self) -> usize {40self.length as usize41}4243pub fn is_empty(&self) -> bool {44self.length == 045}46}47impl PrivateSeriesNumeric for NullChunked {48fn bit_repr(&self) -> Option<BitRepr> {49Some(BitRepr::U32(UInt32Chunked::full_null(50self.name.clone(),51self.len(),52)))53}54}5556impl PrivateSeries for NullChunked {57fn compute_len(&mut self) {58fn inner(chunks: &[ArrayRef]) -> usize {59match chunks.len() {60// fast path611 => chunks[0].len(),62_ => chunks.iter().fold(0, |acc, arr| acc + arr.len()),63}64}65self.length = IdxSize::try_from(inner(&self.chunks)).expect(LENGTH_LIMIT_MSG);66}67fn _field(&self) -> Cow<'_, Field> {68Cow::Owned(Field::new(self.name().clone(), DataType::Null))69}7071#[allow(unused)]72fn _set_flags(&mut self, flags: StatisticsFlags) {}7374fn _dtype(&self) -> &DataType {75&DataType::Null76}7778#[cfg(feature = "zip_with")]79fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {80let len = match (self.len(), mask.len(), other.len()) {81(a, b, c) if a == b && b == c => a,82(1, a, b) | (a, 1, b) | (a, b, 1) if a == b => a,83(a, 1, 1) | (1, a, 1) | (1, 1, a) => a,84(_, 0, _) => 0,85_ => {86polars_bail!(ShapeMismatch: "shapes of `self`, `mask` and `other` are not suitable for `zip_with` operation")87},88};8990Ok(Self::new(self.name().clone(), len).into_series())91}9293fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {94IntoTotalEqInner::into_total_eq_inner(self)95}96fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {97IntoTotalOrdInner::into_total_ord_inner(self)98}99100fn subtract(&self, _rhs: &Series) -> PolarsResult<Series> {101null_arithmetic(self, _rhs, "subtract")102}103104fn add_to(&self, _rhs: &Series) -> PolarsResult<Series> {105null_arithmetic(self, _rhs, "add_to")106}107fn multiply(&self, _rhs: &Series) -> PolarsResult<Series> {108null_arithmetic(self, _rhs, "multiply")109}110fn divide(&self, _rhs: &Series) -> PolarsResult<Series> {111null_arithmetic(self, _rhs, "divide")112}113fn remainder(&self, _rhs: &Series) -> PolarsResult<Series> {114null_arithmetic(self, _rhs, "remainder")115}116117#[cfg(feature = "algorithm_group_by")]118fn group_tuples(&self, _multithreaded: bool, _sorted: bool) -> PolarsResult<GroupsType> {119Ok(if self.is_empty() {120GroupsType::default()121} else {122GroupsType::new_slice(vec![[0, self.length]], false, true)123})124}125126#[cfg(feature = "algorithm_group_by")]127unsafe fn agg_list(&self, groups: &GroupsType) -> Series {128AggList::agg_list(self, groups)129}130131fn _get_flags(&self) -> StatisticsFlags {132StatisticsFlags::empty()133}134135fn vec_hash(136&self,137random_state: PlSeedableRandomStateQuality,138buf: &mut Vec<u64>,139) -> PolarsResult<()> {140VecHash::vec_hash(self, random_state, buf)?;141Ok(())142}143144fn vec_hash_combine(145&self,146build_hasher: PlSeedableRandomStateQuality,147hashes: &mut [u64],148) -> PolarsResult<()> {149VecHash::vec_hash_combine(self, build_hasher, hashes)?;150Ok(())151}152153fn arg_sort_multiple(154&self,155by: &[Column],156options: &SortMultipleOptions,157) -> PolarsResult<IdxCa> {158let vals = (0..self.len())159.map(|i| (i as IdxSize, NonNull(())))160.collect();161arg_sort_multiple_impl(vals, by, options)162}163}164165fn null_arithmetic(lhs: &NullChunked, rhs: &Series, op: &str) -> PolarsResult<Series> {166let output_len = match (lhs.len(), rhs.len()) {167(1, len_r) => len_r,168(len_l, 1) => len_l,169(len_l, len_r) if len_l == len_r => len_l,170_ => polars_bail!(ComputeError: "Cannot {:?} two series of different lengths.", op),171};172Ok(NullChunked::new(lhs.name().clone(), output_len).into_series())173}174175impl SeriesTrait for NullChunked {176fn name(&self) -> &PlSmallStr {177&self.name178}179180fn rename(&mut self, name: PlSmallStr) {181self.name = name182}183184fn chunks(&self) -> &Vec<ArrayRef> {185&self.chunks186}187unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {188&mut self.chunks189}190191fn chunk_lengths(&self) -> ChunkLenIter<'_> {192self.chunks.iter().map(|chunk| chunk.len())193}194195fn take(&self, indices: &IdxCa) -> PolarsResult<Series> {196Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())197}198199unsafe fn take_unchecked(&self, indices: &IdxCa) -> Series {200NullChunked::new(self.name.clone(), indices.len()).into_series()201}202203fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Series> {204Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())205}206207unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Series {208NullChunked::new(self.name.clone(), indices.len()).into_series()209}210211fn deposit(&self, validity: &Bitmap) -> Series {212assert_eq!(validity.set_bits(), 0);213self.clone().into_series()214}215216fn len(&self) -> usize {217self.length as usize218}219220fn has_nulls(&self) -> bool {221!self.is_empty()222}223224fn rechunk(&self) -> Series {225NullChunked::new(self.name.clone(), self.len()).into_series()226}227228fn drop_nulls(&self) -> Series {229NullChunked::new(self.name.clone(), 0).into_series()230}231232fn cast(&self, dtype: &DataType, _cast_options: CastOptions) -> PolarsResult<Series> {233Ok(Series::full_null(self.name.clone(), self.len(), dtype))234}235236fn null_count(&self) -> usize {237self.len()238}239240#[cfg(feature = "algorithm_group_by")]241fn unique(&self) -> PolarsResult<Series> {242let ca = NullChunked::new(self.name.clone(), self.n_unique().unwrap());243Ok(ca.into_series())244}245246#[cfg(feature = "algorithm_group_by")]247fn n_unique(&self) -> PolarsResult<usize> {248let n = if self.is_empty() { 0 } else { 1 };249Ok(n)250}251252#[cfg(feature = "algorithm_group_by")]253fn arg_unique(&self) -> PolarsResult<IdxCa> {254let idxs: Vec<IdxSize> = (0..self.n_unique().unwrap() as IdxSize).collect();255Ok(IdxCa::new(self.name().clone(), idxs))256}257258fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {259if self.is_empty() {260Ok((0, Vec::new()))261} else {262Ok((1, vec![0; self.len()]))263}264}265266fn new_from_index(&self, _index: usize, length: usize) -> Series {267NullChunked::new(self.name.clone(), length).into_series()268}269270unsafe fn get_unchecked(&self, _index: usize) -> AnyValue<'_> {271AnyValue::Null272}273274fn slice(&self, offset: i64, length: usize) -> Series {275let (chunks, len) = chunkops::slice(&self.chunks, offset, length, self.len());276NullChunked {277name: self.name.clone(),278length: len as IdxSize,279chunks,280}281.into_series()282}283284fn split_at(&self, offset: i64) -> (Series, Series) {285let (l, r) = chunkops::split_at(self.chunks(), offset, self.len());286(287NullChunked {288name: self.name.clone(),289length: l.iter().map(|arr| arr.len() as IdxSize).sum(),290chunks: l,291}292.into_series(),293NullChunked {294name: self.name.clone(),295length: r.iter().map(|arr| arr.len() as IdxSize).sum(),296chunks: r,297}298.into_series(),299)300}301302fn sort_with(&self, _options: SortOptions) -> PolarsResult<Series> {303Ok(self.clone().into_series())304}305306fn arg_sort(&self, _options: SortOptions) -> IdxCa {307IdxCa::from_vec(self.name().clone(), (0..self.len() as IdxSize).collect())308}309310fn is_null(&self) -> BooleanChunked {311BooleanChunked::full(self.name().clone(), true, self.len())312}313314fn is_not_null(&self) -> BooleanChunked {315BooleanChunked::full(self.name().clone(), false, self.len())316}317318fn reverse(&self) -> Series {319self.clone().into_series()320}321322fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Series> {323let len = if self.is_empty() {324// We still allow a length of `1` because it could be `lit(true)`.325polars_ensure!(filter.len() <= 1, ShapeMismatch: "filter's length: {} differs from that of the series: 0", filter.len());3260327} else if filter.len() == 1 {328return match filter.get(0) {329Some(true) => Ok(self.clone().into_series()),330None | Some(false) => Ok(NullChunked::new(self.name.clone(), 0).into_series()),331};332} else {333polars_ensure!(filter.len() == self.len(), ShapeMismatch: "filter's length: {} differs from that of the series: {}", filter.len(), self.len());334filter.sum().unwrap_or(0) as usize335};336Ok(NullChunked::new(self.name.clone(), len).into_series())337}338339fn shift(&self, _periods: i64) -> Series {340self.clone().into_series()341}342343fn sum_reduce(&self) -> PolarsResult<Scalar> {344Ok(Scalar::null(DataType::Null))345}346347fn min_reduce(&self) -> PolarsResult<Scalar> {348Ok(Scalar::null(DataType::Null))349}350351fn max_reduce(&self) -> PolarsResult<Scalar> {352Ok(Scalar::null(DataType::Null))353}354355fn mean_reduce(&self) -> PolarsResult<Scalar> {356Ok(Scalar::null(DataType::Null))357}358359fn median_reduce(&self) -> PolarsResult<Scalar> {360Ok(Scalar::null(DataType::Null))361}362363fn std_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {364Ok(Scalar::null(DataType::Null))365}366367fn var_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {368Ok(Scalar::null(DataType::Null))369}370371fn append(&mut self, other: &Series) -> PolarsResult<()> {372polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");373// we don't create a new null array to keep probability of aligned chunks higher374self.length += other.len() as IdxSize;375self.chunks.extend(other.chunks().iter().cloned());376Ok(())377}378fn append_owned(&mut self, mut other: Series) -> PolarsResult<()> {379polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");380// we don't create a new null array to keep probability of aligned chunks higher381let other: &mut NullChunked = other._get_inner_mut().as_any_mut().downcast_mut().unwrap();382self.length += other.len() as IdxSize;383self.chunks.extend(std::mem::take(&mut other.chunks));384Ok(())385}386387fn extend(&mut self, other: &Series) -> PolarsResult<()> {388*self = NullChunked::new(self.name.clone(), self.len() + other.len());389Ok(())390}391392#[cfg(feature = "approx_unique")]393fn approx_n_unique(&self) -> PolarsResult<IdxSize> {394Ok(if self.is_empty() { 0 } else { 1 })395}396397fn clone_inner(&self) -> Arc<dyn SeriesTrait> {398Arc::new(self.clone())399}400401fn find_validity_mismatch(&self, other: &Series, idxs: &mut Vec<IdxSize>) {402ChunkNestingUtils::find_validity_mismatch(self, other, idxs)403}404405fn as_any(&self) -> &dyn Any {406self407}408409fn as_any_mut(&mut self) -> &mut dyn Any {410self411}412413fn as_phys_any(&self) -> &dyn Any {414self415}416417fn as_arc_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {418self as _419}420}421422unsafe impl IntoSeries for NullChunked {423fn into_series(self) -> Series424where425Self: Sized,426{427Series(Arc::new(self))428}429}430431432