Path: blob/main/crates/polars-ops/src/chunked_array/array/namespace.rs
6939 views
use arrow::array::builder::{ShareStrategy, make_builder};1use arrow::array::{Array, FixedSizeListArray};2use arrow::bitmap::BitmapBuilder;3use polars_core::prelude::arity::unary_kernel;4use polars_core::utils::slice_offsets;56use super::min_max::AggType;7use super::*;8#[cfg(feature = "array_count")]9use crate::chunked_array::array::count::array_count_matches;10use crate::chunked_array::array::count::count_boolean_bits;11use crate::chunked_array::array::sum_mean::sum_with_nulls;12#[cfg(feature = "array_any_all")]13use crate::prelude::array::any_all::{array_all, array_any};14use crate::prelude::array::get::array_get;15use crate::prelude::array::join::array_join;16use crate::prelude::array::sum_mean::sum_array_numerical;17use crate::series::ArgAgg;1819pub fn has_inner_nulls(ca: &ArrayChunked) -> bool {20for arr in ca.downcast_iter() {21if arr.values().null_count() > 0 {22return true;23}24}25false26}2728fn get_agg(ca: &ArrayChunked, agg_type: AggType) -> Series {29let values = ca.get_inner();30let width = ca.width();31min_max::array_dispatch(ca.name().clone(), &values, width, agg_type)32}3334pub trait ArrayNameSpace: AsArray {35fn array_max(&self) -> Series {36let ca = self.as_array();37get_agg(ca, AggType::Max)38}3940fn array_min(&self) -> Series {41let ca = self.as_array();42get_agg(ca, AggType::Min)43}4445fn array_sum(&self) -> PolarsResult<Series> {46let ca = self.as_array();4748if has_inner_nulls(ca) {49return sum_with_nulls(ca, ca.inner_dtype());50};5152match ca.inner_dtype() {53DataType::Boolean => Ok(count_boolean_bits(ca).into_series()),54dt if dt.is_primitive_numeric() => Ok(sum_array_numerical(ca, dt)),55dt => sum_with_nulls(ca, dt),56}57}5859fn array_mean(&self) -> PolarsResult<Series> {60let ca = self.as_array();61dispersion::mean_with_nulls(ca)62}6364fn array_median(&self) -> PolarsResult<Series> {65let ca = self.as_array();66dispersion::median_with_nulls(ca)67}6869fn array_std(&self, ddof: u8) -> PolarsResult<Series> {70let ca = self.as_array();71dispersion::std_with_nulls(ca, ddof)72}7374fn array_var(&self, ddof: u8) -> PolarsResult<Series> {75let ca = self.as_array();76dispersion::var_with_nulls(ca, ddof)77}7879fn array_unique(&self) -> PolarsResult<ListChunked> {80let ca = self.as_array();81ca.try_apply_amortized_to_list(|s| s.as_ref().unique())82}8384fn array_unique_stable(&self) -> PolarsResult<ListChunked> {85let ca = self.as_array();86ca.try_apply_amortized_to_list(|s| s.as_ref().unique_stable())87}8889fn array_n_unique(&self) -> PolarsResult<IdxCa> {90let ca = self.as_array();91ca.try_apply_amortized_generic(|opt_s| {92let opt_v = opt_s.map(|s| s.as_ref().n_unique()).transpose()?;93Ok(opt_v.map(|idx| idx as IdxSize))94})95}9697#[cfg(feature = "array_any_all")]98fn array_any(&self) -> PolarsResult<Series> {99let ca = self.as_array();100array_any(ca)101}102103#[cfg(feature = "array_any_all")]104fn array_all(&self) -> PolarsResult<Series> {105let ca = self.as_array();106array_all(ca)107}108109fn array_sort(&self, options: SortOptions) -> PolarsResult<ArrayChunked> {110let ca = self.as_array();111// SAFETY: Sort only changes the order of the elements in each subarray.112unsafe { ca.try_apply_amortized_same_type(|s| s.as_ref().sort_with(options)) }113}114115fn array_reverse(&self) -> ArrayChunked {116let ca = self.as_array();117// SAFETY: Reverse only changes the order of the elements in each subarray118unsafe { ca.apply_amortized_same_type(|s| s.as_ref().reverse()) }119}120121fn array_arg_min(&self) -> IdxCa {122let ca = self.as_array();123ca.apply_amortized_generic(|opt_s| {124opt_s.and_then(|s| s.as_ref().arg_min().map(|idx| idx as IdxSize))125})126}127128fn array_arg_max(&self) -> IdxCa {129let ca = self.as_array();130ca.apply_amortized_generic(|opt_s| {131opt_s.and_then(|s| s.as_ref().arg_max().map(|idx| idx as IdxSize))132})133}134135fn array_get(&self, index: &Int64Chunked, null_on_oob: bool) -> PolarsResult<Series> {136let ca = self.as_array();137array_get(ca, index, null_on_oob)138}139140fn array_join(&self, separator: &StringChunked, ignore_nulls: bool) -> PolarsResult<Series> {141let ca = self.as_array();142array_join(ca, separator, ignore_nulls).map(|ok| ok.into_series())143}144145#[cfg(feature = "array_count")]146fn array_count_matches(&self, element: AnyValue) -> PolarsResult<Series> {147let ca = self.as_array();148array_count_matches(ca, element)149}150151fn array_shift(&self, n: &Series) -> PolarsResult<Series> {152let ca = self.as_array();153let n_s = n.cast(&DataType::Int64)?;154let n = n_s.i64()?;155let out = match (ca.len(), n.len()) {156(a, b) if a == b => {157// SAFETY: Shift does not change the dtype and number of elements of sub-array.158unsafe {159ca.zip_and_apply_amortized_same_type(n, |opt_s, opt_periods| {160match (opt_s, opt_periods) {161(Some(s), Some(n)) => Some(s.as_ref().shift(n)),162_ => None,163}164})165}166},167(_, 1) => {168if let Some(n) = n.get(0) {169// SAFETY: Shift does not change the dtype and number of elements of sub-array.170unsafe { ca.apply_amortized_same_type(|s| s.as_ref().shift(n)) }171} else {172ArrayChunked::full_null_with_dtype(173ca.name().clone(),174ca.len(),175ca.inner_dtype(),176ca.width(),177)178}179},180(1, _) => {181if ca.get(0).is_some() {182// Optimize: This does not need to broadcast first.183let ca = ca.new_from_index(0, n.len());184// SAFETY: Shift does not change the dtype and number of elements of sub-array.185unsafe {186ca.zip_and_apply_amortized_same_type(n, |opt_s, opt_periods| {187match (opt_s, opt_periods) {188(Some(s), Some(n)) => Some(s.as_ref().shift(n)),189_ => None,190}191})192}193} else {194ArrayChunked::full_null_with_dtype(195ca.name().clone(),196ca.len(),197ca.inner_dtype(),198ca.width(),199)200}201},202_ => polars_bail!(length_mismatch = "arr.shift", ca.len(), n.len()),203};204Ok(out.into_series())205}206207fn array_slice(&self, offset: i64, length: i64) -> PolarsResult<Series> {208let slice_arr: ArrayChunked = unary_kernel(209self.as_array(),210move |arr: &FixedSizeListArray| -> FixedSizeListArray {211let length: usize = if length < 0 {212(arr.size() as i64 + length).max(0)213} else {214length215}216.try_into()217.expect("Length can not be larger than i64::MAX");218let (raw_offset, slice_len) = slice_offsets(offset, length, arr.size());219220let mut builder = make_builder(arr.values().dtype());221builder.reserve(slice_len * arr.len());222223let mut validity = BitmapBuilder::with_capacity(arr.len());224225let values = arr.values().as_ref();226for row in 0..arr.len() {227if !arr.is_valid(row) {228validity.push(false);229continue;230}231let inner_offset = row * arr.size() + raw_offset;232builder.subslice_extend(values, inner_offset, slice_len, ShareStrategy::Always);233validity.push(true);234}235let values = builder.freeze_reset();236let sliced_dtype = match arr.dtype() {237ArrowDataType::FixedSizeList(inner, _) => {238ArrowDataType::FixedSizeList(inner.clone(), slice_len)239},240_ => unreachable!(),241};242FixedSizeListArray::new(243sliced_dtype,244arr.len(),245values,246validity.into_opt_validity(),247)248},249);250Ok(slice_arr.into_series())251}252}253254impl ArrayNameSpace for ArrayChunked {}255256257