Path: blob/main/crates/polars-python/src/series/general.rs
7889 views
use polars_core::chunked_array::cast::CastOptions;1use polars_core::series::IsSorted;2use polars_core::utils::flatten::flatten_series;3use polars_utils::python_function::PythonObject;4use pyo3::exceptions::{PyIndexError, PyRuntimeError, PyTypeError, PyValueError};5use pyo3::prelude::*;6use pyo3::types::PyBytes;7use pyo3::{IntoPyObjectExt, Python};89use super::PySeries;10use crate::dataframe::PyDataFrame;11use crate::error::PyPolarsErr;12use crate::prelude::*;13use crate::py_modules::polars;14use crate::utils::EnterPolarsExt;1516#[pymethods]17impl PySeries {18fn struct_unnest(&self, py: Python) -> PyResult<PyDataFrame> {19py.enter_polars_df(|| Ok(self.series.read().struct_()?.clone().unnest()))20}2122fn struct_fields(&self) -> PyResult<Vec<String>> {23let s = self.series.read();24let ca = s.struct_().map_err(PyPolarsErr::from)?;25Ok(ca26.struct_fields()27.iter()28.map(|s| s.name().to_string())29.collect())30}3132fn is_sorted_ascending_flag(&self) -> bool {33matches!(self.series.read().is_sorted_flag(), IsSorted::Ascending)34}3536fn is_sorted_descending_flag(&self) -> bool {37matches!(self.series.read().is_sorted_flag(), IsSorted::Descending)38}3940fn can_fast_explode_flag(&self) -> bool {41match self.series.read().list() {42Err(_) => false,43Ok(list) => list._can_fast_explode(),44}45}4647pub fn cat_uses_lexical_ordering(&self) -> PyResult<bool> {48Ok(true)49}5051pub fn cat_is_local(&self) -> PyResult<bool> {52Ok(false)53}5455pub fn cat_to_local(&self, _py: Python) -> PyResult<Self> {56Ok(self.clone())57}5859fn estimated_size(&self) -> usize {60self.series.read().estimated_size()61}6263#[cfg(feature = "object")]64fn get_object<'py>(&self, py: Python<'py>, index: usize) -> PyResult<Bound<'py, PyAny>> {65let s = self.series.read();66if matches!(s.dtype(), DataType::Object(_)) {67let obj: Option<&ObjectValue> = s.get_object(index).map(|any| any.into());68Ok(obj.into_pyobject(py)?)69} else {70Ok(py.None().into_bound(py))71}72}7374#[cfg(feature = "dtype-array")]75fn reshape(&self, py: Python<'_>, dims: Vec<i64>) -> PyResult<Self> {76let dims = dims77.into_iter()78.map(ReshapeDimension::new)79.collect::<Vec<_>>();8081py.enter_polars_series(|| self.series.read().reshape_array(&dims))82}8384/// Returns the string format of a single element of the Series.85fn get_fmt(&self, index: usize, str_len_limit: usize) -> String {86let s = self.series.read();87let v = format!("{}", s.get(index).unwrap());88if let DataType::String | DataType::Categorical(_, _) | DataType::Enum(_, _) = s.dtype() {89let v_no_quotes = &v[1..v.len() - 1];90let v_trunc = &v_no_quotes[..v_no_quotes91.char_indices()92.take(str_len_limit)93.last()94.map(|(i, c)| i + c.len_utf8())95.unwrap_or(0)];96if v_no_quotes == v_trunc {97v98} else {99format!("\"{v_trunc}…")100}101} else {102v103}104}105106pub fn rechunk(&self, py: Python<'_>, in_place: bool) -> PyResult<Option<Self>> {107let series = py.enter_polars_ok(|| self.series.read().rechunk())?;108if in_place {109*self.series.write() = series;110Ok(None)111} else {112Ok(Some(series.into()))113}114}115116/// Get a value by index.117fn get_index(&self, py: Python<'_>, index: usize) -> PyResult<Py<PyAny>> {118let s = self.series.read();119let av = match s.get(index) {120Ok(v) => v,121Err(PolarsError::OutOfBounds(err)) => {122return Err(PyIndexError::new_err(err.to_string()));123},124Err(e) => return Err(PyPolarsErr::from(e).into()),125};126127match av {128AnyValue::List(s) | AnyValue::Array(s, _) => {129let pyseries = PySeries::new(s);130polars(py).getattr(py, "wrap_s")?.call1(py, (pyseries,))131},132_ => Wrap(av).into_py_any(py),133}134}135136/// Get a value by index, allowing negative indices.137fn get_index_signed(&self, py: Python<'_>, index: isize) -> PyResult<Py<PyAny>> {138let index = if index < 0 {139match self.len().checked_sub(index.unsigned_abs()) {140Some(v) => v,141None => {142return Err(PyIndexError::new_err(143polars_err!(oob = index, self.len()).to_string(),144));145},146}147} else {148usize::try_from(index).unwrap()149};150self.get_index(py, index)151}152153fn bitand(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {154py.enter_polars_series(|| &*self.series.read() & &*other.series.read())155}156157fn bitor(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {158py.enter_polars_series(|| &*self.series.read() | &*other.series.read())159}160161fn bitxor(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {162py.enter_polars_series(|| &*self.series.read() ^ &*other.series.read())163}164165fn chunk_lengths(&self) -> Vec<usize> {166self.series.read().chunk_lengths().collect()167}168169pub fn name(&self) -> String {170self.series.read().name().to_string()171}172173fn rename(&self, name: &str) {174self.series.write().rename(name.into());175}176177fn dtype<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {178Wrap(self.series.read().dtype().clone()).into_pyobject(py)179}180181fn set_sorted_flag(&self, descending: bool) -> Self {182let mut out = self.series.read().clone();183if descending {184out.set_sorted_flag(IsSorted::Descending);185} else {186out.set_sorted_flag(IsSorted::Ascending)187}188out.into()189}190191fn n_chunks(&self) -> usize {192self.series.read().n_chunks()193}194195fn append(&self, py: Python<'_>, other: &PySeries) -> PyResult<()> {196py.enter_polars(|| {197// Prevent self-append deadlocks.198let other = other.series.read().clone();199let mut s = self.series.write();200s.append(&other)?;201PolarsResult::Ok(())202})203}204205fn extend(&self, py: Python<'_>, other: &PySeries) -> PyResult<()> {206py.enter_polars(|| {207// Prevent self-extend deadlocks.208let other = other.series.read().clone();209let mut s = self.series.write();210s.extend(&other)?;211PolarsResult::Ok(())212})213}214215fn new_from_index(&self, py: Python<'_>, index: usize, length: usize) -> PyResult<Self> {216let s = self.series.read();217if index >= s.len() {218Err(PyValueError::new_err("index is out of bounds"))219} else {220py.enter_polars_series(|| Ok(s.new_from_index(index, length)))221}222}223224fn filter(&self, py: Python<'_>, filter: &PySeries) -> PyResult<Self> {225let filter_series = &filter.series.read();226if let Ok(ca) = filter_series.bool() {227py.enter_polars_series(|| self.series.read().filter(ca))228} else {229Err(PyRuntimeError::new_err("Expected a boolean mask"))230}231}232233fn sort(234&self,235py: Python<'_>,236descending: bool,237nulls_last: bool,238multithreaded: bool,239) -> PyResult<Self> {240py.enter_polars_series(|| {241self.series.read().sort(242SortOptions::default()243.with_order_descending(descending)244.with_nulls_last(nulls_last)245.with_multithreaded(multithreaded),246)247})248}249250fn gather_with_series(&self, py: Python<'_>, indices: &PySeries) -> PyResult<Self> {251py.enter_polars_series(|| self.series.read().take(indices.series.read().idx()?))252}253254fn null_count(&self) -> PyResult<usize> {255Ok(self.series.read().null_count())256}257258fn has_nulls(&self) -> bool {259self.series.read().has_nulls()260}261262fn equals(263&self,264py: Python<'_>,265other: &PySeries,266check_dtypes: bool,267check_names: bool,268null_equal: bool,269) -> PyResult<bool> {270let s = self.series.read();271let o = other.series.read();272if check_dtypes && (s.dtype() != o.dtype()) {273return Ok(false);274}275if check_names && (s.name() != o.name()) {276return Ok(false);277}278if null_equal {279py.enter_polars_ok(|| s.equals_missing(&o))280} else {281py.enter_polars_ok(|| s.equals(&o))282}283}284285fn as_str(&self) -> PyResult<String> {286Ok(format!("{:?}", self.series.read()))287}288289#[allow(clippy::len_without_is_empty)]290pub fn len(&self) -> usize {291self.series.read().len()292}293294/// Rechunk and return a pointer to the start of the Series.295/// Only implemented for numeric types296fn as_single_ptr(&self, py: Python) -> PyResult<usize> {297py.enter_polars(|| self.series.write().as_single_ptr())298}299300fn clone(&self) -> Self {301Clone::clone(self)302}303304fn zip_with(&self, py: Python<'_>, mask: &PySeries, other: &PySeries) -> PyResult<Self> {305let ms = mask.series.read();306let mask = ms.bool().map_err(PyPolarsErr::from)?;307py.enter_polars_series(|| self.series.read().zip_with(mask, &other.series.read()))308}309310#[pyo3(signature = (separator, drop_first, drop_nulls))]311fn to_dummies(312&self,313py: Python<'_>,314separator: Option<&str>,315drop_first: bool,316drop_nulls: bool,317) -> PyResult<PyDataFrame> {318py.enter_polars_df(|| {319self.series320.read()321.to_dummies(separator, drop_first, drop_nulls)322})323}324325fn get_list(&self, index: usize) -> Option<Self> {326let s = self.series.read();327let ca = s.list().ok()?;328Some(ca.get_as_series(index)?.into())329}330331fn n_unique(&self, py: Python) -> PyResult<usize> {332py.enter_polars(|| self.series.read().n_unique())333}334335fn floor(&self, py: Python) -> PyResult<Self> {336py.enter_polars_series(|| self.series.read().floor())337}338339fn shrink_to_fit(&self, py: Python) -> PyResult<()> {340py.enter_polars_ok(|| self.series.write().shrink_to_fit())341}342343fn dot<'py>(&self, other: &PySeries, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {344let s = &*self.series.read();345let o = &*other.series.read();346let lhs_dtype = s.dtype();347let rhs_dtype = o.dtype();348349if !lhs_dtype.is_primitive_numeric() {350return Err(PyPolarsErr::from(polars_err!(opq = dot, lhs_dtype)).into());351};352if !rhs_dtype.is_primitive_numeric() {353return Err(PyPolarsErr::from(polars_err!(opq = dot, rhs_dtype)).into());354}355356let result: AnyValue = if lhs_dtype.is_float() || rhs_dtype.is_float() {357py.enter_polars(|| (s * o)?.sum::<f64>())?.into()358} else {359py.enter_polars(|| (s * o)?.sum::<i64>())?.into()360};361362Wrap(result).into_pyobject(py)363}364365fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {366// Used in pickle/pickling367Ok(PyBytes::new(368py,369&py.enter_polars(|| self.series.read().serialize_to_bytes())?,370))371}372373fn __setstate__(&self, py: Python<'_>, state: Py<PyAny>) -> PyResult<()> {374// Used in pickle/pickling375use pyo3::pybacked::PyBackedBytes;376match state.extract::<PyBackedBytes>(py) {377Ok(bytes) => py.enter_polars(|| {378let mut reader = std::io::Cursor::new(&*bytes);379*self.series.write() = Series::deserialize_from_reader(&mut reader)?;380PolarsResult::Ok(())381}),382Err(e) => Err(e),383}384}385386fn skew(&self, py: Python<'_>, bias: bool) -> PyResult<Option<f64>> {387py.enter_polars(|| self.series.read().skew(bias))388}389390fn kurtosis(&self, py: Python<'_>, fisher: bool, bias: bool) -> PyResult<Option<f64>> {391py.enter_polars(|| self.series.read().kurtosis(fisher, bias))392}393394fn cast(395&self,396py: Python<'_>,397dtype: Wrap<DataType>,398strict: bool,399wrap_numerical: bool,400) -> PyResult<Self> {401let options = if wrap_numerical {402CastOptions::Overflowing403} else if strict {404CastOptions::Strict405} else {406CastOptions::NonStrict407};408py.enter_polars_series(|| self.series.read().cast_with_options(&dtype.0, options))409}410411fn get_chunks(&self) -> PyResult<Vec<Py<PyAny>>> {412Python::attach(|py| {413let wrap_s = py_modules::polars(py).getattr(py, "wrap_s").unwrap();414flatten_series(&self.series.read())415.into_iter()416.map(|s| wrap_s.call1(py, (Self::new(s),)))417.collect()418})419}420421fn is_sorted(&self, py: Python<'_>, descending: bool, nulls_last: bool) -> PyResult<bool> {422let options = SortOptions {423descending,424nulls_last,425multithreaded: true,426maintain_order: false,427limit: None,428};429py.enter_polars(|| self.series.read().is_sorted(options))430}431432fn clear(&self) -> Self {433self.series.read().clear().into()434}435436fn head(&self, py: Python<'_>, n: usize) -> PyResult<Self> {437py.enter_polars_series(|| Ok(self.series.read().head(Some(n))))438}439440fn tail(&self, py: Python<'_>, n: usize) -> PyResult<Self> {441py.enter_polars_series(|| Ok(self.series.read().tail(Some(n))))442}443444fn value_counts(445&self,446py: Python<'_>,447sort: bool,448parallel: bool,449name: String,450normalize: bool,451) -> PyResult<PyDataFrame> {452py.enter_polars_df(|| {453self.series454.read()455.value_counts(sort, parallel, name.into(), normalize)456})457}458459#[pyo3(signature = (offset, length))]460fn slice(&self, offset: i64, length: Option<usize>) -> Self {461let s = self.series.read();462let length = length.unwrap_or_else(|| s.len());463s.slice(offset, length).into()464}465466pub fn not_(&self, py: Python) -> PyResult<Self> {467py.enter_polars_series(|| polars_ops::series::negate_bitwise(&self.series.read()))468}469470pub fn shrink_dtype(&self, py: Python<'_>) -> PyResult<Self> {471py.enter_polars(|| {472self.series473.read()474.shrink_type()475.map(Into::into)476.map_err(PyPolarsErr::from)477.map_err(PyErr::from)478})479}480481fn str_to_datetime_infer(482&self,483py: Python,484time_unit: Option<Wrap<TimeUnit>>,485strict: bool,486exact: bool,487ambiguous: PySeries,488) -> PyResult<Self> {489Ok(py490.enter_polars(|| {491let s = self.series.read();492let datetime_strings = s.str()?;493let ambiguous = ambiguous.series.into_inner();494let ambiguous = ambiguous.str()?;495496polars_time::prelude::string::infer::to_datetime_with_inferred_tz(497datetime_strings,498time_unit.map_or(TimeUnit::Microseconds, |v| v.0),499strict,500exact,501ambiguous,502)503})?504.into_series()505.into())506}507508pub fn str_to_decimal_infer(&self, py: Python, inference_length: usize) -> PyResult<Self> {509py.enter_polars_series(|| {510let s = self.series.read();511let ca = s.str()?;512ca.to_decimal_infer(inference_length).map(Series::from)513})514}515516pub fn list_to_struct(517&self,518py: Python<'_>,519width_strat: Wrap<ListToStructWidthStrategy>,520name_gen: Option<Py<PyAny>>,521) -> PyResult<Self> {522py.enter_polars(|| {523let get_index_name =524name_gen.map(|f| PlanCallback::<usize, String>::new_python(PythonObject(f)));525let get_index_name = get_index_name.map(|f| {526NameGenerator(Arc::new(move |i| f.call(i).map(PlSmallStr::from)) as Arc<_>)527});528self.series529.read()530.list()?531.to_struct(&ListToStructArgs::InferWidth {532infer_field_strategy: width_strat.0,533get_index_name,534max_fields: None,535})536.map(IntoSeries::into_series)537})538.map(Into::into)539.map_err(PyPolarsErr::from)540.map_err(PyErr::from)541}542543#[cfg(feature = "extract_jsonpath")]544fn str_json_decode(545&self,546py: Python<'_>,547infer_schema_length: Option<usize>,548) -> PyResult<Self> {549py.enter_polars(|| {550let lock = self.series.read();551lock.str()?552.json_decode(None, infer_schema_length)553.map(|s| s.with_name(lock.name().clone()))554})555.map(Into::into)556.map_err(PyPolarsErr::from)557.map_err(PyErr::from)558}559560fn ext_to(&self, dtype: Wrap<DataType>) -> PyResult<Self> {561let DataType::Extension(typ, storage) = &dtype.0 else {562return Err(PyTypeError::new_err(563"ext.to(dtype) can only be used with Extension dtypes",564));565};566567let s = self.series.read();568569if storage.as_ref() != s.dtype() {570return Err(PyErr::from(PyPolarsErr::from(polars_err!(SchemaMismatch:571"storage type mismatch in ext.to(): expected {}, got {}",572storage,573s.dtype()574))));575}576577Ok(s.clone().into_extension(typ.clone()).into())578}579580fn ext_storage(&self) -> Self {581self.series.read().to_storage().clone().into()582}583584fn set(&self, py: Python<'_>, mask: PySeries, value: PySeries) -> PyResult<Self> {585assert_eq!(value.len(), 1);586py.enter_polars(|| {587let slf = self.series.read();588let mask = mask.series.read();589let value = value.series.read();590591let mask = mask.bool()?;592593PolarsResult::Ok(594value595.zip_with_same_type(mask, &slf)?596.with_name(slf.name().clone()),597)598})599.map(Into::into)600.map_err(PyPolarsErr::from)601.map_err(PyErr::from)602}603}604605macro_rules! impl_get {606($name:ident, $series_variant:ident, $type:ty) => {607#[pymethods]608impl PySeries {609fn $name(&self, index: i64) -> Option<$type> {610let s = self.series.read();611if let Ok(ca) = s.$series_variant() {612let index = if index < 0 {613(ca.len() as i64 + index) as usize614} else {615index as usize616};617ca.get(index).map(|r| r.to_owned())618} else {619None620}621}622}623};624}625626impl_get!(get_f32, f32, f32);627impl_get!(get_f64, f64, f64);628impl_get!(get_u8, u8, u8);629impl_get!(get_u16, u16, u16);630impl_get!(get_u32, u32, u32);631impl_get!(get_u64, u64, u64);632impl_get!(get_i8, i8, i8);633impl_get!(get_i16, i16, i16);634impl_get!(get_i32, i32, i32);635impl_get!(get_i64, i64, i64);636impl_get!(get_str, str, String);637638macro_rules! impl_get_phys {639($name:ident, $series_variant:ident, $type:ty) => {640#[pymethods]641impl PySeries {642fn $name(&self, index: i64) -> Option<$type> {643let s = self.series.read();644if let Ok(ca) = s.$series_variant() {645let index = if index < 0 {646(ca.len() as i64 + index) as usize647} else {648index as usize649};650ca.physical().get(index)651} else {652None653}654}655}656};657}658659impl_get_phys!(get_date, date, i32);660impl_get_phys!(get_datetime, datetime, i64);661impl_get_phys!(get_duration, duration, i64);662663664