Path: blob/main/crates/polars-python/src/series/construction.rs
7889 views
use std::borrow::Cow;12use arrow::array::Array;3use arrow::bitmap::BitmapBuilder;4use arrow::types::NativeType;5use num_traits::AsPrimitive;6use numpy::{Element, PyArray1, PyArrayMethods, PyUntypedArrayMethods};7use polars::prelude::*;8use polars_core::utils::CustomIterTools;9use pyo3::exceptions::{PyTypeError, PyValueError};10use pyo3::prelude::*;1112use crate::PySeries;13use crate::conversion::Wrap;14use crate::conversion::any_value::py_object_to_any_value;15use crate::error::PyPolarsErr;16use crate::interop::arrow::to_rust::array_to_rust;17use crate::prelude::ObjectValue;18use crate::utils::EnterPolarsExt;1920// Init with numpy arrays.21macro_rules! init_method {22($name:ident, $type:ty) => {23#[pymethods]24impl PySeries {25#[staticmethod]26fn $name(name: &str, array: &Bound<PyArray1<$type>>, _strict: bool) -> Self {27mmap_numpy_array(name, array)28}29}30};31}3233init_method!(new_i8, i8);34init_method!(new_i16, i16);35init_method!(new_i32, i32);36init_method!(new_i64, i64);37init_method!(new_u8, u8);38init_method!(new_u16, u16);39init_method!(new_u32, u32);40init_method!(new_u64, u64);4142fn mmap_numpy_array<T: Element + NativeType>(name: &str, array: &Bound<PyArray1<T>>) -> PySeries {43let vals = unsafe { array.as_slice().unwrap() };4445let arr = unsafe { arrow::ffi::mmap::slice_and_owner(vals, array.clone().unbind()) };46Series::from_arrow(name.into(), arr.to_boxed())47.unwrap()48.into()49}5051#[cfg(feature = "object")]52pub fn series_from_objects(py: Python<'_>, name: PlSmallStr, objects: Vec<ObjectValue>) -> Series {53let mut validity = BitmapBuilder::with_capacity(objects.len());54for v in &objects {55let is_valid = !v.inner.is_none(py);56// SAFETY: we can ensure that validity has correct capacity.57unsafe { validity.push_unchecked(is_valid) };58}59ObjectChunked::<ObjectValue>::new_from_vec_and_validity(60name,61objects,62validity.into_opt_validity(),63)64.into_series()65}6667#[pymethods]68impl PySeries {69#[staticmethod]70fn new_bool(71py: Python<'_>,72name: &str,73array: &Bound<PyArray1<bool>>,74_strict: bool,75) -> PyResult<Self> {76let array = array.readonly();7778// We use raw ptr methods to read this as a u8 slice to work around PyO3/rust-numpy#509.79assert!(array.is_contiguous());80let data_ptr = array.data().cast::<u8>();81let data_len = array.len();82let vals = unsafe { core::slice::from_raw_parts(data_ptr, data_len) };83py.enter_polars_series(|| Series::new(name.into(), vals).cast(&DataType::Boolean))84}8586#[staticmethod]87fn new_f16(88py: Python<'_>,89name: &str,90array: &Bound<PyArray1<pf16>>,91nan_is_null: bool,92) -> PyResult<Self> {93if nan_is_null {94let array = array.readonly();95let vals = array.as_slice().unwrap();96py.enter_polars_series(|| {97let ca: Float16Chunked = vals98.iter()99.map(|&val| if pf16::is_nan(val) { None } else { Some(val) })100.collect_trusted();101Ok(ca.with_name(name.into()))102})103} else {104Ok(mmap_numpy_array(name, array))105}106}107108#[staticmethod]109fn new_f32(110py: Python<'_>,111name: &str,112array: &Bound<PyArray1<f32>>,113nan_is_null: bool,114) -> PyResult<Self> {115if nan_is_null {116let array = array.readonly();117let vals = array.as_slice().unwrap();118py.enter_polars_series(|| {119let ca: Float32Chunked = vals120.iter()121.map(|&val| if f32::is_nan(val) { None } else { Some(val) })122.collect_trusted();123Ok(ca.with_name(name.into()))124})125} else {126Ok(mmap_numpy_array(name, array))127}128}129130#[staticmethod]131fn new_f64(132py: Python<'_>,133name: &str,134array: &Bound<PyArray1<f64>>,135nan_is_null: bool,136) -> PyResult<Self> {137if nan_is_null {138let array = array.readonly();139let vals = array.as_slice().unwrap();140py.enter_polars_series(|| {141let ca: Float64Chunked = vals142.iter()143.map(|&val| if f64::is_nan(val) { None } else { Some(val) })144.collect_trusted();145Ok(ca.with_name(name.into()))146})147} else {148Ok(mmap_numpy_array(name, array))149}150}151}152153#[pymethods]154impl PySeries {155#[staticmethod]156fn new_opt_bool(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {157let len = values.len()?;158let mut builder = BooleanChunkedBuilder::new(name.into(), len);159160for res in values.try_iter()? {161let value = res?;162if value.is_none() {163builder.append_null()164} else {165let v = value.extract::<bool>()?;166builder.append_value(v)167}168}169170let ca = builder.finish();171let s = ca.into_series();172Ok(s.into())173}174}175176fn new_primitive<'py, T, F>(177name: &str,178values: &Bound<'py, PyAny>,179_strict: bool,180extract: F,181) -> PyResult<PySeries>182where183T: PolarsNumericType,184F: Fn(Bound<'py, PyAny>) -> PyResult<T::Native>,185{186let len = values.len()?;187let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);188189for res in values.try_iter()? {190let value = res?;191if value.is_none() {192builder.append_null()193} else {194let v = extract(value)?;195builder.append_value(v)196}197}198199let ca = builder.finish();200let s = ca.into_series();201Ok(s.into())202}203204// Init with lists that can contain Nones205macro_rules! init_method_opt {206($name:ident, $type:ty, $native: ty) => {207#[pymethods]208impl PySeries {209#[staticmethod]210fn $name(name: &str, obj: &Bound<PyAny>, strict: bool) -> PyResult<Self> {211new_primitive::<$type, _>(name, obj, strict, |v| v.extract::<$native>())212}213}214};215}216217init_method_opt!(new_opt_u8, UInt8Type, u8);218init_method_opt!(new_opt_u16, UInt16Type, u16);219init_method_opt!(new_opt_u32, UInt32Type, u32);220init_method_opt!(new_opt_u64, UInt64Type, u64);221init_method_opt!(new_opt_u128, UInt128Type, u128);222init_method_opt!(new_opt_i8, Int8Type, i8);223init_method_opt!(new_opt_i16, Int16Type, i16);224init_method_opt!(new_opt_i32, Int32Type, i32);225init_method_opt!(new_opt_i64, Int64Type, i64);226init_method_opt!(new_opt_i128, Int128Type, i128);227init_method_opt!(new_opt_f32, Float32Type, f32);228init_method_opt!(new_opt_f64, Float64Type, f64);229230#[pymethods]231impl PySeries {232#[staticmethod]233fn new_opt_f16(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {234new_primitive::<Float16Type, _>(name, values, false, |v| {235Ok(AsPrimitive::<pf16>::as_(v.extract::<f64>()?))236})237}238}239240fn convert_to_avs(241values: &Bound<'_, PyAny>,242strict: bool,243allow_object: bool,244) -> PyResult<Vec<AnyValue<'static>>> {245values246.try_iter()?247.map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, allow_object))248.collect()249}250251#[pymethods]252impl PySeries {253#[staticmethod]254fn new_from_any_values(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {255let any_values_result = values256.try_iter()?257.map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, true))258.collect::<PyResult<Vec<AnyValue>>>();259260let result = any_values_result.and_then(|avs| {261let s = Series::from_any_values(name.into(), avs.as_slice(), strict).map_err(|e| {262PyTypeError::new_err(format!(263"{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."264))265})?;266Ok(s.into())267});268269// Fall back to Object type for non-strict construction.270if !strict && result.is_err() {271return Python::attach(|py| {272let objects = values273.try_iter()?274.map(|v| v?.extract())275.collect::<PyResult<Vec<ObjectValue>>>()?;276Ok(Self::new_object(py, name, objects, strict))277});278}279280result281}282283#[staticmethod]284fn new_from_any_values_and_dtype(285name: &str,286values: &Bound<PyAny>,287dtype: Wrap<DataType>,288strict: bool,289) -> PyResult<Self> {290let avs = convert_to_avs(values, strict, false)?;291let s = Series::from_any_values_and_dtype(name.into(), avs.as_slice(), &dtype.0, strict)292.map_err(|e| {293PyTypeError::new_err(format!(294"{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."295))296})?;297Ok(s.into())298}299300#[staticmethod]301fn new_str(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {302let len = values.len()?;303let mut builder = StringChunkedBuilder::new(name.into(), len);304305for res in values.try_iter()? {306let value = res?;307if value.is_none() {308builder.append_null()309} else {310let v = value.extract::<Cow<str>>()?;311builder.append_value(v)312}313}314315let ca = builder.finish();316let s = ca.into_series();317Ok(s.into())318}319320#[staticmethod]321fn new_binary(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {322let len = values.len()?;323let mut builder = BinaryChunkedBuilder::new(name.into(), len);324325for res in values.try_iter()? {326let value = res?;327if value.is_none() {328builder.append_null()329} else {330let v = value.extract::<&[u8]>()?;331builder.append_value(v)332}333}334335let ca = builder.finish();336let s = ca.into_series();337Ok(s.into())338}339340#[staticmethod]341fn new_decimal(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {342Self::new_from_any_values(name, values, strict)343}344345#[staticmethod]346fn new_series_list(name: &str, values: Vec<Option<PySeries>>, _strict: bool) -> PyResult<Self> {347let series: Vec<_> = values348.into_iter()349.map(|ops| ops.map(|ps| ps.series.into_inner()))350.collect();351if let Some(s) = series.iter().flatten().next() {352if s.dtype().is_object() {353return Err(PyValueError::new_err(354"list of objects isn't supported; try building a 'object' only series",355));356}357}358Ok(Series::new(name.into(), series).into())359}360361#[staticmethod]362#[pyo3(signature = (name, values, strict, dtype))]363fn new_array(364name: &str,365values: &Bound<PyAny>,366strict: bool,367dtype: Wrap<DataType>,368) -> PyResult<Self> {369Self::new_from_any_values_and_dtype(name, values, dtype, strict)370}371372#[staticmethod]373pub fn new_object(py: Python<'_>, name: &str, values: Vec<ObjectValue>, _strict: bool) -> Self {374#[cfg(feature = "object")]375{376PySeries::from(series_from_objects(py, name.into(), values))377}378#[cfg(not(feature = "object"))]379panic!("activate 'object' feature")380}381382#[staticmethod]383fn new_null(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {384let len = values.len()?;385Ok(Series::new_null(name.into(), len).into())386}387388#[staticmethod]389fn from_arrow(name: &str, array: &Bound<PyAny>) -> PyResult<Self> {390let arr = array_to_rust(array)?;391392match arr.dtype() {393ArrowDataType::LargeList(_) => {394let array = arr.as_any().downcast_ref::<LargeListArray>().unwrap();395let fast_explode = array.offsets().as_slice().windows(2).all(|w| w[0] != w[1]);396397let mut out = ListChunked::with_chunk(name.into(), array.clone());398if fast_explode {399out.set_fast_explode()400}401Ok(out.into_series().into())402},403_ => {404let series: Series =405Series::try_new(name.into(), arr).map_err(PyPolarsErr::from)?;406Ok(series.into())407},408}409}410}411412413