Path: blob/main/crates/polars-python/src/series/construction.rs
8368 views
use std::borrow::Cow;12use arrow::array::{Array, PrimitiveArray};3use arrow::bitmap::BitmapBuilder;4use arrow::types::NativeType;5use num_traits::AsPrimitive;6use numpy::{Element, PyArray1, PyArrayMethods, PyUntypedArrayMethods};7use polars::prelude::*;8use polars_buffer::{Buffer, SharedStorage};9use pyo3::exceptions::{PyTypeError, PyValueError};10use pyo3::prelude::*;1112use crate::PySeries;13use crate::conversion::Wrap;14use crate::conversion::any_value::py_object_to_any_value;15use crate::error::PyPolarsErr;16use crate::interop::arrow::to_rust::array_to_rust;17use crate::prelude::ObjectValue;18use crate::utils::EnterPolarsExt;1920// Init with numpy arrays.21macro_rules! init_method {22($name:ident, $type:ty) => {23#[pymethods]24impl PySeries {25#[staticmethod]26fn $name(name: &str, array: &Bound<PyArray1<$type>>, _strict: bool) -> Self {27let arr = numpy_array_to_arrow(array);28Series::from_arrow(name.into(), arr.to_boxed())29.unwrap()30.into()31}32}33};34}3536init_method!(new_i8, i8);37init_method!(new_i16, i16);38init_method!(new_i32, i32);39init_method!(new_i64, i64);40init_method!(new_u8, u8);41init_method!(new_u16, u16);42init_method!(new_u32, u32);43init_method!(new_u64, u64);4445fn numpy_array_to_arrow<T: Element + NativeType>(array: &Bound<PyArray1<T>>) -> PrimitiveArray<T> {46let owner = array.clone().unbind();47let ro = array.readonly();48let vals = ro.as_slice().unwrap();49unsafe {50let storage = SharedStorage::from_slice_with_owner(vals, owner);51let buffer = Buffer::from_storage(storage);52PrimitiveArray::new_unchecked(T::PRIMITIVE.into(), buffer, None)53}54}5556#[cfg(feature = "object")]57pub fn series_from_objects(py: Python<'_>, name: PlSmallStr, objects: Vec<ObjectValue>) -> Series {58let mut validity = BitmapBuilder::with_capacity(objects.len());59for v in &objects {60let is_valid = !v.inner.is_none(py);61// SAFETY: we can ensure that validity has correct capacity.62unsafe { validity.push_unchecked(is_valid) };63}64ObjectChunked::<ObjectValue>::new_from_vec_and_validity(65name,66objects,67validity.into_opt_validity(),68)69.into_series()70}7172#[pymethods]73impl PySeries {74#[staticmethod]75fn new_bool(76py: Python<'_>,77name: &str,78array: &Bound<PyArray1<bool>>,79_strict: bool,80) -> PyResult<Self> {81let array = array.readonly();8283// We use raw ptr methods to read this as a u8 slice to work around PyO3/rust-numpy#509.84assert!(array.is_contiguous());85let data_ptr = array.data().cast::<u8>();86let data_len = array.len();87let vals = unsafe { core::slice::from_raw_parts(data_ptr, data_len) };88py.enter_polars_series(|| Series::new(name.into(), vals).cast(&DataType::Boolean))89}9091#[staticmethod]92fn new_f16(93py: Python<'_>,94name: &str,95array: &Bound<PyArray1<pf16>>,96nan_is_null: bool,97) -> PyResult<Self> {98let arr = numpy_array_to_arrow(array);99if nan_is_null {100py.enter_polars_series(|| {101let validity = polars_compute::nan::is_not_nan(arr.values());102Ok(Series::from_array(name.into(), arr.with_validity(validity)))103})104} else {105Ok(Series::from_array(name.into(), arr).into())106}107}108109#[staticmethod]110fn new_f32(111py: Python<'_>,112name: &str,113array: &Bound<PyArray1<f32>>,114nan_is_null: bool,115) -> PyResult<Self> {116let arr = numpy_array_to_arrow(array);117if nan_is_null {118py.enter_polars_series(|| {119let validity = polars_compute::nan::is_not_nan(arr.values());120Ok(Series::from_array(name.into(), arr.with_validity(validity)))121})122} else {123Ok(Series::from_array(name.into(), arr).into())124}125}126127#[staticmethod]128fn new_f64(129py: Python<'_>,130name: &str,131array: &Bound<PyArray1<f64>>,132nan_is_null: bool,133) -> PyResult<Self> {134let arr = numpy_array_to_arrow(array);135if nan_is_null {136py.enter_polars_series(|| {137let validity = polars_compute::nan::is_not_nan(arr.values());138Ok(Series::from_array(name.into(), arr.with_validity(validity)))139})140} else {141Ok(Series::from_array(name.into(), arr).into())142}143}144}145146#[pymethods]147impl PySeries {148#[staticmethod]149fn new_opt_bool(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {150let len = values.len()?;151let mut builder = BooleanChunkedBuilder::new(name.into(), len);152153for res in values.try_iter()? {154let value = res?;155if value.is_none() {156builder.append_null()157} else {158let v = value.extract::<bool>()?;159builder.append_value(v)160}161}162163let ca = builder.finish();164let s = ca.into_series();165Ok(s.into())166}167}168169fn new_primitive<'py, T, F>(170name: &str,171values: &Bound<'py, PyAny>,172_strict: bool,173extract: F,174) -> PyResult<PySeries>175where176T: PolarsNumericType,177F: Fn(Bound<'py, PyAny>) -> PyResult<T::Native>,178{179let len = values.len()?;180let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);181182for res in values.try_iter()? {183let value = res?;184if value.is_none() {185builder.append_null()186} else {187let v = extract(value)?;188builder.append_value(v)189}190}191192let ca = builder.finish();193let s = ca.into_series();194Ok(s.into())195}196197// Init with lists that can contain Nones198macro_rules! init_method_opt {199($name:ident, $type:ty, $native: ty) => {200#[pymethods]201impl PySeries {202#[staticmethod]203fn $name(name: &str, obj: &Bound<PyAny>, strict: bool) -> PyResult<Self> {204new_primitive::<$type, _>(name, obj, strict, |v| v.extract::<$native>())205}206}207};208}209210init_method_opt!(new_opt_u8, UInt8Type, u8);211init_method_opt!(new_opt_u16, UInt16Type, u16);212init_method_opt!(new_opt_u32, UInt32Type, u32);213init_method_opt!(new_opt_u64, UInt64Type, u64);214init_method_opt!(new_opt_u128, UInt128Type, u128);215init_method_opt!(new_opt_i8, Int8Type, i8);216init_method_opt!(new_opt_i16, Int16Type, i16);217init_method_opt!(new_opt_i32, Int32Type, i32);218init_method_opt!(new_opt_i64, Int64Type, i64);219init_method_opt!(new_opt_i128, Int128Type, i128);220init_method_opt!(new_opt_f32, Float32Type, f32);221init_method_opt!(new_opt_f64, Float64Type, f64);222223#[pymethods]224impl PySeries {225#[staticmethod]226fn new_opt_f16(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {227new_primitive::<Float16Type, _>(name, values, false, |v| {228Ok(AsPrimitive::<pf16>::as_(v.extract::<f64>()?))229})230}231}232233fn convert_to_avs(234values: &Bound<'_, PyAny>,235strict: bool,236allow_object: bool,237) -> PyResult<Vec<AnyValue<'static>>> {238values239.try_iter()?240.map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, allow_object))241.collect()242}243244#[pymethods]245impl PySeries {246#[staticmethod]247fn new_from_any_values(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {248let any_values_result = values249.try_iter()?250.map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, true))251.collect::<PyResult<Vec<AnyValue>>>();252253let result = any_values_result.and_then(|avs| {254let s = Series::from_any_values(name.into(), avs.as_slice(), strict).map_err(|e| {255PyTypeError::new_err(format!(256"{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."257))258})?;259Ok(s.into())260});261262// Fall back to Object type for non-strict construction.263if !strict && result.is_err() {264return Python::attach(|py| {265let objects = values266.try_iter()?267.map(|v| v?.extract())268.collect::<PyResult<Vec<ObjectValue>>>()?;269Ok(Self::new_object(py, name, objects, strict))270});271}272273result274}275276#[staticmethod]277fn new_from_any_values_and_dtype(278name: &str,279values: &Bound<PyAny>,280dtype: Wrap<DataType>,281strict: bool,282) -> PyResult<Self> {283let avs = convert_to_avs(values, strict, false)?;284let s = Series::from_any_values_and_dtype(name.into(), avs.as_slice(), &dtype.0, strict)285.map_err(|e| {286PyTypeError::new_err(format!(287"{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."288))289})?;290Ok(s.into())291}292293#[staticmethod]294fn new_str(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {295let len = values.len()?;296let mut builder = StringChunkedBuilder::new(name.into(), len);297298for res in values.try_iter()? {299let value = res?;300if value.is_none() {301builder.append_null()302} else {303let v = value.extract::<Cow<str>>()?;304builder.append_value(v)305}306}307308let ca = builder.finish();309let s = ca.into_series();310Ok(s.into())311}312313#[staticmethod]314fn new_binary(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {315let len = values.len()?;316let mut builder = BinaryChunkedBuilder::new(name.into(), len);317318for res in values.try_iter()? {319let value = res?;320if value.is_none() {321builder.append_null()322} else {323let v = value.extract::<&[u8]>()?;324builder.append_value(v)325}326}327328let ca = builder.finish();329let s = ca.into_series();330Ok(s.into())331}332333#[staticmethod]334fn new_decimal(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {335Self::new_from_any_values(name, values, strict)336}337338#[staticmethod]339fn new_series_list(name: &str, values: Vec<Option<PySeries>>, _strict: bool) -> PyResult<Self> {340let series: Vec<_> = values341.into_iter()342.map(|ops| ops.map(|ps| ps.series.into_inner()))343.collect();344if let Some(s) = series.iter().flatten().next() {345if s.dtype().is_object() {346return Err(PyValueError::new_err(347"list of objects isn't supported; try building a 'object' only series",348));349}350}351Ok(Series::new(name.into(), series).into())352}353354#[staticmethod]355#[pyo3(signature = (name, values, strict, dtype))]356fn new_array(357name: &str,358values: &Bound<PyAny>,359strict: bool,360dtype: Wrap<DataType>,361) -> PyResult<Self> {362Self::new_from_any_values_and_dtype(name, values, dtype, strict)363}364365#[staticmethod]366pub fn new_object(py: Python<'_>, name: &str, values: Vec<ObjectValue>, _strict: bool) -> Self {367#[cfg(feature = "object")]368{369PySeries::from(series_from_objects(py, name.into(), values))370}371#[cfg(not(feature = "object"))]372panic!("activate 'object' feature")373}374375#[staticmethod]376fn new_null(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {377let len = values.len()?;378Ok(Series::new_null(name.into(), len).into())379}380381#[staticmethod]382fn from_arrow(name: &str, array: &Bound<PyAny>) -> PyResult<Self> {383let arr = array_to_rust(array)?;384385match arr.dtype() {386ArrowDataType::LargeList(_) => {387let array = arr.as_any().downcast_ref::<LargeListArray>().unwrap();388let fast_explode = array.offsets().as_slice().windows(2).all(|w| w[0] != w[1]);389390let mut out = ListChunked::with_chunk(name.into(), array.clone());391if fast_explode {392out.set_fast_explode()393}394Ok(out.into_series().into())395},396_ => {397let series: Series =398Series::try_new(name.into(), arr).map_err(PyPolarsErr::from)?;399Ok(series.into())400},401}402}403}404405406