Path: blob/main/crates/polars-python/src/interop/numpy/to_numpy_series.rs
7892 views
use ndarray::IntoDimension;1use num_traits::{Float, NumCast};2use numpy::npyffi::flags;3use numpy::{Element, PyArray1};4use polars::prelude::*;5use pyo3::exceptions::PyRuntimeError;6use pyo3::prelude::*;7use pyo3::{IntoPyObjectExt, intern};89use super::to_numpy_df::df_to_numpy;10use super::utils::{11create_borrowed_np_array, dtype_supports_view, polars_dtype_to_np_temporal_dtype,12reshape_numpy_array, series_contains_null,13};14use crate::conversion::ObjectValue;15use crate::conversion::chunked_array::{decimal_to_pyobject_iter, time_to_pyobject_iter};16use crate::series::PySeries;1718#[pymethods]19impl PySeries {20/// Convert this Series to a NumPy ndarray.21///22/// This method copies data only when necessary. Set `allow_copy` to raise an error if copy23/// is required. Set `writable` to make sure the resulting array is writable, possibly requiring24/// copying the data.25fn to_numpy(&self, py: Python<'_>, writable: bool, allow_copy: bool) -> PyResult<Py<PyAny>> {26series_to_numpy(py, &self.series.read(), writable, allow_copy)27}2829/// Create a view of the data as a NumPy ndarray.30///31/// WARNING: The resulting view will show the underlying value for nulls,32/// which may be any value. The caller is responsible for handling nulls33/// appropriately.34fn to_numpy_view(&self, py: Python) -> Option<Py<PyAny>> {35let (view, _) = try_series_to_numpy_view(py, &self.series.read(), true, false)?;36Some(view)37}38}3940/// Convert a Series to a NumPy ndarray.41pub(super) fn series_to_numpy(42py: Python<'_>,43s: &Series,44writable: bool,45allow_copy: bool,46) -> PyResult<Py<PyAny>> {47if s.is_empty() {48// Take this path to ensure a writable array.49// This does not actually copy data for an empty Series.50return Ok(series_to_numpy_with_copy(py, s, true));51}52if let Some((mut arr, writable_flag)) = try_series_to_numpy_view(py, s, false, allow_copy) {53if writable && !writable_flag {54if !allow_copy {55return Err(PyRuntimeError::new_err(56"copy not allowed: cannot create a writable array without copying data",57));58}59arr = arr.call_method0(py, intern!(py, "copy"))?;60}61return Ok(arr);62}6364if !allow_copy {65return Err(PyRuntimeError::new_err(66"copy not allowed: cannot convert to a NumPy array without copying data",67));68}6970Ok(series_to_numpy_with_copy(py, s, writable))71}7273/// Create a NumPy view of the given Series.74fn try_series_to_numpy_view(75py: Python<'_>,76s: &Series,77allow_nulls: bool,78allow_rechunk: bool,79) -> Option<(Py<PyAny>, bool)> {80if !dtype_supports_view(s.dtype()) {81return None;82}83if !allow_nulls && series_contains_null(s) {84return None;85}86let (s_owned, writable_flag) = handle_chunks(py, s, allow_rechunk)?;87let array = series_to_numpy_view_recursive(py, s_owned, writable_flag);88Some((array, writable_flag))89}9091/// Rechunk the Series if required.92///93/// NumPy arrays are always contiguous, so we may have to rechunk before creating a view.94/// If we do so, we can flag the resulting array as writable.95fn handle_chunks(py: Python<'_>, s: &Series, allow_rechunk: bool) -> Option<(Series, bool)> {96let is_chunked = s.n_chunks() > 1;97match (is_chunked, allow_rechunk) {98(true, false) => None,99(true, true) => Some((py.detach(|| s.rechunk()), true)),100(false, _) => Some((s.clone(), false)),101}102}103104/// Create a NumPy view of the given Series without checking for data types, chunks, or nulls.105fn series_to_numpy_view_recursive(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {106debug_assert!(s.n_chunks() == 1);107match s.dtype() {108dt if dt.is_primitive_numeric() => numeric_series_to_numpy_view(py, s, writable),109DataType::Datetime(_, _) | DataType::Duration(_) => {110temporal_series_to_numpy_view(py, s, writable)111},112DataType::Array(_, _) => array_series_to_numpy_view(py, &s, writable),113_ => panic!("invalid data type"),114}115}116117/// Create a NumPy view of a numeric Series.118fn numeric_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {119let dims = [s.len()].into_dimension();120with_match_physical_numpy_polars_type!(s.dtype(), |$T| {121let np_dtype = <$T as PolarsNumericType>::Native::get_dtype(py);122let ca: &ChunkedArray<$T> = s.unpack::<$T>().unwrap();123let flags = if writable {124flags::NPY_ARRAY_FARRAY125} else {126flags::NPY_ARRAY_FARRAY_RO127};128129let slice = ca.data_views().next().unwrap();130131unsafe {132create_borrowed_np_array::<_>(133py,134np_dtype,135dims,136flags,137slice.as_ptr() as _,138PySeries::from(s).into_py_any(py).unwrap(), // Keep the Series memory alive.,139)140}141})142}143144/// Create a NumPy view of a Datetime or Duration Series.145fn temporal_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {146let np_dtype = polars_dtype_to_np_temporal_dtype(py, s.dtype());147148let phys = s.to_physical_repr();149let ca = phys.i64().unwrap();150let slice = ca.data_views().next().unwrap();151let dims = [s.len()].into_dimension();152let flags = if writable {153flags::NPY_ARRAY_FARRAY154} else {155flags::NPY_ARRAY_FARRAY_RO156};157158unsafe {159create_borrowed_np_array::<_>(160py,161np_dtype,162dims,163flags,164slice.as_ptr() as _,165PySeries::from(s).into_py_any(py).unwrap(), // Keep the Series memory alive.,166)167}168}169170/// Create a NumPy view of an Array Series.171fn array_series_to_numpy_view(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {172let ca = s.array().unwrap();173let s_inner = ca.get_inner();174let np_array_flat = series_to_numpy_view_recursive(py, s_inner, writable);175176// Reshape to the original shape.177let DataType::Array(_, width) = s.dtype() else {178unreachable!()179};180reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()181}182183/// Convert a Series to a NumPy ndarray, copying data in the process.184///185/// This method will cast integers to floats so that `null = np.nan`.186fn series_to_numpy_with_copy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {187use DataType::*;188match s.dtype() {189Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),190Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),191Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),192Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),193Int128 => {194let s = s.cast(&DataType::Float64).unwrap();195series_to_numpy(py, &s, writable, true).unwrap()196},197UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),198UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),199UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),200UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),201UInt128 => {202let s = s.cast(&DataType::Float64).unwrap();203series_to_numpy(py, &s, writable, true).unwrap()204},205Float16 => numeric_series_to_numpy::<Float16Type, pf16>(py, s),206Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),207Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),208Boolean => boolean_series_to_numpy(py, s),209Date => date_series_to_numpy(py, s),210Datetime(tu, _) => {211use numpy::datetime::{Datetime, units};212match tu {213TimeUnit::Milliseconds => {214temporal_series_to_numpy::<Datetime<units::Milliseconds>>(py, s)215},216TimeUnit::Microseconds => {217temporal_series_to_numpy::<Datetime<units::Microseconds>>(py, s)218},219TimeUnit::Nanoseconds => {220temporal_series_to_numpy::<Datetime<units::Nanoseconds>>(py, s)221},222}223},224Duration(tu) => {225use numpy::datetime::{Timedelta, units};226match tu {227TimeUnit::Milliseconds => {228temporal_series_to_numpy::<Timedelta<units::Milliseconds>>(py, s)229},230TimeUnit::Microseconds => {231temporal_series_to_numpy::<Timedelta<units::Microseconds>>(py, s)232},233TimeUnit::Nanoseconds => {234temporal_series_to_numpy::<Timedelta<units::Nanoseconds>>(py, s)235},236}237},238Time => {239let ca = s.time().unwrap();240let values = time_to_pyobject_iter(ca).map(|v| v.into_py_any(py).unwrap());241PyArray1::from_iter(py, values).into_py_any(py).unwrap()242},243String => {244let ca = s.str().unwrap();245let values = ca.iter().map(|s| s.into_py_any(py).unwrap());246PyArray1::from_iter(py, values).into_py_any(py).unwrap()247},248Binary => {249let ca = s.binary().unwrap();250let values = ca.iter().map(|s| s.into_py_any(py).unwrap());251PyArray1::from_iter(py, values).into_py_any(py).unwrap()252},253Categorical(_, _) | Enum(_, _) => {254with_match_categorical_physical_type!(s.dtype().cat_physical().unwrap(), |$C| {255let ca = s.cat::<$C>().unwrap();256let values = ca.iter_str().map(|s| s.into_py_any(py).unwrap());257PyArray1::from_iter(py, values).into_py_any(py).unwrap()258})259},260Decimal(_, _) => {261let ca = s.decimal().unwrap();262let values = decimal_to_pyobject_iter(py, ca)263.unwrap()264.map(|v| v.into_py_any(py).unwrap());265PyArray1::from_iter(py, values).into_py_any(py).unwrap()266},267List(_) => list_series_to_numpy(py, s, writable),268Array(_, _) => array_series_to_numpy(py, s, writable),269Struct(_) => {270let ca = s.struct_().unwrap();271let df = ca.clone().unnest();272df_to_numpy(py, &df, IndexOrder::Fortran, writable, true).unwrap()273},274#[cfg(feature = "object")]275Object(_) => {276let ca = s277.as_any()278.downcast_ref::<ObjectChunked<ObjectValue>>()279.unwrap();280let values = ca.iter().map(|v| v.into_py_any(py).unwrap());281PyArray1::from_iter(py, values).into_py_any(py).unwrap()282},283Null => {284let n = s.len();285let values = std::iter::repeat_n(f32::NAN, n);286PyArray1::from_iter(py, values).into_py_any(py).unwrap()287},288Extension(_, _) => series_to_numpy_with_copy(py, s.ext().unwrap().storage(), writable),289Unknown(_) | BinaryOffset => unreachable!(),290}291}292293/// Convert numeric types to f32 or f64 with NaN representing a null value.294fn numeric_series_to_numpy<T, U>(py: Python<'_>, s: &Series) -> Py<PyAny>295where296T: PolarsNumericType,297T::Native: numpy::Element,298U: Float + numpy::Element,299{300let ca: &ChunkedArray<T> = s.as_ref().as_ref();301if s.null_count() == 0 {302let values = ca.into_no_null_iter();303PyArray1::<T::Native>::from_iter(py, values)304.into_py_any(py)305.unwrap()306} else {307let mapper = |opt_v: Option<T::Native>| match opt_v {308Some(v) => NumCast::from(v).unwrap(),309None => U::nan(),310};311let values = ca.iter().map(mapper);312PyArray1::from_iter(py, values).into_py_any(py).unwrap()313}314}315316/// Convert booleans to u8 if no nulls are present, otherwise convert to objects.317fn boolean_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {318let ca = s.bool().unwrap();319if s.null_count() == 0 {320let values = ca.into_no_null_iter();321PyArray1::<bool>::from_iter(py, values)322.into_py_any(py)323.unwrap()324} else {325let values = ca.iter().map(|opt_v| opt_v.into_py_any(py).unwrap());326PyArray1::from_iter(py, values).into_py_any(py).unwrap()327}328}329330/// Convert dates directly to i64 with i64::MIN representing a null value.331fn date_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {332use numpy::datetime::{Datetime, units};333334let s_phys = s.to_physical_repr();335let ca = s_phys.i32().unwrap();336337if s.null_count() == 0 {338let mapper = |v: i32| (v as i64).into();339let values = ca.into_no_null_iter().map(mapper);340PyArray1::<Datetime<units::Days>>::from_iter(py, values)341.into_py_any(py)342.unwrap()343} else {344let mapper = |opt_v: Option<i32>| {345match opt_v {346Some(v) => v as i64,347None => i64::MIN,348}349.into()350};351let values = ca.iter().map(mapper);352PyArray1::<Datetime<units::Days>>::from_iter(py, values)353.into_py_any(py)354.unwrap()355}356}357358/// Convert datetimes and durations with i64::MIN representing a null value.359fn temporal_series_to_numpy<T>(py: Python<'_>, s: &Series) -> Py<PyAny>360where361T: From<i64> + numpy::Element,362{363let s_phys = s.to_physical_repr();364let ca = s_phys.i64().unwrap();365let values = ca.iter().map(|v| v.unwrap_or(i64::MIN).into());366PyArray1::<T>::from_iter(py, values)367.into_py_any(py)368.unwrap()369}370fn list_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {371let ca = s.list().unwrap();372373let iter = ca.amortized_iter().map(|opt_s| match opt_s {374None => py.None(),375Some(s) => series_to_numpy(py, s.as_ref(), writable, true).unwrap(),376});377PyArray1::from_iter(py, iter).into_py_any(py).unwrap()378}379380/// Convert arrays by flattening first, converting the flat Series, and then reshaping.381fn array_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {382let ca = s.array().unwrap();383let s_inner = ca.get_inner();384let np_array_flat = series_to_numpy_with_copy(py, &s_inner, writable);385386// Reshape to the original shape.387let DataType::Array(_, width) = s.dtype() else {388unreachable!()389};390reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()391}392393394