Path: blob/main/crates/polars-python/src/interop/numpy/utils.rs
7892 views
#![allow(unsafe_op_in_unsafe_fn)]1use std::ffi::{c_int, c_void};23use ndarray::{Dim, Dimension};4use numpy::npyffi::PyArrayObject;5use numpy::{Element, PY_ARRAY_API, PyArrayDescr, PyArrayDescrMethods, ToNpyDims, npyffi};6use polars_core::prelude::*;7use pyo3::intern;8use pyo3::prelude::*;9use pyo3::types::PyTuple;1011/// Create a NumPy ndarray view of the data.12pub(super) unsafe fn create_borrowed_np_array<I>(13py: Python<'_>,14dtype: Bound<PyArrayDescr>,15mut shape: Dim<I>,16flags: c_int,17data: *mut c_void,18owner: Py<PyAny>,19) -> Py<PyAny>20where21Dim<I>: Dimension + ToNpyDims,22{23// See: https://numpy.org/doc/stable/reference/c-api/array.html24let array = PY_ARRAY_API.PyArray_NewFromDescr(25py,26PY_ARRAY_API.get_type_object(py, npyffi::NpyTypes::PyArray_Type),27dtype.into_dtype_ptr(),28shape.ndim_cint(),29shape.as_dims_ptr(),30// We don't provide strides, but provide flags that tell c/f-order31std::ptr::null_mut(),32data,33flags,34std::ptr::null_mut(),35);3637// This keeps the memory alive38let owner_ptr = owner.as_ptr();39// SetBaseObject steals a reference40// so we can forget.41std::mem::forget(owner);42PY_ARRAY_API.PyArray_SetBaseObject(py, array as *mut PyArrayObject, owner_ptr);4344Py::from_owned_ptr(py, array)45}4647/// Returns whether the data type supports creating a NumPy view.48pub(super) fn dtype_supports_view(dtype: &DataType) -> bool {49match dtype {50dt if dt.is_primitive_numeric() => true,51DataType::Datetime(_, _) | DataType::Duration(_) => true,52DataType::Array(inner, _) => dtype_supports_view(inner.as_ref()),53_ => false,54}55}5657/// Returns whether the Series contains nulls at any level of nesting.58///59/// Of the nested types, only Array types are handled since only those are relevant for NumPy views.60pub(super) fn series_contains_null(s: &Series) -> bool {61if s.null_count() > 0 {62true63} else if let Ok(ca) = s.array() {64let s_inner = ca.get_inner();65series_contains_null(&s_inner)66} else {67false68}69}7071/// Reshape the first dimension of a NumPy array to the given height and width.72pub(super) fn reshape_numpy_array(73py: Python<'_>,74arr: Py<PyAny>,75height: usize,76width: usize,77) -> PyResult<Py<PyAny>> {78let shape = arr79.getattr(py, intern!(py, "shape"))?80.extract::<Vec<usize>>(py)?;8182if shape.len() == 1 {83// In this case, we can avoid allocating a Vec.84let new_shape = (height, width);85arr.call_method1(py, intern!(py, "reshape"), new_shape)86} else {87let mut new_shape_vec = vec![height, width];88for v in &shape[1..] {89new_shape_vec.push(*v)90}91let new_shape = PyTuple::new(py, new_shape_vec)?;92arr.call_method1(py, intern!(py, "reshape"), new_shape)93}94}9596/// Get the NumPy temporal data type associated with the given Polars [`DataType`].97pub(super) fn polars_dtype_to_np_temporal_dtype<'py>(98py: Python<'py>,99dtype: &DataType,100) -> Bound<'py, PyArrayDescr> {101use numpy::datetime::{Datetime, Timedelta, units};102match dtype {103DataType::Datetime(TimeUnit::Milliseconds, _) => {104Datetime::<units::Milliseconds>::get_dtype(py)105},106DataType::Datetime(TimeUnit::Microseconds, _) => {107Datetime::<units::Microseconds>::get_dtype(py)108},109DataType::Datetime(TimeUnit::Nanoseconds, _) => {110Datetime::<units::Nanoseconds>::get_dtype(py)111},112DataType::Duration(TimeUnit::Milliseconds) => {113Timedelta::<units::Milliseconds>::get_dtype(py)114},115DataType::Duration(TimeUnit::Microseconds) => {116Timedelta::<units::Microseconds>::get_dtype(py)117},118DataType::Duration(TimeUnit::Nanoseconds) => Timedelta::<units::Nanoseconds>::get_dtype(py),119_ => panic!("only Datetime/Duration inputs supported, got {dtype}"),120}121}122123124