Path: blob/main/crates/polars-python/src/interop/numpy/utils.rs
8346 views
#![allow(unsafe_op_in_unsafe_fn)]1use std::ffi::{c_int, c_void};23use ndarray::{Dim, Dimension};4use numpy::npyffi::PyArrayObject;5use numpy::{Element, PY_ARRAY_API, PyArrayDescr, PyArrayDescrMethods, ToNpyDims, npyffi};6use polars_core::prelude::*;7use pyo3::intern;8use pyo3::prelude::*;9use pyo3::types::PyTuple;1011pub(super) fn get_numpy_module(py: Python) -> PyResult<Bound<PyModule>> {12PyModule::import(py, intern!(py, "numpy"))13}1415/// Create a NumPy ndarray view of the data.16pub(super) unsafe fn create_borrowed_np_array<I>(17py: Python<'_>,18dtype: Bound<PyArrayDescr>,19mut shape: Dim<I>,20flags: c_int,21data: *mut c_void,22owner: Py<PyAny>,23) -> Py<PyAny>24where25Dim<I>: Dimension + ToNpyDims,26{27// See: https://numpy.org/doc/stable/reference/c-api/array.html28let array = PY_ARRAY_API.PyArray_NewFromDescr(29py,30PY_ARRAY_API.get_type_object(py, npyffi::NpyTypes::PyArray_Type),31dtype.into_dtype_ptr(),32shape.ndim_cint(),33shape.as_dims_ptr(),34// We don't provide strides, but provide flags that tell c/f-order35std::ptr::null_mut(),36data,37flags,38std::ptr::null_mut(),39);4041// This keeps the memory alive42let owner_ptr = owner.as_ptr();43// SetBaseObject steals a reference44// so we can forget.45std::mem::forget(owner);46PY_ARRAY_API.PyArray_SetBaseObject(py, array as *mut PyArrayObject, owner_ptr);4748Py::from_owned_ptr(py, array)49}5051/// Returns whether the data type supports creating a NumPy view.52pub(super) fn dtype_supports_view(dtype: &DataType) -> bool {53match dtype {54dt if dt.is_primitive_numeric() => true,55DataType::Datetime(_, _) | DataType::Duration(_) => true,56DataType::Array(inner, _) => dtype_supports_view(inner.as_ref()),57_ => false,58}59}6061/// Returns whether the Series contains nulls at any level of nesting.62///63/// Of the nested types, only Array types are handled since only those are relevant for NumPy views.64pub(super) fn series_contains_null(s: &Series) -> bool {65if s.null_count() > 0 {66true67} else if let Ok(ca) = s.array() {68let s_inner = ca.get_inner();69series_contains_null(&s_inner)70} else {71false72}73}7475/// Reshape the first dimension of a NumPy array to the given height and width.76pub(super) fn reshape_numpy_array(77py: Python<'_>,78arr: Py<PyAny>,79height: usize,80width: usize,81) -> PyResult<Py<PyAny>> {82let shape = arr83.getattr(py, intern!(py, "shape"))?84.extract::<Vec<usize>>(py)?;8586if shape.len() == 1 {87// In this case, we can avoid allocating a Vec.88let new_shape = (height, width);89arr.call_method1(py, intern!(py, "reshape"), new_shape)90} else {91let mut new_shape_vec = vec![height, width];92for v in &shape[1..] {93new_shape_vec.push(*v)94}95let new_shape = PyTuple::new(py, new_shape_vec)?;96arr.call_method1(py, intern!(py, "reshape"), new_shape)97}98}99100/// Get the NumPy temporal data type associated with the given Polars [`DataType`].101pub(super) fn polars_dtype_to_np_temporal_dtype<'py>(102py: Python<'py>,103dtype: &DataType,104) -> Bound<'py, PyArrayDescr> {105use numpy::datetime::{Datetime, Timedelta, units};106match dtype {107DataType::Datetime(TimeUnit::Milliseconds, _) => {108Datetime::<units::Milliseconds>::get_dtype(py)109},110DataType::Datetime(TimeUnit::Microseconds, _) => {111Datetime::<units::Microseconds>::get_dtype(py)112},113DataType::Datetime(TimeUnit::Nanoseconds, _) => {114Datetime::<units::Nanoseconds>::get_dtype(py)115},116DataType::Duration(TimeUnit::Milliseconds) => {117Timedelta::<units::Milliseconds>::get_dtype(py)118},119DataType::Duration(TimeUnit::Microseconds) => {120Timedelta::<units::Microseconds>::get_dtype(py)121},122DataType::Duration(TimeUnit::Nanoseconds) => Timedelta::<units::Nanoseconds>::get_dtype(py),123_ => panic!("only Datetime/Duration inputs supported, got {dtype}"),124}125}126127128