Path: blob/main/crates/polars-core/src/chunked_array/ndarray.rs
6940 views
use ndarray::prelude::*;1use rayon::prelude::*;2#[cfg(feature = "serde")]3use serde::{Deserialize, Serialize};45use crate::POOL;6use crate::prelude::*;78#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]9#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]10pub enum IndexOrder {11C,12#[default]13Fortran,14}1516impl<T> ChunkedArray<T>17where18T: PolarsNumericType,19{20/// If data is aligned in a single chunk and has no Null values a zero copy view is returned21/// as an [ndarray]22pub fn to_ndarray(&self) -> PolarsResult<ArrayView1<'_, T::Native>> {23let slice = self.cont_slice()?;24Ok(aview1(slice))25}26}2728impl ListChunked {29/// If all nested [`Series`] have the same length, a 2 dimensional [`ndarray::Array`] is returned.30pub fn to_ndarray<N>(&self) -> PolarsResult<Array2<N::Native>>31where32N: PolarsNumericType,33{34polars_ensure!(35self.null_count() == 0,36ComputeError: "creation of ndarray with null values is not supported"37);3839// first iteration determine the size40let mut iter = self.into_no_null_iter();41let series = iter42.next()43.ok_or_else(|| polars_err!(NoData: "unable to create ndarray of empty ListChunked"))?;4445let width = series.len();46let mut row_idx = 0;47let mut ndarray = ndarray::Array::uninit((self.len(), width));4849let series = series.cast(&N::get_static_dtype())?;50let ca = series.unpack::<N>()?;51let a = ca.to_ndarray()?;52let mut row = ndarray.slice_mut(s![row_idx, ..]);53a.assign_to(&mut row);54row_idx += 1;5556for series in iter {57polars_ensure!(58series.len() == width,59ShapeMismatch: "unable to create a 2-D array, series have different lengths"60);61let series = series.cast(&N::get_static_dtype())?;62let ca = series.unpack::<N>()?;63let a = ca.to_ndarray()?;64let mut row = ndarray.slice_mut(s![row_idx, ..]);65a.assign_to(&mut row);66row_idx += 1;67}6869debug_assert_eq!(row_idx, self.len());70// SAFETY:71// We have assigned to every row and element of the array72unsafe { Ok(ndarray.assume_init()) }73}74}7576impl DataFrame {77/// Create a 2D [`ndarray::Array`] from this [`DataFrame`]. This requires all columns in the78/// [`DataFrame`] to be non-null and numeric. They will be cast to the same data type79/// (if they aren't already).80///81/// For floating point data we implicitly convert `None` to `NaN` without failure.82///83/// ```rust84/// use polars_core::prelude::*;85/// let a = UInt32Chunked::new("a".into(), &[1, 2, 3]).into_column();86/// let b = Float64Chunked::new("b".into(), &[10., 8., 6.]).into_column();87///88/// let df = DataFrame::new(vec![a, b]).unwrap();89/// let ndarray = df.to_ndarray::<Float64Type>(IndexOrder::Fortran).unwrap();90/// println!("{:?}", ndarray);91/// ```92/// Outputs:93/// ```text94/// [[1.0, 10.0],95/// [2.0, 8.0],96/// [3.0, 6.0]], shape=[3, 2], strides=[1, 3], layout=Ff (0xa), const ndim=297/// ```98pub fn to_ndarray<N>(&self, ordering: IndexOrder) -> PolarsResult<Array2<N::Native>>99where100N: PolarsNumericType,101{102let shape = self.shape();103let height = self.height();104let mut membuf = Vec::with_capacity(shape.0 * shape.1);105let ptr = membuf.as_ptr() as usize;106107let columns = self.get_columns();108POOL.install(|| {109columns.par_iter().enumerate().try_for_each(|(col_idx, s)| {110let s = s.as_materialized_series().cast(&N::get_static_dtype())?;111let s = match s.dtype() {112DataType::Float32 => {113let ca = s.f32().unwrap();114ca.none_to_nan().into_series()115},116DataType::Float64 => {117let ca = s.f64().unwrap();118ca.none_to_nan().into_series()119},120_ => s,121};122polars_ensure!(123s.null_count() == 0,124ComputeError: "creation of ndarray with null values is not supported"125);126let ca = s.unpack::<N>()?;127128let mut chunk_offset = 0;129for arr in ca.downcast_iter() {130let vals = arr.values();131132// Depending on the desired order, we add items to the buffer.133// SAFETY:134// We get parallel access to the vector by offsetting index access accordingly.135// For C-order, we only operate on every num-col-th element, starting from the136// column index. For Fortran-order we only operate on n contiguous elements,137// offset by n * the column index.138match ordering {139IndexOrder::C => unsafe {140let num_cols = columns.len();141let mut offset =142(ptr as *mut N::Native).add(col_idx + chunk_offset * num_cols);143for v in vals.iter() {144*offset = *v;145offset = offset.add(num_cols);146}147},148IndexOrder::Fortran => unsafe {149let offset_ptr =150(ptr as *mut N::Native).add(col_idx * height + chunk_offset);151// SAFETY:152// this is uninitialized memory, so we must never read from this data153// copy_from_slice does not read154let buf = std::slice::from_raw_parts_mut(offset_ptr, vals.len());155buf.copy_from_slice(vals)156},157}158chunk_offset += vals.len();159}160161Ok(())162})163})?;164165// SAFETY:166// we have written all data, so we can now safely set length167unsafe {168membuf.set_len(shape.0 * shape.1);169}170// Depending on the desired order, we can either return the array buffer as-is or reverse171// the axes.172match ordering {173IndexOrder::C => Ok(Array2::from_shape_vec((shape.0, shape.1), membuf).unwrap()),174IndexOrder::Fortran => {175let ndarr = Array2::from_shape_vec((shape.1, shape.0), membuf).unwrap();176Ok(ndarr.reversed_axes())177},178}179}180}181182#[cfg(test)]183mod test {184use super::*;185186#[test]187fn test_ndarray_from_ca() -> PolarsResult<()> {188let ca = Float64Chunked::new(PlSmallStr::EMPTY, &[1.0, 2.0, 3.0]);189let ndarr = ca.to_ndarray()?;190assert_eq!(ndarr, ArrayView1::from(&[1.0, 2.0, 3.0]));191192let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(193PlSmallStr::EMPTY,19410,19510,196DataType::Float64,197);198builder.append_opt_slice(Some(&[1.0, 2.0, 3.0]));199builder.append_opt_slice(Some(&[2.0, 4.0, 5.0]));200builder.append_opt_slice(Some(&[6.0, 7.0, 8.0]));201let list = builder.finish();202203let ndarr = list.to_ndarray::<Float64Type>()?;204let expected = array![[1.0, 2.0, 3.0], [2.0, 4.0, 5.0], [6.0, 7.0, 8.0]];205assert_eq!(ndarr, expected);206207// test list array that is not square208let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(209PlSmallStr::EMPTY,21010,21110,212DataType::Float64,213);214builder.append_opt_slice(Some(&[1.0, 2.0, 3.0]));215builder.append_opt_slice(Some(&[2.0]));216builder.append_opt_slice(Some(&[6.0, 7.0, 8.0]));217let list = builder.finish();218assert!(list.to_ndarray::<Float64Type>().is_err());219Ok(())220}221222#[test]223fn test_ndarray_from_df_order_fortran() -> PolarsResult<()> {224let df = df!["a"=> [1.0, 2.0, 3.0],225"b" => [2.0, 3.0, 4.0]226]?;227228let ndarr = df.to_ndarray::<Float64Type>(IndexOrder::Fortran)?;229let expected = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]];230assert!(!ndarr.is_standard_layout());231assert_eq!(ndarr, expected);232233Ok(())234}235236#[test]237fn test_ndarray_from_df_order_c() -> PolarsResult<()> {238let df = df!["a"=> [1.0, 2.0, 3.0],239"b" => [2.0, 3.0, 4.0]240]?;241242let ndarr = df.to_ndarray::<Float64Type>(IndexOrder::C)?;243let expected = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]];244assert!(ndarr.is_standard_layout());245assert_eq!(ndarr, expected);246247Ok(())248}249}250251252