Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/chunked_array/ndarray.rs
6940 views
1
use ndarray::prelude::*;
2
use rayon::prelude::*;
3
#[cfg(feature = "serde")]
4
use serde::{Deserialize, Serialize};
5
6
use crate::POOL;
7
use crate::prelude::*;
8
9
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
10
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
11
pub enum IndexOrder {
12
C,
13
#[default]
14
Fortran,
15
}
16
17
impl<T> ChunkedArray<T>
18
where
19
T: PolarsNumericType,
20
{
21
/// If data is aligned in a single chunk and has no Null values a zero copy view is returned
22
/// as an [ndarray]
23
pub fn to_ndarray(&self) -> PolarsResult<ArrayView1<'_, T::Native>> {
24
let slice = self.cont_slice()?;
25
Ok(aview1(slice))
26
}
27
}
28
29
impl ListChunked {
30
/// If all nested [`Series`] have the same length, a 2 dimensional [`ndarray::Array`] is returned.
31
pub fn to_ndarray<N>(&self) -> PolarsResult<Array2<N::Native>>
32
where
33
N: PolarsNumericType,
34
{
35
polars_ensure!(
36
self.null_count() == 0,
37
ComputeError: "creation of ndarray with null values is not supported"
38
);
39
40
// first iteration determine the size
41
let mut iter = self.into_no_null_iter();
42
let series = iter
43
.next()
44
.ok_or_else(|| polars_err!(NoData: "unable to create ndarray of empty ListChunked"))?;
45
46
let width = series.len();
47
let mut row_idx = 0;
48
let mut ndarray = ndarray::Array::uninit((self.len(), width));
49
50
let series = series.cast(&N::get_static_dtype())?;
51
let ca = series.unpack::<N>()?;
52
let a = ca.to_ndarray()?;
53
let mut row = ndarray.slice_mut(s![row_idx, ..]);
54
a.assign_to(&mut row);
55
row_idx += 1;
56
57
for series in iter {
58
polars_ensure!(
59
series.len() == width,
60
ShapeMismatch: "unable to create a 2-D array, series have different lengths"
61
);
62
let series = series.cast(&N::get_static_dtype())?;
63
let ca = series.unpack::<N>()?;
64
let a = ca.to_ndarray()?;
65
let mut row = ndarray.slice_mut(s![row_idx, ..]);
66
a.assign_to(&mut row);
67
row_idx += 1;
68
}
69
70
debug_assert_eq!(row_idx, self.len());
71
// SAFETY:
72
// We have assigned to every row and element of the array
73
unsafe { Ok(ndarray.assume_init()) }
74
}
75
}
76
77
impl DataFrame {
78
/// Create a 2D [`ndarray::Array`] from this [`DataFrame`]. This requires all columns in the
79
/// [`DataFrame`] to be non-null and numeric. They will be cast to the same data type
80
/// (if they aren't already).
81
///
82
/// For floating point data we implicitly convert `None` to `NaN` without failure.
83
///
84
/// ```rust
85
/// use polars_core::prelude::*;
86
/// let a = UInt32Chunked::new("a".into(), &[1, 2, 3]).into_column();
87
/// let b = Float64Chunked::new("b".into(), &[10., 8., 6.]).into_column();
88
///
89
/// let df = DataFrame::new(vec![a, b]).unwrap();
90
/// let ndarray = df.to_ndarray::<Float64Type>(IndexOrder::Fortran).unwrap();
91
/// println!("{:?}", ndarray);
92
/// ```
93
/// Outputs:
94
/// ```text
95
/// [[1.0, 10.0],
96
/// [2.0, 8.0],
97
/// [3.0, 6.0]], shape=[3, 2], strides=[1, 3], layout=Ff (0xa), const ndim=2
98
/// ```
99
pub fn to_ndarray<N>(&self, ordering: IndexOrder) -> PolarsResult<Array2<N::Native>>
100
where
101
N: PolarsNumericType,
102
{
103
let shape = self.shape();
104
let height = self.height();
105
let mut membuf = Vec::with_capacity(shape.0 * shape.1);
106
let ptr = membuf.as_ptr() as usize;
107
108
let columns = self.get_columns();
109
POOL.install(|| {
110
columns.par_iter().enumerate().try_for_each(|(col_idx, s)| {
111
let s = s.as_materialized_series().cast(&N::get_static_dtype())?;
112
let s = match s.dtype() {
113
DataType::Float32 => {
114
let ca = s.f32().unwrap();
115
ca.none_to_nan().into_series()
116
},
117
DataType::Float64 => {
118
let ca = s.f64().unwrap();
119
ca.none_to_nan().into_series()
120
},
121
_ => s,
122
};
123
polars_ensure!(
124
s.null_count() == 0,
125
ComputeError: "creation of ndarray with null values is not supported"
126
);
127
let ca = s.unpack::<N>()?;
128
129
let mut chunk_offset = 0;
130
for arr in ca.downcast_iter() {
131
let vals = arr.values();
132
133
// Depending on the desired order, we add items to the buffer.
134
// SAFETY:
135
// We get parallel access to the vector by offsetting index access accordingly.
136
// For C-order, we only operate on every num-col-th element, starting from the
137
// column index. For Fortran-order we only operate on n contiguous elements,
138
// offset by n * the column index.
139
match ordering {
140
IndexOrder::C => unsafe {
141
let num_cols = columns.len();
142
let mut offset =
143
(ptr as *mut N::Native).add(col_idx + chunk_offset * num_cols);
144
for v in vals.iter() {
145
*offset = *v;
146
offset = offset.add(num_cols);
147
}
148
},
149
IndexOrder::Fortran => unsafe {
150
let offset_ptr =
151
(ptr as *mut N::Native).add(col_idx * height + chunk_offset);
152
// SAFETY:
153
// this is uninitialized memory, so we must never read from this data
154
// copy_from_slice does not read
155
let buf = std::slice::from_raw_parts_mut(offset_ptr, vals.len());
156
buf.copy_from_slice(vals)
157
},
158
}
159
chunk_offset += vals.len();
160
}
161
162
Ok(())
163
})
164
})?;
165
166
// SAFETY:
167
// we have written all data, so we can now safely set length
168
unsafe {
169
membuf.set_len(shape.0 * shape.1);
170
}
171
// Depending on the desired order, we can either return the array buffer as-is or reverse
172
// the axes.
173
match ordering {
174
IndexOrder::C => Ok(Array2::from_shape_vec((shape.0, shape.1), membuf).unwrap()),
175
IndexOrder::Fortran => {
176
let ndarr = Array2::from_shape_vec((shape.1, shape.0), membuf).unwrap();
177
Ok(ndarr.reversed_axes())
178
},
179
}
180
}
181
}
182
183
#[cfg(test)]
184
mod test {
185
use super::*;
186
187
#[test]
188
fn test_ndarray_from_ca() -> PolarsResult<()> {
189
let ca = Float64Chunked::new(PlSmallStr::EMPTY, &[1.0, 2.0, 3.0]);
190
let ndarr = ca.to_ndarray()?;
191
assert_eq!(ndarr, ArrayView1::from(&[1.0, 2.0, 3.0]));
192
193
let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
194
PlSmallStr::EMPTY,
195
10,
196
10,
197
DataType::Float64,
198
);
199
builder.append_opt_slice(Some(&[1.0, 2.0, 3.0]));
200
builder.append_opt_slice(Some(&[2.0, 4.0, 5.0]));
201
builder.append_opt_slice(Some(&[6.0, 7.0, 8.0]));
202
let list = builder.finish();
203
204
let ndarr = list.to_ndarray::<Float64Type>()?;
205
let expected = array![[1.0, 2.0, 3.0], [2.0, 4.0, 5.0], [6.0, 7.0, 8.0]];
206
assert_eq!(ndarr, expected);
207
208
// test list array that is not square
209
let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
210
PlSmallStr::EMPTY,
211
10,
212
10,
213
DataType::Float64,
214
);
215
builder.append_opt_slice(Some(&[1.0, 2.0, 3.0]));
216
builder.append_opt_slice(Some(&[2.0]));
217
builder.append_opt_slice(Some(&[6.0, 7.0, 8.0]));
218
let list = builder.finish();
219
assert!(list.to_ndarray::<Float64Type>().is_err());
220
Ok(())
221
}
222
223
#[test]
224
fn test_ndarray_from_df_order_fortran() -> PolarsResult<()> {
225
let df = df!["a"=> [1.0, 2.0, 3.0],
226
"b" => [2.0, 3.0, 4.0]
227
]?;
228
229
let ndarr = df.to_ndarray::<Float64Type>(IndexOrder::Fortran)?;
230
let expected = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]];
231
assert!(!ndarr.is_standard_layout());
232
assert_eq!(ndarr, expected);
233
234
Ok(())
235
}
236
237
#[test]
238
fn test_ndarray_from_df_order_c() -> PolarsResult<()> {
239
let df = df!["a"=> [1.0, 2.0, 3.0],
240
"b" => [2.0, 3.0, 4.0]
241
]?;
242
243
let ndarr = df.to_ndarray::<Float64Type>(IndexOrder::C)?;
244
let expected = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]];
245
assert!(ndarr.is_standard_layout());
246
assert_eq!(ndarr, expected);
247
248
Ok(())
249
}
250
}
251
252