Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-python/src/interop/numpy/to_numpy_series.rs
7892 views
1
use ndarray::IntoDimension;
2
use num_traits::{Float, NumCast};
3
use numpy::npyffi::flags;
4
use numpy::{Element, PyArray1};
5
use polars::prelude::*;
6
use pyo3::exceptions::PyRuntimeError;
7
use pyo3::prelude::*;
8
use pyo3::{IntoPyObjectExt, intern};
9
10
use super::to_numpy_df::df_to_numpy;
11
use super::utils::{
12
create_borrowed_np_array, dtype_supports_view, polars_dtype_to_np_temporal_dtype,
13
reshape_numpy_array, series_contains_null,
14
};
15
use crate::conversion::ObjectValue;
16
use crate::conversion::chunked_array::{decimal_to_pyobject_iter, time_to_pyobject_iter};
17
use crate::series::PySeries;
18
19
#[pymethods]
20
impl PySeries {
21
/// Convert this Series to a NumPy ndarray.
22
///
23
/// This method copies data only when necessary. Set `allow_copy` to raise an error if copy
24
/// is required. Set `writable` to make sure the resulting array is writable, possibly requiring
25
/// copying the data.
26
fn to_numpy(&self, py: Python<'_>, writable: bool, allow_copy: bool) -> PyResult<Py<PyAny>> {
27
series_to_numpy(py, &self.series.read(), writable, allow_copy)
28
}
29
30
/// Create a view of the data as a NumPy ndarray.
31
///
32
/// WARNING: The resulting view will show the underlying value for nulls,
33
/// which may be any value. The caller is responsible for handling nulls
34
/// appropriately.
35
fn to_numpy_view(&self, py: Python) -> Option<Py<PyAny>> {
36
let (view, _) = try_series_to_numpy_view(py, &self.series.read(), true, false)?;
37
Some(view)
38
}
39
}
40
41
/// Convert a Series to a NumPy ndarray.
42
pub(super) fn series_to_numpy(
43
py: Python<'_>,
44
s: &Series,
45
writable: bool,
46
allow_copy: bool,
47
) -> PyResult<Py<PyAny>> {
48
if s.is_empty() {
49
// Take this path to ensure a writable array.
50
// This does not actually copy data for an empty Series.
51
return Ok(series_to_numpy_with_copy(py, s, true));
52
}
53
if let Some((mut arr, writable_flag)) = try_series_to_numpy_view(py, s, false, allow_copy) {
54
if writable && !writable_flag {
55
if !allow_copy {
56
return Err(PyRuntimeError::new_err(
57
"copy not allowed: cannot create a writable array without copying data",
58
));
59
}
60
arr = arr.call_method0(py, intern!(py, "copy"))?;
61
}
62
return Ok(arr);
63
}
64
65
if !allow_copy {
66
return Err(PyRuntimeError::new_err(
67
"copy not allowed: cannot convert to a NumPy array without copying data",
68
));
69
}
70
71
Ok(series_to_numpy_with_copy(py, s, writable))
72
}
73
74
/// Create a NumPy view of the given Series.
75
fn try_series_to_numpy_view(
76
py: Python<'_>,
77
s: &Series,
78
allow_nulls: bool,
79
allow_rechunk: bool,
80
) -> Option<(Py<PyAny>, bool)> {
81
if !dtype_supports_view(s.dtype()) {
82
return None;
83
}
84
if !allow_nulls && series_contains_null(s) {
85
return None;
86
}
87
let (s_owned, writable_flag) = handle_chunks(py, s, allow_rechunk)?;
88
let array = series_to_numpy_view_recursive(py, s_owned, writable_flag);
89
Some((array, writable_flag))
90
}
91
92
/// Rechunk the Series if required.
93
///
94
/// NumPy arrays are always contiguous, so we may have to rechunk before creating a view.
95
/// If we do so, we can flag the resulting array as writable.
96
fn handle_chunks(py: Python<'_>, s: &Series, allow_rechunk: bool) -> Option<(Series, bool)> {
97
let is_chunked = s.n_chunks() > 1;
98
match (is_chunked, allow_rechunk) {
99
(true, false) => None,
100
(true, true) => Some((py.detach(|| s.rechunk()), true)),
101
(false, _) => Some((s.clone(), false)),
102
}
103
}
104
105
/// Create a NumPy view of the given Series without checking for data types, chunks, or nulls.
106
fn series_to_numpy_view_recursive(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
107
debug_assert!(s.n_chunks() == 1);
108
match s.dtype() {
109
dt if dt.is_primitive_numeric() => numeric_series_to_numpy_view(py, s, writable),
110
DataType::Datetime(_, _) | DataType::Duration(_) => {
111
temporal_series_to_numpy_view(py, s, writable)
112
},
113
DataType::Array(_, _) => array_series_to_numpy_view(py, &s, writable),
114
_ => panic!("invalid data type"),
115
}
116
}
117
118
/// Create a NumPy view of a numeric Series.
119
fn numeric_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
120
let dims = [s.len()].into_dimension();
121
with_match_physical_numpy_polars_type!(s.dtype(), |$T| {
122
let np_dtype = <$T as PolarsNumericType>::Native::get_dtype(py);
123
let ca: &ChunkedArray<$T> = s.unpack::<$T>().unwrap();
124
let flags = if writable {
125
flags::NPY_ARRAY_FARRAY
126
} else {
127
flags::NPY_ARRAY_FARRAY_RO
128
};
129
130
let slice = ca.data_views().next().unwrap();
131
132
unsafe {
133
create_borrowed_np_array::<_>(
134
py,
135
np_dtype,
136
dims,
137
flags,
138
slice.as_ptr() as _,
139
PySeries::from(s).into_py_any(py).unwrap(), // Keep the Series memory alive.,
140
)
141
}
142
})
143
}
144
145
/// Create a NumPy view of a Datetime or Duration Series.
146
fn temporal_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
147
let np_dtype = polars_dtype_to_np_temporal_dtype(py, s.dtype());
148
149
let phys = s.to_physical_repr();
150
let ca = phys.i64().unwrap();
151
let slice = ca.data_views().next().unwrap();
152
let dims = [s.len()].into_dimension();
153
let flags = if writable {
154
flags::NPY_ARRAY_FARRAY
155
} else {
156
flags::NPY_ARRAY_FARRAY_RO
157
};
158
159
unsafe {
160
create_borrowed_np_array::<_>(
161
py,
162
np_dtype,
163
dims,
164
flags,
165
slice.as_ptr() as _,
166
PySeries::from(s).into_py_any(py).unwrap(), // Keep the Series memory alive.,
167
)
168
}
169
}
170
171
/// Create a NumPy view of an Array Series.
172
fn array_series_to_numpy_view(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
173
let ca = s.array().unwrap();
174
let s_inner = ca.get_inner();
175
let np_array_flat = series_to_numpy_view_recursive(py, s_inner, writable);
176
177
// Reshape to the original shape.
178
let DataType::Array(_, width) = s.dtype() else {
179
unreachable!()
180
};
181
reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
182
}
183
184
/// Convert a Series to a NumPy ndarray, copying data in the process.
185
///
186
/// This method will cast integers to floats so that `null = np.nan`.
187
fn series_to_numpy_with_copy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
188
use DataType::*;
189
match s.dtype() {
190
Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
191
Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
192
Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
193
Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
194
Int128 => {
195
let s = s.cast(&DataType::Float64).unwrap();
196
series_to_numpy(py, &s, writable, true).unwrap()
197
},
198
UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
199
UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
200
UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
201
UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
202
UInt128 => {
203
let s = s.cast(&DataType::Float64).unwrap();
204
series_to_numpy(py, &s, writable, true).unwrap()
205
},
206
Float16 => numeric_series_to_numpy::<Float16Type, pf16>(py, s),
207
Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
208
Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
209
Boolean => boolean_series_to_numpy(py, s),
210
Date => date_series_to_numpy(py, s),
211
Datetime(tu, _) => {
212
use numpy::datetime::{Datetime, units};
213
match tu {
214
TimeUnit::Milliseconds => {
215
temporal_series_to_numpy::<Datetime<units::Milliseconds>>(py, s)
216
},
217
TimeUnit::Microseconds => {
218
temporal_series_to_numpy::<Datetime<units::Microseconds>>(py, s)
219
},
220
TimeUnit::Nanoseconds => {
221
temporal_series_to_numpy::<Datetime<units::Nanoseconds>>(py, s)
222
},
223
}
224
},
225
Duration(tu) => {
226
use numpy::datetime::{Timedelta, units};
227
match tu {
228
TimeUnit::Milliseconds => {
229
temporal_series_to_numpy::<Timedelta<units::Milliseconds>>(py, s)
230
},
231
TimeUnit::Microseconds => {
232
temporal_series_to_numpy::<Timedelta<units::Microseconds>>(py, s)
233
},
234
TimeUnit::Nanoseconds => {
235
temporal_series_to_numpy::<Timedelta<units::Nanoseconds>>(py, s)
236
},
237
}
238
},
239
Time => {
240
let ca = s.time().unwrap();
241
let values = time_to_pyobject_iter(ca).map(|v| v.into_py_any(py).unwrap());
242
PyArray1::from_iter(py, values).into_py_any(py).unwrap()
243
},
244
String => {
245
let ca = s.str().unwrap();
246
let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
247
PyArray1::from_iter(py, values).into_py_any(py).unwrap()
248
},
249
Binary => {
250
let ca = s.binary().unwrap();
251
let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
252
PyArray1::from_iter(py, values).into_py_any(py).unwrap()
253
},
254
Categorical(_, _) | Enum(_, _) => {
255
with_match_categorical_physical_type!(s.dtype().cat_physical().unwrap(), |$C| {
256
let ca = s.cat::<$C>().unwrap();
257
let values = ca.iter_str().map(|s| s.into_py_any(py).unwrap());
258
PyArray1::from_iter(py, values).into_py_any(py).unwrap()
259
})
260
},
261
Decimal(_, _) => {
262
let ca = s.decimal().unwrap();
263
let values = decimal_to_pyobject_iter(py, ca)
264
.unwrap()
265
.map(|v| v.into_py_any(py).unwrap());
266
PyArray1::from_iter(py, values).into_py_any(py).unwrap()
267
},
268
List(_) => list_series_to_numpy(py, s, writable),
269
Array(_, _) => array_series_to_numpy(py, s, writable),
270
Struct(_) => {
271
let ca = s.struct_().unwrap();
272
let df = ca.clone().unnest();
273
df_to_numpy(py, &df, IndexOrder::Fortran, writable, true).unwrap()
274
},
275
#[cfg(feature = "object")]
276
Object(_) => {
277
let ca = s
278
.as_any()
279
.downcast_ref::<ObjectChunked<ObjectValue>>()
280
.unwrap();
281
let values = ca.iter().map(|v| v.into_py_any(py).unwrap());
282
PyArray1::from_iter(py, values).into_py_any(py).unwrap()
283
},
284
Null => {
285
let n = s.len();
286
let values = std::iter::repeat_n(f32::NAN, n);
287
PyArray1::from_iter(py, values).into_py_any(py).unwrap()
288
},
289
Extension(_, _) => series_to_numpy_with_copy(py, s.ext().unwrap().storage(), writable),
290
Unknown(_) | BinaryOffset => unreachable!(),
291
}
292
}
293
294
/// Convert numeric types to f32 or f64 with NaN representing a null value.
295
fn numeric_series_to_numpy<T, U>(py: Python<'_>, s: &Series) -> Py<PyAny>
296
where
297
T: PolarsNumericType,
298
T::Native: numpy::Element,
299
U: Float + numpy::Element,
300
{
301
let ca: &ChunkedArray<T> = s.as_ref().as_ref();
302
if s.null_count() == 0 {
303
let values = ca.into_no_null_iter();
304
PyArray1::<T::Native>::from_iter(py, values)
305
.into_py_any(py)
306
.unwrap()
307
} else {
308
let mapper = |opt_v: Option<T::Native>| match opt_v {
309
Some(v) => NumCast::from(v).unwrap(),
310
None => U::nan(),
311
};
312
let values = ca.iter().map(mapper);
313
PyArray1::from_iter(py, values).into_py_any(py).unwrap()
314
}
315
}
316
317
/// Convert booleans to u8 if no nulls are present, otherwise convert to objects.
318
fn boolean_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {
319
let ca = s.bool().unwrap();
320
if s.null_count() == 0 {
321
let values = ca.into_no_null_iter();
322
PyArray1::<bool>::from_iter(py, values)
323
.into_py_any(py)
324
.unwrap()
325
} else {
326
let values = ca.iter().map(|opt_v| opt_v.into_py_any(py).unwrap());
327
PyArray1::from_iter(py, values).into_py_any(py).unwrap()
328
}
329
}
330
331
/// Convert dates directly to i64 with i64::MIN representing a null value.
332
fn date_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {
333
use numpy::datetime::{Datetime, units};
334
335
let s_phys = s.to_physical_repr();
336
let ca = s_phys.i32().unwrap();
337
338
if s.null_count() == 0 {
339
let mapper = |v: i32| (v as i64).into();
340
let values = ca.into_no_null_iter().map(mapper);
341
PyArray1::<Datetime<units::Days>>::from_iter(py, values)
342
.into_py_any(py)
343
.unwrap()
344
} else {
345
let mapper = |opt_v: Option<i32>| {
346
match opt_v {
347
Some(v) => v as i64,
348
None => i64::MIN,
349
}
350
.into()
351
};
352
let values = ca.iter().map(mapper);
353
PyArray1::<Datetime<units::Days>>::from_iter(py, values)
354
.into_py_any(py)
355
.unwrap()
356
}
357
}
358
359
/// Convert datetimes and durations with i64::MIN representing a null value.
360
fn temporal_series_to_numpy<T>(py: Python<'_>, s: &Series) -> Py<PyAny>
361
where
362
T: From<i64> + numpy::Element,
363
{
364
let s_phys = s.to_physical_repr();
365
let ca = s_phys.i64().unwrap();
366
let values = ca.iter().map(|v| v.unwrap_or(i64::MIN).into());
367
PyArray1::<T>::from_iter(py, values)
368
.into_py_any(py)
369
.unwrap()
370
}
371
fn list_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
372
let ca = s.list().unwrap();
373
374
let iter = ca.amortized_iter().map(|opt_s| match opt_s {
375
None => py.None(),
376
Some(s) => series_to_numpy(py, s.as_ref(), writable, true).unwrap(),
377
});
378
PyArray1::from_iter(py, iter).into_py_any(py).unwrap()
379
}
380
381
/// Convert arrays by flattening first, converting the flat Series, and then reshaping.
382
fn array_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
383
let ca = s.array().unwrap();
384
let s_inner = ca.get_inner();
385
let np_array_flat = series_to_numpy_with_copy(py, &s_inner, writable);
386
387
// Reshape to the original shape.
388
let DataType::Array(_, width) = s.dtype() else {
389
unreachable!()
390
};
391
reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
392
}
393
394