Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-python/src/series/construction.rs
7889 views
1
use std::borrow::Cow;
2
3
use arrow::array::Array;
4
use arrow::bitmap::BitmapBuilder;
5
use arrow::types::NativeType;
6
use num_traits::AsPrimitive;
7
use numpy::{Element, PyArray1, PyArrayMethods, PyUntypedArrayMethods};
8
use polars::prelude::*;
9
use polars_core::utils::CustomIterTools;
10
use pyo3::exceptions::{PyTypeError, PyValueError};
11
use pyo3::prelude::*;
12
13
use crate::PySeries;
14
use crate::conversion::Wrap;
15
use crate::conversion::any_value::py_object_to_any_value;
16
use crate::error::PyPolarsErr;
17
use crate::interop::arrow::to_rust::array_to_rust;
18
use crate::prelude::ObjectValue;
19
use crate::utils::EnterPolarsExt;
20
21
// Init with numpy arrays.
22
macro_rules! init_method {
23
($name:ident, $type:ty) => {
24
#[pymethods]
25
impl PySeries {
26
#[staticmethod]
27
fn $name(name: &str, array: &Bound<PyArray1<$type>>, _strict: bool) -> Self {
28
mmap_numpy_array(name, array)
29
}
30
}
31
};
32
}
33
34
init_method!(new_i8, i8);
35
init_method!(new_i16, i16);
36
init_method!(new_i32, i32);
37
init_method!(new_i64, i64);
38
init_method!(new_u8, u8);
39
init_method!(new_u16, u16);
40
init_method!(new_u32, u32);
41
init_method!(new_u64, u64);
42
43
fn mmap_numpy_array<T: Element + NativeType>(name: &str, array: &Bound<PyArray1<T>>) -> PySeries {
44
let vals = unsafe { array.as_slice().unwrap() };
45
46
let arr = unsafe { arrow::ffi::mmap::slice_and_owner(vals, array.clone().unbind()) };
47
Series::from_arrow(name.into(), arr.to_boxed())
48
.unwrap()
49
.into()
50
}
51
52
#[cfg(feature = "object")]
53
pub fn series_from_objects(py: Python<'_>, name: PlSmallStr, objects: Vec<ObjectValue>) -> Series {
54
let mut validity = BitmapBuilder::with_capacity(objects.len());
55
for v in &objects {
56
let is_valid = !v.inner.is_none(py);
57
// SAFETY: we can ensure that validity has correct capacity.
58
unsafe { validity.push_unchecked(is_valid) };
59
}
60
ObjectChunked::<ObjectValue>::new_from_vec_and_validity(
61
name,
62
objects,
63
validity.into_opt_validity(),
64
)
65
.into_series()
66
}
67
68
#[pymethods]
69
impl PySeries {
70
#[staticmethod]
71
fn new_bool(
72
py: Python<'_>,
73
name: &str,
74
array: &Bound<PyArray1<bool>>,
75
_strict: bool,
76
) -> PyResult<Self> {
77
let array = array.readonly();
78
79
// We use raw ptr methods to read this as a u8 slice to work around PyO3/rust-numpy#509.
80
assert!(array.is_contiguous());
81
let data_ptr = array.data().cast::<u8>();
82
let data_len = array.len();
83
let vals = unsafe { core::slice::from_raw_parts(data_ptr, data_len) };
84
py.enter_polars_series(|| Series::new(name.into(), vals).cast(&DataType::Boolean))
85
}
86
87
#[staticmethod]
88
fn new_f16(
89
py: Python<'_>,
90
name: &str,
91
array: &Bound<PyArray1<pf16>>,
92
nan_is_null: bool,
93
) -> PyResult<Self> {
94
if nan_is_null {
95
let array = array.readonly();
96
let vals = array.as_slice().unwrap();
97
py.enter_polars_series(|| {
98
let ca: Float16Chunked = vals
99
.iter()
100
.map(|&val| if pf16::is_nan(val) { None } else { Some(val) })
101
.collect_trusted();
102
Ok(ca.with_name(name.into()))
103
})
104
} else {
105
Ok(mmap_numpy_array(name, array))
106
}
107
}
108
109
#[staticmethod]
110
fn new_f32(
111
py: Python<'_>,
112
name: &str,
113
array: &Bound<PyArray1<f32>>,
114
nan_is_null: bool,
115
) -> PyResult<Self> {
116
if nan_is_null {
117
let array = array.readonly();
118
let vals = array.as_slice().unwrap();
119
py.enter_polars_series(|| {
120
let ca: Float32Chunked = vals
121
.iter()
122
.map(|&val| if f32::is_nan(val) { None } else { Some(val) })
123
.collect_trusted();
124
Ok(ca.with_name(name.into()))
125
})
126
} else {
127
Ok(mmap_numpy_array(name, array))
128
}
129
}
130
131
#[staticmethod]
132
fn new_f64(
133
py: Python<'_>,
134
name: &str,
135
array: &Bound<PyArray1<f64>>,
136
nan_is_null: bool,
137
) -> PyResult<Self> {
138
if nan_is_null {
139
let array = array.readonly();
140
let vals = array.as_slice().unwrap();
141
py.enter_polars_series(|| {
142
let ca: Float64Chunked = vals
143
.iter()
144
.map(|&val| if f64::is_nan(val) { None } else { Some(val) })
145
.collect_trusted();
146
Ok(ca.with_name(name.into()))
147
})
148
} else {
149
Ok(mmap_numpy_array(name, array))
150
}
151
}
152
}
153
154
#[pymethods]
155
impl PySeries {
156
#[staticmethod]
157
fn new_opt_bool(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
158
let len = values.len()?;
159
let mut builder = BooleanChunkedBuilder::new(name.into(), len);
160
161
for res in values.try_iter()? {
162
let value = res?;
163
if value.is_none() {
164
builder.append_null()
165
} else {
166
let v = value.extract::<bool>()?;
167
builder.append_value(v)
168
}
169
}
170
171
let ca = builder.finish();
172
let s = ca.into_series();
173
Ok(s.into())
174
}
175
}
176
177
fn new_primitive<'py, T, F>(
178
name: &str,
179
values: &Bound<'py, PyAny>,
180
_strict: bool,
181
extract: F,
182
) -> PyResult<PySeries>
183
where
184
T: PolarsNumericType,
185
F: Fn(Bound<'py, PyAny>) -> PyResult<T::Native>,
186
{
187
let len = values.len()?;
188
let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);
189
190
for res in values.try_iter()? {
191
let value = res?;
192
if value.is_none() {
193
builder.append_null()
194
} else {
195
let v = extract(value)?;
196
builder.append_value(v)
197
}
198
}
199
200
let ca = builder.finish();
201
let s = ca.into_series();
202
Ok(s.into())
203
}
204
205
// Init with lists that can contain Nones
206
macro_rules! init_method_opt {
207
($name:ident, $type:ty, $native: ty) => {
208
#[pymethods]
209
impl PySeries {
210
#[staticmethod]
211
fn $name(name: &str, obj: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
212
new_primitive::<$type, _>(name, obj, strict, |v| v.extract::<$native>())
213
}
214
}
215
};
216
}
217
218
init_method_opt!(new_opt_u8, UInt8Type, u8);
219
init_method_opt!(new_opt_u16, UInt16Type, u16);
220
init_method_opt!(new_opt_u32, UInt32Type, u32);
221
init_method_opt!(new_opt_u64, UInt64Type, u64);
222
init_method_opt!(new_opt_u128, UInt128Type, u128);
223
init_method_opt!(new_opt_i8, Int8Type, i8);
224
init_method_opt!(new_opt_i16, Int16Type, i16);
225
init_method_opt!(new_opt_i32, Int32Type, i32);
226
init_method_opt!(new_opt_i64, Int64Type, i64);
227
init_method_opt!(new_opt_i128, Int128Type, i128);
228
init_method_opt!(new_opt_f32, Float32Type, f32);
229
init_method_opt!(new_opt_f64, Float64Type, f64);
230
231
#[pymethods]
232
impl PySeries {
233
#[staticmethod]
234
fn new_opt_f16(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
235
new_primitive::<Float16Type, _>(name, values, false, |v| {
236
Ok(AsPrimitive::<pf16>::as_(v.extract::<f64>()?))
237
})
238
}
239
}
240
241
fn convert_to_avs(
242
values: &Bound<'_, PyAny>,
243
strict: bool,
244
allow_object: bool,
245
) -> PyResult<Vec<AnyValue<'static>>> {
246
values
247
.try_iter()?
248
.map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, allow_object))
249
.collect()
250
}
251
252
#[pymethods]
253
impl PySeries {
254
#[staticmethod]
255
fn new_from_any_values(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
256
let any_values_result = values
257
.try_iter()?
258
.map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, true))
259
.collect::<PyResult<Vec<AnyValue>>>();
260
261
let result = any_values_result.and_then(|avs| {
262
let s = Series::from_any_values(name.into(), avs.as_slice(), strict).map_err(|e| {
263
PyTypeError::new_err(format!(
264
"{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
265
))
266
})?;
267
Ok(s.into())
268
});
269
270
// Fall back to Object type for non-strict construction.
271
if !strict && result.is_err() {
272
return Python::attach(|py| {
273
let objects = values
274
.try_iter()?
275
.map(|v| v?.extract())
276
.collect::<PyResult<Vec<ObjectValue>>>()?;
277
Ok(Self::new_object(py, name, objects, strict))
278
});
279
}
280
281
result
282
}
283
284
#[staticmethod]
285
fn new_from_any_values_and_dtype(
286
name: &str,
287
values: &Bound<PyAny>,
288
dtype: Wrap<DataType>,
289
strict: bool,
290
) -> PyResult<Self> {
291
let avs = convert_to_avs(values, strict, false)?;
292
let s = Series::from_any_values_and_dtype(name.into(), avs.as_slice(), &dtype.0, strict)
293
.map_err(|e| {
294
PyTypeError::new_err(format!(
295
"{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
296
))
297
})?;
298
Ok(s.into())
299
}
300
301
#[staticmethod]
302
fn new_str(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
303
let len = values.len()?;
304
let mut builder = StringChunkedBuilder::new(name.into(), len);
305
306
for res in values.try_iter()? {
307
let value = res?;
308
if value.is_none() {
309
builder.append_null()
310
} else {
311
let v = value.extract::<Cow<str>>()?;
312
builder.append_value(v)
313
}
314
}
315
316
let ca = builder.finish();
317
let s = ca.into_series();
318
Ok(s.into())
319
}
320
321
#[staticmethod]
322
fn new_binary(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
323
let len = values.len()?;
324
let mut builder = BinaryChunkedBuilder::new(name.into(), len);
325
326
for res in values.try_iter()? {
327
let value = res?;
328
if value.is_none() {
329
builder.append_null()
330
} else {
331
let v = value.extract::<&[u8]>()?;
332
builder.append_value(v)
333
}
334
}
335
336
let ca = builder.finish();
337
let s = ca.into_series();
338
Ok(s.into())
339
}
340
341
#[staticmethod]
342
fn new_decimal(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
343
Self::new_from_any_values(name, values, strict)
344
}
345
346
#[staticmethod]
347
fn new_series_list(name: &str, values: Vec<Option<PySeries>>, _strict: bool) -> PyResult<Self> {
348
let series: Vec<_> = values
349
.into_iter()
350
.map(|ops| ops.map(|ps| ps.series.into_inner()))
351
.collect();
352
if let Some(s) = series.iter().flatten().next() {
353
if s.dtype().is_object() {
354
return Err(PyValueError::new_err(
355
"list of objects isn't supported; try building a 'object' only series",
356
));
357
}
358
}
359
Ok(Series::new(name.into(), series).into())
360
}
361
362
#[staticmethod]
363
#[pyo3(signature = (name, values, strict, dtype))]
364
fn new_array(
365
name: &str,
366
values: &Bound<PyAny>,
367
strict: bool,
368
dtype: Wrap<DataType>,
369
) -> PyResult<Self> {
370
Self::new_from_any_values_and_dtype(name, values, dtype, strict)
371
}
372
373
#[staticmethod]
374
pub fn new_object(py: Python<'_>, name: &str, values: Vec<ObjectValue>, _strict: bool) -> Self {
375
#[cfg(feature = "object")]
376
{
377
PySeries::from(series_from_objects(py, name.into(), values))
378
}
379
#[cfg(not(feature = "object"))]
380
panic!("activate 'object' feature")
381
}
382
383
#[staticmethod]
384
fn new_null(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
385
let len = values.len()?;
386
Ok(Series::new_null(name.into(), len).into())
387
}
388
389
#[staticmethod]
390
fn from_arrow(name: &str, array: &Bound<PyAny>) -> PyResult<Self> {
391
let arr = array_to_rust(array)?;
392
393
match arr.dtype() {
394
ArrowDataType::LargeList(_) => {
395
let array = arr.as_any().downcast_ref::<LargeListArray>().unwrap();
396
let fast_explode = array.offsets().as_slice().windows(2).all(|w| w[0] != w[1]);
397
398
let mut out = ListChunked::with_chunk(name.into(), array.clone());
399
if fast_explode {
400
out.set_fast_explode()
401
}
402
Ok(out.into_series().into())
403
},
404
_ => {
405
let series: Series =
406
Series::try_new(name.into(), arr).map_err(PyPolarsErr::from)?;
407
Ok(series.into())
408
},
409
}
410
}
411
}
412
413