Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-python/src/series/construction.rs
8368 views
1
use std::borrow::Cow;
2
3
use arrow::array::{Array, PrimitiveArray};
4
use arrow::bitmap::BitmapBuilder;
5
use arrow::types::NativeType;
6
use num_traits::AsPrimitive;
7
use numpy::{Element, PyArray1, PyArrayMethods, PyUntypedArrayMethods};
8
use polars::prelude::*;
9
use polars_buffer::{Buffer, SharedStorage};
10
use pyo3::exceptions::{PyTypeError, PyValueError};
11
use pyo3::prelude::*;
12
13
use crate::PySeries;
14
use crate::conversion::Wrap;
15
use crate::conversion::any_value::py_object_to_any_value;
16
use crate::error::PyPolarsErr;
17
use crate::interop::arrow::to_rust::array_to_rust;
18
use crate::prelude::ObjectValue;
19
use crate::utils::EnterPolarsExt;
20
21
// Init with numpy arrays.
22
macro_rules! init_method {
23
($name:ident, $type:ty) => {
24
#[pymethods]
25
impl PySeries {
26
#[staticmethod]
27
fn $name(name: &str, array: &Bound<PyArray1<$type>>, _strict: bool) -> Self {
28
let arr = numpy_array_to_arrow(array);
29
Series::from_arrow(name.into(), arr.to_boxed())
30
.unwrap()
31
.into()
32
}
33
}
34
};
35
}
36
37
init_method!(new_i8, i8);
38
init_method!(new_i16, i16);
39
init_method!(new_i32, i32);
40
init_method!(new_i64, i64);
41
init_method!(new_u8, u8);
42
init_method!(new_u16, u16);
43
init_method!(new_u32, u32);
44
init_method!(new_u64, u64);
45
46
fn numpy_array_to_arrow<T: Element + NativeType>(array: &Bound<PyArray1<T>>) -> PrimitiveArray<T> {
47
let owner = array.clone().unbind();
48
let ro = array.readonly();
49
let vals = ro.as_slice().unwrap();
50
unsafe {
51
let storage = SharedStorage::from_slice_with_owner(vals, owner);
52
let buffer = Buffer::from_storage(storage);
53
PrimitiveArray::new_unchecked(T::PRIMITIVE.into(), buffer, None)
54
}
55
}
56
57
#[cfg(feature = "object")]
58
pub fn series_from_objects(py: Python<'_>, name: PlSmallStr, objects: Vec<ObjectValue>) -> Series {
59
let mut validity = BitmapBuilder::with_capacity(objects.len());
60
for v in &objects {
61
let is_valid = !v.inner.is_none(py);
62
// SAFETY: we can ensure that validity has correct capacity.
63
unsafe { validity.push_unchecked(is_valid) };
64
}
65
ObjectChunked::<ObjectValue>::new_from_vec_and_validity(
66
name,
67
objects,
68
validity.into_opt_validity(),
69
)
70
.into_series()
71
}
72
73
#[pymethods]
74
impl PySeries {
75
#[staticmethod]
76
fn new_bool(
77
py: Python<'_>,
78
name: &str,
79
array: &Bound<PyArray1<bool>>,
80
_strict: bool,
81
) -> PyResult<Self> {
82
let array = array.readonly();
83
84
// We use raw ptr methods to read this as a u8 slice to work around PyO3/rust-numpy#509.
85
assert!(array.is_contiguous());
86
let data_ptr = array.data().cast::<u8>();
87
let data_len = array.len();
88
let vals = unsafe { core::slice::from_raw_parts(data_ptr, data_len) };
89
py.enter_polars_series(|| Series::new(name.into(), vals).cast(&DataType::Boolean))
90
}
91
92
#[staticmethod]
93
fn new_f16(
94
py: Python<'_>,
95
name: &str,
96
array: &Bound<PyArray1<pf16>>,
97
nan_is_null: bool,
98
) -> PyResult<Self> {
99
let arr = numpy_array_to_arrow(array);
100
if nan_is_null {
101
py.enter_polars_series(|| {
102
let validity = polars_compute::nan::is_not_nan(arr.values());
103
Ok(Series::from_array(name.into(), arr.with_validity(validity)))
104
})
105
} else {
106
Ok(Series::from_array(name.into(), arr).into())
107
}
108
}
109
110
#[staticmethod]
111
fn new_f32(
112
py: Python<'_>,
113
name: &str,
114
array: &Bound<PyArray1<f32>>,
115
nan_is_null: bool,
116
) -> PyResult<Self> {
117
let arr = numpy_array_to_arrow(array);
118
if nan_is_null {
119
py.enter_polars_series(|| {
120
let validity = polars_compute::nan::is_not_nan(arr.values());
121
Ok(Series::from_array(name.into(), arr.with_validity(validity)))
122
})
123
} else {
124
Ok(Series::from_array(name.into(), arr).into())
125
}
126
}
127
128
#[staticmethod]
129
fn new_f64(
130
py: Python<'_>,
131
name: &str,
132
array: &Bound<PyArray1<f64>>,
133
nan_is_null: bool,
134
) -> PyResult<Self> {
135
let arr = numpy_array_to_arrow(array);
136
if nan_is_null {
137
py.enter_polars_series(|| {
138
let validity = polars_compute::nan::is_not_nan(arr.values());
139
Ok(Series::from_array(name.into(), arr.with_validity(validity)))
140
})
141
} else {
142
Ok(Series::from_array(name.into(), arr).into())
143
}
144
}
145
}
146
147
#[pymethods]
148
impl PySeries {
149
#[staticmethod]
150
fn new_opt_bool(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
151
let len = values.len()?;
152
let mut builder = BooleanChunkedBuilder::new(name.into(), len);
153
154
for res in values.try_iter()? {
155
let value = res?;
156
if value.is_none() {
157
builder.append_null()
158
} else {
159
let v = value.extract::<bool>()?;
160
builder.append_value(v)
161
}
162
}
163
164
let ca = builder.finish();
165
let s = ca.into_series();
166
Ok(s.into())
167
}
168
}
169
170
fn new_primitive<'py, T, F>(
171
name: &str,
172
values: &Bound<'py, PyAny>,
173
_strict: bool,
174
extract: F,
175
) -> PyResult<PySeries>
176
where
177
T: PolarsNumericType,
178
F: Fn(Bound<'py, PyAny>) -> PyResult<T::Native>,
179
{
180
let len = values.len()?;
181
let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);
182
183
for res in values.try_iter()? {
184
let value = res?;
185
if value.is_none() {
186
builder.append_null()
187
} else {
188
let v = extract(value)?;
189
builder.append_value(v)
190
}
191
}
192
193
let ca = builder.finish();
194
let s = ca.into_series();
195
Ok(s.into())
196
}
197
198
// Init with lists that can contain Nones
199
macro_rules! init_method_opt {
200
($name:ident, $type:ty, $native: ty) => {
201
#[pymethods]
202
impl PySeries {
203
#[staticmethod]
204
fn $name(name: &str, obj: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
205
new_primitive::<$type, _>(name, obj, strict, |v| v.extract::<$native>())
206
}
207
}
208
};
209
}
210
211
init_method_opt!(new_opt_u8, UInt8Type, u8);
212
init_method_opt!(new_opt_u16, UInt16Type, u16);
213
init_method_opt!(new_opt_u32, UInt32Type, u32);
214
init_method_opt!(new_opt_u64, UInt64Type, u64);
215
init_method_opt!(new_opt_u128, UInt128Type, u128);
216
init_method_opt!(new_opt_i8, Int8Type, i8);
217
init_method_opt!(new_opt_i16, Int16Type, i16);
218
init_method_opt!(new_opt_i32, Int32Type, i32);
219
init_method_opt!(new_opt_i64, Int64Type, i64);
220
init_method_opt!(new_opt_i128, Int128Type, i128);
221
init_method_opt!(new_opt_f32, Float32Type, f32);
222
init_method_opt!(new_opt_f64, Float64Type, f64);
223
224
#[pymethods]
225
impl PySeries {
226
#[staticmethod]
227
fn new_opt_f16(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
228
new_primitive::<Float16Type, _>(name, values, false, |v| {
229
Ok(AsPrimitive::<pf16>::as_(v.extract::<f64>()?))
230
})
231
}
232
}
233
234
fn convert_to_avs(
235
values: &Bound<'_, PyAny>,
236
strict: bool,
237
allow_object: bool,
238
) -> PyResult<Vec<AnyValue<'static>>> {
239
values
240
.try_iter()?
241
.map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, allow_object))
242
.collect()
243
}
244
245
#[pymethods]
246
impl PySeries {
247
#[staticmethod]
248
fn new_from_any_values(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
249
let any_values_result = values
250
.try_iter()?
251
.map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, true))
252
.collect::<PyResult<Vec<AnyValue>>>();
253
254
let result = any_values_result.and_then(|avs| {
255
let s = Series::from_any_values(name.into(), avs.as_slice(), strict).map_err(|e| {
256
PyTypeError::new_err(format!(
257
"{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
258
))
259
})?;
260
Ok(s.into())
261
});
262
263
// Fall back to Object type for non-strict construction.
264
if !strict && result.is_err() {
265
return Python::attach(|py| {
266
let objects = values
267
.try_iter()?
268
.map(|v| v?.extract())
269
.collect::<PyResult<Vec<ObjectValue>>>()?;
270
Ok(Self::new_object(py, name, objects, strict))
271
});
272
}
273
274
result
275
}
276
277
#[staticmethod]
278
fn new_from_any_values_and_dtype(
279
name: &str,
280
values: &Bound<PyAny>,
281
dtype: Wrap<DataType>,
282
strict: bool,
283
) -> PyResult<Self> {
284
let avs = convert_to_avs(values, strict, false)?;
285
let s = Series::from_any_values_and_dtype(name.into(), avs.as_slice(), &dtype.0, strict)
286
.map_err(|e| {
287
PyTypeError::new_err(format!(
288
"{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
289
))
290
})?;
291
Ok(s.into())
292
}
293
294
#[staticmethod]
295
fn new_str(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
296
let len = values.len()?;
297
let mut builder = StringChunkedBuilder::new(name.into(), len);
298
299
for res in values.try_iter()? {
300
let value = res?;
301
if value.is_none() {
302
builder.append_null()
303
} else {
304
let v = value.extract::<Cow<str>>()?;
305
builder.append_value(v)
306
}
307
}
308
309
let ca = builder.finish();
310
let s = ca.into_series();
311
Ok(s.into())
312
}
313
314
#[staticmethod]
315
fn new_binary(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
316
let len = values.len()?;
317
let mut builder = BinaryChunkedBuilder::new(name.into(), len);
318
319
for res in values.try_iter()? {
320
let value = res?;
321
if value.is_none() {
322
builder.append_null()
323
} else {
324
let v = value.extract::<&[u8]>()?;
325
builder.append_value(v)
326
}
327
}
328
329
let ca = builder.finish();
330
let s = ca.into_series();
331
Ok(s.into())
332
}
333
334
#[staticmethod]
335
fn new_decimal(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
336
Self::new_from_any_values(name, values, strict)
337
}
338
339
#[staticmethod]
340
fn new_series_list(name: &str, values: Vec<Option<PySeries>>, _strict: bool) -> PyResult<Self> {
341
let series: Vec<_> = values
342
.into_iter()
343
.map(|ops| ops.map(|ps| ps.series.into_inner()))
344
.collect();
345
if let Some(s) = series.iter().flatten().next() {
346
if s.dtype().is_object() {
347
return Err(PyValueError::new_err(
348
"list of objects isn't supported; try building a 'object' only series",
349
));
350
}
351
}
352
Ok(Series::new(name.into(), series).into())
353
}
354
355
#[staticmethod]
356
#[pyo3(signature = (name, values, strict, dtype))]
357
fn new_array(
358
name: &str,
359
values: &Bound<PyAny>,
360
strict: bool,
361
dtype: Wrap<DataType>,
362
) -> PyResult<Self> {
363
Self::new_from_any_values_and_dtype(name, values, dtype, strict)
364
}
365
366
#[staticmethod]
367
pub fn new_object(py: Python<'_>, name: &str, values: Vec<ObjectValue>, _strict: bool) -> Self {
368
#[cfg(feature = "object")]
369
{
370
PySeries::from(series_from_objects(py, name.into(), values))
371
}
372
#[cfg(not(feature = "object"))]
373
panic!("activate 'object' feature")
374
}
375
376
#[staticmethod]
377
fn new_null(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
378
let len = values.len()?;
379
Ok(Series::new_null(name.into(), len).into())
380
}
381
382
#[staticmethod]
383
fn from_arrow(name: &str, array: &Bound<PyAny>) -> PyResult<Self> {
384
let arr = array_to_rust(array)?;
385
386
match arr.dtype() {
387
ArrowDataType::LargeList(_) => {
388
let array = arr.as_any().downcast_ref::<LargeListArray>().unwrap();
389
let fast_explode = array.offsets().as_slice().windows(2).all(|w| w[0] != w[1]);
390
391
let mut out = ListChunked::with_chunk(name.into(), array.clone());
392
if fast_explode {
393
out.set_fast_explode()
394
}
395
Ok(out.into_series().into())
396
},
397
_ => {
398
let series: Series =
399
Series::try_new(name.into(), arr).map_err(PyPolarsErr::from)?;
400
Ok(series.into())
401
},
402
}
403
}
404
}
405
406