Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-python/src/series/general.rs
7889 views
1
use polars_core::chunked_array::cast::CastOptions;
2
use polars_core::series::IsSorted;
3
use polars_core::utils::flatten::flatten_series;
4
use polars_utils::python_function::PythonObject;
5
use pyo3::exceptions::{PyIndexError, PyRuntimeError, PyTypeError, PyValueError};
6
use pyo3::prelude::*;
7
use pyo3::types::PyBytes;
8
use pyo3::{IntoPyObjectExt, Python};
9
10
use super::PySeries;
11
use crate::dataframe::PyDataFrame;
12
use crate::error::PyPolarsErr;
13
use crate::prelude::*;
14
use crate::py_modules::polars;
15
use crate::utils::EnterPolarsExt;
16
17
#[pymethods]
18
impl PySeries {
19
fn struct_unnest(&self, py: Python) -> PyResult<PyDataFrame> {
20
py.enter_polars_df(|| Ok(self.series.read().struct_()?.clone().unnest()))
21
}
22
23
fn struct_fields(&self) -> PyResult<Vec<String>> {
24
let s = self.series.read();
25
let ca = s.struct_().map_err(PyPolarsErr::from)?;
26
Ok(ca
27
.struct_fields()
28
.iter()
29
.map(|s| s.name().to_string())
30
.collect())
31
}
32
33
fn is_sorted_ascending_flag(&self) -> bool {
34
matches!(self.series.read().is_sorted_flag(), IsSorted::Ascending)
35
}
36
37
fn is_sorted_descending_flag(&self) -> bool {
38
matches!(self.series.read().is_sorted_flag(), IsSorted::Descending)
39
}
40
41
fn can_fast_explode_flag(&self) -> bool {
42
match self.series.read().list() {
43
Err(_) => false,
44
Ok(list) => list._can_fast_explode(),
45
}
46
}
47
48
pub fn cat_uses_lexical_ordering(&self) -> PyResult<bool> {
49
Ok(true)
50
}
51
52
pub fn cat_is_local(&self) -> PyResult<bool> {
53
Ok(false)
54
}
55
56
pub fn cat_to_local(&self, _py: Python) -> PyResult<Self> {
57
Ok(self.clone())
58
}
59
60
fn estimated_size(&self) -> usize {
61
self.series.read().estimated_size()
62
}
63
64
#[cfg(feature = "object")]
65
fn get_object<'py>(&self, py: Python<'py>, index: usize) -> PyResult<Bound<'py, PyAny>> {
66
let s = self.series.read();
67
if matches!(s.dtype(), DataType::Object(_)) {
68
let obj: Option<&ObjectValue> = s.get_object(index).map(|any| any.into());
69
Ok(obj.into_pyobject(py)?)
70
} else {
71
Ok(py.None().into_bound(py))
72
}
73
}
74
75
#[cfg(feature = "dtype-array")]
76
fn reshape(&self, py: Python<'_>, dims: Vec<i64>) -> PyResult<Self> {
77
let dims = dims
78
.into_iter()
79
.map(ReshapeDimension::new)
80
.collect::<Vec<_>>();
81
82
py.enter_polars_series(|| self.series.read().reshape_array(&dims))
83
}
84
85
/// Returns the string format of a single element of the Series.
86
fn get_fmt(&self, index: usize, str_len_limit: usize) -> String {
87
let s = self.series.read();
88
let v = format!("{}", s.get(index).unwrap());
89
if let DataType::String | DataType::Categorical(_, _) | DataType::Enum(_, _) = s.dtype() {
90
let v_no_quotes = &v[1..v.len() - 1];
91
let v_trunc = &v_no_quotes[..v_no_quotes
92
.char_indices()
93
.take(str_len_limit)
94
.last()
95
.map(|(i, c)| i + c.len_utf8())
96
.unwrap_or(0)];
97
if v_no_quotes == v_trunc {
98
v
99
} else {
100
format!("\"{v_trunc}…")
101
}
102
} else {
103
v
104
}
105
}
106
107
pub fn rechunk(&self, py: Python<'_>, in_place: bool) -> PyResult<Option<Self>> {
108
let series = py.enter_polars_ok(|| self.series.read().rechunk())?;
109
if in_place {
110
*self.series.write() = series;
111
Ok(None)
112
} else {
113
Ok(Some(series.into()))
114
}
115
}
116
117
/// Get a value by index.
118
fn get_index(&self, py: Python<'_>, index: usize) -> PyResult<Py<PyAny>> {
119
let s = self.series.read();
120
let av = match s.get(index) {
121
Ok(v) => v,
122
Err(PolarsError::OutOfBounds(err)) => {
123
return Err(PyIndexError::new_err(err.to_string()));
124
},
125
Err(e) => return Err(PyPolarsErr::from(e).into()),
126
};
127
128
match av {
129
AnyValue::List(s) | AnyValue::Array(s, _) => {
130
let pyseries = PySeries::new(s);
131
polars(py).getattr(py, "wrap_s")?.call1(py, (pyseries,))
132
},
133
_ => Wrap(av).into_py_any(py),
134
}
135
}
136
137
/// Get a value by index, allowing negative indices.
138
fn get_index_signed(&self, py: Python<'_>, index: isize) -> PyResult<Py<PyAny>> {
139
let index = if index < 0 {
140
match self.len().checked_sub(index.unsigned_abs()) {
141
Some(v) => v,
142
None => {
143
return Err(PyIndexError::new_err(
144
polars_err!(oob = index, self.len()).to_string(),
145
));
146
},
147
}
148
} else {
149
usize::try_from(index).unwrap()
150
};
151
self.get_index(py, index)
152
}
153
154
fn bitand(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {
155
py.enter_polars_series(|| &*self.series.read() & &*other.series.read())
156
}
157
158
fn bitor(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {
159
py.enter_polars_series(|| &*self.series.read() | &*other.series.read())
160
}
161
162
fn bitxor(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {
163
py.enter_polars_series(|| &*self.series.read() ^ &*other.series.read())
164
}
165
166
fn chunk_lengths(&self) -> Vec<usize> {
167
self.series.read().chunk_lengths().collect()
168
}
169
170
pub fn name(&self) -> String {
171
self.series.read().name().to_string()
172
}
173
174
fn rename(&self, name: &str) {
175
self.series.write().rename(name.into());
176
}
177
178
fn dtype<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
179
Wrap(self.series.read().dtype().clone()).into_pyobject(py)
180
}
181
182
fn set_sorted_flag(&self, descending: bool) -> Self {
183
let mut out = self.series.read().clone();
184
if descending {
185
out.set_sorted_flag(IsSorted::Descending);
186
} else {
187
out.set_sorted_flag(IsSorted::Ascending)
188
}
189
out.into()
190
}
191
192
fn n_chunks(&self) -> usize {
193
self.series.read().n_chunks()
194
}
195
196
fn append(&self, py: Python<'_>, other: &PySeries) -> PyResult<()> {
197
py.enter_polars(|| {
198
// Prevent self-append deadlocks.
199
let other = other.series.read().clone();
200
let mut s = self.series.write();
201
s.append(&other)?;
202
PolarsResult::Ok(())
203
})
204
}
205
206
fn extend(&self, py: Python<'_>, other: &PySeries) -> PyResult<()> {
207
py.enter_polars(|| {
208
// Prevent self-extend deadlocks.
209
let other = other.series.read().clone();
210
let mut s = self.series.write();
211
s.extend(&other)?;
212
PolarsResult::Ok(())
213
})
214
}
215
216
fn new_from_index(&self, py: Python<'_>, index: usize, length: usize) -> PyResult<Self> {
217
let s = self.series.read();
218
if index >= s.len() {
219
Err(PyValueError::new_err("index is out of bounds"))
220
} else {
221
py.enter_polars_series(|| Ok(s.new_from_index(index, length)))
222
}
223
}
224
225
fn filter(&self, py: Python<'_>, filter: &PySeries) -> PyResult<Self> {
226
let filter_series = &filter.series.read();
227
if let Ok(ca) = filter_series.bool() {
228
py.enter_polars_series(|| self.series.read().filter(ca))
229
} else {
230
Err(PyRuntimeError::new_err("Expected a boolean mask"))
231
}
232
}
233
234
fn sort(
235
&self,
236
py: Python<'_>,
237
descending: bool,
238
nulls_last: bool,
239
multithreaded: bool,
240
) -> PyResult<Self> {
241
py.enter_polars_series(|| {
242
self.series.read().sort(
243
SortOptions::default()
244
.with_order_descending(descending)
245
.with_nulls_last(nulls_last)
246
.with_multithreaded(multithreaded),
247
)
248
})
249
}
250
251
fn gather_with_series(&self, py: Python<'_>, indices: &PySeries) -> PyResult<Self> {
252
py.enter_polars_series(|| self.series.read().take(indices.series.read().idx()?))
253
}
254
255
fn null_count(&self) -> PyResult<usize> {
256
Ok(self.series.read().null_count())
257
}
258
259
fn has_nulls(&self) -> bool {
260
self.series.read().has_nulls()
261
}
262
263
fn equals(
264
&self,
265
py: Python<'_>,
266
other: &PySeries,
267
check_dtypes: bool,
268
check_names: bool,
269
null_equal: bool,
270
) -> PyResult<bool> {
271
let s = self.series.read();
272
let o = other.series.read();
273
if check_dtypes && (s.dtype() != o.dtype()) {
274
return Ok(false);
275
}
276
if check_names && (s.name() != o.name()) {
277
return Ok(false);
278
}
279
if null_equal {
280
py.enter_polars_ok(|| s.equals_missing(&o))
281
} else {
282
py.enter_polars_ok(|| s.equals(&o))
283
}
284
}
285
286
fn as_str(&self) -> PyResult<String> {
287
Ok(format!("{:?}", self.series.read()))
288
}
289
290
#[allow(clippy::len_without_is_empty)]
291
pub fn len(&self) -> usize {
292
self.series.read().len()
293
}
294
295
/// Rechunk and return a pointer to the start of the Series.
296
/// Only implemented for numeric types
297
fn as_single_ptr(&self, py: Python) -> PyResult<usize> {
298
py.enter_polars(|| self.series.write().as_single_ptr())
299
}
300
301
fn clone(&self) -> Self {
302
Clone::clone(self)
303
}
304
305
fn zip_with(&self, py: Python<'_>, mask: &PySeries, other: &PySeries) -> PyResult<Self> {
306
let ms = mask.series.read();
307
let mask = ms.bool().map_err(PyPolarsErr::from)?;
308
py.enter_polars_series(|| self.series.read().zip_with(mask, &other.series.read()))
309
}
310
311
#[pyo3(signature = (separator, drop_first, drop_nulls))]
312
fn to_dummies(
313
&self,
314
py: Python<'_>,
315
separator: Option<&str>,
316
drop_first: bool,
317
drop_nulls: bool,
318
) -> PyResult<PyDataFrame> {
319
py.enter_polars_df(|| {
320
self.series
321
.read()
322
.to_dummies(separator, drop_first, drop_nulls)
323
})
324
}
325
326
fn get_list(&self, index: usize) -> Option<Self> {
327
let s = self.series.read();
328
let ca = s.list().ok()?;
329
Some(ca.get_as_series(index)?.into())
330
}
331
332
fn n_unique(&self, py: Python) -> PyResult<usize> {
333
py.enter_polars(|| self.series.read().n_unique())
334
}
335
336
fn floor(&self, py: Python) -> PyResult<Self> {
337
py.enter_polars_series(|| self.series.read().floor())
338
}
339
340
fn shrink_to_fit(&self, py: Python) -> PyResult<()> {
341
py.enter_polars_ok(|| self.series.write().shrink_to_fit())
342
}
343
344
fn dot<'py>(&self, other: &PySeries, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
345
let s = &*self.series.read();
346
let o = &*other.series.read();
347
let lhs_dtype = s.dtype();
348
let rhs_dtype = o.dtype();
349
350
if !lhs_dtype.is_primitive_numeric() {
351
return Err(PyPolarsErr::from(polars_err!(opq = dot, lhs_dtype)).into());
352
};
353
if !rhs_dtype.is_primitive_numeric() {
354
return Err(PyPolarsErr::from(polars_err!(opq = dot, rhs_dtype)).into());
355
}
356
357
let result: AnyValue = if lhs_dtype.is_float() || rhs_dtype.is_float() {
358
py.enter_polars(|| (s * o)?.sum::<f64>())?.into()
359
} else {
360
py.enter_polars(|| (s * o)?.sum::<i64>())?.into()
361
};
362
363
Wrap(result).into_pyobject(py)
364
}
365
366
fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
367
// Used in pickle/pickling
368
Ok(PyBytes::new(
369
py,
370
&py.enter_polars(|| self.series.read().serialize_to_bytes())?,
371
))
372
}
373
374
fn __setstate__(&self, py: Python<'_>, state: Py<PyAny>) -> PyResult<()> {
375
// Used in pickle/pickling
376
use pyo3::pybacked::PyBackedBytes;
377
match state.extract::<PyBackedBytes>(py) {
378
Ok(bytes) => py.enter_polars(|| {
379
let mut reader = std::io::Cursor::new(&*bytes);
380
*self.series.write() = Series::deserialize_from_reader(&mut reader)?;
381
PolarsResult::Ok(())
382
}),
383
Err(e) => Err(e),
384
}
385
}
386
387
fn skew(&self, py: Python<'_>, bias: bool) -> PyResult<Option<f64>> {
388
py.enter_polars(|| self.series.read().skew(bias))
389
}
390
391
fn kurtosis(&self, py: Python<'_>, fisher: bool, bias: bool) -> PyResult<Option<f64>> {
392
py.enter_polars(|| self.series.read().kurtosis(fisher, bias))
393
}
394
395
fn cast(
396
&self,
397
py: Python<'_>,
398
dtype: Wrap<DataType>,
399
strict: bool,
400
wrap_numerical: bool,
401
) -> PyResult<Self> {
402
let options = if wrap_numerical {
403
CastOptions::Overflowing
404
} else if strict {
405
CastOptions::Strict
406
} else {
407
CastOptions::NonStrict
408
};
409
py.enter_polars_series(|| self.series.read().cast_with_options(&dtype.0, options))
410
}
411
412
fn get_chunks(&self) -> PyResult<Vec<Py<PyAny>>> {
413
Python::attach(|py| {
414
let wrap_s = py_modules::polars(py).getattr(py, "wrap_s").unwrap();
415
flatten_series(&self.series.read())
416
.into_iter()
417
.map(|s| wrap_s.call1(py, (Self::new(s),)))
418
.collect()
419
})
420
}
421
422
fn is_sorted(&self, py: Python<'_>, descending: bool, nulls_last: bool) -> PyResult<bool> {
423
let options = SortOptions {
424
descending,
425
nulls_last,
426
multithreaded: true,
427
maintain_order: false,
428
limit: None,
429
};
430
py.enter_polars(|| self.series.read().is_sorted(options))
431
}
432
433
fn clear(&self) -> Self {
434
self.series.read().clear().into()
435
}
436
437
fn head(&self, py: Python<'_>, n: usize) -> PyResult<Self> {
438
py.enter_polars_series(|| Ok(self.series.read().head(Some(n))))
439
}
440
441
fn tail(&self, py: Python<'_>, n: usize) -> PyResult<Self> {
442
py.enter_polars_series(|| Ok(self.series.read().tail(Some(n))))
443
}
444
445
fn value_counts(
446
&self,
447
py: Python<'_>,
448
sort: bool,
449
parallel: bool,
450
name: String,
451
normalize: bool,
452
) -> PyResult<PyDataFrame> {
453
py.enter_polars_df(|| {
454
self.series
455
.read()
456
.value_counts(sort, parallel, name.into(), normalize)
457
})
458
}
459
460
#[pyo3(signature = (offset, length))]
461
fn slice(&self, offset: i64, length: Option<usize>) -> Self {
462
let s = self.series.read();
463
let length = length.unwrap_or_else(|| s.len());
464
s.slice(offset, length).into()
465
}
466
467
pub fn not_(&self, py: Python) -> PyResult<Self> {
468
py.enter_polars_series(|| polars_ops::series::negate_bitwise(&self.series.read()))
469
}
470
471
pub fn shrink_dtype(&self, py: Python<'_>) -> PyResult<Self> {
472
py.enter_polars(|| {
473
self.series
474
.read()
475
.shrink_type()
476
.map(Into::into)
477
.map_err(PyPolarsErr::from)
478
.map_err(PyErr::from)
479
})
480
}
481
482
fn str_to_datetime_infer(
483
&self,
484
py: Python,
485
time_unit: Option<Wrap<TimeUnit>>,
486
strict: bool,
487
exact: bool,
488
ambiguous: PySeries,
489
) -> PyResult<Self> {
490
Ok(py
491
.enter_polars(|| {
492
let s = self.series.read();
493
let datetime_strings = s.str()?;
494
let ambiguous = ambiguous.series.into_inner();
495
let ambiguous = ambiguous.str()?;
496
497
polars_time::prelude::string::infer::to_datetime_with_inferred_tz(
498
datetime_strings,
499
time_unit.map_or(TimeUnit::Microseconds, |v| v.0),
500
strict,
501
exact,
502
ambiguous,
503
)
504
})?
505
.into_series()
506
.into())
507
}
508
509
pub fn str_to_decimal_infer(&self, py: Python, inference_length: usize) -> PyResult<Self> {
510
py.enter_polars_series(|| {
511
let s = self.series.read();
512
let ca = s.str()?;
513
ca.to_decimal_infer(inference_length).map(Series::from)
514
})
515
}
516
517
pub fn list_to_struct(
518
&self,
519
py: Python<'_>,
520
width_strat: Wrap<ListToStructWidthStrategy>,
521
name_gen: Option<Py<PyAny>>,
522
) -> PyResult<Self> {
523
py.enter_polars(|| {
524
let get_index_name =
525
name_gen.map(|f| PlanCallback::<usize, String>::new_python(PythonObject(f)));
526
let get_index_name = get_index_name.map(|f| {
527
NameGenerator(Arc::new(move |i| f.call(i).map(PlSmallStr::from)) as Arc<_>)
528
});
529
self.series
530
.read()
531
.list()?
532
.to_struct(&ListToStructArgs::InferWidth {
533
infer_field_strategy: width_strat.0,
534
get_index_name,
535
max_fields: None,
536
})
537
.map(IntoSeries::into_series)
538
})
539
.map(Into::into)
540
.map_err(PyPolarsErr::from)
541
.map_err(PyErr::from)
542
}
543
544
#[cfg(feature = "extract_jsonpath")]
545
fn str_json_decode(
546
&self,
547
py: Python<'_>,
548
infer_schema_length: Option<usize>,
549
) -> PyResult<Self> {
550
py.enter_polars(|| {
551
let lock = self.series.read();
552
lock.str()?
553
.json_decode(None, infer_schema_length)
554
.map(|s| s.with_name(lock.name().clone()))
555
})
556
.map(Into::into)
557
.map_err(PyPolarsErr::from)
558
.map_err(PyErr::from)
559
}
560
561
fn ext_to(&self, dtype: Wrap<DataType>) -> PyResult<Self> {
562
let DataType::Extension(typ, storage) = &dtype.0 else {
563
return Err(PyTypeError::new_err(
564
"ext.to(dtype) can only be used with Extension dtypes",
565
));
566
};
567
568
let s = self.series.read();
569
570
if storage.as_ref() != s.dtype() {
571
return Err(PyErr::from(PyPolarsErr::from(polars_err!(SchemaMismatch:
572
"storage type mismatch in ext.to(): expected {}, got {}",
573
storage,
574
s.dtype()
575
))));
576
}
577
578
Ok(s.clone().into_extension(typ.clone()).into())
579
}
580
581
fn ext_storage(&self) -> Self {
582
self.series.read().to_storage().clone().into()
583
}
584
585
fn set(&self, py: Python<'_>, mask: PySeries, value: PySeries) -> PyResult<Self> {
586
assert_eq!(value.len(), 1);
587
py.enter_polars(|| {
588
let slf = self.series.read();
589
let mask = mask.series.read();
590
let value = value.series.read();
591
592
let mask = mask.bool()?;
593
594
PolarsResult::Ok(
595
value
596
.zip_with_same_type(mask, &slf)?
597
.with_name(slf.name().clone()),
598
)
599
})
600
.map(Into::into)
601
.map_err(PyPolarsErr::from)
602
.map_err(PyErr::from)
603
}
604
}
605
606
macro_rules! impl_get {
607
($name:ident, $series_variant:ident, $type:ty) => {
608
#[pymethods]
609
impl PySeries {
610
fn $name(&self, index: i64) -> Option<$type> {
611
let s = self.series.read();
612
if let Ok(ca) = s.$series_variant() {
613
let index = if index < 0 {
614
(ca.len() as i64 + index) as usize
615
} else {
616
index as usize
617
};
618
ca.get(index).map(|r| r.to_owned())
619
} else {
620
None
621
}
622
}
623
}
624
};
625
}
626
627
impl_get!(get_f32, f32, f32);
628
impl_get!(get_f64, f64, f64);
629
impl_get!(get_u8, u8, u8);
630
impl_get!(get_u16, u16, u16);
631
impl_get!(get_u32, u32, u32);
632
impl_get!(get_u64, u64, u64);
633
impl_get!(get_i8, i8, i8);
634
impl_get!(get_i16, i16, i16);
635
impl_get!(get_i32, i32, i32);
636
impl_get!(get_i64, i64, i64);
637
impl_get!(get_str, str, String);
638
639
macro_rules! impl_get_phys {
640
($name:ident, $series_variant:ident, $type:ty) => {
641
#[pymethods]
642
impl PySeries {
643
fn $name(&self, index: i64) -> Option<$type> {
644
let s = self.series.read();
645
if let Ok(ca) = s.$series_variant() {
646
let index = if index < 0 {
647
(ca.len() as i64 + index) as usize
648
} else {
649
index as usize
650
};
651
ca.physical().get(index)
652
} else {
653
None
654
}
655
}
656
}
657
};
658
}
659
660
impl_get_phys!(get_date, date, i32);
661
impl_get_phys!(get_datetime, datetime, i64);
662
impl_get_phys!(get_duration, duration, i64);
663
664