Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/utils/series.rs
8424 views
1
use std::rc::Rc;
2
3
use polars_compute::find_validity_mismatch::find_validity_mismatch;
4
use polars_compute::gather::take_unchecked;
5
6
use crate::prelude::*;
7
use crate::series::amortized_iter::AmortSeries;
8
9
/// A utility that allocates an [`AmortSeries`]. The applied function can then use that
10
/// series container to save heap allocations and swap arrow arrays.
11
pub fn with_unstable_series<F, T>(dtype: &DataType, f: F) -> T
12
where
13
F: Fn(&mut AmortSeries) -> T,
14
{
15
let container = Series::full_null(PlSmallStr::EMPTY, 0, dtype);
16
let mut us = AmortSeries::new(Rc::new(container));
17
18
f(&mut us)
19
}
20
21
pub fn is_deprecated_cast(input_dtype: &DataType, output_dtype: &DataType) -> bool {
22
use DataType as D;
23
24
#[allow(clippy::single_match)]
25
match (input_dtype, output_dtype) {
26
#[cfg(feature = "dtype-struct")]
27
(D::Struct(l_fields), D::Struct(r_fields)) => {
28
l_fields.len() != r_fields.len()
29
|| l_fields
30
.iter()
31
.zip(r_fields.iter())
32
.any(|(l, r)| l.name() != r.name() || is_deprecated_cast(l.dtype(), r.dtype()))
33
},
34
(D::List(input_dtype), D::List(output_dtype)) => {
35
is_deprecated_cast(input_dtype, output_dtype)
36
},
37
#[cfg(feature = "dtype-array")]
38
(D::Array(input_dtype, _), D::Array(output_dtype, _)) => {
39
is_deprecated_cast(input_dtype, output_dtype)
40
},
41
#[cfg(feature = "dtype-array")]
42
(D::List(input_dtype), D::Array(output_dtype, _))
43
| (D::Array(input_dtype, _), D::List(output_dtype)) => {
44
is_deprecated_cast(input_dtype, output_dtype)
45
},
46
_ => false,
47
}
48
}
49
50
pub fn handle_casting_failures(input: &Series, output: &Series) -> PolarsResult<()> {
51
// @Hack to deal with deprecated cast
52
// @2.0
53
if is_deprecated_cast(input.dtype(), output.dtype()) {
54
return Ok(());
55
}
56
57
let mut idxs = Vec::new();
58
input.find_validity_mismatch(output, &mut idxs);
59
60
if idxs.is_empty() {
61
return Ok(());
62
}
63
64
let num_failures = idxs.len();
65
let failures = input.take_slice(&idxs[..num_failures.min(10)])?;
66
67
let additional_info = match (input.dtype(), output.dtype()) {
68
(DataType::String, DataType::Date | DataType::Datetime(_, _)) => {
69
"\n\nYou might want to try:\n\
70
- setting `strict=False` to set values that cannot be converted to `null`\n\
71
- using `str.strptime`, `str.to_date`, or `str.to_datetime` and providing a format string"
72
},
73
#[cfg(feature = "dtype-categorical")]
74
(DataType::String, DataType::Enum(_, _)) => {
75
"\n\nEnsure that all values in the input column are present in the categories of the enum datatype."
76
},
77
_ if failures.len() < num_failures => {
78
"\n\nDid not show all failed cases as there were too many."
79
},
80
_ => "",
81
};
82
83
polars_bail!(
84
InvalidOperation:
85
"conversion from `{}` to `{}` failed in column '{}' for {} out of {} values: {}{}",
86
input.dtype(),
87
output.dtype(),
88
output.name(),
89
num_failures,
90
input.len(),
91
failures.fmt_list(),
92
additional_info,
93
)
94
}
95
96
pub fn handle_array_casting_failures(input: &dyn Array, output: &dyn Array) -> PolarsResult<()> {
97
let mut idxs = Vec::new();
98
find_validity_mismatch(input, output, &mut idxs);
99
if idxs.is_empty() {
100
return Ok(());
101
}
102
103
let num_failures = idxs.len();
104
let failures = PrimitiveArray::with_slice(&idxs[..num_failures.min(10)], |idxs| unsafe {
105
take_unchecked(input, &idxs)
106
});
107
108
polars_bail!(
109
InvalidOperation:
110
"conversion from `{}` to `{}` failed for {} out of {} values: {}",
111
DataType::from_arrow(input.dtype(), None),
112
DataType::from_arrow(output.dtype(), None),
113
num_failures,
114
input.len(),
115
Series::try_from((PlSmallStr::EMPTY, failures))?,
116
)
117
}
118
119