Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/dsl/udf.rs
8430 views
1
use polars_core::prelude::{AnyValue, Column, DataType, Field};
2
use polars_core::scalar::Scalar;
3
use polars_error::{PolarsResult, polars_err};
4
use polars_utils::pl_str::PlSmallStr;
5
6
use super::{AnonymousColumnsUdf, Expr, OpaqueColumnUdf};
7
use crate::prelude::{FunctionOptions, new_column_udf};
8
9
/// Represents a user-defined function
10
#[derive(Clone)]
11
pub struct UserDefinedFunction {
12
/// name
13
pub name: PlSmallStr,
14
/// The function implementation.
15
pub fun: OpaqueColumnUdf,
16
/// Options for the function.
17
pub options: FunctionOptions,
18
}
19
20
impl std::fmt::Debug for UserDefinedFunction {
21
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
22
f.debug_struct("UserDefinedFunction")
23
.field("name", &self.name)
24
.field("fun", &"<FUNC>")
25
.field("options", &self.options)
26
.finish()
27
}
28
}
29
30
impl UserDefinedFunction {
31
/// Create a new UserDefinedFunction
32
pub fn new(name: PlSmallStr, fun: impl AnonymousColumnsUdf + 'static) -> Self {
33
Self {
34
name,
35
fun: new_column_udf(fun),
36
options: FunctionOptions::default(),
37
}
38
}
39
40
/// creates a logical expression with a call of the UDF
41
pub fn call(self, args: Vec<Expr>) -> Expr {
42
Expr::AnonymousFunction {
43
input: args,
44
function: self.fun,
45
options: self.options,
46
fmt_str: Box::new(PlSmallStr::EMPTY),
47
}
48
}
49
}
50
51
/// Try to infer the output datatype of a UDF.
52
///
53
/// This will call the UDF in a few ways and see if it can get an output type without erroring.
54
pub fn infer_udf_output_dtype(
55
f: &dyn Fn(&[Column]) -> PolarsResult<Column>,
56
input_fields: &[Field],
57
) -> Option<DataType> {
58
// NOTE! It is important that this does not start having less capability as that would mess
59
// API. We can add more passes though.
60
61
// Pass 1: Provide default values for all columns.
62
{
63
let numeric_to_one = true; // A lot of functions error on 0, just give a 1.
64
let num_list_values = 1; // Give at least 1 value, so UDFs have something to go off.
65
let params: Option<Vec<_>> = input_fields
66
.iter()
67
.map(|f| {
68
// Materialize `Unknown` dtypes (e.g., from literals like `pl.lit(10)`)
69
// to concrete types before we try to create default values for them.
70
let dtype = f.dtype().clone().materialize_unknown(true).ok()?;
71
if !dtype.is_known() {
72
return None;
73
}
74
let av = AnyValue::default_value(&dtype, numeric_to_one, num_list_values);
75
let scalar = Scalar::new(dtype, av);
76
77
// Give each column with 2 dummy values.
78
Some(Column::new_scalar(f.name().clone(), scalar, 2))
79
})
80
.collect();
81
82
let params = params?;
83
if let Ok(c) = f(&params) {
84
return Some(c.dtype().clone());
85
}
86
}
87
None
88
}
89
90
/// Try to infer the output datatype of a UDF.
91
///
92
/// This will call the UDF in a few ways and see if it can get an output type without erroring.
93
pub fn try_infer_udf_output_dtype(
94
f: &dyn Fn(&[Column]) -> PolarsResult<Column>,
95
input_fields: &[Field],
96
) -> PolarsResult<DataType> {
97
infer_udf_output_dtype(f, input_fields).ok_or_else(||
98
polars_err!(
99
InvalidOperation:
100
"UDF called without return type, but was not able to infer the output type.\n\nThis used to be allowed but lead to unpredictable results. To fix this problem, either provide a return datatype or execute the UDF in an eager context (e.g. in `map_columns`)."
101
)
102
)
103
}
104
105