Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/dsl/udf.rs
6939 views
1
use polars_core::prelude::{AnyValue, Column, DataType, Field};
2
use polars_core::scalar::Scalar;
3
use polars_error::{PolarsResult, polars_err};
4
use polars_utils::pl_str::PlSmallStr;
5
6
use super::{AnonymousColumnsUdf, Expr, OpaqueColumnUdf};
7
use crate::prelude::{FunctionOptions, new_column_udf};
8
9
/// Represents a user-defined function
10
#[derive(Clone)]
11
pub struct UserDefinedFunction {
12
/// name
13
pub name: PlSmallStr,
14
/// The function implementation.
15
pub fun: OpaqueColumnUdf,
16
/// Options for the function.
17
pub options: FunctionOptions,
18
}
19
20
impl std::fmt::Debug for UserDefinedFunction {
21
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
22
f.debug_struct("UserDefinedFunction")
23
.field("name", &self.name)
24
.field("fun", &"<FUNC>")
25
.field("options", &self.options)
26
.finish()
27
}
28
}
29
30
impl UserDefinedFunction {
31
/// Create a new UserDefinedFunction
32
pub fn new(name: PlSmallStr, fun: impl AnonymousColumnsUdf + 'static) -> Self {
33
Self {
34
name,
35
fun: new_column_udf(fun),
36
options: FunctionOptions::default(),
37
}
38
}
39
40
/// creates a logical expression with a call of the UDF
41
pub fn call(self, args: Vec<Expr>) -> Expr {
42
Expr::AnonymousFunction {
43
input: args,
44
function: self.fun,
45
options: self.options,
46
fmt_str: Box::new(PlSmallStr::EMPTY),
47
}
48
}
49
}
50
51
/// Try to infer the output datatype of a UDF.
52
///
53
/// This will call the UDF in a few ways and see if it can get an output type without erroring.
54
pub fn infer_udf_output_dtype(
55
f: &dyn Fn(&[Column]) -> PolarsResult<Column>,
56
input_fields: &[Field],
57
) -> Option<DataType> {
58
// NOTE! It is important that this does not start having less capability as that would mess
59
// API. We can add more passes though.
60
61
// Pass 1: Provide default values for all columns.
62
{
63
let numeric_to_one = true; // A lot of functions error on 0, just give a 1.
64
let num_list_values = 1; // Give at least 1 value, so UDFs have something to go off.
65
let params = input_fields
66
.iter()
67
.map(|f| {
68
let av = AnyValue::default_value(f.dtype(), numeric_to_one, num_list_values);
69
let scalar = Scalar::new(f.dtype().clone(), av);
70
71
// Give each column with 2 dummy values.
72
Column::new_scalar(f.name().clone(), scalar, 2)
73
})
74
.collect::<Vec<_>>();
75
76
if let Ok(c) = f(&params) {
77
return Some(c.dtype().clone());
78
}
79
}
80
81
None
82
}
83
84
/// Try to infer the output datatype of a UDF.
85
///
86
/// This will call the UDF in a few ways and see if it can get an output type without erroring.
87
pub fn try_infer_udf_output_dtype(
88
f: &dyn Fn(&[Column]) -> PolarsResult<Column>,
89
input_fields: &[Field],
90
) -> PolarsResult<DataType> {
91
infer_udf_output_dtype(f, input_fields).ok_or_else(||
92
polars_err!(
93
InvalidOperation:
94
"UDF called without return type, but was not able to infer the output type.\n\nThis used to be allowed but lead to unpredictable results. To fix this problem, either provide a return datatype or execute the UDF in an eager context (e.g. in `map_columns`)."
95
)
96
)
97
}
98
99