Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/dsl/list.rs
8430 views
1
use polars_core::prelude::*;
2
#[cfg(feature = "diff")]
3
use polars_core::series::ops::NullBehavior;
4
5
use crate::dsl::functions::lit;
6
use crate::prelude::function_expr::ListFunction;
7
use crate::prelude::*;
8
9
/// Specialized expressions for [`Series`] of [`DataType::List`].
10
pub struct ListNameSpace(pub Expr);
11
12
impl ListNameSpace {
13
#[cfg(feature = "list_any_all")]
14
pub fn any(self) -> Expr {
15
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Any))
16
}
17
18
#[cfg(feature = "list_any_all")]
19
pub fn all(self) -> Expr {
20
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::All))
21
}
22
23
#[cfg(feature = "list_drop_nulls")]
24
pub fn drop_nulls(self) -> Expr {
25
self.0
26
.map_unary(FunctionExpr::ListExpr(ListFunction::DropNulls))
27
}
28
29
#[cfg(feature = "list_sample")]
30
pub fn sample_n(
31
self,
32
n: Expr,
33
with_replacement: bool,
34
shuffle: bool,
35
seed: Option<u64>,
36
) -> Expr {
37
self.0.map_binary(
38
FunctionExpr::ListExpr(ListFunction::Sample {
39
is_fraction: false,
40
with_replacement,
41
shuffle,
42
seed,
43
}),
44
n,
45
)
46
}
47
48
#[cfg(feature = "list_sample")]
49
pub fn sample_fraction(
50
self,
51
fraction: Expr,
52
with_replacement: bool,
53
shuffle: bool,
54
seed: Option<u64>,
55
) -> Expr {
56
self.0.map_binary(
57
FunctionExpr::ListExpr(ListFunction::Sample {
58
is_fraction: true,
59
with_replacement,
60
shuffle,
61
seed,
62
}),
63
fraction,
64
)
65
}
66
67
/// Return the number of elements in each list.
68
///
69
/// Null values are treated like regular elements in this context.
70
pub fn len(self) -> Expr {
71
self.0
72
.map_unary(FunctionExpr::ListExpr(ListFunction::Length))
73
}
74
75
/// Compute the maximum of the items in every sublist.
76
pub fn max(self) -> Expr {
77
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Max))
78
}
79
80
/// Compute the minimum of the items in every sublist.
81
pub fn min(self) -> Expr {
82
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Min))
83
}
84
85
/// Compute the sum the items in every sublist.
86
pub fn sum(self) -> Expr {
87
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Sum))
88
}
89
90
/// Compute the mean of every sublist and return a `Series` of dtype `Float64`
91
pub fn mean(self) -> Expr {
92
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Mean))
93
}
94
95
pub fn median(self) -> Expr {
96
self.0
97
.map_unary(FunctionExpr::ListExpr(ListFunction::Median))
98
}
99
100
pub fn std(self, ddof: u8) -> Expr {
101
self.0
102
.map_unary(FunctionExpr::ListExpr(ListFunction::Std(ddof)))
103
}
104
105
pub fn var(self, ddof: u8) -> Expr {
106
self.0
107
.map_unary(FunctionExpr::ListExpr(ListFunction::Var(ddof)))
108
}
109
110
/// Sort every sublist.
111
pub fn sort(self, options: SortOptions) -> Expr {
112
self.0
113
.map_unary(FunctionExpr::ListExpr(ListFunction::Sort(options)))
114
}
115
116
/// Reverse every sublist
117
pub fn reverse(self) -> Expr {
118
self.0
119
.map_unary(FunctionExpr::ListExpr(ListFunction::Reverse))
120
}
121
122
/// Keep only the unique values in every sublist.
123
pub fn unique(self) -> Expr {
124
self.0
125
.map_unary(FunctionExpr::ListExpr(ListFunction::Unique(false)))
126
}
127
128
/// Keep only the unique values in every sublist.
129
pub fn unique_stable(self) -> Expr {
130
self.0
131
.map_unary(FunctionExpr::ListExpr(ListFunction::Unique(true)))
132
}
133
134
pub fn n_unique(self) -> Expr {
135
self.0
136
.map_unary(FunctionExpr::ListExpr(ListFunction::NUnique))
137
}
138
139
/// Get items in every sublist by index.
140
pub fn get(self, index: Expr, null_on_oob: bool) -> Expr {
141
self.0.map_binary(
142
FunctionExpr::ListExpr(ListFunction::Get(null_on_oob)),
143
index,
144
)
145
}
146
147
/// Get items in every sublist by multiple indexes.
148
///
149
/// # Arguments
150
/// - `null_on_oob`: Return a null when an index is out of bounds.
151
/// This behavior is more expensive than defaulting to returning an `Error`.
152
#[cfg(feature = "list_gather")]
153
pub fn gather(self, index: Expr, null_on_oob: bool) -> Expr {
154
self.0.map_binary(
155
FunctionExpr::ListExpr(ListFunction::Gather(null_on_oob)),
156
index,
157
)
158
}
159
160
#[cfg(feature = "list_gather")]
161
pub fn gather_every(self, n: Expr, offset: Expr) -> Expr {
162
self.0
163
.map_ternary(FunctionExpr::ListExpr(ListFunction::GatherEvery), n, offset)
164
}
165
166
/// Get first item of every sublist.
167
pub fn first(self) -> Expr {
168
self.get(lit(0i64), true)
169
}
170
171
/// Get last item of every sublist.
172
pub fn last(self) -> Expr {
173
self.get(lit(-1i64), true)
174
}
175
176
/// Join all string items in a sublist and place a separator between them.
177
/// # Error
178
/// This errors if inner type of list `!= DataType::String`.
179
pub fn join(self, separator: Expr, ignore_nulls: bool) -> Expr {
180
self.0.map_binary(
181
FunctionExpr::ListExpr(ListFunction::Join(ignore_nulls)),
182
separator,
183
)
184
}
185
186
/// Return the index of the minimal value of every sublist
187
pub fn arg_min(self) -> Expr {
188
self.0
189
.map_unary(FunctionExpr::ListExpr(ListFunction::ArgMin))
190
}
191
192
/// Return the index of the maximum value of every sublist
193
pub fn arg_max(self) -> Expr {
194
self.0
195
.map_unary(FunctionExpr::ListExpr(ListFunction::ArgMax))
196
}
197
198
/// Diff every sublist.
199
#[cfg(feature = "diff")]
200
pub fn diff(self, n: i64, null_behavior: NullBehavior) -> Expr {
201
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Diff {
202
n,
203
null_behavior,
204
}))
205
}
206
207
/// Shift every sublist.
208
pub fn shift(self, periods: Expr) -> Expr {
209
self.0
210
.map_binary(FunctionExpr::ListExpr(ListFunction::Shift), periods)
211
}
212
213
/// Slice every sublist.
214
pub fn slice(self, offset: Expr, length: Expr) -> Expr {
215
self.0
216
.map_ternary(FunctionExpr::ListExpr(ListFunction::Slice), offset, length)
217
}
218
219
/// Get the head of every sublist
220
pub fn head(self, n: Expr) -> Expr {
221
self.slice(lit(0), n)
222
}
223
224
/// Get the tail of every sublist
225
pub fn tail(self, n: Expr) -> Expr {
226
self.slice(lit(0i64) - n.clone().cast(DataType::Int64), n)
227
}
228
229
#[cfg(feature = "dtype-array")]
230
/// Convert a List column into an Array column with the same inner data type.
231
pub fn to_array(self, width: usize) -> Expr {
232
self.0
233
.map_unary(FunctionExpr::ListExpr(ListFunction::ToArray(width)))
234
}
235
236
#[cfg(feature = "list_to_struct")]
237
#[allow(clippy::wrong_self_convention)]
238
/// Convert this `List` to a `Series` of type `Struct`. The width will be determined according to
239
/// `ListToStructWidthStrategy` and the names of the fields determined by the given `name_generator`.
240
///
241
/// # Schema
242
///
243
/// A polars `LazyFrame` needs to know the schema at all time. The caller therefore must provide
244
/// an `upper_bound` of struct fields that will be set.
245
/// If this is incorrectly downstream operation may fail. For instance an `all().sum()` expression
246
/// will look in the current schema to determine which columns to select.
247
pub fn to_struct(self, names: Arc<[PlSmallStr]>) -> Expr {
248
self.0.map_unary(ListFunction::ToStruct(names))
249
}
250
251
#[cfg(feature = "is_in")]
252
/// Check if the list array contain an element
253
pub fn contains<E: Into<Expr>>(self, other: E, nulls_equal: bool) -> Expr {
254
self.0.map_binary(
255
FunctionExpr::ListExpr(ListFunction::Contains { nulls_equal }),
256
other.into(),
257
)
258
}
259
260
#[cfg(feature = "list_count")]
261
/// Count how often the value produced by ``element`` occurs.
262
pub fn count_matches<E: Into<Expr>>(self, element: E) -> Expr {
263
self.0.map_binary(
264
FunctionExpr::ListExpr(ListFunction::CountMatches),
265
element.into(),
266
)
267
}
268
269
#[cfg(feature = "list_sets")]
270
fn set_operation(self, other: Expr, set_operation: SetOperation) -> Expr {
271
self.0.map_binary(
272
FunctionExpr::ListExpr(ListFunction::SetOperation(set_operation)),
273
other,
274
)
275
}
276
277
/// Return the SET UNION between both list arrays.
278
#[cfg(feature = "list_sets")]
279
pub fn union<E: Into<Expr>>(self, other: E) -> Expr {
280
self.set_operation(other.into(), SetOperation::Union)
281
}
282
283
/// Return the SET DIFFERENCE between both list arrays.
284
#[cfg(feature = "list_sets")]
285
pub fn set_difference<E: Into<Expr>>(self, other: E) -> Expr {
286
self.set_operation(other.into(), SetOperation::Difference)
287
}
288
289
/// Return the SET INTERSECTION between both list arrays.
290
#[cfg(feature = "list_sets")]
291
pub fn set_intersection<E: Into<Expr>>(self, other: E) -> Expr {
292
self.set_operation(other.into(), SetOperation::Intersection)
293
}
294
295
/// Return the SET SYMMETRIC DIFFERENCE between both list arrays.
296
#[cfg(feature = "list_sets")]
297
pub fn set_symmetric_difference<E: Into<Expr>>(self, other: E) -> Expr {
298
self.set_operation(other.into(), SetOperation::SymmetricDifference)
299
}
300
301
pub fn eval<E: Into<Expr>>(self, other: E) -> Expr {
302
Expr::Eval {
303
expr: Arc::new(self.0),
304
evaluation: Arc::new(other.into()),
305
variant: EvalVariant::List,
306
}
307
}
308
309
pub fn agg<E: Into<Expr>>(self, other: E) -> Expr {
310
Expr::Eval {
311
expr: Arc::new(self.0),
312
evaluation: Arc::new(other.into()),
313
variant: EvalVariant::ListAgg,
314
}
315
}
316
}
317
318