Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/dsl/list.rs
6939 views
1
use polars_core::prelude::*;
2
#[cfg(feature = "diff")]
3
use polars_core::series::ops::NullBehavior;
4
5
use crate::prelude::function_expr::ListFunction;
6
use crate::prelude::*;
7
8
/// Specialized expressions for [`Series`] of [`DataType::List`].
9
pub struct ListNameSpace(pub Expr);
10
11
impl ListNameSpace {
12
#[cfg(feature = "list_any_all")]
13
pub fn any(self) -> Expr {
14
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Any))
15
}
16
17
#[cfg(feature = "list_any_all")]
18
pub fn all(self) -> Expr {
19
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::All))
20
}
21
22
#[cfg(feature = "list_drop_nulls")]
23
pub fn drop_nulls(self) -> Expr {
24
self.0
25
.map_unary(FunctionExpr::ListExpr(ListFunction::DropNulls))
26
}
27
28
#[cfg(feature = "list_sample")]
29
pub fn sample_n(
30
self,
31
n: Expr,
32
with_replacement: bool,
33
shuffle: bool,
34
seed: Option<u64>,
35
) -> Expr {
36
self.0.map_binary(
37
FunctionExpr::ListExpr(ListFunction::Sample {
38
is_fraction: false,
39
with_replacement,
40
shuffle,
41
seed,
42
}),
43
n,
44
)
45
}
46
47
#[cfg(feature = "list_sample")]
48
pub fn sample_fraction(
49
self,
50
fraction: Expr,
51
with_replacement: bool,
52
shuffle: bool,
53
seed: Option<u64>,
54
) -> Expr {
55
self.0.map_binary(
56
FunctionExpr::ListExpr(ListFunction::Sample {
57
is_fraction: true,
58
with_replacement,
59
shuffle,
60
seed,
61
}),
62
fraction,
63
)
64
}
65
66
/// Return the number of elements in each list.
67
///
68
/// Null values are treated like regular elements in this context.
69
pub fn len(self) -> Expr {
70
self.0
71
.map_unary(FunctionExpr::ListExpr(ListFunction::Length))
72
}
73
74
/// Compute the maximum of the items in every sublist.
75
pub fn max(self) -> Expr {
76
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Max))
77
}
78
79
/// Compute the minimum of the items in every sublist.
80
pub fn min(self) -> Expr {
81
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Min))
82
}
83
84
/// Compute the sum the items in every sublist.
85
pub fn sum(self) -> Expr {
86
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Sum))
87
}
88
89
/// Compute the mean of every sublist and return a `Series` of dtype `Float64`
90
pub fn mean(self) -> Expr {
91
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Mean))
92
}
93
94
pub fn median(self) -> Expr {
95
self.0
96
.map_unary(FunctionExpr::ListExpr(ListFunction::Median))
97
}
98
99
pub fn std(self, ddof: u8) -> Expr {
100
self.0
101
.map_unary(FunctionExpr::ListExpr(ListFunction::Std(ddof)))
102
}
103
104
pub fn var(self, ddof: u8) -> Expr {
105
self.0
106
.map_unary(FunctionExpr::ListExpr(ListFunction::Var(ddof)))
107
}
108
109
/// Sort every sublist.
110
pub fn sort(self, options: SortOptions) -> Expr {
111
self.0
112
.map_unary(FunctionExpr::ListExpr(ListFunction::Sort(options)))
113
}
114
115
/// Reverse every sublist
116
pub fn reverse(self) -> Expr {
117
self.0
118
.map_unary(FunctionExpr::ListExpr(ListFunction::Reverse))
119
}
120
121
/// Keep only the unique values in every sublist.
122
pub fn unique(self) -> Expr {
123
self.0
124
.map_unary(FunctionExpr::ListExpr(ListFunction::Unique(false)))
125
}
126
127
/// Keep only the unique values in every sublist.
128
pub fn unique_stable(self) -> Expr {
129
self.0
130
.map_unary(FunctionExpr::ListExpr(ListFunction::Unique(true)))
131
}
132
133
pub fn n_unique(self) -> Expr {
134
self.0
135
.map_unary(FunctionExpr::ListExpr(ListFunction::NUnique))
136
}
137
138
/// Get items in every sublist by index.
139
pub fn get(self, index: Expr, null_on_oob: bool) -> Expr {
140
self.0.map_binary(
141
FunctionExpr::ListExpr(ListFunction::Get(null_on_oob)),
142
index,
143
)
144
}
145
146
/// Get items in every sublist by multiple indexes.
147
///
148
/// # Arguments
149
/// - `null_on_oob`: Return a null when an index is out of bounds.
150
/// This behavior is more expensive than defaulting to returning an `Error`.
151
#[cfg(feature = "list_gather")]
152
pub fn gather(self, index: Expr, null_on_oob: bool) -> Expr {
153
self.0.map_binary(
154
FunctionExpr::ListExpr(ListFunction::Gather(null_on_oob)),
155
index,
156
)
157
}
158
159
#[cfg(feature = "list_gather")]
160
pub fn gather_every(self, n: Expr, offset: Expr) -> Expr {
161
self.0
162
.map_ternary(FunctionExpr::ListExpr(ListFunction::GatherEvery), n, offset)
163
}
164
165
/// Get first item of every sublist.
166
pub fn first(self) -> Expr {
167
self.get(lit(0i64), true)
168
}
169
170
/// Get last item of every sublist.
171
pub fn last(self) -> Expr {
172
self.get(lit(-1i64), true)
173
}
174
175
/// Join all string items in a sublist and place a separator between them.
176
/// # Error
177
/// This errors if inner type of list `!= DataType::String`.
178
pub fn join(self, separator: Expr, ignore_nulls: bool) -> Expr {
179
self.0.map_binary(
180
FunctionExpr::ListExpr(ListFunction::Join(ignore_nulls)),
181
separator,
182
)
183
}
184
185
/// Return the index of the minimal value of every sublist
186
pub fn arg_min(self) -> Expr {
187
self.0
188
.map_unary(FunctionExpr::ListExpr(ListFunction::ArgMin))
189
}
190
191
/// Return the index of the maximum value of every sublist
192
pub fn arg_max(self) -> Expr {
193
self.0
194
.map_unary(FunctionExpr::ListExpr(ListFunction::ArgMax))
195
}
196
197
/// Diff every sublist.
198
#[cfg(feature = "diff")]
199
pub fn diff(self, n: i64, null_behavior: NullBehavior) -> Expr {
200
self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Diff {
201
n,
202
null_behavior,
203
}))
204
}
205
206
/// Shift every sublist.
207
pub fn shift(self, periods: Expr) -> Expr {
208
self.0
209
.map_binary(FunctionExpr::ListExpr(ListFunction::Shift), periods)
210
}
211
212
/// Slice every sublist.
213
pub fn slice(self, offset: Expr, length: Expr) -> Expr {
214
self.0
215
.map_ternary(FunctionExpr::ListExpr(ListFunction::Slice), offset, length)
216
}
217
218
/// Get the head of every sublist
219
pub fn head(self, n: Expr) -> Expr {
220
self.slice(lit(0), n)
221
}
222
223
/// Get the tail of every sublist
224
pub fn tail(self, n: Expr) -> Expr {
225
self.slice(lit(0i64) - n.clone().cast(DataType::Int64), n)
226
}
227
228
#[cfg(feature = "dtype-array")]
229
/// Convert a List column into an Array column with the same inner data type.
230
pub fn to_array(self, width: usize) -> Expr {
231
self.0
232
.map_unary(FunctionExpr::ListExpr(ListFunction::ToArray(width)))
233
}
234
235
#[cfg(feature = "list_to_struct")]
236
#[allow(clippy::wrong_self_convention)]
237
/// Convert this `List` to a `Series` of type `Struct`. The width will be determined according to
238
/// `ListToStructWidthStrategy` and the names of the fields determined by the given `name_generator`.
239
///
240
/// # Schema
241
///
242
/// A polars `LazyFrame` needs to know the schema at all time. The caller therefore must provide
243
/// an `upper_bound` of struct fields that will be set.
244
/// If this is incorrectly downstream operation may fail. For instance an `all().sum()` expression
245
/// will look in the current schema to determine which columns to select.
246
pub fn to_struct(self, names: Arc<[PlSmallStr]>) -> Expr {
247
self.0.map_unary(ListFunction::ToStruct(names))
248
}
249
250
#[cfg(feature = "is_in")]
251
/// Check if the list array contain an element
252
pub fn contains<E: Into<Expr>>(self, other: E, nulls_equal: bool) -> Expr {
253
self.0.map_binary(
254
FunctionExpr::ListExpr(ListFunction::Contains { nulls_equal }),
255
other.into(),
256
)
257
}
258
259
#[cfg(feature = "list_count")]
260
/// Count how often the value produced by ``element`` occurs.
261
pub fn count_matches<E: Into<Expr>>(self, element: E) -> Expr {
262
self.0.map_binary(
263
FunctionExpr::ListExpr(ListFunction::CountMatches),
264
element.into(),
265
)
266
}
267
268
#[cfg(feature = "list_sets")]
269
fn set_operation(self, other: Expr, set_operation: SetOperation) -> Expr {
270
self.0.map_binary(
271
FunctionExpr::ListExpr(ListFunction::SetOperation(set_operation)),
272
other,
273
)
274
}
275
276
/// Return the SET UNION between both list arrays.
277
#[cfg(feature = "list_sets")]
278
pub fn union<E: Into<Expr>>(self, other: E) -> Expr {
279
self.set_operation(other.into(), SetOperation::Union)
280
}
281
282
/// Return the SET DIFFERENCE between both list arrays.
283
#[cfg(feature = "list_sets")]
284
pub fn set_difference<E: Into<Expr>>(self, other: E) -> Expr {
285
self.set_operation(other.into(), SetOperation::Difference)
286
}
287
288
/// Return the SET INTERSECTION between both list arrays.
289
#[cfg(feature = "list_sets")]
290
pub fn set_intersection<E: Into<Expr>>(self, other: E) -> Expr {
291
self.set_operation(other.into(), SetOperation::Intersection)
292
}
293
294
/// Return the SET SYMMETRIC DIFFERENCE between both list arrays.
295
#[cfg(feature = "list_sets")]
296
pub fn set_symmetric_difference<E: Into<Expr>>(self, other: E) -> Expr {
297
self.set_operation(other.into(), SetOperation::SymmetricDifference)
298
}
299
300
pub fn eval<E: Into<Expr>>(self, other: E) -> Expr {
301
Expr::Eval {
302
expr: Arc::new(self.0),
303
evaluation: Arc::new(other.into()),
304
variant: EvalVariant::List,
305
}
306
}
307
}
308
309