Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/rust/user-guide/expressions/aggregation.rs
7889 views
1
fn main() -> Result<(), Box<dyn std::error::Error>> {
2
// --8<-- [start:dataframe]
3
use std::io::Cursor;
4
5
use polars::prelude::*;
6
use reqwest::blocking::Client;
7
8
let url = "https://huggingface.co/datasets/nameexhaustion/polars-docs/resolve/main/legislators-historical.csv";
9
10
let mut schema = Schema::default();
11
schema.with_column(
12
"first_name".into(),
13
DataType::from_categories(Categories::global()),
14
);
15
schema.with_column(
16
"gender".into(),
17
DataType::from_categories(Categories::global()),
18
);
19
schema.with_column(
20
"type".into(),
21
DataType::from_categories(Categories::global()),
22
);
23
schema.with_column(
24
"state".into(),
25
DataType::from_categories(Categories::global()),
26
);
27
schema.with_column(
28
"party".into(),
29
DataType::from_categories(Categories::global()),
30
);
31
schema.with_column("birthday".into(), DataType::Date);
32
33
let data = Client::new().get(url).send()?.bytes()?;
34
35
let dataset = CsvReadOptions::default()
36
.with_has_header(true)
37
.with_schema_overwrite(Some(Arc::new(schema)))
38
.map_parse_options(|parse_options| parse_options.with_try_parse_dates(true))
39
.into_reader_with_file_handle(Cursor::new(data))
40
.finish()?
41
.lazy()
42
.with_columns([
43
col("first").name().suffix("_name"),
44
col("middle").name().suffix("_name"),
45
col("last").name().suffix("_name"),
46
])
47
.collect()?;
48
49
println!("{}", &dataset);
50
// --8<-- [end:dataframe]
51
52
// --8<-- [start:basic]
53
let df = dataset
54
.clone()
55
.lazy()
56
.group_by(["first_name"])
57
.agg([len(), col("gender"), col("last_name").first()])
58
.sort(
59
["len"],
60
SortMultipleOptions::default()
61
.with_order_descending(true)
62
.with_nulls_last(true),
63
)
64
.limit(5)
65
.collect()?;
66
67
println!("{df}");
68
// --8<-- [end:basic]
69
70
// --8<-- [start:conditional]
71
let df = dataset
72
.clone()
73
.lazy()
74
.group_by(["state"])
75
.agg([
76
(col("party").eq(lit("Anti-Administration")))
77
.sum()
78
.alias("anti"),
79
(col("party").eq(lit("Pro-Administration")))
80
.sum()
81
.alias("pro"),
82
])
83
.sort(
84
["pro"],
85
SortMultipleOptions::default().with_order_descending(true),
86
)
87
.limit(5)
88
.collect()?;
89
90
println!("{df}");
91
// --8<-- [end:conditional]
92
93
// --8<-- [start:nested]
94
let df = dataset
95
.clone()
96
.lazy()
97
.group_by(["state", "party"])
98
.agg([len().alias("count")])
99
.filter(
100
col("party")
101
.eq(lit("Anti-Administration"))
102
.or(col("party").eq(lit("Pro-Administration"))),
103
)
104
.sort(
105
["count"],
106
SortMultipleOptions::default()
107
.with_order_descending(true)
108
.with_nulls_last(true),
109
)
110
.limit(5)
111
.collect()?;
112
113
println!("{df}");
114
// --8<-- [end:nested]
115
116
// --8<-- [start:filter]
117
fn compute_age() -> Expr {
118
lit(2024) - col("birthday").dt().year()
119
}
120
121
fn avg_birthday(gender: &str) -> Expr {
122
compute_age()
123
.filter(col("gender").eq(lit(gender)))
124
.mean()
125
.alias(format!("avg {gender} birthday"))
126
}
127
128
let df = dataset
129
.clone()
130
.lazy()
131
.group_by(["state"])
132
.agg([
133
avg_birthday("M"),
134
avg_birthday("F"),
135
(col("gender").eq(lit("M"))).sum().alias("# male"),
136
(col("gender").eq(lit("F"))).sum().alias("# female"),
137
])
138
.limit(5)
139
.collect()?;
140
141
println!("{df}");
142
// --8<-- [end:filter]
143
144
// --8<-- [start:filter-nested]
145
let df = dataset
146
.clone()
147
.lazy()
148
.group_by(["state", "gender"])
149
.agg([compute_age().mean().alias("avg birthday"), len().alias("#")])
150
.sort(
151
["#"],
152
SortMultipleOptions::default()
153
.with_order_descending(true)
154
.with_nulls_last(true),
155
)
156
.limit(5)
157
.collect()?;
158
159
println!("{df}");
160
// --8<-- [end:filter-nested]
161
162
// --8<-- [start:sort]
163
fn get_name() -> Expr {
164
col("first_name") + lit(" ") + col("last_name")
165
}
166
167
let df = dataset
168
.clone()
169
.lazy()
170
.sort(
171
["birthday"],
172
SortMultipleOptions::default()
173
.with_order_descending(true)
174
.with_nulls_last(true),
175
)
176
.group_by(["state"])
177
.agg([
178
get_name().first().alias("youngest"),
179
get_name().last().alias("oldest"),
180
])
181
.limit(5)
182
.collect()?;
183
184
println!("{df}");
185
// --8<-- [end:sort]
186
187
// --8<-- [start:sort2]
188
let df = dataset
189
.clone()
190
.lazy()
191
.sort(
192
["birthday"],
193
SortMultipleOptions::default()
194
.with_order_descending(true)
195
.with_nulls_last(true),
196
)
197
.group_by(["state"])
198
.agg([
199
get_name().first().alias("youngest"),
200
get_name().last().alias("oldest"),
201
get_name()
202
.sort(Default::default())
203
.first()
204
.alias("alphabetical_first"),
205
])
206
.limit(5)
207
.collect()?;
208
209
println!("{df}");
210
// --8<-- [end:sort2]
211
212
// --8<-- [start:sort3]
213
let df = dataset
214
.lazy()
215
.sort(
216
["birthday"],
217
SortMultipleOptions::default()
218
.with_order_descending(true)
219
.with_nulls_last(true),
220
)
221
.group_by(["state"])
222
.agg([
223
get_name().first().alias("youngest"),
224
get_name().last().alias("oldest"),
225
get_name()
226
.sort(Default::default())
227
.first()
228
.alias("alphabetical_first"),
229
col("gender")
230
.sort_by(["first_name"], SortMultipleOptions::default())
231
.first(),
232
])
233
.sort(["state"], SortMultipleOptions::default())
234
.limit(5)
235
.collect()?;
236
237
println!("{df}");
238
// --8<-- [end:sort3]
239
240
Ok(())
241
}
242
243