Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/rust/user-guide/getting-started.rs
6940 views
1
fn main() -> Result<(), Box<dyn std::error::Error>> {
2
// --8<-- [start:df]
3
use chrono::prelude::*;
4
use polars::prelude::*;
5
6
let mut df: DataFrame = df!(
7
"name" => ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
8
"birthdate" => [
9
NaiveDate::from_ymd_opt(1997, 1, 10).unwrap(),
10
NaiveDate::from_ymd_opt(1985, 2, 15).unwrap(),
11
NaiveDate::from_ymd_opt(1983, 3, 22).unwrap(),
12
NaiveDate::from_ymd_opt(1981, 4, 30).unwrap(),
13
],
14
"weight" => [57.9, 72.5, 53.6, 83.1], // (kg)
15
"height" => [1.56, 1.77, 1.65, 1.75], // (m)
16
)
17
.unwrap();
18
println!("{df}");
19
// --8<-- [end:df]
20
21
// --8<-- [start:csv]
22
use std::fs::File;
23
24
let mut file = File::create("docs/assets/data/output.csv").expect("could not create file");
25
CsvWriter::new(&mut file)
26
.include_header(true)
27
.with_separator(b',')
28
.finish(&mut df)?;
29
let df_csv = CsvReadOptions::default()
30
.with_has_header(true)
31
.with_parse_options(CsvParseOptions::default().with_try_parse_dates(true))
32
.try_into_reader_with_file_path(Some("docs/assets/data/output.csv".into()))?
33
.finish()?;
34
println!("{df_csv}");
35
// --8<-- [end:csv]
36
37
// --8<-- [start:select]
38
let result = df
39
.clone()
40
.lazy()
41
.select([
42
col("name"),
43
col("birthdate").dt().year().alias("birth_year"),
44
(col("weight") / col("height").pow(2)).alias("bmi"),
45
])
46
.collect()?;
47
println!("{result}");
48
// --8<-- [end:select]
49
50
// --8<-- [start:expression-expansion]
51
let result = df
52
.clone()
53
.lazy()
54
.select([
55
col("name"),
56
(cols(["weight", "height"]).as_expr() * lit(0.95))
57
.round(2, RoundMode::default())
58
.name()
59
.suffix("-5%"),
60
])
61
.collect()?;
62
println!("{result}");
63
// --8<-- [end:expression-expansion]
64
65
// --8<-- [start:with_columns]
66
let result = df
67
.clone()
68
.lazy()
69
.with_columns([
70
col("birthdate").dt().year().alias("birth_year"),
71
(col("weight") / col("height").pow(2)).alias("bmi"),
72
])
73
.collect()?;
74
println!("{result}");
75
// --8<-- [end:with_columns]
76
77
// --8<-- [start:filter]
78
let result = df
79
.clone()
80
.lazy()
81
.filter(col("birthdate").dt().year().lt(lit(1990)))
82
.collect()?;
83
println!("{result}");
84
// --8<-- [end:filter]
85
86
// --8<-- [start:filter-multiple]
87
let result = df
88
.clone()
89
.lazy()
90
.filter(
91
col("birthdate")
92
.is_between(
93
lit(NaiveDate::from_ymd_opt(1982, 12, 31).unwrap()),
94
lit(NaiveDate::from_ymd_opt(1996, 1, 1).unwrap()),
95
ClosedInterval::Both,
96
)
97
.and(col("height").gt(lit(1.7))),
98
)
99
.collect()?;
100
println!("{result}");
101
// --8<-- [end:filter-multiple]
102
103
// --8<-- [start:group_by]
104
// Use `group_by_stable` if you want the Python behaviour of `maintain_order=True`.
105
let result = df
106
.clone()
107
.lazy()
108
.group_by([(col("birthdate").dt().year() / lit(10) * lit(10)).alias("decade")])
109
.agg([len()])
110
.collect()?;
111
println!("{result}");
112
// --8<-- [end:group_by]
113
114
// --8<-- [start:group_by-agg]
115
let result = df
116
.clone()
117
.lazy()
118
.group_by([(col("birthdate").dt().year() / lit(10) * lit(10)).alias("decade")])
119
.agg([
120
len().alias("sample_size"),
121
col("weight")
122
.mean()
123
.round(2, RoundMode::default())
124
.alias("avg_weight"),
125
col("height").max().alias("tallest"),
126
])
127
.collect()?;
128
println!("{result}");
129
// --8<-- [end:group_by-agg]
130
131
// --8<-- [start:complex]
132
let result = df
133
.clone()
134
.lazy()
135
.with_columns([
136
(col("birthdate").dt().year() / lit(10) * lit(10)).alias("decade"),
137
col("name").str().split(lit(" ")).list().first(),
138
])
139
.select([all().exclude_cols(["birthdate"]).as_expr()])
140
.group_by([col("decade")])
141
.agg([
142
col("name"),
143
cols(["weight", "height"])
144
.as_expr()
145
.mean()
146
.round(2, RoundMode::default())
147
.name()
148
.prefix("avg_"),
149
])
150
.collect()?;
151
println!("{result}");
152
// --8<-- [end:complex]
153
154
// --8<-- [start:join]
155
let df2: DataFrame = df!(
156
"name" => ["Ben Brown", "Daniel Donovan", "Alice Archer", "Chloe Cooper"],
157
"parent" => [true, false, false, false],
158
"siblings" => [1, 2, 3, 4],
159
)
160
.unwrap();
161
162
let result = df
163
.clone()
164
.lazy()
165
.join(
166
df2.lazy(),
167
[col("name")],
168
[col("name")],
169
JoinArgs::new(JoinType::Left),
170
)
171
.collect()?;
172
173
println!("{result}");
174
// --8<-- [end:join]
175
176
// --8<-- [start:concat]
177
let df3: DataFrame = df!(
178
"name" => ["Ethan Edwards", "Fiona Foster", "Grace Gibson", "Henry Harris"],
179
"birthdate" => [
180
NaiveDate::from_ymd_opt(1977, 5, 10).unwrap(),
181
NaiveDate::from_ymd_opt(1975, 6, 23).unwrap(),
182
NaiveDate::from_ymd_opt(1973, 7, 22).unwrap(),
183
NaiveDate::from_ymd_opt(1971, 8, 3).unwrap(),
184
],
185
"weight" => [67.9, 72.5, 57.6, 93.1], // (kg)
186
"height" => [1.76, 1.6, 1.66, 1.8], // (m)
187
)
188
.unwrap();
189
190
let result = concat([df.clone().lazy(), df3.lazy()], UnionArgs::default())?.collect()?;
191
println!("{result}");
192
// --8<-- [end:concat]
193
194
Ok(())
195
}
196
197