Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/rust/user-guide/transformations/joins.rs
7889 views
1
// --8<-- [start:setup]
2
use polars::prelude::*;
3
// --8<-- [end:setup]
4
5
fn main() -> Result<(), Box<dyn std::error::Error>> {
6
// NOTE: This assumes the data has been downloaded and is available.
7
// See the corresponding Python script for the remote location of the data.
8
9
// --8<-- [start:props_groups]
10
let props_groups = CsvReadOptions::default()
11
.with_has_header(true)
12
.try_into_reader_with_file_path(Some("docs/assets/data/monopoly_props_groups.csv".into()))?
13
.finish()?
14
.head(Some(5));
15
println!("{props_groups}");
16
// --8<-- [end:props_groups]
17
18
// --8<-- [start:props_prices]
19
let props_prices = CsvReadOptions::default()
20
.with_has_header(true)
21
.try_into_reader_with_file_path(Some("docs/assets/data/monopoly_props_prices.csv".into()))?
22
.finish()?
23
.head(Some(5));
24
println!("{props_prices}");
25
// --8<-- [end:props_prices]
26
27
// --8<-- [start:equi-join]
28
// In Rust, we cannot use the shorthand of specifying a common
29
// column name just once.
30
let result = props_groups
31
.clone()
32
.lazy()
33
.join(
34
props_prices.clone().lazy(),
35
[col("property_name")],
36
[col("property_name")],
37
JoinArgs::default(),
38
)
39
.collect()?;
40
println!("{result}");
41
// --8<-- [end:equi-join]
42
43
// --8<-- [start:props_groups2]
44
let props_groups2 = props_groups
45
.clone()
46
.lazy()
47
.with_column(col("property_name").str().to_lowercase())
48
.collect()?;
49
println!("{props_groups2}");
50
// --8<-- [end:props_groups2]
51
52
// --8<-- [start:props_prices2]
53
let props_prices2 = props_prices
54
.clone()
55
.lazy()
56
.select([col("property_name").alias("name"), col("cost")])
57
.collect()?;
58
println!("{props_prices2}");
59
// --8<-- [end:props_prices2]
60
61
// --8<-- [start:join-key-expression]
62
let result = props_groups2
63
.lazy()
64
.join(
65
props_prices2.lazy(),
66
[col("property_name")],
67
[col("name").str().to_lowercase()],
68
JoinArgs::default(),
69
)
70
.collect()?;
71
println!("{result}");
72
// --8<-- [end:join-key-expression]
73
74
// --8<-- [start:inner-join]
75
let result = props_groups
76
.clone()
77
.lazy()
78
.join(
79
props_prices.clone().lazy(),
80
[col("property_name")],
81
[col("property_name")],
82
JoinArgs::new(JoinType::Inner),
83
)
84
.collect()?;
85
println!("{result}");
86
// --8<-- [end:inner-join]
87
88
// --8<-- [start:left-join]
89
let result = props_groups
90
.clone()
91
.lazy()
92
.join(
93
props_prices.clone().lazy(),
94
[col("property_name")],
95
[col("property_name")],
96
JoinArgs::new(JoinType::Left),
97
)
98
.collect()?;
99
println!("{result}");
100
// --8<-- [end:left-join]
101
102
// --8<-- [start:right-join]
103
let result = props_groups
104
.clone()
105
.lazy()
106
.join(
107
props_prices.clone().lazy(),
108
[col("property_name")],
109
[col("property_name")],
110
JoinArgs::new(JoinType::Right),
111
)
112
.collect()?;
113
println!("{result}");
114
// --8<-- [end:right-join]
115
116
// --8<-- [start:left-right-join-equals]
117
// `equals_missing` is needed instead of `equals`
118
// so that missing values compare as equal.
119
let dfs_match = result.equals_missing(
120
&props_prices
121
.clone()
122
.lazy()
123
.join(
124
props_groups.clone().lazy(),
125
[col("property_name")],
126
[col("property_name")],
127
JoinArgs::new(JoinType::Left),
128
)
129
.select([
130
// Reorder the columns to match the order of `result`.
131
col("group"),
132
col("property_name"),
133
col("cost"),
134
])
135
.collect()?,
136
);
137
println!("{dfs_match}");
138
// --8<-- [end:left-right-join-equals]
139
140
// --8<-- [start:full-join]
141
let result = props_groups
142
.clone()
143
.lazy()
144
.join(
145
props_prices.clone().lazy(),
146
[col("property_name")],
147
[col("property_name")],
148
JoinArgs::new(JoinType::Full),
149
)
150
.collect()?;
151
println!("{result}");
152
// --8<-- [end:full-join]
153
154
// --8<-- [start:full-join-coalesce]
155
let result = props_groups
156
.clone()
157
.lazy()
158
.join(
159
props_prices.clone().lazy(),
160
[col("property_name")],
161
[col("property_name")],
162
JoinArgs::new(JoinType::Full).with_coalesce(JoinCoalesce::CoalesceColumns),
163
)
164
.collect()?;
165
println!("{result}");
166
// --8<-- [end:full-join-coalesce]
167
168
// --8<-- [start:semi-join]
169
let result = props_groups
170
.clone()
171
.lazy()
172
.join(
173
props_prices.clone().lazy(),
174
[col("property_name")],
175
[col("property_name")],
176
JoinArgs::new(JoinType::Semi),
177
)
178
.collect()?;
179
println!("{result}");
180
// --8<-- [end:semi-join]
181
182
// --8<-- [start:anti-join]
183
let result = props_groups
184
.lazy()
185
.join(
186
props_prices.clone().lazy(),
187
[col("property_name")],
188
[col("property_name")],
189
JoinArgs::new(JoinType::Anti),
190
)
191
.collect()?;
192
println!("{result}");
193
// --8<-- [end:anti-join]
194
195
// --8<-- [start:players]
196
let players = df!(
197
"name" => ["Alice", "Bob"],
198
"cash" => [78, 135],
199
)?;
200
println!("{players}");
201
// --8<-- [end:players]
202
203
// --8<-- [start:non-equi]
204
let result = players
205
.clone()
206
.lazy()
207
.join_builder()
208
.with(props_prices.lazy())
209
.join_where(vec![col("cash").cast(DataType::Int64).gt(col("cost"))])
210
.collect()?;
211
println!("{result}");
212
// --8<-- [end:non-equi]
213
214
// --8<-- [start:df_trades]
215
use chrono::prelude::*;
216
217
let df_trades = df!(
218
"time" => [
219
NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 1, 0).unwrap(),
220
NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 1, 0).unwrap(),
221
NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 3, 0).unwrap(),
222
NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 6, 0).unwrap(),
223
],
224
"stock" => ["A", "B", "B", "C"],
225
"trade" => [101, 299, 301, 500],
226
)?;
227
println!("{df_trades}");
228
// --8<-- [end:df_trades]
229
230
// --8<-- [start:df_quotes]
231
let df_quotes = df!(
232
"time" => [
233
NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 1, 0).unwrap(),
234
NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 2, 0).unwrap(),
235
NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 4, 0).unwrap(),
236
NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 6, 0).unwrap(),
237
],
238
"stock" => ["A", "B", "C", "A"],
239
"quote" => [100, 300, 501, 102],
240
)?;
241
println!("{df_quotes}");
242
// --8<-- [end:df_quotes]
243
244
// --8<-- [start:asof]
245
let result = df_trades.join_asof_by(
246
&df_quotes,
247
"time",
248
"time",
249
["stock"],
250
["stock"],
251
AsofStrategy::Backward,
252
None,
253
true,
254
true,
255
)?;
256
println!("{result}");
257
// --8<-- [end:asof]
258
259
// --8<-- [start:asof-tolerance]
260
let result = df_trades.join_asof_by(
261
&df_quotes,
262
"time",
263
"time",
264
["stock"],
265
["stock"],
266
AsofStrategy::Backward,
267
Some(AnyValue::Duration(60000, TimeUnit::Milliseconds)),
268
true,
269
true,
270
)?;
271
println!("{result}");
272
// --8<-- [end:asof-tolerance]
273
274
// --8<-- [start:cartesian-product]
275
let tokens = df!(
276
"monopoly_token" => ["hat", "shoe", "boat"],
277
)?;
278
279
let result = players
280
.lazy()
281
.select([col("name")])
282
.cross_join(tokens.lazy(), None)
283
.collect()?;
284
println!("{result}");
285
// --8<-- [end:cartesian-product]
286
287
Ok(())
288
}
289
290