Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/transformations/joins.py
7890 views
1
# --8<-- [start:prep-data]
2
import pathlib
3
import requests
4
5
6
DATA = [
7
(
8
"https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/data/monopoly_props_groups.csv",
9
"docs/assets/data/monopoly_props_groups.csv",
10
),
11
(
12
"https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/data/monopoly_props_prices.csv",
13
"docs/assets/data/monopoly_props_prices.csv",
14
),
15
]
16
17
18
for url, dest in DATA:
19
if pathlib.Path(dest).exists():
20
continue
21
with open(dest, "wb") as f:
22
f.write(requests.get(url, timeout=10).content)
23
# --8<-- [end:prep-data]
24
25
# --8<-- [start:props_groups]
26
import polars as pl
27
28
props_groups = pl.read_csv("docs/assets/data/monopoly_props_groups.csv").head(5)
29
print(props_groups)
30
# --8<-- [end:props_groups]
31
32
# --8<-- [start:props_prices]
33
props_prices = pl.read_csv("docs/assets/data/monopoly_props_prices.csv").head(5)
34
print(props_prices)
35
# --8<-- [end:props_prices]
36
37
# --8<-- [start:equi-join]
38
result = props_groups.join(props_prices, on="property_name")
39
print(result)
40
# --8<-- [end:equi-join]
41
42
# --8<-- [start:props_groups2]
43
props_groups2 = props_groups.with_columns(
44
pl.col("property_name").str.to_lowercase(),
45
)
46
print(props_groups2)
47
# --8<-- [end:props_groups2]
48
49
# --8<-- [start:props_prices2]
50
props_prices2 = props_prices.select(
51
pl.col("property_name").alias("name"), pl.col("cost")
52
)
53
print(props_prices2)
54
# --8<-- [end:props_prices2]
55
56
# --8<-- [start:join-key-expression]
57
result = props_groups2.join(
58
props_prices2,
59
left_on="property_name",
60
right_on=pl.col("name").str.to_lowercase(),
61
)
62
print(result)
63
# --8<-- [end:join-key-expression]
64
65
# --8<-- [start:inner-join]
66
result = props_groups.join(props_prices, on="property_name", how="inner")
67
print(result)
68
# --8<-- [end:inner-join]
69
70
# --8<-- [start:left-join]
71
result = props_groups.join(props_prices, on="property_name", how="left")
72
print(result)
73
# --8<-- [end:left-join]
74
75
# --8<-- [start:right-join]
76
result = props_groups.join(props_prices, on="property_name", how="right")
77
print(result)
78
# --8<-- [end:right-join]
79
80
# --8<-- [start:left-right-join-equals]
81
print(
82
result.equals(
83
props_prices.join(
84
props_groups,
85
on="property_name",
86
how="left",
87
# Reorder the columns to match the order from above.
88
).select(pl.col("group"), pl.col("property_name"), pl.col("cost"))
89
)
90
)
91
# --8<-- [end:left-right-join-equals]
92
93
# --8<-- [start:full-join]
94
result = props_groups.join(props_prices, on="property_name", how="full")
95
print(result)
96
# --8<-- [end:full-join]
97
98
# --8<-- [start:full-join-coalesce]
99
result = props_groups.join(
100
props_prices,
101
on="property_name",
102
how="full",
103
coalesce=True,
104
)
105
print(result)
106
# --8<-- [end:full-join-coalesce]
107
108
# --8<-- [start:semi-join]
109
result = props_groups.join(props_prices, on="property_name", how="semi")
110
print(result)
111
# --8<-- [end:semi-join]
112
113
# --8<-- [start:anti-join]
114
result = props_groups.join(props_prices, on="property_name", how="anti")
115
print(result)
116
# --8<-- [end:anti-join]
117
118
# --8<-- [start:players]
119
players = pl.DataFrame(
120
{
121
"name": ["Alice", "Bob"],
122
"cash": [78, 135],
123
}
124
)
125
print(players)
126
# --8<-- [end:players]
127
128
# --8<-- [start:non-equi]
129
result = players.join_where(props_prices, pl.col("cash") > pl.col("cost"))
130
print(result)
131
# --8<-- [end:non-equi]
132
133
# --8<-- [start:df_trades]
134
from datetime import datetime
135
136
df_trades = pl.DataFrame(
137
{
138
"time": [
139
datetime(2020, 1, 1, 9, 1, 0),
140
datetime(2020, 1, 1, 9, 1, 0),
141
datetime(2020, 1, 1, 9, 3, 0),
142
datetime(2020, 1, 1, 9, 6, 0),
143
],
144
"stock": ["A", "B", "B", "C"],
145
"trade": [101, 299, 301, 500],
146
}
147
)
148
print(df_trades)
149
# --8<-- [end:df_trades]
150
151
# --8<-- [start:df_quotes]
152
df_quotes = pl.DataFrame(
153
{
154
"time": [
155
datetime(2020, 1, 1, 9, 0, 0),
156
datetime(2020, 1, 1, 9, 2, 0),
157
datetime(2020, 1, 1, 9, 4, 0),
158
datetime(2020, 1, 1, 9, 6, 0),
159
],
160
"stock": ["A", "B", "C", "A"],
161
"quote": [100, 300, 501, 102],
162
}
163
)
164
165
print(df_quotes)
166
# --8<-- [end:df_quotes]
167
168
# --8<-- [start:asof]
169
df_asof_join = df_trades.join_asof(df_quotes, on="time", by="stock")
170
print(df_asof_join)
171
# --8<-- [end:asof]
172
173
# --8<-- [start:asof-tolerance]
174
df_asof_tolerance_join = df_trades.join_asof(
175
df_quotes, on="time", by="stock", tolerance="1m"
176
)
177
print(df_asof_tolerance_join)
178
# --8<-- [end:asof-tolerance]
179
180
# --8<-- [start:cartesian-product]
181
tokens = pl.DataFrame({"monopoly_token": ["hat", "shoe", "boat"]})
182
183
result = players.select(pl.col("name")).join(tokens, how="cross")
184
print(result)
185
# --8<-- [end:cartesian-product]
186
187