Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/expressions/expression-expansion.py
7890 views
1
# --8<-- [start:df]
2
import polars as pl
3
4
df = pl.DataFrame(
5
{ # As of 14th October 2024, ~3pm UTC
6
"ticker": ["AAPL", "NVDA", "MSFT", "GOOG", "AMZN"],
7
"company_name": ["Apple", "NVIDIA", "Microsoft", "Alphabet (Google)", "Amazon"],
8
"price": [229.9, 138.93, 420.56, 166.41, 188.4],
9
"day_high": [231.31, 139.6, 424.04, 167.62, 189.83],
10
"day_low": [228.6, 136.3, 417.52, 164.78, 188.44],
11
"year_high": [237.23, 140.76, 468.35, 193.31, 201.2],
12
"year_low": [164.08, 39.23, 324.39, 121.46, 118.35],
13
}
14
)
15
16
print(df)
17
# --8<-- [end:df]
18
19
# --8<-- [start:col-with-names]
20
eur_usd_rate = 1.09 # As of 14th October 2024
21
22
result = df.with_columns(
23
(
24
pl.col(
25
"price",
26
"day_high",
27
"day_low",
28
"year_high",
29
"year_low",
30
)
31
/ eur_usd_rate
32
).round(2)
33
)
34
print(result)
35
# --8<-- [end:col-with-names]
36
37
# --8<-- [start:expression-list]
38
exprs = [
39
(pl.col("price") / eur_usd_rate).round(2),
40
(pl.col("day_high") / eur_usd_rate).round(2),
41
(pl.col("day_low") / eur_usd_rate).round(2),
42
(pl.col("year_high") / eur_usd_rate).round(2),
43
(pl.col("year_low") / eur_usd_rate).round(2),
44
]
45
46
result2 = df.with_columns(exprs)
47
print(result.equals(result2))
48
# --8<-- [end:expression-list]
49
50
# --8<-- [start:col-with-dtype]
51
result = df.with_columns((pl.col(pl.Float64) / eur_usd_rate).round(2))
52
print(result)
53
# --8<-- [end:col-with-dtype]
54
55
# --8<-- [start:col-with-dtypes]
56
result2 = df.with_columns(
57
(
58
pl.col(
59
pl.Float32,
60
pl.Float64,
61
)
62
/ eur_usd_rate
63
).round(2)
64
)
65
print(result.equals(result2))
66
# --8<-- [end:col-with-dtypes]
67
68
# --8<-- [start:col-with-regex]
69
result = df.select(pl.col("ticker", "^.*_high$", "^.*_low$"))
70
print(result)
71
# --8<-- [end:col-with-regex]
72
73
# --8<-- [start:col-error]
74
try:
75
df.select(pl.col("ticker", pl.Float64))
76
except TypeError as err:
77
print("TypeError:", err)
78
# --8<-- [end:col-error]
79
80
# --8<-- [start:all]
81
result = df.select(pl.all())
82
print(result.equals(df))
83
# --8<-- [end:all]
84
85
# --8<-- [start:all-exclude]
86
result = df.select(pl.all().exclude("^day_.*$"))
87
print(result)
88
# --8<-- [end:all-exclude]
89
90
# --8<-- [start:col-exclude]
91
result = df.select(pl.col(pl.Float64).exclude("^day_.*$"))
92
print(result)
93
# --8<-- [end:col-exclude]
94
95
# --8<-- [start:duplicate-error]
96
from polars.exceptions import DuplicateError
97
98
gbp_usd_rate = 1.31 # As of 14th October 2024
99
100
try:
101
df.select(
102
pl.col("price") / gbp_usd_rate, # This would be named "price"...
103
pl.col("price") / eur_usd_rate, # And so would this.
104
)
105
except DuplicateError as err:
106
print("DuplicateError:", err)
107
# --8<-- [end:duplicate-error]
108
109
# --8<-- [start:alias]
110
result = df.select(
111
(pl.col("price") / gbp_usd_rate).alias("price (GBP)"),
112
(pl.col("price") / eur_usd_rate).alias("price (EUR)"),
113
)
114
# --8<-- [end:alias]
115
116
# --8<-- [start:prefix-suffix]
117
result = df.select(
118
(pl.col("^year_.*$") / eur_usd_rate).name.prefix("in_eur_"),
119
(pl.col("day_high", "day_low") / gbp_usd_rate).name.suffix("_gbp"),
120
)
121
print(result)
122
# --8<-- [end:prefix-suffix]
123
124
# --8<-- [start:name-map]
125
# There is also `.name.to_uppercase`, so this usage of `.map` is moot.
126
result = df.select(pl.all().name.map(str.upper))
127
print(result)
128
# --8<-- [end:name-map]
129
130
# --8<-- [start:for-with_columns]
131
result = df
132
for tp in ["day", "year"]:
133
result = result.with_columns(
134
(pl.col(f"{tp}_high") - pl.col(f"{tp}_low")).alias(f"{tp}_amplitude")
135
)
136
print(result)
137
# --8<-- [end:for-with_columns]
138
139
140
# --8<-- [start:yield-expressions]
141
def amplitude_expressions(time_periods):
142
for tp in time_periods:
143
yield (pl.col(f"{tp}_high") - pl.col(f"{tp}_low")).alias(f"{tp}_amplitude")
144
145
146
result = df.with_columns(amplitude_expressions(["day", "year"]))
147
print(result)
148
# --8<-- [end:yield-expressions]
149
150
# --8<-- [start:selectors]
151
import polars.selectors as cs
152
153
result = df.select(cs.string() | cs.ends_with("_high"))
154
print(result)
155
# --8<-- [end:selectors]
156
157
# --8<-- [start:selectors-set-operations]
158
result = df.select(cs.contains("_") - cs.string())
159
print(result)
160
# --8<-- [end:selectors-set-operations]
161
162
# --8<-- [start:selectors-expressions]
163
result = df.select((cs.contains("_") - cs.string()) / eur_usd_rate)
164
print(result)
165
# --8<-- [end:selectors-expressions]
166
167
# --8<-- [start:selector-ambiguity]
168
people = pl.DataFrame(
169
{
170
"name": ["Anna", "Bob"],
171
"has_partner": [True, False],
172
"has_kids": [False, False],
173
"has_tattoos": [True, False],
174
"is_alive": [True, True],
175
}
176
)
177
178
wrong_result = people.select((~cs.starts_with("has_")).name.prefix("not_"))
179
print(wrong_result)
180
# --8<-- [end:selector-ambiguity]
181
182
# --8<-- [start:as_expr]
183
result = people.select((~cs.starts_with("has_").as_expr()).name.prefix("not_"))
184
print(result)
185
# --8<-- [end:as_expr]
186
187
# --8<-- [start:is_selector]
188
print(cs.is_selector(~cs.starts_with("has_").as_expr()))
189
# --8<-- [end:is_selector]
190
191
# --8<-- [start:expand_selector]
192
print(
193
cs.expand_selector(
194
people,
195
cs.starts_with("has_"),
196
)
197
)
198
# --8<-- [end:expand_selector]
199
200