CoCalc -- expression-expansion.py

GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/expressions/expression-expansion.py
⁷⁸⁹⁰ views
1
# --8<-- [start:df]
2
import polars as pl
3

4
df = pl.DataFrame(
5
    {  # As of 14th October 2024, ~3pm UTC
6
        "ticker": ["AAPL", "NVDA", "MSFT", "GOOG", "AMZN"],
7
        "company_name": ["Apple", "NVIDIA", "Microsoft", "Alphabet (Google)", "Amazon"],
8
        "price": [229.9, 138.93, 420.56, 166.41, 188.4],
9
        "day_high": [231.31, 139.6, 424.04, 167.62, 189.83],
10
        "day_low": [228.6, 136.3, 417.52, 164.78, 188.44],
11
        "year_high": [237.23, 140.76, 468.35, 193.31, 201.2],
12
        "year_low": [164.08, 39.23, 324.39, 121.46, 118.35],
13
    }
14
)
15

16
print(df)
17
# --8<-- [end:df]
18

19
# --8<-- [start:col-with-names]
20
eur_usd_rate = 1.09  # As of 14th October 2024
21

22
result = df.with_columns(
23
    (
24
        pl.col(
25
            "price",
26
            "day_high",
27
            "day_low",
28
            "year_high",
29
            "year_low",
30
        )
31
        / eur_usd_rate
32
    ).round(2)
33
)
34
print(result)
35
# --8<-- [end:col-with-names]
36

37
# --8<-- [start:expression-list]
38
exprs = [
39
    (pl.col("price") / eur_usd_rate).round(2),
40
    (pl.col("day_high") / eur_usd_rate).round(2),
41
    (pl.col("day_low") / eur_usd_rate).round(2),
42
    (pl.col("year_high") / eur_usd_rate).round(2),
43
    (pl.col("year_low") / eur_usd_rate).round(2),
44
]
45

46
result2 = df.with_columns(exprs)
47
print(result.equals(result2))
48
# --8<-- [end:expression-list]
49

50
# --8<-- [start:col-with-dtype]
51
result = df.with_columns((pl.col(pl.Float64) / eur_usd_rate).round(2))
52
print(result)
53
# --8<-- [end:col-with-dtype]
54

55
# --8<-- [start:col-with-dtypes]
56
result2 = df.with_columns(
57
    (
58
        pl.col(
59
            pl.Float32,
60
            pl.Float64,
61
        )
62
        / eur_usd_rate
63
    ).round(2)
64
)
65
print(result.equals(result2))
66
# --8<-- [end:col-with-dtypes]
67

68
# --8<-- [start:col-with-regex]
69
result = df.select(pl.col("ticker", "^.*_high$", "^.*_low$"))
70
print(result)
71
# --8<-- [end:col-with-regex]
72

73
# --8<-- [start:col-error]
74
try:
75
    df.select(pl.col("ticker", pl.Float64))
76
except TypeError as err:
77
    print("TypeError:", err)
78
# --8<-- [end:col-error]
79

80
# --8<-- [start:all]
81
result = df.select(pl.all())
82
print(result.equals(df))
83
# --8<-- [end:all]
84

85
# --8<-- [start:all-exclude]
86
result = df.select(pl.all().exclude("^day_.*$"))
87
print(result)
88
# --8<-- [end:all-exclude]
89

90
# --8<-- [start:col-exclude]
91
result = df.select(pl.col(pl.Float64).exclude("^day_.*$"))
92
print(result)
93
# --8<-- [end:col-exclude]
94

95
# --8<-- [start:duplicate-error]
96
from polars.exceptions import DuplicateError
97

98
gbp_usd_rate = 1.31  # As of 14th October 2024
99

100
try:
101
    df.select(
102
        pl.col("price") / gbp_usd_rate,  # This would be named "price"...
103
        pl.col("price") / eur_usd_rate,  # And so would this.
104
    )
105
except DuplicateError as err:
106
    print("DuplicateError:", err)
107
# --8<-- [end:duplicate-error]
108

109
# --8<-- [start:alias]
110
result = df.select(
111
    (pl.col("price") / gbp_usd_rate).alias("price (GBP)"),
112
    (pl.col("price") / eur_usd_rate).alias("price (EUR)"),
113
)
114
# --8<-- [end:alias]
115

116
# --8<-- [start:prefix-suffix]
117
result = df.select(
118
    (pl.col("^year_.*$") / eur_usd_rate).name.prefix("in_eur_"),
119
    (pl.col("day_high", "day_low") / gbp_usd_rate).name.suffix("_gbp"),
120
)
121
print(result)
122
# --8<-- [end:prefix-suffix]
123

124
# --8<-- [start:name-map]
125
# There is also `.name.to_uppercase`, so this usage of `.map` is moot.
126
result = df.select(pl.all().name.map(str.upper))
127
print(result)
128
# --8<-- [end:name-map]
129

130
# --8<-- [start:for-with_columns]
131
result = df
132
for tp in ["day", "year"]:
133
    result = result.with_columns(
134
        (pl.col(f"{tp}_high") - pl.col(f"{tp}_low")).alias(f"{tp}_amplitude")
135
    )
136
print(result)
137
# --8<-- [end:for-with_columns]
138

139

140
# --8<-- [start:yield-expressions]
141
def amplitude_expressions(time_periods):
142
    for tp in time_periods:
143
        yield (pl.col(f"{tp}_high") - pl.col(f"{tp}_low")).alias(f"{tp}_amplitude")
144

145

146
result = df.with_columns(amplitude_expressions(["day", "year"]))
147
print(result)
148
# --8<-- [end:yield-expressions]
149

150
# --8<-- [start:selectors]
151
import polars.selectors as cs
152

153
result = df.select(cs.string() | cs.ends_with("_high"))
154
print(result)
155
# --8<-- [end:selectors]
156

157
# --8<-- [start:selectors-set-operations]
158
result = df.select(cs.contains("_") - cs.string())
159
print(result)
160
# --8<-- [end:selectors-set-operations]
161

162
# --8<-- [start:selectors-expressions]
163
result = df.select((cs.contains("_") - cs.string()) / eur_usd_rate)
164
print(result)
165
# --8<-- [end:selectors-expressions]
166

167
# --8<-- [start:selector-ambiguity]
168
people = pl.DataFrame(
169
    {
170
        "name": ["Anna", "Bob"],
171
        "has_partner": [True, False],
172
        "has_kids": [False, False],
173
        "has_tattoos": [True, False],
174
        "is_alive": [True, True],
175
    }
176
)
177

178
wrong_result = people.select((~cs.starts_with("has_")).name.prefix("not_"))
179
print(wrong_result)
180
# --8<-- [end:selector-ambiguity]
181

182
# --8<-- [start:as_expr]
183
result = people.select((~cs.starts_with("has_").as_expr()).name.prefix("not_"))
184
print(result)
185
# --8<-- [end:as_expr]
186

187
# --8<-- [start:is_selector]
188
print(cs.is_selector(~cs.starts_with("has_").as_expr()))
189
# --8<-- [end:is_selector]
190

191
# --8<-- [start:expand_selector]
192
print(
193
    cs.expand_selector(
194
        people,
195
        cs.starts_with("has_"),
196
    )
197
)
198
# --8<-- [end:expand_selector]
199

200
Product

Resources

Company