Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/concepts/expressions.py
7890 views
1
# --8<-- [start:expression]
2
import polars as pl
3
4
pl.col("weight") / (pl.col("height") ** 2)
5
# --8<-- [end:expression]
6
7
# --8<-- [start:print-expr]
8
bmi_expr = pl.col("weight") / (pl.col("height") ** 2)
9
print(bmi_expr)
10
# --8<-- [end:print-expr]
11
12
# --8<-- [start:df]
13
from datetime import date
14
15
df = pl.DataFrame(
16
{
17
"name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
18
"birthdate": [
19
date(1997, 1, 10),
20
date(1985, 2, 15),
21
date(1983, 3, 22),
22
date(1981, 4, 30),
23
],
24
"weight": [57.9, 72.5, 53.6, 83.1], # (kg)
25
"height": [1.56, 1.77, 1.65, 1.75], # (m)
26
}
27
)
28
29
print(df)
30
# --8<-- [end:df]
31
32
# --8<-- [start:select-1]
33
result = df.select(
34
bmi=bmi_expr,
35
avg_bmi=bmi_expr.mean(),
36
ideal_max_bmi=25,
37
)
38
print(result)
39
# --8<-- [end:select-1]
40
41
# --8<-- [start:select-2]
42
result = df.select(deviation=(bmi_expr - bmi_expr.mean()) / bmi_expr.std())
43
print(result)
44
# --8<-- [end:select-2]
45
46
# --8<-- [start:with_columns-1]
47
result = df.with_columns(
48
bmi=bmi_expr,
49
avg_bmi=bmi_expr.mean(),
50
ideal_max_bmi=25,
51
)
52
print(result)
53
# --8<-- [end:with_columns-1]
54
55
# --8<-- [start:filter-1]
56
result = df.filter(
57
pl.col("birthdate").is_between(date(1982, 12, 31), date(1996, 1, 1)),
58
pl.col("height") > 1.7,
59
)
60
print(result)
61
# --8<-- [end:filter-1]
62
63
# --8<-- [start:group_by-1]
64
result = df.group_by(
65
(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
66
).agg(pl.col("name"))
67
print(result)
68
# --8<-- [end:group_by-1]
69
70
# --8<-- [start:group_by-2]
71
result = df.group_by(
72
(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
73
(pl.col("height") < 1.7).alias("short?"),
74
).agg(pl.col("name"))
75
print(result)
76
# --8<-- [end:group_by-2]
77
78
# --8<-- [start:group_by-3]
79
result = df.group_by(
80
(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
81
(pl.col("height") < 1.7).alias("short?"),
82
).agg(
83
pl.len(),
84
pl.col("height").max().alias("tallest"),
85
pl.col("weight", "height").mean().name.prefix("avg_"),
86
)
87
print(result)
88
# --8<-- [end:group_by-3]
89
90
# --8<-- [start:expression-expansion-1]
91
expr = (pl.col(pl.Float64) * 1.1).name.suffix("*1.1")
92
result = df.select(expr)
93
print(result)
94
# --8<-- [end:expression-expansion-1]
95
96
# --8<-- [start:expression-expansion-2]
97
df2 = pl.DataFrame(
98
{
99
"ints": [1, 2, 3, 4],
100
"letters": ["A", "B", "C", "D"],
101
}
102
)
103
result = df2.select(expr)
104
print(result)
105
# --8<-- [end:expression-expansion-2]
106
107