Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/expressions/operations.py
7890 views
1
# --8<-- [start:dataframe]
2
import polars as pl
3
import numpy as np
4
5
np.random.seed(42) # For reproducibility.
6
7
df = pl.DataFrame(
8
{
9
"nrs": [1, 2, 3, None, 5],
10
"names": ["foo", "ham", "spam", "egg", "spam"],
11
"random": np.random.rand(5),
12
"groups": ["A", "A", "B", "A", "B"],
13
}
14
)
15
print(df)
16
# --8<-- [end:dataframe]
17
18
# --8<-- [start:arithmetic]
19
result = df.select(
20
(pl.col("nrs") + 5).alias("nrs + 5"),
21
(pl.col("nrs") - 5).alias("nrs - 5"),
22
(pl.col("nrs") * pl.col("random")).alias("nrs * random"),
23
(pl.col("nrs") / pl.col("random")).alias("nrs / random"),
24
(pl.col("nrs") ** 2).alias("nrs ** 2"),
25
(pl.col("nrs") % 3).alias("nrs % 3"),
26
)
27
28
print(result)
29
# --8<-- [end:arithmetic]
30
31
# --8<-- [start:operator-overloading]
32
# Python only:
33
result_named_operators = df.select(
34
(pl.col("nrs").add(5)).alias("nrs + 5"),
35
(pl.col("nrs").sub(5)).alias("nrs - 5"),
36
(pl.col("nrs").mul(pl.col("random"))).alias("nrs * random"),
37
(pl.col("nrs").truediv(pl.col("random"))).alias("nrs / random"),
38
(pl.col("nrs").pow(2)).alias("nrs ** 2"),
39
(pl.col("nrs").mod(3)).alias("nrs % 3"),
40
)
41
42
print(result.equals(result_named_operators))
43
# --8<-- [end:operator-overloading]
44
45
# --8<-- [start:comparison]
46
result = df.select(
47
(pl.col("nrs") > 1).alias("nrs > 1"), # .gt
48
(pl.col("nrs") >= 3).alias("nrs >= 3"), # ge
49
(pl.col("random") < 0.2).alias("random < .2"), # .lt
50
(pl.col("random") <= 0.5).alias("random <= .5"), # .le
51
(pl.col("nrs") != 1).alias("nrs != 1"), # .ne
52
(pl.col("nrs") == 1).alias("nrs == 1"), # .eq
53
)
54
print(result)
55
# --8<-- [end:comparison]
56
57
# --8<-- [start:boolean]
58
# Boolean operators & | ~
59
result = df.select(
60
((~pl.col("nrs").is_null()) & (pl.col("groups") == "A")).alias(
61
"number not null and group A"
62
),
63
((pl.col("random") < 0.5) | (pl.col("groups") == "B")).alias(
64
"random < 0.5 or group B"
65
),
66
)
67
68
print(result)
69
70
# Corresponding named functions `and_`, `or_`, and `not_`.
71
result2 = df.select(
72
(pl.col("nrs").is_null().not_().and_(pl.col("groups") == "A")).alias(
73
"number not null and group A"
74
),
75
((pl.col("random") < 0.5).or_(pl.col("groups") == "B")).alias(
76
"random < 0.5 or group B"
77
),
78
)
79
print(result.equals(result2))
80
# --8<-- [end:boolean]
81
82
# --8<-- [start:bitwise]
83
result = df.select(
84
pl.col("nrs"),
85
(pl.col("nrs") & 6).alias("nrs & 6"),
86
(pl.col("nrs") | 6).alias("nrs | 6"),
87
(~pl.col("nrs")).alias("not nrs"),
88
(pl.col("nrs") ^ 6).alias("nrs ^ 6"),
89
)
90
91
print(result)
92
# --8<-- [end:bitwise]
93
94
# --8<-- [start:count]
95
long_df = pl.DataFrame({"numbers": np.random.randint(0, 100_000, 100_000)})
96
97
result = long_df.select(
98
pl.col("numbers").n_unique().alias("n_unique"),
99
pl.col("numbers").approx_n_unique().alias("approx_n_unique"),
100
)
101
102
print(result)
103
# --8<-- [end:count]
104
105
# --8<-- [start:value_counts]
106
result = df.select(
107
pl.col("names").value_counts().alias("value_counts"),
108
)
109
110
print(result)
111
# --8<-- [end:value_counts]
112
113
# --8<-- [start:unique_counts]
114
result = df.select(
115
pl.col("names").unique(maintain_order=True).alias("unique"),
116
pl.col("names").unique_counts().alias("unique_counts"),
117
)
118
119
print(result)
120
# --8<-- [end:unique_counts]
121
122
# --8<-- [start:collatz]
123
result = df.select(
124
pl.col("nrs"),
125
pl.when(pl.col("nrs") % 2 == 1) # Is the number odd?
126
.then(3 * pl.col("nrs") + 1) # If so, multiply by 3 and add 1.
127
.otherwise(pl.col("nrs") // 2) # If not, divide by 2.
128
.alias("Collatz"),
129
)
130
131
print(result)
132
# --8<-- [end:collatz]
133
134