Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/expressions/window.py
7890 views
1
# --8<-- [start:pokemon]
2
import polars as pl
3
4
types = (
5
"Grass Water Fire Normal Ground Electric Psychic Fighting Bug Steel "
6
"Flying Dragon Dark Ghost Poison Rock Ice Fairy".split()
7
)
8
type_enum = pl.Enum(types)
9
# then let's load some csv data with information about pokemon
10
pokemon = pl.read_csv(
11
"https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv",
12
).cast({"Type 1": type_enum, "Type 2": type_enum})
13
print(pokemon.head())
14
# --8<-- [end:pokemon]
15
16
# --8<-- [start:rank]
17
result = pokemon.select(
18
pl.col("Name", "Type 1"),
19
pl.col("Speed").rank("dense", descending=True).over("Type 1").alias("Speed rank"),
20
)
21
22
print(result)
23
# --8<-- [end:rank]
24
25
# --8<-- [start:rank-multiple]
26
result = pokemon.select(
27
pl.col("Name", "Type 1", "Type 2"),
28
pl.col("Speed")
29
.rank("dense", descending=True)
30
.over("Type 1", "Type 2")
31
.alias("Speed rank"),
32
)
33
34
print(result)
35
# --8<-- [end:rank-multiple]
36
37
# --8<-- [start:rank-explode]
38
result = (
39
pokemon.group_by("Type 1")
40
.agg(
41
pl.col("Name"),
42
pl.col("Speed").rank("dense", descending=True).alias("Speed rank"),
43
)
44
.select(pl.col("Name"), pl.col("Type 1"), pl.col("Speed rank"))
45
.explode("Name", "Speed rank")
46
)
47
48
print(result)
49
# --8<-- [end:rank-explode]
50
51
# --8<-- [start:athletes]
52
athletes = pl.DataFrame(
53
{
54
"athlete": list("ABCDEF"),
55
"country": ["PT", "NL", "NL", "PT", "PT", "NL"],
56
"rank": [6, 1, 5, 4, 2, 3],
57
}
58
)
59
print(athletes)
60
# --8<-- [end:athletes]
61
62
# --8<-- [start:athletes-sort-over-country]
63
result = athletes.select(
64
pl.col("athlete", "rank").sort_by(pl.col("rank")).over(pl.col("country")),
65
pl.col("country"),
66
)
67
68
print(result)
69
# --8<-- [end:athletes-sort-over-country]
70
71
# --8<-- [start:athletes-explode]
72
result = athletes.select(
73
pl.all()
74
.sort_by(pl.col("rank"))
75
.over(pl.col("country"), mapping_strategy="explode"),
76
)
77
78
print(result)
79
# --8<-- [end:athletes-explode]
80
81
# --8<-- [start:athletes-join]
82
result = athletes.with_columns(
83
pl.col("rank").sort().over(pl.col("country"), mapping_strategy="join"),
84
)
85
86
print(result)
87
# --8<-- [end:athletes-join]
88
89
# --8<-- [start:pokemon-mean]
90
result = pokemon.select(
91
pl.col("Name", "Type 1", "Speed"),
92
pl.col("Speed").mean().over(pl.col("Type 1")).alias("Mean speed in group"),
93
)
94
95
print(result)
96
# --8<-- [end:pokemon-mean]
97
98
99
# --8<-- [start:group_by]
100
result = pokemon.select(
101
"Type 1",
102
"Type 2",
103
pl.col("Attack").mean().over("Type 1").alias("avg_attack_by_type"),
104
pl.col("Defense")
105
.mean()
106
.over(["Type 1", "Type 2"])
107
.alias("avg_defense_by_type_combination"),
108
pl.col("Attack").mean().alias("avg_attack"),
109
)
110
print(result)
111
# --8<-- [end:group_by]
112
113
# --8<-- [start:operations]
114
filtered = pokemon.filter(pl.col("Type 2") == "Psychic").select(
115
"Name",
116
"Type 1",
117
"Speed",
118
)
119
print(filtered)
120
# --8<-- [end:operations]
121
122
# --8<-- [start:sort]
123
result = filtered.with_columns(
124
pl.col("Name", "Speed").sort_by("Speed", descending=True).over("Type 1"),
125
)
126
print(result)
127
# --8<-- [end:sort]
128
129
# --8<-- [start:examples]
130
result = pokemon.sort("Type 1").select(
131
pl.col("Type 1").head(3).over("Type 1", mapping_strategy="explode"),
132
pl.col("Name")
133
.sort_by(pl.col("Speed"), descending=True)
134
.head(3)
135
.over("Type 1", mapping_strategy="explode")
136
.alias("fastest/group"),
137
pl.col("Name")
138
.sort_by(pl.col("Attack"), descending=True)
139
.head(3)
140
.over("Type 1", mapping_strategy="explode")
141
.alias("strongest/group"),
142
pl.col("Name")
143
.sort()
144
.head(3)
145
.over("Type 1", mapping_strategy="explode")
146
.alias("sorted_by_alphabet"),
147
)
148
print(result)
149
# --8<-- [end:examples]
150
151