Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/expressions/structs.py
7890 views
1
# --8<-- [start:ratings_df]
2
import polars as pl
3
4
ratings = pl.DataFrame(
5
{
6
"Movie": ["Cars", "IT", "ET", "Cars", "Up", "IT", "Cars", "ET", "Up", "Cars"],
7
"Theatre": ["NE", "ME", "IL", "ND", "NE", "SD", "NE", "IL", "IL", "NE"],
8
"Avg_Rating": [4.5, 4.4, 4.6, 4.3, 4.8, 4.7, 4.5, 4.9, 4.7, 4.6],
9
"Count": [30, 27, 26, 29, 31, 28, 28, 26, 33, 28],
10
}
11
)
12
print(ratings)
13
# --8<-- [end:ratings_df]
14
15
# --8<-- [start:state_value_counts]
16
result = ratings.select(pl.col("Theatre").value_counts(sort=True))
17
print(result)
18
# --8<-- [end:state_value_counts]
19
20
# --8<-- [start:struct_unnest]
21
result = ratings.select(pl.col("Theatre").value_counts(sort=True)).unnest("Theatre")
22
print(result)
23
# --8<-- [end:struct_unnest]
24
25
# --8<-- [start:series_struct]
26
rating_series = pl.Series(
27
"ratings",
28
[
29
{"Movie": "Cars", "Theatre": "NE", "Avg_Rating": 4.5},
30
{"Movie": "Toy Story", "Theatre": "ME", "Avg_Rating": 4.9},
31
],
32
)
33
print(rating_series)
34
# --8<-- [end:series_struct]
35
36
# --8<-- [start:series_struct_error]
37
null_rating_series = pl.Series(
38
"ratings",
39
[
40
{"Movie": "Cars", "Theatre": "NE", "Avg_Rating": 4.5},
41
{"Mov": "Toy Story", "Theatre": "ME", "Avg_Rating": 4.9},
42
{"Movie": "Snow White", "Theatre": "IL", "Avg_Rating": "4.7"},
43
],
44
strict=False, # To show the final structs with `null` values.
45
)
46
print(null_rating_series)
47
# --8<-- [end:series_struct_error]
48
49
# --8<-- [start:series_struct_extract]
50
result = rating_series.struct.field("Movie")
51
print(result)
52
# --8<-- [end:series_struct_extract]
53
54
# --8<-- [start:series_struct_rename]
55
result = rating_series.struct.rename_fields(["Film", "State", "Value"])
56
print(result)
57
# --8<-- [end:series_struct_rename]
58
59
# --8<-- [start:struct-rename-check]
60
print(
61
result.to_frame().unnest("ratings"),
62
)
63
# --8<-- [end:struct-rename-check]
64
65
# --8<-- [start:struct_duplicates]
66
result = ratings.filter(pl.struct("Movie", "Theatre").is_duplicated())
67
print(result)
68
# --8<-- [end:struct_duplicates]
69
70
# --8<-- [start:struct_ranking]
71
result = ratings.with_columns(
72
pl.struct("Count", "Avg_Rating")
73
.rank("dense", descending=True)
74
.over("Movie", "Theatre")
75
.alias("Rank")
76
).filter(pl.struct("Movie", "Theatre").is_duplicated())
77
78
print(result)
79
# --8<-- [end:struct_ranking]
80
81
# --8<-- [start:multi_column_apply]
82
df = pl.DataFrame({"keys": ["a", "a", "b"], "values": [10, 7, 1]})
83
84
result = df.select(
85
pl.struct(["keys", "values"])
86
.map_elements(lambda x: len(x["keys"]) + x["values"], return_dtype=pl.Int64)
87
.alias("solution_map_elements"),
88
(pl.col("keys").str.len_bytes() + pl.col("values")).alias("solution_expr"),
89
)
90
print(result)
91
# --8<-- [end:multi_column_apply]
92
93
94
# --8<-- [start:ack]
95
def ack(m, n):
96
if not m:
97
return n + 1
98
if not n:
99
return ack(m - 1, 1)
100
return ack(m - 1, ack(m, n - 1))
101
102
103
# --8<-- [end:ack]
104
105
# --8<-- [start:struct-ack]
106
values = pl.DataFrame(
107
{
108
"m": [0, 0, 0, 1, 1, 1, 2],
109
"n": [2, 3, 4, 1, 2, 3, 1],
110
}
111
)
112
result = values.with_columns(
113
pl.struct(["m", "n"])
114
.map_elements(lambda s: ack(s["m"], s["n"]), return_dtype=pl.Int64)
115
.alias("ack")
116
)
117
118
print(result)
119
# --8<-- [end:struct-ack]
120
121