Path: blob/main/docs/source/src/python/user-guide/expressions/structs.py
7890 views
# --8<-- [start:ratings_df]1import polars as pl23ratings = pl.DataFrame(4{5"Movie": ["Cars", "IT", "ET", "Cars", "Up", "IT", "Cars", "ET", "Up", "Cars"],6"Theatre": ["NE", "ME", "IL", "ND", "NE", "SD", "NE", "IL", "IL", "NE"],7"Avg_Rating": [4.5, 4.4, 4.6, 4.3, 4.8, 4.7, 4.5, 4.9, 4.7, 4.6],8"Count": [30, 27, 26, 29, 31, 28, 28, 26, 33, 28],9}10)11print(ratings)12# --8<-- [end:ratings_df]1314# --8<-- [start:state_value_counts]15result = ratings.select(pl.col("Theatre").value_counts(sort=True))16print(result)17# --8<-- [end:state_value_counts]1819# --8<-- [start:struct_unnest]20result = ratings.select(pl.col("Theatre").value_counts(sort=True)).unnest("Theatre")21print(result)22# --8<-- [end:struct_unnest]2324# --8<-- [start:series_struct]25rating_series = pl.Series(26"ratings",27[28{"Movie": "Cars", "Theatre": "NE", "Avg_Rating": 4.5},29{"Movie": "Toy Story", "Theatre": "ME", "Avg_Rating": 4.9},30],31)32print(rating_series)33# --8<-- [end:series_struct]3435# --8<-- [start:series_struct_error]36null_rating_series = pl.Series(37"ratings",38[39{"Movie": "Cars", "Theatre": "NE", "Avg_Rating": 4.5},40{"Mov": "Toy Story", "Theatre": "ME", "Avg_Rating": 4.9},41{"Movie": "Snow White", "Theatre": "IL", "Avg_Rating": "4.7"},42],43strict=False, # To show the final structs with `null` values.44)45print(null_rating_series)46# --8<-- [end:series_struct_error]4748# --8<-- [start:series_struct_extract]49result = rating_series.struct.field("Movie")50print(result)51# --8<-- [end:series_struct_extract]5253# --8<-- [start:series_struct_rename]54result = rating_series.struct.rename_fields(["Film", "State", "Value"])55print(result)56# --8<-- [end:series_struct_rename]5758# --8<-- [start:struct-rename-check]59print(60result.to_frame().unnest("ratings"),61)62# --8<-- [end:struct-rename-check]6364# --8<-- [start:struct_duplicates]65result = ratings.filter(pl.struct("Movie", "Theatre").is_duplicated())66print(result)67# --8<-- [end:struct_duplicates]6869# --8<-- [start:struct_ranking]70result = ratings.with_columns(71pl.struct("Count", "Avg_Rating")72.rank("dense", descending=True)73.over("Movie", "Theatre")74.alias("Rank")75).filter(pl.struct("Movie", "Theatre").is_duplicated())7677print(result)78# --8<-- [end:struct_ranking]7980# --8<-- [start:multi_column_apply]81df = pl.DataFrame({"keys": ["a", "a", "b"], "values": [10, 7, 1]})8283result = df.select(84pl.struct(["keys", "values"])85.map_elements(lambda x: len(x["keys"]) + x["values"], return_dtype=pl.Int64)86.alias("solution_map_elements"),87(pl.col("keys").str.len_bytes() + pl.col("values")).alias("solution_expr"),88)89print(result)90# --8<-- [end:multi_column_apply]919293# --8<-- [start:ack]94def ack(m, n):95if not m:96return n + 197if not n:98return ack(m - 1, 1)99return ack(m - 1, ack(m, n - 1))100101102# --8<-- [end:ack]103104# --8<-- [start:struct-ack]105values = pl.DataFrame(106{107"m": [0, 0, 0, 1, 1, 1, 2],108"n": [2, 3, 4, 1, 2, 3, 1],109}110)111result = values.with_columns(112pl.struct(["m", "n"])113.map_elements(lambda s: ack(s["m"], s["n"]), return_dtype=pl.Int64)114.alias("ack")115)116117print(result)118# --8<-- [end:struct-ack]119120121