Path: blob/main/docs/source/src/python/user-guide/expressions/operations.py
7890 views
# --8<-- [start:dataframe]1import polars as pl2import numpy as np34np.random.seed(42) # For reproducibility.56df = pl.DataFrame(7{8"nrs": [1, 2, 3, None, 5],9"names": ["foo", "ham", "spam", "egg", "spam"],10"random": np.random.rand(5),11"groups": ["A", "A", "B", "A", "B"],12}13)14print(df)15# --8<-- [end:dataframe]1617# --8<-- [start:arithmetic]18result = df.select(19(pl.col("nrs") + 5).alias("nrs + 5"),20(pl.col("nrs") - 5).alias("nrs - 5"),21(pl.col("nrs") * pl.col("random")).alias("nrs * random"),22(pl.col("nrs") / pl.col("random")).alias("nrs / random"),23(pl.col("nrs") ** 2).alias("nrs ** 2"),24(pl.col("nrs") % 3).alias("nrs % 3"),25)2627print(result)28# --8<-- [end:arithmetic]2930# --8<-- [start:operator-overloading]31# Python only:32result_named_operators = df.select(33(pl.col("nrs").add(5)).alias("nrs + 5"),34(pl.col("nrs").sub(5)).alias("nrs - 5"),35(pl.col("nrs").mul(pl.col("random"))).alias("nrs * random"),36(pl.col("nrs").truediv(pl.col("random"))).alias("nrs / random"),37(pl.col("nrs").pow(2)).alias("nrs ** 2"),38(pl.col("nrs").mod(3)).alias("nrs % 3"),39)4041print(result.equals(result_named_operators))42# --8<-- [end:operator-overloading]4344# --8<-- [start:comparison]45result = df.select(46(pl.col("nrs") > 1).alias("nrs > 1"), # .gt47(pl.col("nrs") >= 3).alias("nrs >= 3"), # ge48(pl.col("random") < 0.2).alias("random < .2"), # .lt49(pl.col("random") <= 0.5).alias("random <= .5"), # .le50(pl.col("nrs") != 1).alias("nrs != 1"), # .ne51(pl.col("nrs") == 1).alias("nrs == 1"), # .eq52)53print(result)54# --8<-- [end:comparison]5556# --8<-- [start:boolean]57# Boolean operators & | ~58result = df.select(59((~pl.col("nrs").is_null()) & (pl.col("groups") == "A")).alias(60"number not null and group A"61),62((pl.col("random") < 0.5) | (pl.col("groups") == "B")).alias(63"random < 0.5 or group B"64),65)6667print(result)6869# Corresponding named functions `and_`, `or_`, and `not_`.70result2 = df.select(71(pl.col("nrs").is_null().not_().and_(pl.col("groups") == "A")).alias(72"number not null and group A"73),74((pl.col("random") < 0.5).or_(pl.col("groups") == "B")).alias(75"random < 0.5 or group B"76),77)78print(result.equals(result2))79# --8<-- [end:boolean]8081# --8<-- [start:bitwise]82result = df.select(83pl.col("nrs"),84(pl.col("nrs") & 6).alias("nrs & 6"),85(pl.col("nrs") | 6).alias("nrs | 6"),86(~pl.col("nrs")).alias("not nrs"),87(pl.col("nrs") ^ 6).alias("nrs ^ 6"),88)8990print(result)91# --8<-- [end:bitwise]9293# --8<-- [start:count]94long_df = pl.DataFrame({"numbers": np.random.randint(0, 100_000, 100_000)})9596result = long_df.select(97pl.col("numbers").n_unique().alias("n_unique"),98pl.col("numbers").approx_n_unique().alias("approx_n_unique"),99)100101print(result)102# --8<-- [end:count]103104# --8<-- [start:value_counts]105result = df.select(106pl.col("names").value_counts().alias("value_counts"),107)108109print(result)110# --8<-- [end:value_counts]111112# --8<-- [start:unique_counts]113result = df.select(114pl.col("names").unique(maintain_order=True).alias("unique"),115pl.col("names").unique_counts().alias("unique_counts"),116)117118print(result)119# --8<-- [end:unique_counts]120121# --8<-- [start:collatz]122result = df.select(123pl.col("nrs"),124pl.when(pl.col("nrs") % 2 == 1) # Is the number odd?125.then(3 * pl.col("nrs") + 1) # If so, multiply by 3 and add 1.126.otherwise(pl.col("nrs") // 2) # If not, divide by 2.127.alias("Collatz"),128)129130print(result)131# --8<-- [end:collatz]132133134