Path: blob/main/docs/source/src/python/user-guide/expressions/expression-expansion.py
7890 views
# --8<-- [start:df]1import polars as pl23df = pl.DataFrame(4{ # As of 14th October 2024, ~3pm UTC5"ticker": ["AAPL", "NVDA", "MSFT", "GOOG", "AMZN"],6"company_name": ["Apple", "NVIDIA", "Microsoft", "Alphabet (Google)", "Amazon"],7"price": [229.9, 138.93, 420.56, 166.41, 188.4],8"day_high": [231.31, 139.6, 424.04, 167.62, 189.83],9"day_low": [228.6, 136.3, 417.52, 164.78, 188.44],10"year_high": [237.23, 140.76, 468.35, 193.31, 201.2],11"year_low": [164.08, 39.23, 324.39, 121.46, 118.35],12}13)1415print(df)16# --8<-- [end:df]1718# --8<-- [start:col-with-names]19eur_usd_rate = 1.09 # As of 14th October 20242021result = df.with_columns(22(23pl.col(24"price",25"day_high",26"day_low",27"year_high",28"year_low",29)30/ eur_usd_rate31).round(2)32)33print(result)34# --8<-- [end:col-with-names]3536# --8<-- [start:expression-list]37exprs = [38(pl.col("price") / eur_usd_rate).round(2),39(pl.col("day_high") / eur_usd_rate).round(2),40(pl.col("day_low") / eur_usd_rate).round(2),41(pl.col("year_high") / eur_usd_rate).round(2),42(pl.col("year_low") / eur_usd_rate).round(2),43]4445result2 = df.with_columns(exprs)46print(result.equals(result2))47# --8<-- [end:expression-list]4849# --8<-- [start:col-with-dtype]50result = df.with_columns((pl.col(pl.Float64) / eur_usd_rate).round(2))51print(result)52# --8<-- [end:col-with-dtype]5354# --8<-- [start:col-with-dtypes]55result2 = df.with_columns(56(57pl.col(58pl.Float32,59pl.Float64,60)61/ eur_usd_rate62).round(2)63)64print(result.equals(result2))65# --8<-- [end:col-with-dtypes]6667# --8<-- [start:col-with-regex]68result = df.select(pl.col("ticker", "^.*_high$", "^.*_low$"))69print(result)70# --8<-- [end:col-with-regex]7172# --8<-- [start:col-error]73try:74df.select(pl.col("ticker", pl.Float64))75except TypeError as err:76print("TypeError:", err)77# --8<-- [end:col-error]7879# --8<-- [start:all]80result = df.select(pl.all())81print(result.equals(df))82# --8<-- [end:all]8384# --8<-- [start:all-exclude]85result = df.select(pl.all().exclude("^day_.*$"))86print(result)87# --8<-- [end:all-exclude]8889# --8<-- [start:col-exclude]90result = df.select(pl.col(pl.Float64).exclude("^day_.*$"))91print(result)92# --8<-- [end:col-exclude]9394# --8<-- [start:duplicate-error]95from polars.exceptions import DuplicateError9697gbp_usd_rate = 1.31 # As of 14th October 20249899try:100df.select(101pl.col("price") / gbp_usd_rate, # This would be named "price"...102pl.col("price") / eur_usd_rate, # And so would this.103)104except DuplicateError as err:105print("DuplicateError:", err)106# --8<-- [end:duplicate-error]107108# --8<-- [start:alias]109result = df.select(110(pl.col("price") / gbp_usd_rate).alias("price (GBP)"),111(pl.col("price") / eur_usd_rate).alias("price (EUR)"),112)113# --8<-- [end:alias]114115# --8<-- [start:prefix-suffix]116result = df.select(117(pl.col("^year_.*$") / eur_usd_rate).name.prefix("in_eur_"),118(pl.col("day_high", "day_low") / gbp_usd_rate).name.suffix("_gbp"),119)120print(result)121# --8<-- [end:prefix-suffix]122123# --8<-- [start:name-map]124# There is also `.name.to_uppercase`, so this usage of `.map` is moot.125result = df.select(pl.all().name.map(str.upper))126print(result)127# --8<-- [end:name-map]128129# --8<-- [start:for-with_columns]130result = df131for tp in ["day", "year"]:132result = result.with_columns(133(pl.col(f"{tp}_high") - pl.col(f"{tp}_low")).alias(f"{tp}_amplitude")134)135print(result)136# --8<-- [end:for-with_columns]137138139# --8<-- [start:yield-expressions]140def amplitude_expressions(time_periods):141for tp in time_periods:142yield (pl.col(f"{tp}_high") - pl.col(f"{tp}_low")).alias(f"{tp}_amplitude")143144145result = df.with_columns(amplitude_expressions(["day", "year"]))146print(result)147# --8<-- [end:yield-expressions]148149# --8<-- [start:selectors]150import polars.selectors as cs151152result = df.select(cs.string() | cs.ends_with("_high"))153print(result)154# --8<-- [end:selectors]155156# --8<-- [start:selectors-set-operations]157result = df.select(cs.contains("_") - cs.string())158print(result)159# --8<-- [end:selectors-set-operations]160161# --8<-- [start:selectors-expressions]162result = df.select((cs.contains("_") - cs.string()) / eur_usd_rate)163print(result)164# --8<-- [end:selectors-expressions]165166# --8<-- [start:selector-ambiguity]167people = pl.DataFrame(168{169"name": ["Anna", "Bob"],170"has_partner": [True, False],171"has_kids": [False, False],172"has_tattoos": [True, False],173"is_alive": [True, True],174}175)176177wrong_result = people.select((~cs.starts_with("has_")).name.prefix("not_"))178print(wrong_result)179# --8<-- [end:selector-ambiguity]180181# --8<-- [start:as_expr]182result = people.select((~cs.starts_with("has_").as_expr()).name.prefix("not_"))183print(result)184# --8<-- [end:as_expr]185186# --8<-- [start:is_selector]187print(cs.is_selector(~cs.starts_with("has_").as_expr()))188# --8<-- [end:is_selector]189190# --8<-- [start:expand_selector]191print(192cs.expand_selector(193people,194cs.starts_with("has_"),195)196)197# --8<-- [end:expand_selector]198199200