Path: blob/main/docs/source/src/python/user-guide/transformations/joins.py
7890 views
# --8<-- [start:prep-data]1import pathlib2import requests345DATA = [6(7"https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/data/monopoly_props_groups.csv",8"docs/assets/data/monopoly_props_groups.csv",9),10(11"https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/data/monopoly_props_prices.csv",12"docs/assets/data/monopoly_props_prices.csv",13),14]151617for url, dest in DATA:18if pathlib.Path(dest).exists():19continue20with open(dest, "wb") as f:21f.write(requests.get(url, timeout=10).content)22# --8<-- [end:prep-data]2324# --8<-- [start:props_groups]25import polars as pl2627props_groups = pl.read_csv("docs/assets/data/monopoly_props_groups.csv").head(5)28print(props_groups)29# --8<-- [end:props_groups]3031# --8<-- [start:props_prices]32props_prices = pl.read_csv("docs/assets/data/monopoly_props_prices.csv").head(5)33print(props_prices)34# --8<-- [end:props_prices]3536# --8<-- [start:equi-join]37result = props_groups.join(props_prices, on="property_name")38print(result)39# --8<-- [end:equi-join]4041# --8<-- [start:props_groups2]42props_groups2 = props_groups.with_columns(43pl.col("property_name").str.to_lowercase(),44)45print(props_groups2)46# --8<-- [end:props_groups2]4748# --8<-- [start:props_prices2]49props_prices2 = props_prices.select(50pl.col("property_name").alias("name"), pl.col("cost")51)52print(props_prices2)53# --8<-- [end:props_prices2]5455# --8<-- [start:join-key-expression]56result = props_groups2.join(57props_prices2,58left_on="property_name",59right_on=pl.col("name").str.to_lowercase(),60)61print(result)62# --8<-- [end:join-key-expression]6364# --8<-- [start:inner-join]65result = props_groups.join(props_prices, on="property_name", how="inner")66print(result)67# --8<-- [end:inner-join]6869# --8<-- [start:left-join]70result = props_groups.join(props_prices, on="property_name", how="left")71print(result)72# --8<-- [end:left-join]7374# --8<-- [start:right-join]75result = props_groups.join(props_prices, on="property_name", how="right")76print(result)77# --8<-- [end:right-join]7879# --8<-- [start:left-right-join-equals]80print(81result.equals(82props_prices.join(83props_groups,84on="property_name",85how="left",86# Reorder the columns to match the order from above.87).select(pl.col("group"), pl.col("property_name"), pl.col("cost"))88)89)90# --8<-- [end:left-right-join-equals]9192# --8<-- [start:full-join]93result = props_groups.join(props_prices, on="property_name", how="full")94print(result)95# --8<-- [end:full-join]9697# --8<-- [start:full-join-coalesce]98result = props_groups.join(99props_prices,100on="property_name",101how="full",102coalesce=True,103)104print(result)105# --8<-- [end:full-join-coalesce]106107# --8<-- [start:semi-join]108result = props_groups.join(props_prices, on="property_name", how="semi")109print(result)110# --8<-- [end:semi-join]111112# --8<-- [start:anti-join]113result = props_groups.join(props_prices, on="property_name", how="anti")114print(result)115# --8<-- [end:anti-join]116117# --8<-- [start:players]118players = pl.DataFrame(119{120"name": ["Alice", "Bob"],121"cash": [78, 135],122}123)124print(players)125# --8<-- [end:players]126127# --8<-- [start:non-equi]128result = players.join_where(props_prices, pl.col("cash") > pl.col("cost"))129print(result)130# --8<-- [end:non-equi]131132# --8<-- [start:df_trades]133from datetime import datetime134135df_trades = pl.DataFrame(136{137"time": [138datetime(2020, 1, 1, 9, 1, 0),139datetime(2020, 1, 1, 9, 1, 0),140datetime(2020, 1, 1, 9, 3, 0),141datetime(2020, 1, 1, 9, 6, 0),142],143"stock": ["A", "B", "B", "C"],144"trade": [101, 299, 301, 500],145}146)147print(df_trades)148# --8<-- [end:df_trades]149150# --8<-- [start:df_quotes]151df_quotes = pl.DataFrame(152{153"time": [154datetime(2020, 1, 1, 9, 0, 0),155datetime(2020, 1, 1, 9, 2, 0),156datetime(2020, 1, 1, 9, 4, 0),157datetime(2020, 1, 1, 9, 6, 0),158],159"stock": ["A", "B", "C", "A"],160"quote": [100, 300, 501, 102],161}162)163164print(df_quotes)165# --8<-- [end:df_quotes]166167# --8<-- [start:asof]168df_asof_join = df_trades.join_asof(df_quotes, on="time", by="stock")169print(df_asof_join)170# --8<-- [end:asof]171172# --8<-- [start:asof-tolerance]173df_asof_tolerance_join = df_trades.join_asof(174df_quotes, on="time", by="stock", tolerance="1m"175)176print(df_asof_tolerance_join)177# --8<-- [end:asof-tolerance]178179# --8<-- [start:cartesian-product]180tokens = pl.DataFrame({"monopoly_token": ["hat", "shoe", "boat"]})181182result = players.select(pl.col("name")).join(tokens, how="cross")183print(result)184# --8<-- [end:cartesian-product]185186187