Path: blob/main/docs/source/src/rust/user-guide/transformations/joins.rs
7889 views
// --8<-- [start:setup]1use polars::prelude::*;2// --8<-- [end:setup]34fn main() -> Result<(), Box<dyn std::error::Error>> {5// NOTE: This assumes the data has been downloaded and is available.6// See the corresponding Python script for the remote location of the data.78// --8<-- [start:props_groups]9let props_groups = CsvReadOptions::default()10.with_has_header(true)11.try_into_reader_with_file_path(Some("docs/assets/data/monopoly_props_groups.csv".into()))?12.finish()?13.head(Some(5));14println!("{props_groups}");15// --8<-- [end:props_groups]1617// --8<-- [start:props_prices]18let props_prices = CsvReadOptions::default()19.with_has_header(true)20.try_into_reader_with_file_path(Some("docs/assets/data/monopoly_props_prices.csv".into()))?21.finish()?22.head(Some(5));23println!("{props_prices}");24// --8<-- [end:props_prices]2526// --8<-- [start:equi-join]27// In Rust, we cannot use the shorthand of specifying a common28// column name just once.29let result = props_groups30.clone()31.lazy()32.join(33props_prices.clone().lazy(),34[col("property_name")],35[col("property_name")],36JoinArgs::default(),37)38.collect()?;39println!("{result}");40// --8<-- [end:equi-join]4142// --8<-- [start:props_groups2]43let props_groups2 = props_groups44.clone()45.lazy()46.with_column(col("property_name").str().to_lowercase())47.collect()?;48println!("{props_groups2}");49// --8<-- [end:props_groups2]5051// --8<-- [start:props_prices2]52let props_prices2 = props_prices53.clone()54.lazy()55.select([col("property_name").alias("name"), col("cost")])56.collect()?;57println!("{props_prices2}");58// --8<-- [end:props_prices2]5960// --8<-- [start:join-key-expression]61let result = props_groups262.lazy()63.join(64props_prices2.lazy(),65[col("property_name")],66[col("name").str().to_lowercase()],67JoinArgs::default(),68)69.collect()?;70println!("{result}");71// --8<-- [end:join-key-expression]7273// --8<-- [start:inner-join]74let result = props_groups75.clone()76.lazy()77.join(78props_prices.clone().lazy(),79[col("property_name")],80[col("property_name")],81JoinArgs::new(JoinType::Inner),82)83.collect()?;84println!("{result}");85// --8<-- [end:inner-join]8687// --8<-- [start:left-join]88let result = props_groups89.clone()90.lazy()91.join(92props_prices.clone().lazy(),93[col("property_name")],94[col("property_name")],95JoinArgs::new(JoinType::Left),96)97.collect()?;98println!("{result}");99// --8<-- [end:left-join]100101// --8<-- [start:right-join]102let result = props_groups103.clone()104.lazy()105.join(106props_prices.clone().lazy(),107[col("property_name")],108[col("property_name")],109JoinArgs::new(JoinType::Right),110)111.collect()?;112println!("{result}");113// --8<-- [end:right-join]114115// --8<-- [start:left-right-join-equals]116// `equals_missing` is needed instead of `equals`117// so that missing values compare as equal.118let dfs_match = result.equals_missing(119&props_prices120.clone()121.lazy()122.join(123props_groups.clone().lazy(),124[col("property_name")],125[col("property_name")],126JoinArgs::new(JoinType::Left),127)128.select([129// Reorder the columns to match the order of `result`.130col("group"),131col("property_name"),132col("cost"),133])134.collect()?,135);136println!("{dfs_match}");137// --8<-- [end:left-right-join-equals]138139// --8<-- [start:full-join]140let result = props_groups141.clone()142.lazy()143.join(144props_prices.clone().lazy(),145[col("property_name")],146[col("property_name")],147JoinArgs::new(JoinType::Full),148)149.collect()?;150println!("{result}");151// --8<-- [end:full-join]152153// --8<-- [start:full-join-coalesce]154let result = props_groups155.clone()156.lazy()157.join(158props_prices.clone().lazy(),159[col("property_name")],160[col("property_name")],161JoinArgs::new(JoinType::Full).with_coalesce(JoinCoalesce::CoalesceColumns),162)163.collect()?;164println!("{result}");165// --8<-- [end:full-join-coalesce]166167// --8<-- [start:semi-join]168let result = props_groups169.clone()170.lazy()171.join(172props_prices.clone().lazy(),173[col("property_name")],174[col("property_name")],175JoinArgs::new(JoinType::Semi),176)177.collect()?;178println!("{result}");179// --8<-- [end:semi-join]180181// --8<-- [start:anti-join]182let result = props_groups183.lazy()184.join(185props_prices.clone().lazy(),186[col("property_name")],187[col("property_name")],188JoinArgs::new(JoinType::Anti),189)190.collect()?;191println!("{result}");192// --8<-- [end:anti-join]193194// --8<-- [start:players]195let players = df!(196"name" => ["Alice", "Bob"],197"cash" => [78, 135],198)?;199println!("{players}");200// --8<-- [end:players]201202// --8<-- [start:non-equi]203let result = players204.clone()205.lazy()206.join_builder()207.with(props_prices.lazy())208.join_where(vec![col("cash").cast(DataType::Int64).gt(col("cost"))])209.collect()?;210println!("{result}");211// --8<-- [end:non-equi]212213// --8<-- [start:df_trades]214use chrono::prelude::*;215216let df_trades = df!(217"time" => [218NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 1, 0).unwrap(),219NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 1, 0).unwrap(),220NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 3, 0).unwrap(),221NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 6, 0).unwrap(),222],223"stock" => ["A", "B", "B", "C"],224"trade" => [101, 299, 301, 500],225)?;226println!("{df_trades}");227// --8<-- [end:df_trades]228229// --8<-- [start:df_quotes]230let df_quotes = df!(231"time" => [232NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 1, 0).unwrap(),233NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 2, 0).unwrap(),234NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 4, 0).unwrap(),235NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_opt(9, 6, 0).unwrap(),236],237"stock" => ["A", "B", "C", "A"],238"quote" => [100, 300, 501, 102],239)?;240println!("{df_quotes}");241// --8<-- [end:df_quotes]242243// --8<-- [start:asof]244let result = df_trades.join_asof_by(245&df_quotes,246"time",247"time",248["stock"],249["stock"],250AsofStrategy::Backward,251None,252true,253true,254)?;255println!("{result}");256// --8<-- [end:asof]257258// --8<-- [start:asof-tolerance]259let result = df_trades.join_asof_by(260&df_quotes,261"time",262"time",263["stock"],264["stock"],265AsofStrategy::Backward,266Some(AnyValue::Duration(60000, TimeUnit::Milliseconds)),267true,268true,269)?;270println!("{result}");271// --8<-- [end:asof-tolerance]272273// --8<-- [start:cartesian-product]274let tokens = df!(275"monopoly_token" => ["hat", "shoe", "boat"],276)?;277278let result = players279.lazy()280.select([col("name")])281.cross_join(tokens.lazy(), None)282.collect()?;283println!("{result}");284// --8<-- [end:cartesian-product]285286Ok(())287}288289290