Path: blob/main/crates/polars-ops/src/frame/join/dispatch_left_right.rs
8424 views
use polars_core::utils::Container;12use super::*;3use crate::prelude::*;45pub(super) fn left_join_from_series(6left: DataFrame,7right: &DataFrame,8s_left: &Series,9s_right: &Series,10args: JoinArgs,11verbose: bool,12drop_names: Option<Vec<PlSmallStr>>,13) -> PolarsResult<DataFrame> {14let (df_left, df_right) = materialize_left_join_from_series(15left, right, s_left, s_right, &args, verbose, drop_names,16)?;17_finish_join(df_left, df_right, args.suffix)18}1920pub(super) fn right_join_from_series(21left: &DataFrame,22right: DataFrame,23s_left: &Series,24s_right: &Series,25mut args: JoinArgs,26verbose: bool,27drop_names: Option<Vec<PlSmallStr>>,28) -> PolarsResult<DataFrame> {29// Swap the order of tables to do a right join.30args.maintain_order = args.maintain_order.flip();31let (df_right, df_left) = materialize_left_join_from_series(32right, left, s_right, s_left, &args, verbose, drop_names,33)?;34_finish_join(df_left, df_right, args.suffix)35}3637pub fn materialize_left_join_from_series(38mut left: DataFrame,39right_: &DataFrame,40s_left: &Series,41s_right: &Series,42args: &JoinArgs,43verbose: bool,44drop_names: Option<Vec<PlSmallStr>>,45) -> PolarsResult<(DataFrame, DataFrame)> {46let mut s_left = s_left.clone();47// Eagerly limit left if possible.48if let Some((offset, len)) = args.slice {49if offset == 0 {50left = left.slice(0, len);51s_left = s_left.slice(0, len);52}53}5455// Ensure that the chunks are aligned otherwise we go OOB.56let requires_ordering = matches!(57args.maintain_order,58MaintainOrderJoin::Right | MaintainOrderJoin::RightLeft59);6061let mut right = Cow::Borrowed(right_);62let mut s_right = s_right.clone();63if left.should_rechunk() || requires_ordering || left.n_chunks() != s_left.n_chunks() {64left.rechunk_mut_par();65s_left = s_left.rechunk();66}67if right.should_rechunk() || requires_ordering || right.n_chunks() != s_right.n_chunks() {68let mut other = right_.clone();69other.rechunk_mut_par();70right = Cow::Owned(other);71s_right = s_right.rechunk();72}7374let (left_idx, right_idx) = sort_or_hash_left(75&s_left,76&s_right,77verbose,78args.validation,79args.nulls_equal,80)?;8182let right = if let Some(drop_names) = drop_names {83right.drop_many(drop_names)84} else {85right.drop(s_right.name()).unwrap()86};87try_raise_keyboard_interrupt();8889#[cfg(feature = "chunked_ids")]90match (left_idx, right_idx) {91(ChunkJoinIds::Left(left_idx), ChunkJoinOptIds::Left(right_idx)) => {92if requires_ordering {93Ok(maintain_order_idx(94&left,95&right,96left_idx.as_slice(),97right_idx.as_slice(),98args,99))100} else {101Ok(POOL.join(102|| materialize_left_join_idx_left(&left, left_idx.as_slice(), args),103|| materialize_left_join_idx_right(&right, right_idx.as_slice(), args),104))105}106},107(ChunkJoinIds::Left(left_idx), ChunkJoinOptIds::Right(right_idx)) => Ok(POOL.join(108|| materialize_left_join_idx_left(&left, left_idx.as_slice(), args),109|| materialize_left_join_chunked_right(&right, right_idx.as_slice(), args),110)),111(ChunkJoinIds::Right(left_idx), ChunkJoinOptIds::Right(right_idx)) => Ok(POOL.join(112|| materialize_left_join_chunked_left(&left, left_idx.as_slice(), args),113|| materialize_left_join_chunked_right(&right, right_idx.as_slice(), args),114)),115(ChunkJoinIds::Right(left_idx), ChunkJoinOptIds::Left(right_idx)) => Ok(POOL.join(116|| materialize_left_join_chunked_left(&left, left_idx.as_slice(), args),117|| materialize_left_join_idx_right(&right, right_idx.as_slice(), args),118)),119}120121#[cfg(not(feature = "chunked_ids"))]122if requires_ordering {123Ok(maintain_order_idx(124&left,125&right,126left_idx.as_slice(),127right_idx.as_slice(),128args,129))130} else {131Ok(POOL.join(132|| materialize_left_join_idx_left(&left, left_idx.as_slice(), args),133|| materialize_left_join_idx_right(&right, right_idx.as_slice(), args),134))135}136}137138fn maintain_order_idx(139left: &DataFrame,140other: &DataFrame,141left_idx: &[IdxSize],142right_idx: &[NullableIdxSize],143args: &JoinArgs,144) -> (DataFrame, DataFrame) {145let mut df = {146// SAFETY: left_idx and right_idx are continuous memory that outlive the memory mapped slices147let left = unsafe { IdxCa::mmap_slice("a".into(), left_idx) };148let right = unsafe { IdxCa::mmap_slice("b".into(), bytemuck::cast_slice(right_idx)) };149unsafe {150DataFrame::new_unchecked(151left_idx.len(),152vec![left.into_series().into(), right.into_series().into()],153)154}155};156157let options = SortMultipleOptions::new()158.with_order_descending(false)159.with_maintain_order(true);160161let columns = match args.maintain_order {162// If the left order is preserved then there are no unsorted right rows163// So Left and LeftRight are equal164MaintainOrderJoin::Left | MaintainOrderJoin::LeftRight => vec!["a"],165MaintainOrderJoin::Right => vec!["b"],166MaintainOrderJoin::RightLeft => vec!["b", "a"],167_ => unreachable!(),168};169170df.sort_in_place(columns, options).unwrap();171df.rechunk_mut();172173let join_tuples_left = df174.column("a")175.unwrap()176.as_materialized_series()177.idx()178.unwrap()179.cont_slice()180.unwrap();181182let join_tuples_right = df183.column("b")184.unwrap()185.as_materialized_series()186.idx()187.unwrap()188.cont_slice()189.unwrap();190191POOL.join(192|| materialize_left_join_idx_left(left, join_tuples_left, args),193|| materialize_left_join_idx_right(other, bytemuck::cast_slice(join_tuples_right), args),194)195}196197fn materialize_left_join_idx_left(198left: &DataFrame,199left_idx: &[IdxSize],200args: &JoinArgs,201) -> DataFrame {202let left_idx = if let Some((offset, len)) = args.slice {203slice_slice(left_idx, offset, len)204} else {205left_idx206};207208unsafe {209left._create_left_df_from_slice(210left_idx,211true,212args.slice.is_some(),213matches!(214args.maintain_order,215MaintainOrderJoin::Left | MaintainOrderJoin::LeftRight216) || args.how == JoinType::Left217&& !matches!(218args.maintain_order,219MaintainOrderJoin::Right | MaintainOrderJoin::RightLeft,220),221)222}223}224225fn materialize_left_join_idx_right(226right: &DataFrame,227right_idx: &[NullableIdxSize],228args: &JoinArgs,229) -> DataFrame {230let right_idx = if let Some((offset, len)) = args.slice {231slice_slice(right_idx, offset, len)232} else {233right_idx234};235unsafe { IdxCa::with_nullable_idx(right_idx, |idx| right.take_unchecked(idx)) }236}237#[cfg(feature = "chunked_ids")]238fn materialize_left_join_chunked_left(239left: &DataFrame,240left_idx: &[ChunkId],241args: &JoinArgs,242) -> DataFrame {243let left_idx = if let Some((offset, len)) = args.slice {244slice_slice(left_idx, offset, len)245} else {246left_idx247};248unsafe { left.create_left_df_chunked(left_idx, true, args.slice.is_some()) }249}250251#[cfg(feature = "chunked_ids")]252fn materialize_left_join_chunked_right(253right: &DataFrame,254right_idx: &[ChunkId],255args: &JoinArgs,256) -> DataFrame {257let right_idx = if let Some((offset, len)) = args.slice {258slice_slice(right_idx, offset, len)259} else {260right_idx261};262unsafe { right._take_opt_chunked_unchecked_hor_par(right_idx) }263}264265266