Path: blob/main/crates/polars-ops/src/frame/join/dispatch_left_right.rs
6940 views
use super::*;1use crate::prelude::*;23pub(super) fn left_join_from_series(4left: DataFrame,5right: &DataFrame,6s_left: &Series,7s_right: &Series,8args: JoinArgs,9verbose: bool,10drop_names: Option<Vec<PlSmallStr>>,11) -> PolarsResult<DataFrame> {12let (df_left, df_right) = materialize_left_join_from_series(13left, right, s_left, s_right, &args, verbose, drop_names,14)?;15_finish_join(df_left, df_right, args.suffix)16}1718pub(super) fn right_join_from_series(19left: &DataFrame,20right: DataFrame,21s_left: &Series,22s_right: &Series,23mut args: JoinArgs,24verbose: bool,25drop_names: Option<Vec<PlSmallStr>>,26) -> PolarsResult<DataFrame> {27// Swap the order of tables to do a right join.28args.maintain_order = args.maintain_order.flip();29let (df_right, df_left) = materialize_left_join_from_series(30right, left, s_right, s_left, &args, verbose, drop_names,31)?;32_finish_join(df_left, df_right, args.suffix)33}3435pub fn materialize_left_join_from_series(36mut left: DataFrame,37right_: &DataFrame,38s_left: &Series,39s_right: &Series,40args: &JoinArgs,41verbose: bool,42drop_names: Option<Vec<PlSmallStr>>,43) -> PolarsResult<(DataFrame, DataFrame)> {44let mut s_left = s_left.clone();45// Eagerly limit left if possible.46if let Some((offset, len)) = args.slice {47if offset == 0 {48left = left.slice(0, len);49s_left = s_left.slice(0, len);50}51}5253// Ensure that the chunks are aligned otherwise we go OOB.54let mut right = Cow::Borrowed(right_);55let mut s_right = s_right.clone();56if left.should_rechunk() {57left.as_single_chunk_par();58s_left = s_left.rechunk();59}60if right.should_rechunk() {61let mut other = right_.clone();62other.as_single_chunk_par();63right = Cow::Owned(other);64s_right = s_right.rechunk();65}6667// The current sort_or_hash_left implementation preserves the Left DataFrame order so skip left for now.68let requires_ordering = matches!(69args.maintain_order,70MaintainOrderJoin::Right | MaintainOrderJoin::RightLeft71);72if requires_ordering {73// When ordering we rechunk the series so we don't get ChunkIds as output74s_left = s_left.rechunk();75s_right = s_right.rechunk();76}7778let (left_idx, right_idx) = sort_or_hash_left(79&s_left,80&s_right,81verbose,82args.validation,83args.nulls_equal,84)?;8586let right = if let Some(drop_names) = drop_names {87right.drop_many(drop_names)88} else {89right.drop(s_right.name()).unwrap()90};91try_raise_keyboard_interrupt();9293#[cfg(feature = "chunked_ids")]94match (left_idx, right_idx) {95(ChunkJoinIds::Left(left_idx), ChunkJoinOptIds::Left(right_idx)) => {96if requires_ordering {97Ok(maintain_order_idx(98&left,99&right,100left_idx.as_slice(),101right_idx.as_slice(),102args,103))104} else {105Ok(POOL.join(106|| materialize_left_join_idx_left(&left, left_idx.as_slice(), args),107|| materialize_left_join_idx_right(&right, right_idx.as_slice(), args),108))109}110},111(ChunkJoinIds::Left(left_idx), ChunkJoinOptIds::Right(right_idx)) => Ok(POOL.join(112|| materialize_left_join_idx_left(&left, left_idx.as_slice(), args),113|| materialize_left_join_chunked_right(&right, right_idx.as_slice(), args),114)),115(ChunkJoinIds::Right(left_idx), ChunkJoinOptIds::Right(right_idx)) => Ok(POOL.join(116|| materialize_left_join_chunked_left(&left, left_idx.as_slice(), args),117|| materialize_left_join_chunked_right(&right, right_idx.as_slice(), args),118)),119(ChunkJoinIds::Right(left_idx), ChunkJoinOptIds::Left(right_idx)) => Ok(POOL.join(120|| materialize_left_join_chunked_left(&left, left_idx.as_slice(), args),121|| materialize_left_join_idx_right(&right, right_idx.as_slice(), args),122)),123}124125#[cfg(not(feature = "chunked_ids"))]126if requires_ordering {127Ok(maintain_order_idx(128&left,129&right,130left_idx.as_slice(),131right_idx.as_slice(),132args,133))134} else {135Ok(POOL.join(136|| materialize_left_join_idx_left(&left, left_idx.as_slice(), args),137|| materialize_left_join_idx_right(&right, right_idx.as_slice(), args),138))139}140}141142fn maintain_order_idx(143left: &DataFrame,144other: &DataFrame,145left_idx: &[IdxSize],146right_idx: &[NullableIdxSize],147args: &JoinArgs,148) -> (DataFrame, DataFrame) {149let mut df = {150// SAFETY: left_idx and right_idx are continuous memory that outlive the memory mapped slices151let left = unsafe { IdxCa::mmap_slice("a".into(), left_idx) };152let right = unsafe { IdxCa::mmap_slice("b".into(), bytemuck::cast_slice(right_idx)) };153DataFrame::new(vec![left.into_series().into(), right.into_series().into()]).unwrap()154};155156let options = SortMultipleOptions::new()157.with_order_descending(false)158.with_maintain_order(true);159160let columns = match args.maintain_order {161// If the left order is preserved then there are no unsorted right rows162// So Left and LeftRight are equal163MaintainOrderJoin::Left | MaintainOrderJoin::LeftRight => vec!["a"],164MaintainOrderJoin::Right => vec!["b"],165MaintainOrderJoin::RightLeft => vec!["b", "a"],166_ => unreachable!(),167};168169df.sort_in_place(columns, options).unwrap();170df.rechunk_mut();171172let join_tuples_left = df173.column("a")174.unwrap()175.as_materialized_series()176.idx()177.unwrap()178.cont_slice()179.unwrap();180181let join_tuples_right = df182.column("b")183.unwrap()184.as_materialized_series()185.idx()186.unwrap()187.cont_slice()188.unwrap();189190POOL.join(191|| materialize_left_join_idx_left(left, join_tuples_left, args),192|| materialize_left_join_idx_right(other, bytemuck::cast_slice(join_tuples_right), args),193)194}195196fn materialize_left_join_idx_left(197left: &DataFrame,198left_idx: &[IdxSize],199args: &JoinArgs,200) -> DataFrame {201let left_idx = if let Some((offset, len)) = args.slice {202slice_slice(left_idx, offset, len)203} else {204left_idx205};206207unsafe {208left._create_left_df_from_slice(209left_idx,210true,211args.slice.is_some(),212matches!(213args.maintain_order,214MaintainOrderJoin::Left | MaintainOrderJoin::LeftRight215) || args.how == JoinType::Left216&& !matches!(217args.maintain_order,218MaintainOrderJoin::Right | MaintainOrderJoin::RightLeft,219),220)221}222}223224fn materialize_left_join_idx_right(225right: &DataFrame,226right_idx: &[NullableIdxSize],227args: &JoinArgs,228) -> DataFrame {229let right_idx = if let Some((offset, len)) = args.slice {230slice_slice(right_idx, offset, len)231} else {232right_idx233};234unsafe { IdxCa::with_nullable_idx(right_idx, |idx| right.take_unchecked(idx)) }235}236#[cfg(feature = "chunked_ids")]237fn materialize_left_join_chunked_left(238left: &DataFrame,239left_idx: &[ChunkId],240args: &JoinArgs,241) -> DataFrame {242let left_idx = if let Some((offset, len)) = args.slice {243slice_slice(left_idx, offset, len)244} else {245left_idx246};247unsafe { left.create_left_df_chunked(left_idx, true, args.slice.is_some()) }248}249250#[cfg(feature = "chunked_ids")]251fn materialize_left_join_chunked_right(252right: &DataFrame,253right_idx: &[ChunkId],254args: &JoinArgs,255) -> DataFrame {256let right_idx = if let Some((offset, len)) = args.slice {257slice_slice(right_idx, offset, len)258} else {259right_idx260};261unsafe { right._take_opt_chunked_unchecked_hor_par(right_idx) }262}263264265