Path: blob/main/py-polars/tests/unit/operations/test_join_right.py
8424 views
import polars as pl1from polars.testing import assert_frame_equal234def test_right_join_schemas() -> None:5a = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})67b = pl.DataFrame({"a": [1, 3], "b": [1, 3], "c": [1, 3]})89# coalesces the join key, so the key of the right table remains10assert a.join(11b, on="a", how="right", coalesce=True, maintain_order="right"12).to_dict(as_series=False) == {13"b": [1, 3],14"a": [1, 3],15"b_right": [1, 3],16"c": [1, 3],17}18# doesn't coalesce the join key, so all columns remain19assert a.join(b, on="a", how="right", coalesce=False).columns == [20"a",21"b",22"a_right",23"b_right",24"c",25]2627# coalesces the join key, so the key of the right table remains28assert_frame_equal(29b.join(a, on="a", how="right", coalesce=True),30pl.DataFrame(31{32"b": [1, None, 3],33"c": [1, None, 3],34"a": [1, 2, 3],35"b_right": [1, 2, 3],36}37),38check_row_order=False,39)40assert b.join(a, on="a", how="right", coalesce=False).columns == [41"a",42"b",43"c",44"a_right",45"b_right",46]4748a_ = a.lazy()49b_ = b.lazy()50assert list(51a_.join(b_, on="a", how="right", coalesce=True).collect_schema().keys()52) == ["b", "a", "b_right", "c"]53assert list(54a_.join(b_, on="a", how="right", coalesce=False).collect_schema().keys()55) == ["a", "b", "a_right", "b_right", "c"]56assert list(57b_.join(a_, on="a", how="right", coalesce=True).collect_schema().keys()58) == ["b", "c", "a", "b_right"]59assert list(60b_.join(a_, on="a", how="right", coalesce=False).collect_schema().keys()61) == ["a", "b", "c", "a_right", "b_right"]626364def test_right_join_schemas_multikey() -> None:65a = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})6667b = pl.DataFrame({"a": [1, 3], "b": [1, 3], "c": [1, 3]})68assert a.join(b, on=["a", "b"], how="right", coalesce=False).columns == [69"a",70"b",71"c",72"a_right",73"b_right",74"c_right",75]76assert_frame_equal(77a.join(b, on=["a", "b"], how="right", coalesce=True),78pl.DataFrame({"c": [1, 3], "a": [1, 3], "b": [1, 3], "c_right": [1, 3]}),79check_row_order=False,80)81assert_frame_equal(82b.join(a, on=["a", "b"], how="right", coalesce=True),83pl.DataFrame(84{"c": [1, None, 3], "a": [1, 2, 3], "b": [1, 2, 3], "c_right": [1, 2, 3]}85),86check_row_order=False,87)888990def test_join_right_different_key() -> None:91df = pl.DataFrame(92{93"foo": [1, 2, 3],94"bar": [6.0, 7.0, 8.0],95"ham1": ["a", "b", "c"],96}97)98other_df = pl.DataFrame(99{100"apple": ["x", "y", "z"],101"ham2": ["a", "b", "d"],102}103)104assert df.join(105other_df, left_on="ham1", right_on="ham2", how="right", maintain_order="right"106).to_dict(as_series=False) == {107"foo": [1, 2, None],108"bar": [6.0, 7.0, None],109"apple": ["x", "y", "z"],110"ham2": ["a", "b", "d"],111}112113114def test_join_right_different_multikey() -> None:115left = pl.LazyFrame({"a": [1, 2], "b": [1, 2]})116right = pl.LazyFrame({"c": [1, 2], "d": [1, 2]})117result = left.join(right, left_on=["a", "b"], right_on=["c", "d"], how="right")118expected = pl.DataFrame({"c": [1, 2], "d": [1, 2]})119assert_frame_equal(result.collect(), expected, check_row_order=False)120assert result.collect_schema() == expected.schema121122123def test_join_right_partial_rechunk_25971() -> None:124lhs = pl.DataFrame({"x": [0, 0]})125rhs = pl.concat([pl.select(x=pl.int_range(2000))] * 2)126rhs = rhs.with_columns(y=pl.when(pl.row_index() == 1).then(pl.lit(0, pl.Int64)))127rhs = rhs.group_by("x").min()128out = lhs.join(rhs, left_on="x", right_on="y", how="right")129ret = pl.DataFrame(130{131"x": [0, 1, 1] + list(range(2, 2000)),132"y": [None, 0, 0] + [None for _ in range(1998)],133}134)135assert_frame_equal(out, ret, check_row_order=False)136137138