Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/test_join_right.py
6939 views
1
import polars as pl
2
from polars.testing import assert_frame_equal
3
4
5
def test_right_join_schemas() -> None:
6
a = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
7
8
b = pl.DataFrame({"a": [1, 3], "b": [1, 3], "c": [1, 3]})
9
10
# coalesces the join key, so the key of the right table remains
11
assert a.join(
12
b, on="a", how="right", coalesce=True, maintain_order="right"
13
).to_dict(as_series=False) == {
14
"b": [1, 3],
15
"a": [1, 3],
16
"b_right": [1, 3],
17
"c": [1, 3],
18
}
19
# doesn't coalesce the join key, so all columns remain
20
assert a.join(b, on="a", how="right", coalesce=False).columns == [
21
"a",
22
"b",
23
"a_right",
24
"b_right",
25
"c",
26
]
27
28
# coalesces the join key, so the key of the right table remains
29
assert_frame_equal(
30
b.join(a, on="a", how="right", coalesce=True),
31
pl.DataFrame(
32
{
33
"b": [1, None, 3],
34
"c": [1, None, 3],
35
"a": [1, 2, 3],
36
"b_right": [1, 2, 3],
37
}
38
),
39
check_row_order=False,
40
)
41
assert b.join(a, on="a", how="right", coalesce=False).columns == [
42
"a",
43
"b",
44
"c",
45
"a_right",
46
"b_right",
47
]
48
49
a_ = a.lazy()
50
b_ = b.lazy()
51
assert list(
52
a_.join(b_, on="a", how="right", coalesce=True).collect_schema().keys()
53
) == ["b", "a", "b_right", "c"]
54
assert list(
55
a_.join(b_, on="a", how="right", coalesce=False).collect_schema().keys()
56
) == ["a", "b", "a_right", "b_right", "c"]
57
assert list(
58
b_.join(a_, on="a", how="right", coalesce=True).collect_schema().keys()
59
) == ["b", "c", "a", "b_right"]
60
assert list(
61
b_.join(a_, on="a", how="right", coalesce=False).collect_schema().keys()
62
) == ["a", "b", "c", "a_right", "b_right"]
63
64
65
def test_right_join_schemas_multikey() -> None:
66
a = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
67
68
b = pl.DataFrame({"a": [1, 3], "b": [1, 3], "c": [1, 3]})
69
assert a.join(b, on=["a", "b"], how="right", coalesce=False).columns == [
70
"a",
71
"b",
72
"c",
73
"a_right",
74
"b_right",
75
"c_right",
76
]
77
assert_frame_equal(
78
a.join(b, on=["a", "b"], how="right", coalesce=True),
79
pl.DataFrame({"c": [1, 3], "a": [1, 3], "b": [1, 3], "c_right": [1, 3]}),
80
check_row_order=False,
81
)
82
assert_frame_equal(
83
b.join(a, on=["a", "b"], how="right", coalesce=True),
84
pl.DataFrame(
85
{"c": [1, None, 3], "a": [1, 2, 3], "b": [1, 2, 3], "c_right": [1, 2, 3]}
86
),
87
check_row_order=False,
88
)
89
90
91
def test_join_right_different_key() -> None:
92
df = pl.DataFrame(
93
{
94
"foo": [1, 2, 3],
95
"bar": [6.0, 7.0, 8.0],
96
"ham1": ["a", "b", "c"],
97
}
98
)
99
other_df = pl.DataFrame(
100
{
101
"apple": ["x", "y", "z"],
102
"ham2": ["a", "b", "d"],
103
}
104
)
105
assert df.join(
106
other_df, left_on="ham1", right_on="ham2", how="right", maintain_order="right"
107
).to_dict(as_series=False) == {
108
"foo": [1, 2, None],
109
"bar": [6.0, 7.0, None],
110
"apple": ["x", "y", "z"],
111
"ham2": ["a", "b", "d"],
112
}
113
114
115
def test_join_right_different_multikey() -> None:
116
left = pl.LazyFrame({"a": [1, 2], "b": [1, 2]})
117
right = pl.LazyFrame({"c": [1, 2], "d": [1, 2]})
118
result = left.join(right, left_on=["a", "b"], right_on=["c", "d"], how="right")
119
expected = pl.DataFrame({"c": [1, 2], "d": [1, 2]})
120
assert_frame_equal(result.collect(), expected, check_row_order=False)
121
assert result.collect_schema() == expected.schema
122
123