Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/functions/test_union.py
7884 views
1
import pytest
2
3
import polars as pl
4
from polars.testing import assert_frame_equal
5
6
7
def test_union_single_element() -> None:
8
df = pl.DataFrame({"a": [1, 2, 3]})
9
result = pl.union([df])
10
assert result is df
11
12
s = pl.Series("test", [1, 2, 3])
13
result_s = pl.union([s])
14
assert result_s is s
15
16
17
def test_union_group_by() -> None:
18
df = pl.DataFrame(
19
{
20
"g": [0, 0, 0, 0, 1, 1, 1, 1],
21
"a": [0, 1, 2, 3, 4, 5, 6, 7],
22
"b": [8, 9, 10, 11, 12, 13, 14, 15],
23
}
24
)
25
out = df.group_by("g").agg(pl.union([pl.col.a, pl.col.b]))
26
27
assert_frame_equal(
28
out,
29
pl.DataFrame(
30
{
31
"g": [0, 1],
32
"a": [[0, 1, 2, 3, 8, 9, 10, 11], [4, 5, 6, 7, 12, 13, 14, 15]],
33
}
34
),
35
check_row_order=False,
36
)
37
38
39
def test_union_basic() -> None:
40
df1 = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
41
df2 = pl.DataFrame({"a": [5, 6], "b": [7, 8]})
42
43
result = pl.union([df1, df2])
44
expected = pl.DataFrame({"a": [1, 2, 5, 6], "b": [3, 4, 7, 8]})
45
46
assert_frame_equal(result, expected, check_row_order=False)
47
48
49
def test_union_vertical_relaxed() -> None:
50
df1 = pl.DataFrame(
51
{"a": [1, 2], "b": [3, 4]}, schema={"a": pl.Int32, "b": pl.Int32}
52
)
53
df2 = pl.DataFrame(
54
{"a": [5.0, 6.0], "b": [7, 8]}, schema={"a": pl.Float64, "b": pl.Int32}
55
)
56
57
result = pl.union([df1, df2], how="vertical_relaxed")
58
expected = pl.DataFrame(
59
{"a": [1.0, 2.0, 5.0, 6.0], "b": [3, 4, 7, 8]},
60
schema={"a": pl.Float64, "b": pl.Int32},
61
)
62
assert_frame_equal(result, expected, check_row_order=False)
63
64
65
def test_union_diagonal() -> None:
66
df1 = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
67
df2 = pl.DataFrame({"a": [5, 6], "c": [7, 8]})
68
df3 = pl.DataFrame({"b": [9, 10], "c": [11, 12]})
69
70
result = pl.union([df1, df2, df3], how="diagonal")
71
expected = pl.DataFrame(
72
{
73
"a": [1, 2, 5, 6, None, None],
74
"b": [3, 4, None, None, 9, 10],
75
"c": [None, None, 7, 8, 11, 12],
76
}
77
)
78
assert_frame_equal(result, expected, check_row_order=False)
79
80
81
def test_union_diagonal_relaxed() -> None:
82
df1 = pl.DataFrame(
83
{"a": [1, 2], "c": [10, 20]}, schema={"a": pl.Int32, "c": pl.Int64}
84
)
85
df2 = pl.DataFrame(
86
{"a": [3.5, 4.5], "b": [30.1, 40.2]}, schema={"a": pl.Float64, "b": pl.Float32}
87
)
88
df3 = pl.DataFrame({"b": [5, 6], "c": [50, 60]})
89
90
result = pl.union([df1, df2, df3], how="diagonal_relaxed")
91
92
assert result.schema["a"] == pl.Float64
93
assert result.schema["b"] == pl.Float64
94
assert result.schema["c"] == pl.Int64
95
96
expected = pl.DataFrame(
97
{
98
"a": [1.0, 2.0, 3.5, 4.5, None, None],
99
"c": [10, 20, None, None, 50, 60],
100
"b": [None, None, 30.1, 40.2, 5.0, 6.0],
101
}
102
)
103
104
assert_frame_equal(result, expected, check_row_order=False)
105
106
107
def test_union_horizontal() -> None:
108
df1 = pl.DataFrame({"a": [1, 2, 3]})
109
df2 = pl.DataFrame({"b": [4, 5]})
110
df3 = pl.DataFrame({"c": [6, 7, 8, 9]})
111
112
result = pl.union([df1, df2, df3], how="horizontal")
113
expected = pl.DataFrame(
114
{"a": [1, 2, 3, None], "b": [4, 5, None, None], "c": [6, 7, 8, 9]}
115
)
116
assert_frame_equal(result, expected)
117
118
119
def test_union_align_no_common_columns() -> None:
120
df1 = pl.DataFrame({"a": [1, 2]})
121
df2 = pl.DataFrame({"b": [3, 4]})
122
123
with pytest.raises(
124
pl.exceptions.InvalidOperationError, match="requires at least one common column"
125
):
126
pl.union([df1, df2], how="align")
127
128
129
def test_union_align_lazy_frames() -> None:
130
lf1 = pl.DataFrame({"id": [1, 2], "x": [3, 4]}).lazy()
131
lf2 = pl.DataFrame({"id": [2, 3], "y": [5, 6]}).lazy()
132
133
result = pl.union([lf1, lf2], how="align")
134
assert isinstance(result, pl.LazyFrame)
135
136
collected = result.collect()
137
expected = pl.DataFrame({"id": [1, 2, 3], "x": [3, 4, None], "y": [None, 5, 6]})
138
assert_frame_equal(collected, expected, check_row_order=False)
139
140
141
def test_union_lazyframe_horizontal() -> None:
142
lf1 = pl.DataFrame({"a": [1, 2]}).lazy()
143
lf2 = pl.DataFrame({"b": [3, 4, 5]}).lazy()
144
145
result = pl.union([lf1, lf2], how="horizontal")
146
assert isinstance(result, pl.LazyFrame)
147
148
collected = result.collect()
149
expected = pl.DataFrame({"a": [1, 2, None], "b": [3, 4, 5]})
150
assert_frame_equal(collected, expected)
151
152
153
def test_union_lazyframe_diagonal() -> None:
154
lf1 = pl.DataFrame({"a": [1, 2], "b": [3, 4]}).lazy()
155
lf2 = pl.DataFrame({"a": [5, 6], "c": [7, 8]}).lazy()
156
157
result = pl.union([lf1, lf2], how="diagonal")
158
assert isinstance(result, pl.LazyFrame)
159
160
collected = result.collect()
161
expected = pl.DataFrame(
162
{"a": [1, 2, 5, 6], "b": [3, 4, None, None], "c": [None, None, 7, 8]}
163
)
164
assert_frame_equal(collected, expected, check_row_order=False)
165
166
167
def test_union_series_invalid_strategy() -> None:
168
s1 = pl.Series("a", [1, 2, 3])
169
s2 = pl.Series("b", [4, 5, 6])
170
171
with pytest.raises(
172
ValueError, match="Series only supports 'vertical' concat strategy"
173
):
174
pl.union([s1, s2], how="horizontal")
175
176
with pytest.raises(
177
ValueError, match="Series only supports 'vertical' concat strategy"
178
):
179
pl.union([s1, s2], how="diagonal")
180
181
182
def test_concat_invalid_how_parameter() -> None:
183
df1 = pl.DataFrame({"a": [1, 2]})
184
df2 = pl.DataFrame({"a": [3, 4]})
185
186
with pytest.raises(ValueError, match="DataFrame `how` must be one of"):
187
pl.concat([df1, df2], how="invalid_strategy") # type: ignore[arg-type]
188
189
190
def test_concat_unsupported_type() -> None:
191
with pytest.raises(TypeError, match="did not expect type"):
192
pl.concat([1, 2, 3]) # type: ignore[type-var]
193
194
195
def test_union_expressions() -> None:
196
expr1 = pl.col("a")
197
expr2 = pl.col("b")
198
union_expr = pl.union([expr1, expr2])
199
200
df_input = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
201
result = df_input.select(union_expr.alias("unioned"))
202
203
expected = pl.DataFrame({"unioned": [1, 2, 3, 4]})
204
assert_frame_equal(result, expected)
205
206
207
def test_union_with_empty_dataframes() -> None:
208
empty_df = pl.DataFrame(schema={"a": pl.Int64, "b": pl.String})
209
df_with_data = pl.DataFrame({"a": [1, 2], "b": ["x", "y"]})
210
211
result = pl.union([empty_df, df_with_data])
212
assert_frame_equal(result, df_with_data, check_row_order=False)
213
214
result2 = pl.union([df_with_data, empty_df])
215
assert_frame_equal(result2, df_with_data, check_row_order=False)
216
217