Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/functions/test_concat.py
6939 views
1
import pytest
2
3
import polars as pl
4
from polars.testing import assert_frame_equal
5
6
7
@pytest.mark.may_fail_cloud # reason: @serialize-stack-overflow
8
@pytest.mark.slow
9
def test_concat_expressions_stack_overflow() -> None:
10
n = 10000
11
e = pl.concat([pl.lit(x) for x in range(n)])
12
13
df = pl.select(e)
14
assert df.shape == (n, 1)
15
16
17
@pytest.mark.may_fail_cloud # reason: @serialize-stack-overflow
18
@pytest.mark.slow
19
def test_concat_lf_stack_overflow() -> None:
20
n = 1000
21
bar = pl.DataFrame({"a": 0}).lazy()
22
23
for i in range(n):
24
bar = pl.concat([bar, pl.DataFrame({"a": i}).lazy()])
25
assert bar.collect().shape == (1001, 1)
26
27
28
def test_concat_vertically_relaxed() -> None:
29
a = pl.DataFrame(
30
data={"a": [1, 2, 3], "b": [True, False, None]},
31
schema={"a": pl.Int8, "b": pl.Boolean},
32
)
33
b = pl.DataFrame(
34
data={"a": [43, 2, 3], "b": [32, 1, None]},
35
schema={"a": pl.Int16, "b": pl.Int64},
36
)
37
out = pl.concat([a, b], how="vertical_relaxed")
38
assert out.schema == {"a": pl.Int16, "b": pl.Int64}
39
assert out.to_dict(as_series=False) == {
40
"a": [1, 2, 3, 43, 2, 3],
41
"b": [1, 0, None, 32, 1, None],
42
}
43
out = pl.concat([b, a], how="vertical_relaxed")
44
assert out.schema == {"a": pl.Int16, "b": pl.Int64}
45
assert out.to_dict(as_series=False) == {
46
"a": [43, 2, 3, 1, 2, 3],
47
"b": [32, 1, None, 1, 0, None],
48
}
49
50
c = pl.DataFrame({"a": [1, 2], "b": [2, 1]})
51
d = pl.DataFrame({"a": [1.0, 0.2], "b": [None, 0.1]})
52
53
out = pl.concat([c, d], how="vertical_relaxed")
54
assert out.schema == {"a": pl.Float64, "b": pl.Float64}
55
assert out.to_dict(as_series=False) == {
56
"a": [1.0, 2.0, 1.0, 0.2],
57
"b": [2.0, 1.0, None, 0.1],
58
}
59
out = pl.concat([d, c], how="vertical_relaxed")
60
assert out.schema == {"a": pl.Float64, "b": pl.Float64}
61
assert out.to_dict(as_series=False) == {
62
"a": [1.0, 0.2, 1.0, 2.0],
63
"b": [None, 0.1, 2.0, 1.0],
64
}
65
66
67
def test_concat_group_by() -> None:
68
df = pl.DataFrame(
69
{
70
"g": [0, 0, 0, 0, 1, 1, 1, 1],
71
"a": [0, 1, 2, 3, 4, 5, 6, 7],
72
"b": [8, 9, 10, 11, 12, 13, 14, 15],
73
}
74
)
75
out = df.group_by("g").agg(pl.concat([pl.col.a, pl.col.b]))
76
77
assert_frame_equal(
78
out,
79
pl.DataFrame(
80
{
81
"g": [0, 1],
82
"a": [[0, 1, 2, 3, 8, 9, 10, 11], [4, 5, 6, 7, 12, 13, 14, 15]],
83
}
84
),
85
check_row_order=False,
86
)
87
88
89
def test_concat_19877() -> None:
90
df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
91
out = df.select(pl.concat([pl.col("a"), pl.col("b")]))
92
assert_frame_equal(out, pl.DataFrame({"a": [1, 2, 3, 4]}))
93
94
95
def test_concat_zip_series_21980() -> None:
96
df = pl.DataFrame({"x": 1, "y": 2})
97
out = df.select(pl.concat([pl.col.x, pl.col.y]), pl.Series([3, 4]))
98
assert_frame_equal(out, pl.DataFrame({"x": [1, 2], "": [3, 4]}))
99
100