Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/test_concat.py
6939 views
1
import io
2
from typing import IO
3
4
import pytest
5
6
import polars as pl
7
from polars.testing import assert_frame_equal
8
9
10
def test_concat_invalid_schema_err_20355() -> None:
11
lf1 = pl.LazyFrame({"x": [1], "y": [None]})
12
lf2 = pl.LazyFrame({"y": [1]})
13
with pytest.raises(pl.exceptions.InvalidOperationError):
14
pl.concat([lf1, lf2]).collect(engine="streaming")
15
16
17
def test_concat_df() -> None:
18
df1 = pl.DataFrame({"a": [2, 1, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
19
df2 = pl.concat([df1, df1], rechunk=True)
20
21
assert df2.shape == (6, 3)
22
assert df2.n_chunks() == 1
23
assert df2.rows() == df1.rows() + df1.rows()
24
assert pl.concat([df1, df1], rechunk=False).n_chunks() == 2
25
26
# concat from generator of frames
27
df3 = pl.concat(items=(df1 for _ in range(2)))
28
assert_frame_equal(df2, df3)
29
30
# check that df4 is not modified following concat of itself
31
df4 = pl.from_records(((1, 2), (1, 2)))
32
_ = pl.concat([df4, df4, df4])
33
34
assert df4.shape == (2, 2)
35
assert df4.rows() == [(1, 1), (2, 2)]
36
37
# misc error conditions
38
with pytest.raises(ValueError):
39
_ = pl.concat([])
40
41
with pytest.raises(ValueError):
42
pl.concat([df1, df1], how="rubbish") # type: ignore[arg-type]
43
44
45
def test_concat_to_empty() -> None:
46
assert pl.concat([pl.DataFrame([]), pl.DataFrame({"a": [1]})]).to_dict(
47
as_series=False
48
) == {"a": [1]}
49
50
51
def test_concat_multiple_parquet_inmem() -> None:
52
f = io.BytesIO()
53
g = io.BytesIO()
54
55
df1 = pl.DataFrame(
56
{
57
"a": [1, 2, 3],
58
"b": ["xyz", "abc", "wow"],
59
}
60
)
61
df2 = pl.DataFrame(
62
{
63
"a": [5, 6, 7],
64
"b": ["a", "few", "entries"],
65
}
66
)
67
68
dfs = pl.concat([df1, df2])
69
70
df1.write_parquet(f)
71
df2.write_parquet(g)
72
73
f.seek(0)
74
g.seek(0)
75
76
items: list[IO[bytes]] = [f, g]
77
assert_frame_equal(pl.read_parquet(items), dfs)
78
79
f.seek(0)
80
g.seek(0)
81
82
assert_frame_equal(pl.read_parquet(items, use_pyarrow=True), dfs)
83
84
f.seek(0)
85
g.seek(0)
86
87
fb = f.read()
88
gb = g.read()
89
90
assert_frame_equal(pl.read_parquet([fb, gb]), dfs)
91
assert_frame_equal(pl.read_parquet([fb, gb], use_pyarrow=True), dfs)
92
93
94
def test_concat_series() -> None:
95
s = pl.Series("a", [2, 1, 3])
96
97
assert pl.concat([s, s]).len() == 6
98
# check if s remains unchanged
99
assert s.len() == 3
100
101
102
def test_concat_null_20501() -> None:
103
a = pl.DataFrame({"id": [1], "value": ["foo"]})
104
b = pl.DataFrame({"id": [2], "value": [None]})
105
106
assert pl.concat([a.lazy(), b.lazy()]).collect().to_dict(as_series=False) == {
107
"id": [1, 2],
108
"value": ["foo", None],
109
}
110
111