Path: blob/main/py-polars/tests/unit/operations/test_concat.py
6939 views
import io1from typing import IO23import pytest45import polars as pl6from polars.testing import assert_frame_equal789def test_concat_invalid_schema_err_20355() -> None:10lf1 = pl.LazyFrame({"x": [1], "y": [None]})11lf2 = pl.LazyFrame({"y": [1]})12with pytest.raises(pl.exceptions.InvalidOperationError):13pl.concat([lf1, lf2]).collect(engine="streaming")141516def test_concat_df() -> None:17df1 = pl.DataFrame({"a": [2, 1, 3], "b": [1, 2, 3], "c": [1, 2, 3]})18df2 = pl.concat([df1, df1], rechunk=True)1920assert df2.shape == (6, 3)21assert df2.n_chunks() == 122assert df2.rows() == df1.rows() + df1.rows()23assert pl.concat([df1, df1], rechunk=False).n_chunks() == 22425# concat from generator of frames26df3 = pl.concat(items=(df1 for _ in range(2)))27assert_frame_equal(df2, df3)2829# check that df4 is not modified following concat of itself30df4 = pl.from_records(((1, 2), (1, 2)))31_ = pl.concat([df4, df4, df4])3233assert df4.shape == (2, 2)34assert df4.rows() == [(1, 1), (2, 2)]3536# misc error conditions37with pytest.raises(ValueError):38_ = pl.concat([])3940with pytest.raises(ValueError):41pl.concat([df1, df1], how="rubbish") # type: ignore[arg-type]424344def test_concat_to_empty() -> None:45assert pl.concat([pl.DataFrame([]), pl.DataFrame({"a": [1]})]).to_dict(46as_series=False47) == {"a": [1]}484950def test_concat_multiple_parquet_inmem() -> None:51f = io.BytesIO()52g = io.BytesIO()5354df1 = pl.DataFrame(55{56"a": [1, 2, 3],57"b": ["xyz", "abc", "wow"],58}59)60df2 = pl.DataFrame(61{62"a": [5, 6, 7],63"b": ["a", "few", "entries"],64}65)6667dfs = pl.concat([df1, df2])6869df1.write_parquet(f)70df2.write_parquet(g)7172f.seek(0)73g.seek(0)7475items: list[IO[bytes]] = [f, g]76assert_frame_equal(pl.read_parquet(items), dfs)7778f.seek(0)79g.seek(0)8081assert_frame_equal(pl.read_parquet(items, use_pyarrow=True), dfs)8283f.seek(0)84g.seek(0)8586fb = f.read()87gb = g.read()8889assert_frame_equal(pl.read_parquet([fb, gb]), dfs)90assert_frame_equal(pl.read_parquet([fb, gb], use_pyarrow=True), dfs)919293def test_concat_series() -> None:94s = pl.Series("a", [2, 1, 3])9596assert pl.concat([s, s]).len() == 697# check if s remains unchanged98assert s.len() == 399100101def test_concat_null_20501() -> None:102a = pl.DataFrame({"id": [1], "value": ["foo"]})103b = pl.DataFrame({"id": [2], "value": [None]})104105assert pl.concat([a.lazy(), b.lazy()]).collect().to_dict(as_series=False) == {106"id": [1, 2],107"value": ["foo", None],108}109110111