Path: blob/main/py-polars/tests/unit/functions/test_union.py
7884 views
import pytest12import polars as pl3from polars.testing import assert_frame_equal456def test_union_single_element() -> None:7df = pl.DataFrame({"a": [1, 2, 3]})8result = pl.union([df])9assert result is df1011s = pl.Series("test", [1, 2, 3])12result_s = pl.union([s])13assert result_s is s141516def test_union_group_by() -> None:17df = pl.DataFrame(18{19"g": [0, 0, 0, 0, 1, 1, 1, 1],20"a": [0, 1, 2, 3, 4, 5, 6, 7],21"b": [8, 9, 10, 11, 12, 13, 14, 15],22}23)24out = df.group_by("g").agg(pl.union([pl.col.a, pl.col.b]))2526assert_frame_equal(27out,28pl.DataFrame(29{30"g": [0, 1],31"a": [[0, 1, 2, 3, 8, 9, 10, 11], [4, 5, 6, 7, 12, 13, 14, 15]],32}33),34check_row_order=False,35)363738def test_union_basic() -> None:39df1 = pl.DataFrame({"a": [1, 2], "b": [3, 4]})40df2 = pl.DataFrame({"a": [5, 6], "b": [7, 8]})4142result = pl.union([df1, df2])43expected = pl.DataFrame({"a": [1, 2, 5, 6], "b": [3, 4, 7, 8]})4445assert_frame_equal(result, expected, check_row_order=False)464748def test_union_vertical_relaxed() -> None:49df1 = pl.DataFrame(50{"a": [1, 2], "b": [3, 4]}, schema={"a": pl.Int32, "b": pl.Int32}51)52df2 = pl.DataFrame(53{"a": [5.0, 6.0], "b": [7, 8]}, schema={"a": pl.Float64, "b": pl.Int32}54)5556result = pl.union([df1, df2], how="vertical_relaxed")57expected = pl.DataFrame(58{"a": [1.0, 2.0, 5.0, 6.0], "b": [3, 4, 7, 8]},59schema={"a": pl.Float64, "b": pl.Int32},60)61assert_frame_equal(result, expected, check_row_order=False)626364def test_union_diagonal() -> None:65df1 = pl.DataFrame({"a": [1, 2], "b": [3, 4]})66df2 = pl.DataFrame({"a": [5, 6], "c": [7, 8]})67df3 = pl.DataFrame({"b": [9, 10], "c": [11, 12]})6869result = pl.union([df1, df2, df3], how="diagonal")70expected = pl.DataFrame(71{72"a": [1, 2, 5, 6, None, None],73"b": [3, 4, None, None, 9, 10],74"c": [None, None, 7, 8, 11, 12],75}76)77assert_frame_equal(result, expected, check_row_order=False)787980def test_union_diagonal_relaxed() -> None:81df1 = pl.DataFrame(82{"a": [1, 2], "c": [10, 20]}, schema={"a": pl.Int32, "c": pl.Int64}83)84df2 = pl.DataFrame(85{"a": [3.5, 4.5], "b": [30.1, 40.2]}, schema={"a": pl.Float64, "b": pl.Float32}86)87df3 = pl.DataFrame({"b": [5, 6], "c": [50, 60]})8889result = pl.union([df1, df2, df3], how="diagonal_relaxed")9091assert result.schema["a"] == pl.Float6492assert result.schema["b"] == pl.Float6493assert result.schema["c"] == pl.Int649495expected = pl.DataFrame(96{97"a": [1.0, 2.0, 3.5, 4.5, None, None],98"c": [10, 20, None, None, 50, 60],99"b": [None, None, 30.1, 40.2, 5.0, 6.0],100}101)102103assert_frame_equal(result, expected, check_row_order=False)104105106def test_union_horizontal() -> None:107df1 = pl.DataFrame({"a": [1, 2, 3]})108df2 = pl.DataFrame({"b": [4, 5]})109df3 = pl.DataFrame({"c": [6, 7, 8, 9]})110111result = pl.union([df1, df2, df3], how="horizontal")112expected = pl.DataFrame(113{"a": [1, 2, 3, None], "b": [4, 5, None, None], "c": [6, 7, 8, 9]}114)115assert_frame_equal(result, expected)116117118def test_union_align_no_common_columns() -> None:119df1 = pl.DataFrame({"a": [1, 2]})120df2 = pl.DataFrame({"b": [3, 4]})121122with pytest.raises(123pl.exceptions.InvalidOperationError, match="requires at least one common column"124):125pl.union([df1, df2], how="align")126127128def test_union_align_lazy_frames() -> None:129lf1 = pl.DataFrame({"id": [1, 2], "x": [3, 4]}).lazy()130lf2 = pl.DataFrame({"id": [2, 3], "y": [5, 6]}).lazy()131132result = pl.union([lf1, lf2], how="align")133assert isinstance(result, pl.LazyFrame)134135collected = result.collect()136expected = pl.DataFrame({"id": [1, 2, 3], "x": [3, 4, None], "y": [None, 5, 6]})137assert_frame_equal(collected, expected, check_row_order=False)138139140def test_union_lazyframe_horizontal() -> None:141lf1 = pl.DataFrame({"a": [1, 2]}).lazy()142lf2 = pl.DataFrame({"b": [3, 4, 5]}).lazy()143144result = pl.union([lf1, lf2], how="horizontal")145assert isinstance(result, pl.LazyFrame)146147collected = result.collect()148expected = pl.DataFrame({"a": [1, 2, None], "b": [3, 4, 5]})149assert_frame_equal(collected, expected)150151152def test_union_lazyframe_diagonal() -> None:153lf1 = pl.DataFrame({"a": [1, 2], "b": [3, 4]}).lazy()154lf2 = pl.DataFrame({"a": [5, 6], "c": [7, 8]}).lazy()155156result = pl.union([lf1, lf2], how="diagonal")157assert isinstance(result, pl.LazyFrame)158159collected = result.collect()160expected = pl.DataFrame(161{"a": [1, 2, 5, 6], "b": [3, 4, None, None], "c": [None, None, 7, 8]}162)163assert_frame_equal(collected, expected, check_row_order=False)164165166def test_union_series_invalid_strategy() -> None:167s1 = pl.Series("a", [1, 2, 3])168s2 = pl.Series("b", [4, 5, 6])169170with pytest.raises(171ValueError, match="Series only supports 'vertical' concat strategy"172):173pl.union([s1, s2], how="horizontal")174175with pytest.raises(176ValueError, match="Series only supports 'vertical' concat strategy"177):178pl.union([s1, s2], how="diagonal")179180181def test_concat_invalid_how_parameter() -> None:182df1 = pl.DataFrame({"a": [1, 2]})183df2 = pl.DataFrame({"a": [3, 4]})184185with pytest.raises(ValueError, match="DataFrame `how` must be one of"):186pl.concat([df1, df2], how="invalid_strategy") # type: ignore[arg-type]187188189def test_concat_unsupported_type() -> None:190with pytest.raises(TypeError, match="did not expect type"):191pl.concat([1, 2, 3]) # type: ignore[type-var]192193194def test_union_expressions() -> None:195expr1 = pl.col("a")196expr2 = pl.col("b")197union_expr = pl.union([expr1, expr2])198199df_input = pl.DataFrame({"a": [1, 2], "b": [3, 4]})200result = df_input.select(union_expr.alias("unioned"))201202expected = pl.DataFrame({"unioned": [1, 2, 3, 4]})203assert_frame_equal(result, expected)204205206def test_union_with_empty_dataframes() -> None:207empty_df = pl.DataFrame(schema={"a": pl.Int64, "b": pl.String})208df_with_data = pl.DataFrame({"a": [1, 2], "b": ["x", "y"]})209210result = pl.union([empty_df, df_with_data])211assert_frame_equal(result, df_with_data, check_row_order=False)212213result2 = pl.union([df_with_data, empty_df])214assert_frame_equal(result2, df_with_data, check_row_order=False)215216217