Path: blob/main/py-polars/tests/unit/operations/test_empty.py
8412 views
import pytest12import polars as pl3from polars.testing import assert_frame_equal, assert_series_equal456def test_empty_str_concat_lit() -> None:7df = pl.DataFrame({"a": [], "b": []}, schema=[("a", pl.String), ("b", pl.String)])8assert df.with_columns(pl.lit("asd") + pl.col("a")).schema == {9"a": pl.String,10"b": pl.String,11"literal": pl.String,12}131415def test_empty_cross_join() -> None:16a = pl.LazyFrame(schema={"a": pl.Int32})17b = pl.LazyFrame(schema={"b": pl.Int32})1819assert (a.join(b, how="cross").collect()).schema == {"a": pl.Int32, "b": pl.Int32}202122def test_empty_string_replace() -> None:23s = pl.Series("", [], dtype=pl.String)24assert_series_equal(s.str.replace("a", "b", literal=True), s)25assert_series_equal(s.str.replace("a", "b"), s)26assert_series_equal(s.str.replace("ab", "b", literal=True), s)27assert_series_equal(s.str.replace("ab", "b"), s)282930def test_empty_window_function() -> None:31expr = (pl.col("VAL") / pl.col("VAL").sum()).over("KEY")3233df = pl.DataFrame(schema={"KEY": pl.String, "VAL": pl.Float64})34df.select(expr) # ComputeError3536lf = pl.DataFrame(schema={"KEY": pl.String, "VAL": pl.Float64}).lazy()37expected = pl.DataFrame(schema={"VAL": pl.Float64})38assert_frame_equal(lf.select(expr).collect(), expected)394041def test_empty_count_window() -> None:42df = pl.DataFrame(43{"ID": [], "DESC": [], "dataset": []},44schema={"ID": pl.String, "DESC": pl.String, "dataset": pl.String},45)4647out = df.select(pl.col("ID").count().over(["ID", "DESC"]))48assert out.schema == {"ID": pl.get_index_type()}49assert out.height == 0505152def test_empty_sort_by_args() -> None:53df = pl.DataFrame({"x": [2, 1, 3]})54assert_frame_equal(df, df.select(pl.col.x.sort_by([])))55assert_frame_equal(df, df.sort([]))565758def test_empty_9137() -> None:59out = (60pl.DataFrame(61{"id": [], "value": []},62schema={"id": pl.Float32, "value": pl.Float32},63)64.group_by("id")65.agg(pl.col("value").pow(2).mean())66)67assert out.shape == (0, 2)68assert out.dtypes == [pl.Float32, pl.Float32]697071@pytest.mark.parametrize("dtype", [pl.String, pl.Binary, pl.UInt32])72@pytest.mark.parametrize(73"set_operation",74["set_intersection", "set_union", "set_difference", "set_symmetric_difference"],75)76def test_empty_df_set_operations(set_operation: str, dtype: pl.DataType) -> None:77expr = getattr(pl.col("list1").list, set_operation)(pl.col("list2"))78df = pl.DataFrame([], {"list1": pl.List(dtype), "list2": pl.List(dtype)})79assert df.select(expr).is_empty()808182def test_empty_set_intersection() -> None:83full = pl.Series("full", [[1, 2, 3]], pl.List(pl.UInt32))84empty = pl.Series("empty", [[]], pl.List(pl.UInt32))8586assert_series_equal(empty.rename("full"), full.list.set_intersection(empty))87assert_series_equal(empty, empty.list.set_intersection(full))888990def test_empty_set_difference() -> None:91full = pl.Series("full", [[1, 2, 3]], pl.List(pl.UInt32))92empty = pl.Series("empty", [[]], pl.List(pl.UInt32))9394assert_series_equal(full, full.list.set_difference(empty))95assert_series_equal(empty, empty.list.set_difference(full))969798def test_empty_set_union() -> None:99full = pl.Series("full", [[1, 2, 3]], pl.List(pl.UInt32))100empty = pl.Series("empty", [[]], pl.List(pl.UInt32))101102assert_series_equal(full, full.list.set_union(empty))103assert_series_equal(full.rename("empty"), empty.list.set_union(full))104105106def test_empty_set_symmetric_difference() -> None:107full = pl.Series("full", [[1, 2, 3]], pl.List(pl.UInt32))108empty = pl.Series("empty", [[]], pl.List(pl.UInt32))109110assert_series_equal(full, full.list.set_symmetric_difference(empty))111assert_series_equal(full.rename("empty"), empty.list.set_symmetric_difference(full))112113114@pytest.mark.parametrize("name", ["sort", "unique", "head", "tail", "shift", "reverse"])115def test_empty_list_namespace_output_9585(name: str) -> None:116dtype = pl.List(pl.String)117df = pl.DataFrame([[None]], schema={"A": dtype})118119expr = getattr(pl.col("A").list, name)()120result = df.select(expr)121122assert result.dtypes == df.dtypes123124125def test_empty_is_in() -> None:126assert_series_equal(127pl.Series("a", [1, 2, 3]).is_in([]), pl.Series("a", [False] * 3)128)129130131@pytest.mark.parametrize("method", ["drop_nulls", "unique"])132def test_empty_to_empty(method: str) -> None:133assert getattr(pl.DataFrame(), method)().shape == (0, 0)134135136def test_empty_shift_over_16676() -> None:137df = pl.DataFrame({"a": [], "b": []})138assert df.with_columns(pl.col("a").shift(fill_value=0).over("b")).shape == (0, 2)139140141def test_empty_list_cat_16405() -> None:142df = pl.DataFrame(schema={"cat": pl.List(pl.Categorical)})143df.select(pl.col("cat") == pl.col("cat"))144145146def test_empty_list_concat_16924() -> None:147df = pl.DataFrame(schema={"a": pl.Int16, "b": pl.List(pl.String)})148df.with_columns(pl.col("b").list.concat([pl.col("a").cast(pl.String)]))149150151def test_empty_input_expansion() -> None:152df = pl.DataFrame({"A": [1], "B": [2]})153154with pytest.raises(pl.exceptions.InvalidOperationError):155(156df.select("A", "B").with_columns(157pl.col("B").sort_by(pl.struct(pl.exclude("A", "B")))158)159)160161162def test_empty_list_15523() -> None:163s = pl.Series("", [["a"], []], dtype=pl.List)164assert s.dtype == pl.List(pl.String)165s = pl.Series("", [[], ["a"]], dtype=pl.List)166assert s.dtype == pl.List(pl.String)167168169