Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/test_empty.py
6939 views
1
import pytest
2
3
import polars as pl
4
from polars.testing import assert_frame_equal, assert_series_equal
5
6
7
def test_empty_str_concat_lit() -> None:
8
df = pl.DataFrame({"a": [], "b": []}, schema=[("a", pl.String), ("b", pl.String)])
9
assert df.with_columns(pl.lit("asd") + pl.col("a")).schema == {
10
"a": pl.String,
11
"b": pl.String,
12
"literal": pl.String,
13
}
14
15
16
def test_empty_cross_join() -> None:
17
a = pl.LazyFrame(schema={"a": pl.Int32})
18
b = pl.LazyFrame(schema={"b": pl.Int32})
19
20
assert (a.join(b, how="cross").collect()).schema == {"a": pl.Int32, "b": pl.Int32}
21
22
23
def test_empty_string_replace() -> None:
24
s = pl.Series("", [], dtype=pl.String)
25
assert_series_equal(s.str.replace("a", "b", literal=True), s)
26
assert_series_equal(s.str.replace("a", "b"), s)
27
assert_series_equal(s.str.replace("ab", "b", literal=True), s)
28
assert_series_equal(s.str.replace("ab", "b"), s)
29
30
31
def test_empty_window_function() -> None:
32
expr = (pl.col("VAL") / pl.col("VAL").sum()).over("KEY")
33
34
df = pl.DataFrame(schema={"KEY": pl.String, "VAL": pl.Float64})
35
df.select(expr) # ComputeError
36
37
lf = pl.DataFrame(schema={"KEY": pl.String, "VAL": pl.Float64}).lazy()
38
expected = pl.DataFrame(schema={"VAL": pl.Float64})
39
assert_frame_equal(lf.select(expr).collect(), expected)
40
41
42
def test_empty_count_window() -> None:
43
df = pl.DataFrame(
44
{"ID": [], "DESC": [], "dataset": []},
45
schema={"ID": pl.String, "DESC": pl.String, "dataset": pl.String},
46
)
47
48
out = df.select(pl.col("ID").count().over(["ID", "DESC"]))
49
assert out.schema == {"ID": pl.UInt32}
50
assert out.height == 0
51
52
53
def test_empty_sort_by_args() -> None:
54
df = pl.DataFrame({"x": [2, 1, 3]})
55
assert_frame_equal(df, df.select(pl.col.x.sort_by([])))
56
assert_frame_equal(df, df.sort([]))
57
58
59
def test_empty_9137() -> None:
60
out = (
61
pl.DataFrame(
62
{"id": [], "value": []},
63
schema={"id": pl.Float32, "value": pl.Float32},
64
)
65
.group_by("id")
66
.agg(pl.col("value").pow(2).mean())
67
)
68
assert out.shape == (0, 2)
69
assert out.dtypes == [pl.Float32, pl.Float32]
70
71
72
@pytest.mark.parametrize("dtype", [pl.String, pl.Binary, pl.UInt32])
73
@pytest.mark.parametrize(
74
"set_operation",
75
["set_intersection", "set_union", "set_difference", "set_symmetric_difference"],
76
)
77
def test_empty_df_set_operations(set_operation: str, dtype: pl.DataType) -> None:
78
expr = getattr(pl.col("list1").list, set_operation)(pl.col("list2"))
79
df = pl.DataFrame([], {"list1": pl.List(dtype), "list2": pl.List(dtype)})
80
assert df.select(expr).is_empty()
81
82
83
def test_empty_set_intersection() -> None:
84
full = pl.Series("full", [[1, 2, 3]], pl.List(pl.UInt32))
85
empty = pl.Series("empty", [[]], pl.List(pl.UInt32))
86
87
assert_series_equal(empty.rename("full"), full.list.set_intersection(empty))
88
assert_series_equal(empty, empty.list.set_intersection(full))
89
90
91
def test_empty_set_difference() -> None:
92
full = pl.Series("full", [[1, 2, 3]], pl.List(pl.UInt32))
93
empty = pl.Series("empty", [[]], pl.List(pl.UInt32))
94
95
assert_series_equal(full, full.list.set_difference(empty))
96
assert_series_equal(empty, empty.list.set_difference(full))
97
98
99
def test_empty_set_union() -> None:
100
full = pl.Series("full", [[1, 2, 3]], pl.List(pl.UInt32))
101
empty = pl.Series("empty", [[]], pl.List(pl.UInt32))
102
103
assert_series_equal(full, full.list.set_union(empty))
104
assert_series_equal(full.rename("empty"), empty.list.set_union(full))
105
106
107
def test_empty_set_symmetric_difference() -> None:
108
full = pl.Series("full", [[1, 2, 3]], pl.List(pl.UInt32))
109
empty = pl.Series("empty", [[]], pl.List(pl.UInt32))
110
111
assert_series_equal(full, full.list.set_symmetric_difference(empty))
112
assert_series_equal(full.rename("empty"), empty.list.set_symmetric_difference(full))
113
114
115
@pytest.mark.parametrize("name", ["sort", "unique", "head", "tail", "shift", "reverse"])
116
def test_empty_list_namespace_output_9585(name: str) -> None:
117
dtype = pl.List(pl.String)
118
df = pl.DataFrame([[None]], schema={"A": dtype})
119
120
expr = getattr(pl.col("A").list, name)()
121
result = df.select(expr)
122
123
assert result.dtypes == df.dtypes
124
125
126
def test_empty_is_in() -> None:
127
assert_series_equal(
128
pl.Series("a", [1, 2, 3]).is_in([]), pl.Series("a", [False] * 3)
129
)
130
131
132
@pytest.mark.parametrize("method", ["drop_nulls", "unique"])
133
def test_empty_to_empty(method: str) -> None:
134
assert getattr(pl.DataFrame(), method)().shape == (0, 0)
135
136
137
def test_empty_shift_over_16676() -> None:
138
df = pl.DataFrame({"a": [], "b": []})
139
assert df.with_columns(pl.col("a").shift(fill_value=0).over("b")).shape == (0, 2)
140
141
142
def test_empty_list_cat_16405() -> None:
143
df = pl.DataFrame(schema={"cat": pl.List(pl.Categorical)})
144
df.select(pl.col("cat") == pl.col("cat"))
145
146
147
def test_empty_list_concat_16924() -> None:
148
df = pl.DataFrame(schema={"a": pl.Int16, "b": pl.List(pl.String)})
149
df.with_columns(pl.col("b").list.concat([pl.col("a").cast(pl.String)]))
150
151
152
def test_empty_input_expansion() -> None:
153
df = pl.DataFrame({"A": [1], "B": [2]})
154
155
with pytest.raises(pl.exceptions.InvalidOperationError):
156
(
157
df.select("A", "B").with_columns(
158
pl.col("B").sort_by(pl.struct(pl.exclude("A", "B")))
159
)
160
)
161
162
163
def test_empty_list_15523() -> None:
164
s = pl.Series("", [["a"], []], dtype=pl.List)
165
assert s.dtype == pl.List(pl.String)
166
s = pl.Series("", [[], ["a"]], dtype=pl.List)
167
assert s.dtype == pl.List(pl.String)
168
169