Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/dataframe/test_null_count.py
6939 views
1
from __future__ import annotations
2
3
from hypothesis import example, given
4
5
import polars as pl
6
from polars.testing.asserts.frame import assert_frame_equal
7
from polars.testing.parametric import dataframes
8
9
10
@given(
11
df=dataframes(
12
min_size=1,
13
min_cols=1,
14
allow_null=True,
15
excluded_dtypes=[
16
pl.String,
17
pl.List,
18
pl.Struct, # See: https://github.com/pola-rs/polars/issues/3462
19
],
20
)
21
)
22
@example(df=pl.DataFrame(schema=["x", "y", "z"]))
23
@example(df=pl.DataFrame())
24
def test_null_count(df: pl.DataFrame) -> None:
25
# note: the zero-row and zero-col cases are always passed as explicit examples
26
null_count, ncols = df.null_count(), df.width
27
assert null_count.shape == (1, ncols)
28
for idx, count in enumerate(null_count.rows()[0]):
29
assert count == sum(v is None for v in df.to_series(idx).to_list())
30
31
32
def test_null_count_optimization_23031() -> None:
33
df = pl.DataFrame(data=[None, 2, None, 4, None, 6], schema={"col": pl.Int64})
34
35
expected = pl.DataFrame(
36
[
37
pl.Series("count_all", [3], pl.UInt32()),
38
pl.Series("sum_all", [12], pl.Int64()),
39
]
40
)
41
42
assert_frame_equal(
43
df.select(
44
count_all=pl.col("col").count(),
45
sum_all=pl.when(pl.col("col").is_not_null().any()).then(
46
pl.col("col").sum()
47
),
48
),
49
expected,
50
)
51
52
assert_frame_equal(
53
df.lazy()
54
.select(
55
count_all=pl.col("col").count(),
56
sum_all=pl.when(pl.col("col").is_not_null().any()).then(
57
pl.col("col").sum()
58
),
59
)
60
.collect(),
61
expected,
62
)
63
64