Path: blob/main/py-polars/tests/unit/dataframe/test_null_count.py
6939 views
from __future__ import annotations12from hypothesis import example, given34import polars as pl5from polars.testing.asserts.frame import assert_frame_equal6from polars.testing.parametric import dataframes789@given(10df=dataframes(11min_size=1,12min_cols=1,13allow_null=True,14excluded_dtypes=[15pl.String,16pl.List,17pl.Struct, # See: https://github.com/pola-rs/polars/issues/346218],19)20)21@example(df=pl.DataFrame(schema=["x", "y", "z"]))22@example(df=pl.DataFrame())23def test_null_count(df: pl.DataFrame) -> None:24# note: the zero-row and zero-col cases are always passed as explicit examples25null_count, ncols = df.null_count(), df.width26assert null_count.shape == (1, ncols)27for idx, count in enumerate(null_count.rows()[0]):28assert count == sum(v is None for v in df.to_series(idx).to_list())293031def test_null_count_optimization_23031() -> None:32df = pl.DataFrame(data=[None, 2, None, 4, None, 6], schema={"col": pl.Int64})3334expected = pl.DataFrame(35[36pl.Series("count_all", [3], pl.UInt32()),37pl.Series("sum_all", [12], pl.Int64()),38]39)4041assert_frame_equal(42df.select(43count_all=pl.col("col").count(),44sum_all=pl.when(pl.col("col").is_not_null().any()).then(45pl.col("col").sum()46),47),48expected,49)5051assert_frame_equal(52df.lazy()53.select(54count_all=pl.col("col").count(),55sum_all=pl.when(pl.col("col").is_not_null().any()).then(56pl.col("col").sum()57),58)59.collect(),60expected,61)626364