Path: blob/main/py-polars/tests/unit/operations/aggregation/test_vertical.py
6940 views
from __future__ import annotations12import numpy as np3import pytest45import polars as pl6from polars.testing import assert_frame_equal789def assert_expr_equal(10left: pl.Expr,11right: pl.Expr,12context: pl.DataFrame | pl.LazyFrame | None = None,13) -> None:14"""15Evaluate expressions in a context to determine equality.1617Parameters18----------19left20The expression to compare.21right22The other expression the compare.23context24The context in which the expressions will be evaluated. Defaults to an empty25context.26"""27if context is None:28context = pl.DataFrame()29assert_frame_equal(context.select(left), context.select(right))303132def test_all_expr() -> None:33df = pl.DataFrame({"nrs": [1, 2, 3, 4, 5, None]})34assert_frame_equal(df.select(pl.all()), df)353637def test_any_expr(fruits_cars: pl.DataFrame) -> None:38assert fruits_cars.with_columns(pl.col("A").cast(bool)).select(pl.any("A")).item()394041@pytest.mark.parametrize("function", ["all", "any"])42@pytest.mark.parametrize("input", ["a", "^a|b$"])43def test_alias_for_col_agg_bool(function: str, input: str) -> None:44result = getattr(pl, function)(input) # e.g. pl.all(input)45expected = getattr(pl.col(input), function)() # e.g. pl.col(input).all()46context = pl.DataFrame({"a": [True, False], "b": [True, True]})47assert_expr_equal(result, expected, context)484950@pytest.mark.parametrize("function", ["min", "max", "sum", "cum_sum"])51@pytest.mark.parametrize("input", ["a", "^a|b$"])52def test_alias_for_col_agg(function: str, input: str) -> None:53result = getattr(pl, function)(input) # e.g. pl.min(input)54expected = getattr(pl.col(input), function)() # e.g. pl.col(input).min()55context = pl.DataFrame({"a": [1, 4], "b": [3, 2]})56assert_expr_equal(result, expected, context)575859@pytest.mark.release60def test_mean_overflow() -> None:61np.random.seed(1)62expected = 769.56076526364df = pl.DataFrame(np.random.randint(500, 1040, 5000000), schema=["value"])6566result = df.with_columns(pl.mean("value"))[0, 0]67assert np.isclose(result, expected)6869result = df.with_columns(pl.col("value").cast(pl.Int32)).with_columns(70pl.mean("value")71)[0, 0]72assert np.isclose(result, expected)7374result = df.with_columns(pl.col("value").cast(pl.Int32)).get_column("value").mean()75assert np.isclose(result, expected)767778def test_deep_subexpression_f32_schema_7129() -> None:79df = pl.DataFrame({"a": [1.1, 2.3, 3.4, 4.5]}, schema={"a": pl.Float32()})80assert df.with_columns(pl.col("a") - pl.col("a").median()).dtypes == [pl.Float32]81assert df.with_columns(82(pl.col("a") - pl.col("a").mean()) / (pl.col("a").std() + 0.001)83).dtypes == [pl.Float32]848586