CoCalc -- test_vertical.py

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/aggregation/test_vertical.py
⁶⁹⁴⁰ views
1
from __future__ import annotations
2

3
import numpy as np
4
import pytest
5

6
import polars as pl
7
from polars.testing import assert_frame_equal
8

9

10
def assert_expr_equal(
11
    left: pl.Expr,
12
    right: pl.Expr,
13
    context: pl.DataFrame | pl.LazyFrame | None = None,
14
) -> None:
15
    """
16
    Evaluate expressions in a context to determine equality.
17

18
    Parameters
19
    ----------
20
    left
21
        The expression to compare.
22
    right
23
        The other expression the compare.
24
    context
25
        The context in which the expressions will be evaluated. Defaults to an empty
26
        context.
27
    """
28
    if context is None:
29
        context = pl.DataFrame()
30
    assert_frame_equal(context.select(left), context.select(right))
31

32

33
def test_all_expr() -> None:
34
    df = pl.DataFrame({"nrs": [1, 2, 3, 4, 5, None]})
35
    assert_frame_equal(df.select(pl.all()), df)
36

37

38
def test_any_expr(fruits_cars: pl.DataFrame) -> None:
39
    assert fruits_cars.with_columns(pl.col("A").cast(bool)).select(pl.any("A")).item()
40

41

42
@pytest.mark.parametrize("function", ["all", "any"])
43
@pytest.mark.parametrize("input", ["a", "^a|b$"])
44
def test_alias_for_col_agg_bool(function: str, input: str) -> None:
45
    result = getattr(pl, function)(input)  # e.g. pl.all(input)
46
    expected = getattr(pl.col(input), function)()  # e.g. pl.col(input).all()
47
    context = pl.DataFrame({"a": [True, False], "b": [True, True]})
48
    assert_expr_equal(result, expected, context)
49

50

51
@pytest.mark.parametrize("function", ["min", "max", "sum", "cum_sum"])
52
@pytest.mark.parametrize("input", ["a", "^a|b$"])
53
def test_alias_for_col_agg(function: str, input: str) -> None:
54
    result = getattr(pl, function)(input)  # e.g. pl.min(input)
55
    expected = getattr(pl.col(input), function)()  # e.g. pl.col(input).min()
56
    context = pl.DataFrame({"a": [1, 4], "b": [3, 2]})
57
    assert_expr_equal(result, expected, context)
58

59

60
@pytest.mark.release
61
def test_mean_overflow() -> None:
62
    np.random.seed(1)
63
    expected = 769.5607652
64

65
    df = pl.DataFrame(np.random.randint(500, 1040, 5000000), schema=["value"])
66

67
    result = df.with_columns(pl.mean("value"))[0, 0]
68
    assert np.isclose(result, expected)
69

70
    result = df.with_columns(pl.col("value").cast(pl.Int32)).with_columns(
71
        pl.mean("value")
72
    )[0, 0]
73
    assert np.isclose(result, expected)
74

75
    result = df.with_columns(pl.col("value").cast(pl.Int32)).get_column("value").mean()
76
    assert np.isclose(result, expected)
77

78

79
def test_deep_subexpression_f32_schema_7129() -> None:
80
    df = pl.DataFrame({"a": [1.1, 2.3, 3.4, 4.5]}, schema={"a": pl.Float32()})
81
    assert df.with_columns(pl.col("a") - pl.col("a").median()).dtypes == [pl.Float32]
82
    assert df.with_columns(
83
        (pl.col("a") - pl.col("a").mean()) / (pl.col("a").std() + 0.001)
84
    ).dtypes == [pl.Float32]
85

86
Product

Resources

Company