Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/aggregation/test_vertical.py
6940 views
1
from __future__ import annotations
2
3
import numpy as np
4
import pytest
5
6
import polars as pl
7
from polars.testing import assert_frame_equal
8
9
10
def assert_expr_equal(
11
left: pl.Expr,
12
right: pl.Expr,
13
context: pl.DataFrame | pl.LazyFrame | None = None,
14
) -> None:
15
"""
16
Evaluate expressions in a context to determine equality.
17
18
Parameters
19
----------
20
left
21
The expression to compare.
22
right
23
The other expression the compare.
24
context
25
The context in which the expressions will be evaluated. Defaults to an empty
26
context.
27
"""
28
if context is None:
29
context = pl.DataFrame()
30
assert_frame_equal(context.select(left), context.select(right))
31
32
33
def test_all_expr() -> None:
34
df = pl.DataFrame({"nrs": [1, 2, 3, 4, 5, None]})
35
assert_frame_equal(df.select(pl.all()), df)
36
37
38
def test_any_expr(fruits_cars: pl.DataFrame) -> None:
39
assert fruits_cars.with_columns(pl.col("A").cast(bool)).select(pl.any("A")).item()
40
41
42
@pytest.mark.parametrize("function", ["all", "any"])
43
@pytest.mark.parametrize("input", ["a", "^a|b$"])
44
def test_alias_for_col_agg_bool(function: str, input: str) -> None:
45
result = getattr(pl, function)(input) # e.g. pl.all(input)
46
expected = getattr(pl.col(input), function)() # e.g. pl.col(input).all()
47
context = pl.DataFrame({"a": [True, False], "b": [True, True]})
48
assert_expr_equal(result, expected, context)
49
50
51
@pytest.mark.parametrize("function", ["min", "max", "sum", "cum_sum"])
52
@pytest.mark.parametrize("input", ["a", "^a|b$"])
53
def test_alias_for_col_agg(function: str, input: str) -> None:
54
result = getattr(pl, function)(input) # e.g. pl.min(input)
55
expected = getattr(pl.col(input), function)() # e.g. pl.col(input).min()
56
context = pl.DataFrame({"a": [1, 4], "b": [3, 2]})
57
assert_expr_equal(result, expected, context)
58
59
60
@pytest.mark.release
61
def test_mean_overflow() -> None:
62
np.random.seed(1)
63
expected = 769.5607652
64
65
df = pl.DataFrame(np.random.randint(500, 1040, 5000000), schema=["value"])
66
67
result = df.with_columns(pl.mean("value"))[0, 0]
68
assert np.isclose(result, expected)
69
70
result = df.with_columns(pl.col("value").cast(pl.Int32)).with_columns(
71
pl.mean("value")
72
)[0, 0]
73
assert np.isclose(result, expected)
74
75
result = df.with_columns(pl.col("value").cast(pl.Int32)).get_column("value").mean()
76
assert np.isclose(result, expected)
77
78
79
def test_deep_subexpression_f32_schema_7129() -> None:
80
df = pl.DataFrame({"a": [1.1, 2.3, 3.4, 4.5]}, schema={"a": pl.Float32()})
81
assert df.with_columns(pl.col("a") - pl.col("a").median()).dtypes == [pl.Float32]
82
assert df.with_columns(
83
(pl.col("a") - pl.col("a").mean()) / (pl.col("a").std() + 0.001)
84
).dtypes == [pl.Float32]
85
86