Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/series/test_describe.py
6939 views
1
from datetime import date
2
3
import polars as pl
4
from polars.testing.asserts.frame import assert_frame_equal
5
6
7
def test_series_describe_int() -> None:
8
s = pl.Series([1, 2, 3])
9
result = s.describe()
10
11
stats = {
12
"count": 3.0,
13
"null_count": 0.0,
14
"mean": 2.0,
15
"std": 1.0,
16
"min": 1.0,
17
"25%": 2.0,
18
"50%": 2.0,
19
"75%": 3.0,
20
"max": 3.0,
21
}
22
expected = pl.DataFrame({"statistic": stats.keys(), "value": stats.values()})
23
assert_frame_equal(expected, result)
24
25
26
def test_series_describe_float() -> None:
27
s = pl.Series([1.3, 4.6, 8.9])
28
result = s.describe()
29
30
stats = {
31
"count": 3.0,
32
"null_count": 0.0,
33
"mean": 4.933333333333334,
34
"std": 3.8109491381194442,
35
"min": 1.3,
36
"25%": 4.6,
37
"50%": 4.6,
38
"75%": 8.9,
39
"max": 8.9,
40
}
41
expected = pl.DataFrame({"statistic": stats.keys(), "value": stats.values()})
42
assert_frame_equal(expected, result)
43
44
45
def test_series_describe_string() -> None:
46
s = pl.Series(["abc", "pqr", "xyz"])
47
result = s.describe()
48
49
stats = {
50
"count": "3",
51
"null_count": "0",
52
"min": "abc",
53
"max": "xyz",
54
}
55
expected = pl.DataFrame({"statistic": stats.keys(), "value": stats.values()})
56
assert_frame_equal(expected, result)
57
58
59
def test_series_describe_boolean() -> None:
60
s = pl.Series([True, False, None, True, True])
61
result = s.describe()
62
63
stats = {
64
"count": 4,
65
"null_count": 1,
66
"mean": 0.75,
67
"min": False,
68
"max": True,
69
}
70
expected = pl.DataFrame(
71
data={"statistic": stats.keys(), "value": stats.values()},
72
schema_overrides={"value": pl.Float64},
73
)
74
assert_frame_equal(expected, result)
75
76
77
def test_series_describe_date() -> None:
78
s = pl.Series([date(1999, 12, 31), date(2011, 3, 11), date(2021, 1, 18)])
79
result = s.describe(interpolation="linear")
80
81
stats = {
82
"count": "3",
83
"null_count": "0",
84
"mean": "2010-09-29 16:00:00",
85
"min": "1999-12-31",
86
"25%": "2005-08-05",
87
"50%": "2011-03-11",
88
"75%": "2016-02-13",
89
"max": "2021-01-18",
90
}
91
expected = pl.DataFrame({"statistic": stats.keys(), "value": stats.values()})
92
assert_frame_equal(expected, result)
93
94
95
def test_series_describe_empty() -> None:
96
s = pl.Series(dtype=pl.Float64)
97
result = s.describe()
98
stats = {
99
"count": 0.0,
100
"null_count": 0.0,
101
}
102
expected = pl.DataFrame({"statistic": stats.keys(), "value": stats.values()})
103
assert_frame_equal(expected, result)
104
105
106
def test_series_describe_null() -> None:
107
s = pl.Series([None, None], dtype=pl.Null)
108
result = s.describe()
109
stats = {
110
"count": 0.0,
111
"null_count": 2.0,
112
}
113
expected = pl.DataFrame({"statistic": stats.keys(), "value": stats.values()})
114
assert_frame_equal(expected, result)
115
116
117
def test_series_describe_nested_list() -> None:
118
s = pl.Series(
119
values=[[10e10, 10e15], [10e12, 10e13], [10e10, 10e15]],
120
dtype=pl.List(pl.Float64),
121
)
122
result = s.describe()
123
stats = {
124
"count": 3.0,
125
"null_count": 0.0,
126
}
127
expected = pl.DataFrame({"statistic": stats.keys(), "value": stats.values()})
128
assert_frame_equal(expected, result)
129
130