Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/dataframe/test_glimpse.py
8412 views
1
from __future__ import annotations
2
3
import textwrap
4
from datetime import datetime
5
from typing import Any
6
7
import pytest
8
9
import polars as pl
10
11
TEST_DF = pl.DataFrame(
12
{
13
"a": [1.0, 2.8, 3.0],
14
"b": [4, 5, None],
15
"c": [True, False, True],
16
"d": [None, "b", "c"],
17
"e": ["usd", "eur", None],
18
"f": pl.datetime_range(
19
datetime(2023, 1, 1),
20
datetime(2023, 1, 3),
21
"1d",
22
time_unit="us",
23
eager=True,
24
),
25
"g": pl.datetime_range(
26
datetime(2023, 1, 1),
27
datetime(2023, 1, 3),
28
"1d",
29
time_unit="ms",
30
eager=True,
31
),
32
"h": pl.datetime_range(
33
datetime(2023, 1, 1),
34
datetime(2023, 1, 3),
35
"1d",
36
time_unit="ns",
37
eager=True,
38
),
39
"i": [[5, 6], [3, 4], [9, 8]],
40
"j": [[5.0, 6.0], [3.0, 4.0], [9.0, 8.0]],
41
"k": [["A", "a"], ["B", "b"], ["C", "c"]],
42
}
43
)
44
45
TEST_EXPECTED = textwrap.dedent(
46
"""\
47
Rows: 3
48
Columns: 11
49
$ a <f64> 1.0, 2.8, 3.0
50
$ b <i64> 4, 5, null
51
$ c <bool> True, False, True
52
$ d <str> null, 'b', 'c'
53
$ e <str> 'usd', 'eur', null
54
$ f <datetime[μs]> 2023-01-01 00:00:00, 2023-01-02 00:00:00, 2023-01-03 00:00:00
55
$ g <datetime[ms]> 2023-01-01 00:00:00, 2023-01-02 00:00:00, 2023-01-03 00:00:00
56
$ h <datetime[ns]> 2023-01-01 00:00:00, 2023-01-02 00:00:00, 2023-01-03 00:00:00
57
$ i <list[i64]> [5, 6], [3, 4], [9, 8]
58
$ j <list[f64]> [5.0, 6.0], [3.0, 4.0], [9.0, 8.0]
59
$ k <list[str]> ['A', 'a'], ['B', 'b'], ['C', 'c']
60
"""
61
)
62
63
64
def test_glimpse(capsys: Any) -> None:
65
for result in (
66
# check deprecated parameter still works
67
TEST_DF.glimpse(return_as_string=True), # type: ignore[call-overload]
68
TEST_DF.glimpse(return_type="string"),
69
):
70
assert result == TEST_EXPECTED
71
72
73
@pytest.mark.parametrize("return_type", [None, "self"])
74
def test_glimpse_print_return(return_type: str | None, capsys: Any) -> None:
75
# default behaviour prints to stdout, returning nothing
76
res = TEST_DF.glimpse(return_type=return_type) # type: ignore[arg-type]
77
78
if return_type is None:
79
assert res is None
80
else:
81
assert res is TEST_DF
82
83
# note: remove the last newline on the capsys
84
assert capsys.readouterr().out[:-1] == TEST_EXPECTED
85
86
87
def test_glimpse_as_frame() -> None:
88
result = TEST_DF.glimpse(return_type="frame")
89
90
assert isinstance(result, pl.DataFrame)
91
assert result.schema == pl.Schema(
92
{
93
"column": pl.String(),
94
"dtype": pl.String(),
95
"values": pl.List(pl.String),
96
}
97
)
98
assert result.to_dict(as_series=False) == {
99
"column": [
100
"a",
101
"b",
102
"c",
103
"d",
104
"e",
105
"f",
106
"g",
107
"h",
108
"i",
109
"j",
110
"k",
111
],
112
"dtype": [
113
"f64",
114
"i64",
115
"bool",
116
"str",
117
"str",
118
"datetime[μs]",
119
"datetime[ms]",
120
"datetime[ns]",
121
"list[i64]",
122
"list[f64]",
123
"list[str]",
124
],
125
"values": [
126
["1.0", "2.8", "3.0"],
127
["4", "5", None],
128
["True", "False", "True"],
129
[None, "'b'", "'c'"],
130
["'usd'", "'eur'", None],
131
["2023-01-01 00:00:00", "2023-01-02 00:00:00", "2023-01-03 00:00:00"],
132
["2023-01-01 00:00:00", "2023-01-02 00:00:00", "2023-01-03 00:00:00"],
133
["2023-01-01 00:00:00", "2023-01-02 00:00:00", "2023-01-03 00:00:00"],
134
["[5, 6]", "[3, 4]", "[9, 8]"],
135
["[5.0, 6.0]", "[3.0, 4.0]", "[9.0, 8.0]"],
136
["['A', 'a']", "['B', 'b']", "['C', 'c']"],
137
],
138
}
139
140
141
def test_glimpse_colname_length() -> None:
142
df = pl.DataFrame({"a" * 30: [11, 22, 33, 44, 55, 66]})
143
result = df.glimpse(max_colname_length=20, return_type="string")
144
145
expected = textwrap.dedent(
146
"""\
147
Rows: 6
148
Columns: 1
149
$ aaaaaaaaaaaaaaaaaaa… <i64> 11, 22, 33, 44, 55, 66
150
"""
151
)
152
assert result == expected
153
154
155
def test_glimpse_items_length() -> None:
156
df = pl.DataFrame({"n": range(50)}, schema={"n": pl.UInt8})
157
158
# default max_items is 10
159
result = df.glimpse(return_type="string")
160
expected = textwrap.dedent(
161
"""\
162
Rows: 50
163
Columns: 1
164
$ n <u8> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
165
"""
166
)
167
assert result == expected
168
169
# test with custom max_items
170
result = df.glimpse(max_items_per_column=5, return_type="string")
171
expected = textwrap.dedent(
172
"""\
173
Rows: 50
174
Columns: 1
175
$ n <u8> 0, 1, 2, 3, 4
176
"""
177
)
178
assert result == expected
179
180