Path: blob/main/py-polars/tests/unit/dataframe/test_glimpse.py
8412 views
from __future__ import annotations12import textwrap3from datetime import datetime4from typing import Any56import pytest78import polars as pl910TEST_DF = pl.DataFrame(11{12"a": [1.0, 2.8, 3.0],13"b": [4, 5, None],14"c": [True, False, True],15"d": [None, "b", "c"],16"e": ["usd", "eur", None],17"f": pl.datetime_range(18datetime(2023, 1, 1),19datetime(2023, 1, 3),20"1d",21time_unit="us",22eager=True,23),24"g": pl.datetime_range(25datetime(2023, 1, 1),26datetime(2023, 1, 3),27"1d",28time_unit="ms",29eager=True,30),31"h": pl.datetime_range(32datetime(2023, 1, 1),33datetime(2023, 1, 3),34"1d",35time_unit="ns",36eager=True,37),38"i": [[5, 6], [3, 4], [9, 8]],39"j": [[5.0, 6.0], [3.0, 4.0], [9.0, 8.0]],40"k": [["A", "a"], ["B", "b"], ["C", "c"]],41}42)4344TEST_EXPECTED = textwrap.dedent(45"""\46Rows: 347Columns: 1148$ a <f64> 1.0, 2.8, 3.049$ b <i64> 4, 5, null50$ c <bool> True, False, True51$ d <str> null, 'b', 'c'52$ e <str> 'usd', 'eur', null53$ f <datetime[μs]> 2023-01-01 00:00:00, 2023-01-02 00:00:00, 2023-01-03 00:00:0054$ g <datetime[ms]> 2023-01-01 00:00:00, 2023-01-02 00:00:00, 2023-01-03 00:00:0055$ h <datetime[ns]> 2023-01-01 00:00:00, 2023-01-02 00:00:00, 2023-01-03 00:00:0056$ i <list[i64]> [5, 6], [3, 4], [9, 8]57$ j <list[f64]> [5.0, 6.0], [3.0, 4.0], [9.0, 8.0]58$ k <list[str]> ['A', 'a'], ['B', 'b'], ['C', 'c']59"""60)616263def test_glimpse(capsys: Any) -> None:64for result in (65# check deprecated parameter still works66TEST_DF.glimpse(return_as_string=True), # type: ignore[call-overload]67TEST_DF.glimpse(return_type="string"),68):69assert result == TEST_EXPECTED707172@pytest.mark.parametrize("return_type", [None, "self"])73def test_glimpse_print_return(return_type: str | None, capsys: Any) -> None:74# default behaviour prints to stdout, returning nothing75res = TEST_DF.glimpse(return_type=return_type) # type: ignore[arg-type]7677if return_type is None:78assert res is None79else:80assert res is TEST_DF8182# note: remove the last newline on the capsys83assert capsys.readouterr().out[:-1] == TEST_EXPECTED848586def test_glimpse_as_frame() -> None:87result = TEST_DF.glimpse(return_type="frame")8889assert isinstance(result, pl.DataFrame)90assert result.schema == pl.Schema(91{92"column": pl.String(),93"dtype": pl.String(),94"values": pl.List(pl.String),95}96)97assert result.to_dict(as_series=False) == {98"column": [99"a",100"b",101"c",102"d",103"e",104"f",105"g",106"h",107"i",108"j",109"k",110],111"dtype": [112"f64",113"i64",114"bool",115"str",116"str",117"datetime[μs]",118"datetime[ms]",119"datetime[ns]",120"list[i64]",121"list[f64]",122"list[str]",123],124"values": [125["1.0", "2.8", "3.0"],126["4", "5", None],127["True", "False", "True"],128[None, "'b'", "'c'"],129["'usd'", "'eur'", None],130["2023-01-01 00:00:00", "2023-01-02 00:00:00", "2023-01-03 00:00:00"],131["2023-01-01 00:00:00", "2023-01-02 00:00:00", "2023-01-03 00:00:00"],132["2023-01-01 00:00:00", "2023-01-02 00:00:00", "2023-01-03 00:00:00"],133["[5, 6]", "[3, 4]", "[9, 8]"],134["[5.0, 6.0]", "[3.0, 4.0]", "[9.0, 8.0]"],135["['A', 'a']", "['B', 'b']", "['C', 'c']"],136],137}138139140def test_glimpse_colname_length() -> None:141df = pl.DataFrame({"a" * 30: [11, 22, 33, 44, 55, 66]})142result = df.glimpse(max_colname_length=20, return_type="string")143144expected = textwrap.dedent(145"""\146Rows: 6147Columns: 1148$ aaaaaaaaaaaaaaaaaaa… <i64> 11, 22, 33, 44, 55, 66149"""150)151assert result == expected152153154def test_glimpse_items_length() -> None:155df = pl.DataFrame({"n": range(50)}, schema={"n": pl.UInt8})156157# default max_items is 10158result = df.glimpse(return_type="string")159expected = textwrap.dedent(160"""\161Rows: 50162Columns: 1163$ n <u8> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9164"""165)166assert result == expected167168# test with custom max_items169result = df.glimpse(max_items_per_column=5, return_type="string")170expected = textwrap.dedent(171"""\172Rows: 50173Columns: 1174$ n <u8> 0, 1, 2, 3, 4175"""176)177assert result == expected178179180