Path: blob/main/py-polars/tests/unit/utils/test_utils.py
8410 views
from __future__ import annotations12from datetime import date, datetime, time, timedelta3from typing import TYPE_CHECKING, Any4from zoneinfo import ZoneInfo56import numpy as np7import pytest89import polars as pl10from polars._utils.convert import (11date_to_int,12datetime_to_int,13parse_as_duration_string,14time_to_int,15timedelta_to_int,16)17from polars._utils.various import (18_in_notebook,19is_bool_sequence,20is_int_sequence,21is_sequence,22is_str_sequence,23parse_percentiles,24parse_version,25)2627if TYPE_CHECKING:28from collections.abc import Sequence2930from polars._typing import TimeUnit313233@pytest.mark.parametrize(34("td", "expected"),35[36(timedelta(), ""),37(timedelta(days=1), "1d"),38(timedelta(days=-1), "-1d"),39(timedelta(seconds=1), "1s"),40(timedelta(seconds=-1), "-1s"),41(timedelta(microseconds=1), "1us"),42(timedelta(microseconds=-1), "-1us"),43(timedelta(days=1, seconds=1), "1d1s"),44(timedelta(minutes=-1, seconds=1), "-59s"),45(timedelta(days=-1, seconds=-1), "-1d1s"),46(timedelta(days=1, microseconds=1), "1d1us"),47(timedelta(days=-1, microseconds=-1), "-1d1us"),48(None, None),49("1d2s", "1d2s"),50],51)52def test_parse_as_duration_string(53td: timedelta | str | None, expected: str | None54) -> None:55assert parse_as_duration_string(td) == expected565758@pytest.mark.parametrize(59("d", "expected"),60[61(date(1999, 9, 9), 10_843),62(date(1969, 12, 31), -1),63(date.min, -719_162),64(date.max, 2_932_896),65],66)67def test_date_to_int(d: date, expected: int) -> None:68assert date_to_int(d) == expected697071@pytest.mark.parametrize(72("t", "expected"),73[74(time(0, 0, 1), 1_000_000_000),75(time(20, 52, 10), 75_130_000_000_000),76(time(20, 52, 10, 200), 75_130_000_200_000),77(time.min, 0),78(time.max, 86_399_999_999_000),79(time(12, 0, tzinfo=None), 43_200_000_000_000),80(time(12, 0, tzinfo=ZoneInfo("UTC")), 43_200_000_000_000),81(time(12, 0, tzinfo=ZoneInfo("Asia/Shanghai")), 43_200_000_000_000),82(time(12, 0, tzinfo=ZoneInfo("America/Chicago")), 43_200_000_000_000),83],84)85def test_time_to_int(t: time, expected: int) -> None:86assert time_to_int(t) == expected878889@pytest.mark.parametrize(90"tzinfo",91[None, ZoneInfo("UTC"), ZoneInfo("Asia/Shanghai"), ZoneInfo("America/Chicago")],92)93def test_time_to_int_with_time_zone(tzinfo: Any) -> None:94t = time(12, 0, tzinfo=tzinfo)95assert time_to_int(t) == 43_200_000_000_000969798@pytest.mark.parametrize(99("dt", "time_unit", "expected"),100[101(datetime(2121, 1, 1), "ns", 4_765_132_800_000_000_000),102(datetime(2121, 1, 1), "us", 4_765_132_800_000_000),103(datetime(2121, 1, 1), "ms", 4_765_132_800_000),104(datetime(1969, 12, 31, 23, 59, 59, 999999), "us", -1),105(datetime(1969, 12, 30, 23, 59, 59, 999999), "us", -86_400_000_001),106(datetime.min, "ns", -62_135_596_800_000_000_000),107(datetime.max, "ns", 253_402_300_799_999_999_000),108(datetime.min, "ms", -62_135_596_800_000),109(datetime.max, "ms", 253_402_300_799_999),110],111)112def test_datetime_to_int(dt: datetime, time_unit: TimeUnit, expected: int) -> None:113assert datetime_to_int(dt, time_unit) == expected114115116@pytest.mark.parametrize(117("dt", "expected"),118[119(120datetime(2000, 1, 1, 12, 0, tzinfo=None),121946_728_000_000_000,122),123(124datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("UTC")),125946_728_000_000_000,126),127(128datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("Asia/Shanghai")),129946_699_200_000_000,130),131(132datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("America/Chicago")),133946_749_600_000_000,134),135],136)137def test_datetime_to_int_with_time_zone(dt: datetime, expected: int) -> None:138assert datetime_to_int(dt, "us") == expected139140141@pytest.mark.parametrize(142("td", "time_unit", "expected"),143[144(timedelta(days=1), "ns", 86_400_000_000_000),145(timedelta(days=1), "us", 86_400_000_000),146(timedelta(days=1), "ms", 86_400_000),147(timedelta.min, "ns", -86_399_999_913_600_000_000_000),148(timedelta.max, "ns", 86_399_999_999_999_999_999_000),149(timedelta.min, "ms", -86_399_999_913_600_000),150(timedelta.max, "ms", 86_399_999_999_999_999),151],152)153def test_timedelta_to_int(td: timedelta, time_unit: TimeUnit, expected: int) -> None:154assert timedelta_to_int(td, time_unit) == expected155156157def test_estimated_size() -> None:158s = pl.Series("n", list(range(100)))159df = s.to_frame()160161for sz in (s.estimated_size(), s.estimated_size("b"), s.estimated_size("bytes")):162assert sz == df.estimated_size()163164assert s.estimated_size("kb") == (df.estimated_size("b") / 1024)165assert s.estimated_size("mb") == (df.estimated_size("kb") / 1024)166assert s.estimated_size("gb") == (df.estimated_size("mb") / 1024)167assert s.estimated_size("tb") == (df.estimated_size("gb") / 1024)168169with pytest.raises(ValueError):170s.estimated_size("milkshake") # type: ignore[arg-type]171172173def test_estimated_size_sliced_list_25068() -> None:174df = pl.select(pl.int_range(10000).cast(pl.List(pl.Int64)))175176assert df.slice(5000).estimated_size() / df.estimated_size() <= 0.5177178179@pytest.mark.parametrize(180("v1", "v2"),181[182("0.16.8", "0.16.7"),183("23.0.0", (3, 1000)),184((23, 0, 0), "3.1000"),185(("0", "0", "2beta"), "0.0.1"),186(("2", "5", "0", "1"), (2, 5, 0)),187],188)189def test_parse_version(v1: Any, v2: Any) -> None:190assert parse_version(v1) > parse_version(v2)191assert parse_version(v2) < parse_version(v1)192193194@pytest.mark.slow195def test_in_notebook() -> None:196# private function, but easier to test this separately and mock it in the callers197assert not _in_notebook()198199200@pytest.mark.parametrize(201("percentiles", "expected", "inject_median"),202[203(None, [0.5], True),204(0.2, [0.2, 0.5], True),205(0.5, [0.5], True),206((0.25, 0.75), [0.25, 0.5, 0.75], True),207# Undocumented effect - percentiles get sorted.208# Can be changed, this serves as documentation of current behaviour.209((0.6, 0.3), [0.3, 0.5, 0.6], True),210(None, [], False),211(0.2, [0.2], False),212(0.5, [0.5], False),213((0.25, 0.75), [0.25, 0.75], False),214((0.6, 0.3), [0.3, 0.6], False),215],216)217def test_parse_percentiles(218percentiles: Sequence[float] | float | None,219expected: Sequence[float],220inject_median: bool,221) -> None:222assert parse_percentiles(percentiles, inject_median=inject_median) == expected223224225@pytest.mark.parametrize(("percentiles"), [(1.1), ([-0.1])])226def test_parse_percentiles_errors(percentiles: Sequence[float] | float | None) -> None:227with pytest.raises(ValueError):228parse_percentiles(percentiles)229230231@pytest.mark.parametrize(232("sequence", "include_series", "expected"),233[234(pl.Series(["xx", "yy"]), True, False),235(pl.Series([True, False]), False, False),236(pl.Series([True, False]), True, True),237(np.array([False, True]), False, True),238(np.array([False, True]), True, True),239([True, False], False, True),240(["xx", "yy"], False, False),241(True, False, False),242],243)244def test_is_bool_sequence_check(245sequence: Any,246include_series: bool,247expected: bool,248) -> None:249assert is_bool_sequence(sequence, include_series=include_series) == expected250if expected:251assert is_sequence(sequence, include_series=include_series)252253254@pytest.mark.parametrize(255("sequence", "include_series", "expected"),256[257(pl.Series(["xx", "yy"]), True, False),258(pl.Series([123, 345]), False, False),259(pl.Series([123, 345]), True, True),260(np.array([123, 345]), False, True),261(np.array([123, 345]), True, True),262(["xx", "yy"], False, False),263([123, 456], False, True),264(123, False, False),265],266)267def test_is_int_sequence_check(268sequence: Any,269include_series: bool,270expected: bool,271) -> None:272assert is_int_sequence(sequence, include_series=include_series) == expected273if expected:274assert is_sequence(sequence, include_series=include_series)275276277@pytest.mark.parametrize(278("sequence", "include_series", "expected"),279[280(pl.Series(["xx", "yy"]), False, False),281(pl.Series(["xx", "yy"]), True, True),282(pl.Series([123, 345]), True, False),283(np.array(["xx", "yy"]), False, True),284(np.array(["xx", "yy"]), True, True),285(["xx", "yy"], False, True),286([123, 456], False, False),287("xx", False, False),288],289)290def test_is_str_sequence_check(291sequence: Any,292include_series: bool,293expected: bool,294) -> None:295assert is_str_sequence(sequence, include_series=include_series) == expected296if expected:297assert is_sequence(sequence, include_series=include_series)298299300