CoCalc -- test

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/utils/test_utils.py
⁸⁴¹⁰ views
1
from __future__ import annotations
2

3
from datetime import date, datetime, time, timedelta
4
from typing import TYPE_CHECKING, Any
5
from zoneinfo import ZoneInfo
6

7
import numpy as np
8
import pytest
9

10
import polars as pl
11
from polars._utils.convert import (
12
    date_to_int,
13
    datetime_to_int,
14
    parse_as_duration_string,
15
    time_to_int,
16
    timedelta_to_int,
17
)
18
from polars._utils.various import (
19
    _in_notebook,
20
    is_bool_sequence,
21
    is_int_sequence,
22
    is_sequence,
23
    is_str_sequence,
24
    parse_percentiles,
25
    parse_version,
26
)
27

28
if TYPE_CHECKING:
29
    from collections.abc import Sequence
30

31
    from polars._typing import TimeUnit
32

33

34
@pytest.mark.parametrize(
35
    ("td", "expected"),
36
    [
37
        (timedelta(), ""),
38
        (timedelta(days=1), "1d"),
39
        (timedelta(days=-1), "-1d"),
40
        (timedelta(seconds=1), "1s"),
41
        (timedelta(seconds=-1), "-1s"),
42
        (timedelta(microseconds=1), "1us"),
43
        (timedelta(microseconds=-1), "-1us"),
44
        (timedelta(days=1, seconds=1), "1d1s"),
45
        (timedelta(minutes=-1, seconds=1), "-59s"),
46
        (timedelta(days=-1, seconds=-1), "-1d1s"),
47
        (timedelta(days=1, microseconds=1), "1d1us"),
48
        (timedelta(days=-1, microseconds=-1), "-1d1us"),
49
        (None, None),
50
        ("1d2s", "1d2s"),
51
    ],
52
)
53
def test_parse_as_duration_string(
54
    td: timedelta | str | None, expected: str | None
55
) -> None:
56
    assert parse_as_duration_string(td) == expected
57

58

59
@pytest.mark.parametrize(
60
    ("d", "expected"),
61
    [
62
        (date(1999, 9, 9), 10_843),
63
        (date(1969, 12, 31), -1),
64
        (date.min, -719_162),
65
        (date.max, 2_932_896),
66
    ],
67
)
68
def test_date_to_int(d: date, expected: int) -> None:
69
    assert date_to_int(d) == expected
70

71

72
@pytest.mark.parametrize(
73
    ("t", "expected"),
74
    [
75
        (time(0, 0, 1), 1_000_000_000),
76
        (time(20, 52, 10), 75_130_000_000_000),
77
        (time(20, 52, 10, 200), 75_130_000_200_000),
78
        (time.min, 0),
79
        (time.max, 86_399_999_999_000),
80
        (time(12, 0, tzinfo=None), 43_200_000_000_000),
81
        (time(12, 0, tzinfo=ZoneInfo("UTC")), 43_200_000_000_000),
82
        (time(12, 0, tzinfo=ZoneInfo("Asia/Shanghai")), 43_200_000_000_000),
83
        (time(12, 0, tzinfo=ZoneInfo("America/Chicago")), 43_200_000_000_000),
84
    ],
85
)
86
def test_time_to_int(t: time, expected: int) -> None:
87
    assert time_to_int(t) == expected
88

89

90
@pytest.mark.parametrize(
91
    "tzinfo",
92
    [None, ZoneInfo("UTC"), ZoneInfo("Asia/Shanghai"), ZoneInfo("America/Chicago")],
93
)
94
def test_time_to_int_with_time_zone(tzinfo: Any) -> None:
95
    t = time(12, 0, tzinfo=tzinfo)
96
    assert time_to_int(t) == 43_200_000_000_000
97

98

99
@pytest.mark.parametrize(
100
    ("dt", "time_unit", "expected"),
101
    [
102
        (datetime(2121, 1, 1), "ns", 4_765_132_800_000_000_000),
103
        (datetime(2121, 1, 1), "us", 4_765_132_800_000_000),
104
        (datetime(2121, 1, 1), "ms", 4_765_132_800_000),
105
        (datetime(1969, 12, 31, 23, 59, 59, 999999), "us", -1),
106
        (datetime(1969, 12, 30, 23, 59, 59, 999999), "us", -86_400_000_001),
107
        (datetime.min, "ns", -62_135_596_800_000_000_000),
108
        (datetime.max, "ns", 253_402_300_799_999_999_000),
109
        (datetime.min, "ms", -62_135_596_800_000),
110
        (datetime.max, "ms", 253_402_300_799_999),
111
    ],
112
)
113
def test_datetime_to_int(dt: datetime, time_unit: TimeUnit, expected: int) -> None:
114
    assert datetime_to_int(dt, time_unit) == expected
115

116

117
@pytest.mark.parametrize(
118
    ("dt", "expected"),
119
    [
120
        (
121
            datetime(2000, 1, 1, 12, 0, tzinfo=None),
122
            946_728_000_000_000,
123
        ),
124
        (
125
            datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("UTC")),
126
            946_728_000_000_000,
127
        ),
128
        (
129
            datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("Asia/Shanghai")),
130
            946_699_200_000_000,
131
        ),
132
        (
133
            datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("America/Chicago")),
134
            946_749_600_000_000,
135
        ),
136
    ],
137
)
138
def test_datetime_to_int_with_time_zone(dt: datetime, expected: int) -> None:
139
    assert datetime_to_int(dt, "us") == expected
140

141

142
@pytest.mark.parametrize(
143
    ("td", "time_unit", "expected"),
144
    [
145
        (timedelta(days=1), "ns", 86_400_000_000_000),
146
        (timedelta(days=1), "us", 86_400_000_000),
147
        (timedelta(days=1), "ms", 86_400_000),
148
        (timedelta.min, "ns", -86_399_999_913_600_000_000_000),
149
        (timedelta.max, "ns", 86_399_999_999_999_999_999_000),
150
        (timedelta.min, "ms", -86_399_999_913_600_000),
151
        (timedelta.max, "ms", 86_399_999_999_999_999),
152
    ],
153
)
154
def test_timedelta_to_int(td: timedelta, time_unit: TimeUnit, expected: int) -> None:
155
    assert timedelta_to_int(td, time_unit) == expected
156

157

158
def test_estimated_size() -> None:
159
    s = pl.Series("n", list(range(100)))
160
    df = s.to_frame()
161

162
    for sz in (s.estimated_size(), s.estimated_size("b"), s.estimated_size("bytes")):
163
        assert sz == df.estimated_size()
164

165
    assert s.estimated_size("kb") == (df.estimated_size("b") / 1024)
166
    assert s.estimated_size("mb") == (df.estimated_size("kb") / 1024)
167
    assert s.estimated_size("gb") == (df.estimated_size("mb") / 1024)
168
    assert s.estimated_size("tb") == (df.estimated_size("gb") / 1024)
169

170
    with pytest.raises(ValueError):
171
        s.estimated_size("milkshake")  # type: ignore[arg-type]
172

173

174
def test_estimated_size_sliced_list_25068() -> None:
175
    df = pl.select(pl.int_range(10000).cast(pl.List(pl.Int64)))
176

177
    assert df.slice(5000).estimated_size() / df.estimated_size() <= 0.5
178

179

180
@pytest.mark.parametrize(
181
    ("v1", "v2"),
182
    [
183
        ("0.16.8", "0.16.7"),
184
        ("23.0.0", (3, 1000)),
185
        ((23, 0, 0), "3.1000"),
186
        (("0", "0", "2beta"), "0.0.1"),
187
        (("2", "5", "0", "1"), (2, 5, 0)),
188
    ],
189
)
190
def test_parse_version(v1: Any, v2: Any) -> None:
191
    assert parse_version(v1) > parse_version(v2)
192
    assert parse_version(v2) < parse_version(v1)
193

194

195
@pytest.mark.slow
196
def test_in_notebook() -> None:
197
    # private function, but easier to test this separately and mock it in the callers
198
    assert not _in_notebook()
199

200

201
@pytest.mark.parametrize(
202
    ("percentiles", "expected", "inject_median"),
203
    [
204
        (None, [0.5], True),
205
        (0.2, [0.2, 0.5], True),
206
        (0.5, [0.5], True),
207
        ((0.25, 0.75), [0.25, 0.5, 0.75], True),
208
        # Undocumented effect - percentiles get sorted.
209
        # Can be changed, this serves as documentation of current behaviour.
210
        ((0.6, 0.3), [0.3, 0.5, 0.6], True),
211
        (None, [], False),
212
        (0.2, [0.2], False),
213
        (0.5, [0.5], False),
214
        ((0.25, 0.75), [0.25, 0.75], False),
215
        ((0.6, 0.3), [0.3, 0.6], False),
216
    ],
217
)
218
def test_parse_percentiles(
219
    percentiles: Sequence[float] | float | None,
220
    expected: Sequence[float],
221
    inject_median: bool,
222
) -> None:
223
    assert parse_percentiles(percentiles, inject_median=inject_median) == expected
224

225

226
@pytest.mark.parametrize(("percentiles"), [(1.1), ([-0.1])])
227
def test_parse_percentiles_errors(percentiles: Sequence[float] | float | None) -> None:
228
    with pytest.raises(ValueError):
229
        parse_percentiles(percentiles)
230

231

232
@pytest.mark.parametrize(
233
    ("sequence", "include_series", "expected"),
234
    [
235
        (pl.Series(["xx", "yy"]), True, False),
236
        (pl.Series([True, False]), False, False),
237
        (pl.Series([True, False]), True, True),
238
        (np.array([False, True]), False, True),
239
        (np.array([False, True]), True, True),
240
        ([True, False], False, True),
241
        (["xx", "yy"], False, False),
242
        (True, False, False),
243
    ],
244
)
245
def test_is_bool_sequence_check(
246
    sequence: Any,
247
    include_series: bool,
248
    expected: bool,
249
) -> None:
250
    assert is_bool_sequence(sequence, include_series=include_series) == expected
251
    if expected:
252
        assert is_sequence(sequence, include_series=include_series)
253

254

255
@pytest.mark.parametrize(
256
    ("sequence", "include_series", "expected"),
257
    [
258
        (pl.Series(["xx", "yy"]), True, False),
259
        (pl.Series([123, 345]), False, False),
260
        (pl.Series([123, 345]), True, True),
261
        (np.array([123, 345]), False, True),
262
        (np.array([123, 345]), True, True),
263
        (["xx", "yy"], False, False),
264
        ([123, 456], False, True),
265
        (123, False, False),
266
    ],
267
)
268
def test_is_int_sequence_check(
269
    sequence: Any,
270
    include_series: bool,
271
    expected: bool,
272
) -> None:
273
    assert is_int_sequence(sequence, include_series=include_series) == expected
274
    if expected:
275
        assert is_sequence(sequence, include_series=include_series)
276

277

278
@pytest.mark.parametrize(
279
    ("sequence", "include_series", "expected"),
280
    [
281
        (pl.Series(["xx", "yy"]), False, False),
282
        (pl.Series(["xx", "yy"]), True, True),
283
        (pl.Series([123, 345]), True, False),
284
        (np.array(["xx", "yy"]), False, True),
285
        (np.array(["xx", "yy"]), True, True),
286
        (["xx", "yy"], False, True),
287
        ([123, 456], False, False),
288
        ("xx", False, False),
289
    ],
290
)
291
def test_is_str_sequence_check(
292
    sequence: Any,
293
    include_series: bool,
294
    expected: bool,
295
) -> None:
296
    assert is_str_sequence(sequence, include_series=include_series) == expected
297
    if expected:
298
        assert is_sequence(sequence, include_series=include_series)
299

300
Product

Resources

Company