Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/functions/range/test_date_range.py
6939 views
1
from __future__ import annotations
2
3
from datetime import date, datetime
4
from typing import TYPE_CHECKING
5
6
import pandas as pd
7
import pytest
8
9
import polars as pl
10
from polars.exceptions import ComputeError, InvalidOperationError, ShapeError
11
from polars.testing import assert_frame_equal, assert_series_equal
12
13
if TYPE_CHECKING:
14
from polars._typing import ClosedInterval
15
16
17
def test_date_range() -> None:
18
# if low/high are both date, range is also be date _iff_ the granularity is >= 1d
19
result = pl.date_range(date(2022, 1, 1), date(2022, 3, 1), "1mo", eager=True)
20
assert result.to_list() == [date(2022, 1, 1), date(2022, 2, 1), date(2022, 3, 1)]
21
22
23
def test_date_range_invalid_time_unit() -> None:
24
with pytest.raises(InvalidOperationError, match="'x' not supported"):
25
pl.date_range(
26
start=date(2021, 12, 16),
27
end=date(2021, 12, 18),
28
interval="1X",
29
eager=True,
30
)
31
32
33
def test_date_range_lazy_with_literals() -> None:
34
df = pl.DataFrame({"misc": ["x"]}).with_columns(
35
pl.date_ranges(
36
date(2000, 1, 1),
37
date(2023, 8, 31),
38
interval="987d",
39
eager=False,
40
).alias("dts")
41
)
42
assert df.rows() == [
43
(
44
"x",
45
[
46
date(2000, 1, 1),
47
date(2002, 9, 14),
48
date(2005, 5, 28),
49
date(2008, 2, 9),
50
date(2010, 10, 23),
51
date(2013, 7, 6),
52
date(2016, 3, 19),
53
date(2018, 12, 1),
54
date(2021, 8, 14),
55
],
56
)
57
]
58
assert (
59
df.rows()[0][1]
60
== pd.date_range(
61
date(2000, 1, 1), date(2023, 12, 31), freq="987d"
62
).date.tolist()
63
)
64
65
66
@pytest.mark.parametrize("low", ["start", pl.col("start")])
67
@pytest.mark.parametrize("high", ["stop", pl.col("stop")])
68
def test_date_range_lazy_with_expressions(
69
low: str | pl.Expr, high: str | pl.Expr
70
) -> None:
71
lf = pl.LazyFrame(
72
{
73
"start": [date(2015, 6, 30)],
74
"stop": [date(2022, 12, 31)],
75
}
76
)
77
78
result = lf.with_columns(
79
pl.date_ranges(low, high, interval="678d", eager=False).alias("dts")
80
)
81
82
assert result.collect().rows() == [
83
(
84
date(2015, 6, 30),
85
date(2022, 12, 31),
86
[
87
date(2015, 6, 30),
88
date(2017, 5, 8),
89
date(2019, 3, 17),
90
date(2021, 1, 23),
91
date(2022, 12, 2),
92
],
93
)
94
]
95
96
df = pl.DataFrame(
97
{
98
"start": [date(2000, 1, 1), date(2022, 6, 1)],
99
"stop": [date(2000, 1, 2), date(2022, 6, 2)],
100
}
101
)
102
103
result_df = df.with_columns(pl.date_ranges(low, high, interval="1d").alias("dts"))
104
105
assert result_df.to_dict(as_series=False) == {
106
"start": [date(2000, 1, 1), date(2022, 6, 1)],
107
"stop": [date(2000, 1, 2), date(2022, 6, 2)],
108
"dts": [
109
[date(2000, 1, 1), date(2000, 1, 2)],
110
[date(2022, 6, 1), date(2022, 6, 2)],
111
],
112
}
113
114
115
def test_date_ranges_single_row_lazy_7110() -> None:
116
df = pl.DataFrame(
117
{
118
"name": ["A"],
119
"from": [date(2020, 1, 1)],
120
"to": [date(2020, 1, 2)],
121
}
122
)
123
result = df.with_columns(
124
pl.date_ranges(
125
start=pl.col("from"),
126
end=pl.col("to"),
127
interval="1d",
128
eager=False,
129
).alias("date_range")
130
)
131
expected = pl.DataFrame(
132
{
133
"name": ["A"],
134
"from": [date(2020, 1, 1)],
135
"to": [date(2020, 1, 2)],
136
"date_range": [[date(2020, 1, 1), date(2020, 1, 2)]],
137
}
138
)
139
assert_frame_equal(result, expected)
140
141
142
@pytest.mark.parametrize(
143
("closed", "expected_values"),
144
[
145
("right", [date(2020, 2, 29), date(2020, 3, 31)]),
146
("left", [date(2020, 1, 31), date(2020, 2, 29)]),
147
("none", [date(2020, 2, 29)]),
148
("both", [date(2020, 1, 31), date(2020, 2, 29), date(2020, 3, 31)]),
149
],
150
)
151
def test_date_range_end_of_month_5441(
152
closed: ClosedInterval, expected_values: list[date]
153
) -> None:
154
start = date(2020, 1, 31)
155
stop = date(2020, 3, 31)
156
result = pl.date_range(start, stop, interval="1mo", closed=closed, eager=True)
157
expected = pl.Series("literal", expected_values)
158
assert_series_equal(result, expected)
159
160
161
def test_date_range_name() -> None:
162
result_eager = pl.date_range(date(2020, 1, 1), date(2020, 1, 3), eager=True)
163
assert result_eager.name == "literal"
164
165
start = pl.Series("left", [date(2020, 1, 1)])
166
result_lazy = pl.select(
167
pl.date_range(pl.lit(start).first(), date(2020, 1, 3), eager=False)
168
).to_series()
169
assert result_lazy.name == "left"
170
171
172
def test_date_ranges_eager() -> None:
173
start = pl.Series("start", [date(2022, 1, 1), date(2022, 1, 2)])
174
end = pl.Series("end", [date(2022, 1, 4), date(2022, 1, 3)])
175
176
result = pl.date_ranges(start, end, eager=True)
177
178
expected = pl.Series(
179
"start",
180
[
181
[date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3), date(2022, 1, 4)],
182
[date(2022, 1, 2), date(2022, 1, 3)],
183
],
184
)
185
assert_series_equal(result, expected)
186
187
188
def test_date_range_eager() -> None:
189
result = pl.date_range(date(2022, 1, 1), date(2022, 1, 3), eager=True)
190
expected = pl.Series(
191
"literal", [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)]
192
)
193
assert_series_equal(result, expected)
194
195
196
def test_date_range_input_shape_empty() -> None:
197
empty = pl.Series(dtype=pl.Datetime)
198
single = pl.Series([datetime(2022, 1, 2)])
199
200
with pytest.raises(ShapeError):
201
pl.date_range(empty, single, eager=True)
202
with pytest.raises(ShapeError):
203
pl.date_range(single, empty, eager=True)
204
with pytest.raises(ShapeError):
205
pl.date_range(empty, empty, eager=True)
206
207
208
def test_date_range_input_shape_multiple_values() -> None:
209
single = pl.Series([datetime(2022, 1, 2)])
210
multiple = pl.Series([datetime(2022, 1, 3), datetime(2022, 1, 4)])
211
212
with pytest.raises(ShapeError):
213
pl.date_range(multiple, single, eager=True)
214
with pytest.raises(ShapeError):
215
pl.date_range(single, multiple, eager=True)
216
with pytest.raises(ShapeError):
217
pl.date_range(multiple, multiple, eager=True)
218
219
220
def test_date_range_start_later_than_end() -> None:
221
result = pl.date_range(date(2000, 3, 20), date(2000, 3, 5), eager=True)
222
expected = pl.Series("literal", dtype=pl.Date)
223
assert_series_equal(result, expected)
224
225
226
def test_date_range_24h_interval_raises() -> None:
227
with pytest.raises(
228
ComputeError,
229
match="`interval` input for `date_range` must consist of full days",
230
):
231
pl.date_range(date(2022, 1, 1), date(2022, 1, 3), interval="24h", eager=True)
232
233
234
def test_long_date_range_12461() -> None:
235
result = pl.date_range(date(1900, 1, 1), date(2300, 1, 1), "1d", eager=True)
236
assert result[0] == date(1900, 1, 1)
237
assert result[-1] == date(2300, 1, 1)
238
assert (result.diff()[1:].dt.total_days() == 1).all()
239
240
241
def test_date_ranges_broadcasting() -> None:
242
df = pl.DataFrame({"dates": [date(2021, 1, 1), date(2021, 1, 2), date(2021, 1, 3)]})
243
result = df.select(
244
pl.date_ranges(start="dates", end=date(2021, 1, 3)).alias("end"),
245
pl.date_ranges(start=date(2021, 1, 1), end="dates").alias("start"),
246
)
247
expected = pl.DataFrame(
248
{
249
"end": [
250
[date(2021, 1, 1), date(2021, 1, 2), date(2021, 1, 3)],
251
[date(2021, 1, 2), date(2021, 1, 3)],
252
[date(2021, 1, 3)],
253
],
254
"start": [
255
[date(2021, 1, 1)],
256
[date(2021, 1, 1), date(2021, 1, 2)],
257
[date(2021, 1, 1), date(2021, 1, 2), date(2021, 1, 3)],
258
],
259
}
260
)
261
assert_frame_equal(result, expected)
262
263
264
def test_date_ranges_broadcasting_fail() -> None:
265
start = pl.Series([date(2021, 1, 1), date(2021, 1, 2), date(2021, 1, 3)])
266
end = pl.Series([date(2021, 1, 2), date(2021, 1, 3)])
267
268
with pytest.raises(
269
ComputeError, match=r"lengths of `start` \(3\) and `end` \(2\) do not match"
270
):
271
pl.date_ranges(start, end, eager=True)
272
273
274
def test_date_range_datetime_input() -> None:
275
result = pl.date_range(
276
datetime(2022, 1, 1, 12), datetime(2022, 1, 3), interval="1d", eager=True
277
)
278
expected = pl.Series(
279
"literal", [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)]
280
)
281
assert_series_equal(result, expected)
282
283
284
def test_date_ranges_datetime_input() -> None:
285
result = pl.date_ranges(
286
datetime(2022, 1, 1, 12), datetime(2022, 1, 3), interval="1d", eager=True
287
)
288
expected = pl.Series(
289
"literal", [[date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)]]
290
)
291
assert_series_equal(result, expected)
292
293
294
def test_date_range_with_subclass_18470_18447() -> None:
295
class MyAmazingDate(date):
296
pass
297
298
class MyAmazingDatetime(datetime):
299
pass
300
301
result = pl.datetime_range(
302
MyAmazingDate(2020, 1, 1), MyAmazingDatetime(2020, 1, 2), eager=True
303
)
304
expected = pl.Series("literal", [datetime(2020, 1, 1), datetime(2020, 1, 2)])
305
assert_series_equal(result, expected)
306
307