Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/namespaces/temporal/test_replace.py
6940 views
1
from __future__ import annotations
2
3
from datetime import date, datetime
4
from typing import TYPE_CHECKING
5
6
import pytest
7
8
import polars as pl
9
from polars.exceptions import ComputeError
10
from polars.testing import assert_frame_equal, assert_series_equal
11
12
if TYPE_CHECKING:
13
from polars._typing import TimeUnit
14
15
16
def test_replace_expr_datetime() -> None:
17
df = pl.DataFrame(
18
{
19
"dates": [
20
datetime(2088, 8, 8, 8, 8, 8, 8),
21
datetime(2088, 8, 8, 8, 8, 8, 8),
22
datetime(2088, 8, 8, 8, 8, 8, 8),
23
datetime(2088, 8, 8, 8, 8, 8, 8),
24
datetime(2088, 8, 8, 8, 8, 8, 8),
25
datetime(2088, 8, 8, 8, 8, 8, 8),
26
datetime(2088, 8, 8, 8, 8, 8, 8),
27
None,
28
],
29
"year": [None, 2, 3, 4, 5, 6, 7, 8],
30
"month": [1, None, 3, 4, 5, 6, 7, 8],
31
"day": [1, 2, None, 4, 5, 6, 7, 8],
32
"hour": [1, 2, 3, None, 5, 6, 7, 8],
33
"minute": [1, 2, 3, 4, None, 6, 7, 8],
34
"second": [1, 2, 3, 4, 5, None, 7, 8],
35
"microsecond": [1, 2, 3, 4, 5, 6, None, 8],
36
}
37
)
38
39
result = df.select(
40
pl.col("dates").dt.replace(
41
year="year",
42
month="month",
43
day="day",
44
hour="hour",
45
minute="minute",
46
second="second",
47
microsecond="microsecond",
48
)
49
)
50
51
expected = pl.DataFrame(
52
{
53
"dates": [
54
datetime(2088, 1, 1, 1, 1, 1, 1),
55
datetime(2, 8, 2, 2, 2, 2, 2),
56
datetime(3, 3, 8, 3, 3, 3, 3),
57
datetime(4, 4, 4, 8, 4, 4, 4),
58
datetime(5, 5, 5, 5, 8, 5, 5),
59
datetime(6, 6, 6, 6, 6, 8, 6),
60
datetime(7, 7, 7, 7, 7, 7, 8),
61
None,
62
]
63
}
64
)
65
66
assert_frame_equal(result, expected)
67
68
69
def test_replace_expr_date() -> None:
70
df = pl.DataFrame(
71
{
72
"dates": [date(2088, 8, 8), date(2088, 8, 8), date(2088, 8, 8), None],
73
"year": [None, 2, 3, 4],
74
"month": [1, None, 3, 4],
75
"day": [1, 2, None, 4],
76
}
77
)
78
79
result = df.select(
80
pl.col("dates").dt.replace(year="year", month="month", day="day")
81
)
82
83
expected = pl.DataFrame(
84
{"dates": [date(2088, 1, 1), date(2, 8, 2), date(3, 3, 8), None]}
85
)
86
87
assert_frame_equal(result, expected)
88
89
90
def test_replace_int_datetime() -> None:
91
df = pl.DataFrame(
92
{
93
"a": [
94
datetime(1, 1, 1, 1, 1, 1, 1),
95
datetime(2, 2, 2, 2, 2, 2, 2),
96
datetime(3, 3, 3, 3, 3, 3, 3),
97
None,
98
]
99
}
100
)
101
result = df.select(
102
pl.col("a").dt.replace().alias("no_change"),
103
pl.col("a").dt.replace(year=9).alias("year"),
104
pl.col("a").dt.replace(month=9).alias("month"),
105
pl.col("a").dt.replace(day=9).alias("day"),
106
pl.col("a").dt.replace(hour=9).alias("hour"),
107
pl.col("a").dt.replace(minute=9).alias("minute"),
108
pl.col("a").dt.replace(second=9).alias("second"),
109
pl.col("a").dt.replace(microsecond=9).alias("microsecond"),
110
)
111
expected = pl.DataFrame(
112
{
113
"no_change": [
114
datetime(1, 1, 1, 1, 1, 1, 1),
115
datetime(2, 2, 2, 2, 2, 2, 2),
116
datetime(3, 3, 3, 3, 3, 3, 3),
117
None,
118
],
119
"year": [
120
datetime(9, 1, 1, 1, 1, 1, 1),
121
datetime(9, 2, 2, 2, 2, 2, 2),
122
datetime(9, 3, 3, 3, 3, 3, 3),
123
None,
124
],
125
"month": [
126
datetime(1, 9, 1, 1, 1, 1, 1),
127
datetime(2, 9, 2, 2, 2, 2, 2),
128
datetime(3, 9, 3, 3, 3, 3, 3),
129
None,
130
],
131
"day": [
132
datetime(1, 1, 9, 1, 1, 1, 1),
133
datetime(2, 2, 9, 2, 2, 2, 2),
134
datetime(3, 3, 9, 3, 3, 3, 3),
135
None,
136
],
137
"hour": [
138
datetime(1, 1, 1, 9, 1, 1, 1),
139
datetime(2, 2, 2, 9, 2, 2, 2),
140
datetime(3, 3, 3, 9, 3, 3, 3),
141
None,
142
],
143
"minute": [
144
datetime(1, 1, 1, 1, 9, 1, 1),
145
datetime(2, 2, 2, 2, 9, 2, 2),
146
datetime(3, 3, 3, 3, 9, 3, 3),
147
None,
148
],
149
"second": [
150
datetime(1, 1, 1, 1, 1, 9, 1),
151
datetime(2, 2, 2, 2, 2, 9, 2),
152
datetime(3, 3, 3, 3, 3, 9, 3),
153
None,
154
],
155
"microsecond": [
156
datetime(1, 1, 1, 1, 1, 1, 9),
157
datetime(2, 2, 2, 2, 2, 2, 9),
158
datetime(3, 3, 3, 3, 3, 3, 9),
159
None,
160
],
161
}
162
)
163
assert_frame_equal(result, expected)
164
165
166
def test_replace_int_date() -> None:
167
df = pl.DataFrame(
168
{
169
"a": [
170
date(1, 1, 1),
171
date(2, 2, 2),
172
date(3, 3, 3),
173
None,
174
]
175
}
176
)
177
result = df.select(
178
pl.col("a").dt.replace().alias("no_change"),
179
pl.col("a").dt.replace(year=9).alias("year"),
180
pl.col("a").dt.replace(month=9).alias("month"),
181
pl.col("a").dt.replace(day=9).alias("day"),
182
)
183
expected = pl.DataFrame(
184
{
185
"no_change": [
186
date(1, 1, 1),
187
date(2, 2, 2),
188
date(3, 3, 3),
189
None,
190
],
191
"year": [
192
date(9, 1, 1),
193
date(9, 2, 2),
194
date(9, 3, 3),
195
None,
196
],
197
"month": [
198
date(1, 9, 1),
199
date(2, 9, 2),
200
date(3, 9, 3),
201
None,
202
],
203
"day": [
204
date(1, 1, 9),
205
date(2, 2, 9),
206
date(3, 3, 9),
207
None,
208
],
209
}
210
)
211
assert_frame_equal(result, expected)
212
213
214
def test_replace_ambiguous() -> None:
215
# Value to be replaced by an ambiguous hour.
216
value = pl.select(
217
pl.datetime(2020, 10, 25, 5, time_zone="Europe/London")
218
).to_series()
219
220
input = [2020, 10, 25, 1]
221
tz = "Europe/London"
222
223
# earliest
224
expected = pl.select(
225
pl.datetime(*input, time_zone=tz, ambiguous="earliest")
226
).to_series()
227
result = value.dt.replace(hour=1, ambiguous="earliest")
228
assert_series_equal(result, expected)
229
230
# latest
231
expected = pl.select(
232
pl.datetime(*input, time_zone=tz, ambiguous="latest")
233
).to_series()
234
result = value.dt.replace(hour=1, ambiguous="latest")
235
assert_series_equal(result, expected)
236
237
# null
238
expected = pl.select(
239
pl.datetime(*input, time_zone=tz, ambiguous="null")
240
).to_series()
241
result = value.dt.replace(hour=1, ambiguous="null")
242
assert_series_equal(result, expected)
243
244
# raise
245
with pytest.raises(
246
ComputeError,
247
match=(
248
"datetime '2020-10-25 01:00:00' is ambiguous in time zone 'Europe/London'. "
249
"Please use `ambiguous` to tell how it should be localized."
250
),
251
):
252
value.dt.replace(hour=1, ambiguous="raise")
253
254
255
def test_replace_datetime_preserve_ns() -> None:
256
df = pl.DataFrame(
257
{
258
"a": pl.Series(["2020-01-01T00:00:00.123456789"] * 2).cast(
259
pl.Datetime("ns")
260
),
261
"year": [2021, None],
262
"microsecond": [50, None],
263
}
264
)
265
266
result = df.select(
267
year=pl.col("a").dt.replace(year="year"),
268
us=pl.col("a").dt.replace(microsecond="microsecond"),
269
)
270
271
expected = pl.DataFrame(
272
{
273
"year": pl.Series(
274
[
275
"2021-01-01T00:00:00.123456789",
276
"2020-01-01T00:00:00.123456789",
277
]
278
).cast(pl.Datetime("ns")),
279
"us": pl.Series(
280
[
281
"2020-01-01T00:00:00.000050",
282
"2020-01-01T00:00:00.123456789",
283
]
284
).cast(pl.Datetime("ns")),
285
}
286
)
287
288
assert_frame_equal(result, expected)
289
290
291
@pytest.mark.parametrize("tu", ["ms", "us", "ns"])
292
@pytest.mark.parametrize("tzinfo", [None, "Africa/Nairobi", "America/New_York"])
293
def test_replace_preserve_tu_and_tz(tu: TimeUnit, tzinfo: str) -> None:
294
s = pl.Series(
295
[datetime(2024, 1, 1), datetime(2024, 1, 2)],
296
dtype=pl.Datetime(time_unit=tu, time_zone=tzinfo),
297
)
298
result = s.dt.replace(year=2000)
299
assert result.dtype.time_unit == tu # type: ignore[attr-defined]
300
assert result.dtype.time_zone == tzinfo # type: ignore[attr-defined]
301
302
303
def test_replace_date_invalid_components() -> None:
304
df = pl.DataFrame({"a": [date(2025, 1, 1)]})
305
306
with pytest.raises(
307
ComputeError, match=r"Invalid date components \(2025, 13, 1\) supplied"
308
):
309
df.select(pl.col("a").dt.replace(month=13))
310
with pytest.raises(
311
ComputeError, match=r"Invalid date components \(2025, 1, 32\) supplied"
312
):
313
df.select(pl.col("a").dt.replace(day=32))
314
315
316
def test_replace_datetime_invalid_date_components() -> None:
317
df = pl.DataFrame({"a": [datetime(2025, 1, 1)]})
318
319
with pytest.raises(
320
ComputeError, match=r"Invalid date components \(2025, 13, 1\) supplied"
321
):
322
df.select(pl.col("a").dt.replace(month=13))
323
with pytest.raises(
324
ComputeError, match=r"Invalid date components \(2025, 1, 32\) supplied"
325
):
326
df.select(pl.col("a").dt.replace(day=32))
327
328
329
def test_replace_datetime_invalid_time_components() -> None:
330
df = pl.DataFrame({"a": [datetime(2025, 1, 1)]})
331
332
# hour
333
with pytest.raises(
334
ComputeError, match=r"Invalid time components \(25, 0, 0, 0\) supplied"
335
):
336
df.select(pl.col("a").dt.replace(hour=25))
337
338
# minute
339
with pytest.raises(
340
ComputeError, match=r"Invalid time components \(0, 61, 0, 0\) supplied"
341
):
342
df.select(pl.col("a").dt.replace(minute=61))
343
344
# second
345
with pytest.raises(
346
ComputeError, match=r"Invalid time components \(0, 0, 61, 0\) supplied"
347
):
348
df.select(pl.col("a").dt.replace(second=61))
349
350
# microsecond
351
with pytest.raises(
352
ComputeError,
353
match=r"Invalid time components \(0, 0, 0, 2000000000\) supplied",
354
):
355
df.select(pl.col("a").dt.replace(microsecond=2_000_000))
356
357
358
def test_replace_unequal_length_22018() -> None:
359
with pytest.raises(pl.exceptions.ShapeError):
360
pl.Series([datetime(2088, 8, 8, 8, 8, 8, 8)] * 2).dt.replace(
361
year=pl.Series([2000, 2001, 2002])
362
)
363
364
365
def test_replace_broadcast_self() -> None:
366
df = pl.DataFrame(
367
{
368
"year": [None, 2, 3, 4, 5, 6, 7, 8],
369
"month": [1, None, 3, 4, 5, 6, 7, 8],
370
"day": [1, 2, None, 4, 5, 6, 7, 8],
371
"hour": [1, 2, 3, None, 5, 6, 7, 8],
372
"minute": [1, 2, 3, 4, None, 6, 7, 8],
373
"second": [1, 2, 3, 4, 5, None, 7, 8],
374
"microsecond": [1, 2, 3, 4, 5, 6, None, 8],
375
}
376
)
377
378
result = df.select(
379
pl.lit(pl.Series("dates", [datetime(2088, 8, 8, 8, 8, 8, 8)])).dt.replace(
380
year="year",
381
month="month",
382
day="day",
383
hour="hour",
384
minute="minute",
385
second="second",
386
microsecond="microsecond",
387
)
388
)
389
390
expected = pl.DataFrame(
391
{
392
"dates": [
393
datetime(2088, 1, 1, 1, 1, 1, 1),
394
datetime(2, 8, 2, 2, 2, 2, 2),
395
datetime(3, 3, 8, 3, 3, 3, 3),
396
datetime(4, 4, 4, 8, 4, 4, 4),
397
datetime(5, 5, 5, 5, 8, 5, 5),
398
datetime(6, 6, 6, 6, 6, 8, 6),
399
datetime(7, 7, 7, 7, 7, 7, 8),
400
datetime(8, 8, 8, 8, 8, 8, 8),
401
]
402
}
403
)
404
405
assert_frame_equal(result, expected)
406
407