Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_temporal.py
6939 views
1
from __future__ import annotations
2
3
from datetime import date, datetime, time
4
from typing import Any, Literal
5
6
import pytest
7
8
import polars as pl
9
from polars.exceptions import InvalidOperationError, SQLInterfaceError, SQLSyntaxError
10
from polars.testing import assert_frame_equal
11
12
13
def test_date_func() -> None:
14
df = pl.DataFrame(
15
{
16
"date": [
17
date(2021, 3, 15),
18
date(2021, 3, 28),
19
date(2021, 4, 4),
20
],
21
"version": ["0.0.1", "0.7.3", "0.7.4"],
22
}
23
)
24
with pl.SQLContext(df=df, eager=True) as ctx:
25
result = ctx.execute("SELECT date < DATE('2021-03-20') from df")
26
27
expected = pl.DataFrame({"date": [True, False, False]})
28
assert_frame_equal(result, expected)
29
30
result = pl.select(pl.sql_expr("CAST(DATE('2023-03', '%Y-%m') as STRING)"))
31
expected = pl.DataFrame({"literal": ["2023-03-01"]})
32
assert_frame_equal(result, expected)
33
34
with pytest.raises(
35
SQLSyntaxError,
36
match=r"DATE expects 1-2 arguments \(found 0\)",
37
):
38
df.sql("SELECT DATE() FROM self")
39
40
with pytest.raises(InvalidOperationError):
41
df.sql("SELECT DATE('2077-07-07','not_a_valid_strftime_format') FROM self")
42
43
44
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
45
def test_datetime_to_time(time_unit: Literal["ns", "us", "ms"]) -> None:
46
df = pl.DataFrame( # noqa: F841
47
{
48
"dtm": [
49
datetime(2099, 12, 31, 23, 59, 59),
50
datetime(1999, 12, 31, 12, 30, 30),
51
datetime(1969, 12, 31, 1, 1, 1),
52
datetime(1899, 12, 31, 0, 0, 0),
53
],
54
},
55
schema={"dtm": pl.Datetime(time_unit)},
56
)
57
58
res = pl.sql("SELECT dtm::time AS tm from df").collect()
59
assert res["tm"].to_list() == [
60
time(23, 59, 59),
61
time(12, 30, 30),
62
time(1, 1, 1),
63
time(0, 0, 0),
64
]
65
66
67
@pytest.mark.parametrize(
68
("parts", "dtype", "expected"),
69
[
70
(["decade", "decades"], pl.Int32, [202, 202, 200]),
71
(["isoyear"], pl.Int32, [2024, 2020, 2005]),
72
(["year", "y"], pl.Int32, [2024, 2020, 2006]),
73
(["quarter"], pl.Int8, [1, 4, 1]),
74
(["month", "months", "mon", "mons"], pl.Int8, [1, 12, 1]),
75
(["week", "weeks"], pl.Int8, [1, 53, 52]),
76
(["doy"], pl.Int16, [7, 365, 1]),
77
(["isodow"], pl.Int8, [7, 3, 7]),
78
(["dow"], pl.Int8, [0, 3, 0]),
79
(["day", "days", "d"], pl.Int8, [7, 30, 1]),
80
(["hour", "hours", "h"], pl.Int8, [1, 10, 23]),
81
(["minute", "min", "mins", "m"], pl.Int8, [2, 30, 59]),
82
(["second", "seconds", "secs", "sec"], pl.Int8, [3, 45, 59]),
83
(
84
["millisecond", "milliseconds", "ms"],
85
pl.Float64,
86
[3123.456, 45987.654, 59555.555],
87
),
88
(
89
["microsecond", "microseconds", "us"],
90
pl.Float64,
91
[3123456.0, 45987654.0, 59555555.0],
92
),
93
(
94
["nanosecond", "nanoseconds", "ns"],
95
pl.Float64,
96
[3123456000.0, 45987654000.0, 59555555000.0],
97
),
98
(
99
["time"],
100
pl.Time,
101
[time(1, 2, 3, 123456), time(10, 30, 45, 987654), time(23, 59, 59, 555555)],
102
),
103
(
104
["epoch"],
105
pl.Float64,
106
[1704589323.123456, 1609324245.987654, 1136159999.555555],
107
),
108
],
109
)
110
def test_extract(parts: list[str], dtype: pl.DataType, expected: list[Any]) -> None:
111
df = pl.DataFrame(
112
{
113
"dt": [
114
# note: these values test several edge-cases, such as isoyear,
115
# the mon/sun wrapping of dow vs isodow, epoch rounding, etc,
116
# and the results have been validated against postgresql.
117
datetime(2024, 1, 7, 1, 2, 3, 123456),
118
datetime(2020, 12, 30, 10, 30, 45, 987654),
119
datetime(2006, 1, 1, 23, 59, 59, 555555),
120
],
121
}
122
)
123
with pl.SQLContext(frame_data=df, eager=True) as ctx:
124
for part in parts:
125
for fn in (
126
f"EXTRACT({part} FROM dt)",
127
f"DATE_PART('{part}',dt)",
128
):
129
res = ctx.execute(f"SELECT {fn} AS {part} FROM frame_data").to_series()
130
assert res.dtype == dtype
131
assert res.to_list() == expected
132
133
134
def test_extract_errors() -> None:
135
df = pl.DataFrame({"dt": [datetime(2024, 1, 7, 1, 2, 3, 123456)]})
136
137
with pl.SQLContext(frame_data=df, eager=True) as ctx:
138
for part in ("femtosecond", "stroopwafel"):
139
with pytest.raises(
140
SQLSyntaxError,
141
match=f"EXTRACT/DATE_PART does not support '{part}' part",
142
):
143
ctx.execute(f"SELECT EXTRACT({part} FROM dt) FROM frame_data")
144
145
with pytest.raises(
146
SQLSyntaxError,
147
match=r"EXTRACT/DATE_PART does not support 'week\(tuesday\)' part",
148
):
149
ctx.execute("SELECT DATE_PART('week(tuesday)', dt) FROM frame_data")
150
151
152
@pytest.mark.parametrize(
153
("dt", "expected"),
154
[
155
(date(1, 1, 1), [1, 1]),
156
(date(100, 1, 1), [1, 1]),
157
(date(101, 1, 1), [1, 2]),
158
(date(1000, 1, 1), [1, 10]),
159
(date(1001, 1, 1), [2, 11]),
160
(date(1899, 12, 31), [2, 19]),
161
(date(1900, 12, 31), [2, 19]),
162
(date(1901, 1, 1), [2, 20]),
163
(date(2000, 12, 31), [2, 20]),
164
(date(2001, 1, 1), [3, 21]),
165
(date(5555, 5, 5), [6, 56]),
166
(date(9999, 12, 31), [10, 100]),
167
],
168
)
169
def test_extract_century_millennium(dt: date, expected: list[int]) -> None:
170
with pl.SQLContext(frame_data=pl.DataFrame({"dt": [dt]}), eager=True) as ctx:
171
res = ctx.execute(
172
"""
173
SELECT
174
EXTRACT(MILLENNIUM FROM dt) AS c1,
175
DATE_PART('century',dt) AS c2,
176
EXTRACT(millennium FROM dt) AS c3,
177
DATE_PART('CENTURY',dt) AS c4,
178
FROM frame_data
179
"""
180
)
181
assert_frame_equal(
182
left=res,
183
right=pl.DataFrame(
184
data=[expected + expected],
185
schema=["c1", "c2", "c3", "c4"],
186
orient="row",
187
).cast(pl.Int32),
188
)
189
190
191
@pytest.mark.parametrize(
192
("constraint", "expected"),
193
[
194
("dtm >= '2020-12-30T10:30:45.987'", [0, 2]),
195
("dtm::date > '2006-01-01'", [0, 2]),
196
("dtm > '2006-01-01'", [0, 1, 2]), # << implies '2006-01-01 00:00:00'
197
("dtm <= '2006-01-01'", []), # << implies '2006-01-01 00:00:00'
198
("dt != '1960-01-07'", [0, 1]),
199
("tm != '22:10:30'", [0, 2]),
200
("tm >= '11:00:00' AND tm < '22:00'", [0]),
201
("tm >= '11:00' AND tm < '22:00:00.000'", [0]),
202
("tm BETWEEN '12:00' AND '23:59:58'", [0, 1]),
203
("tm BETWEEN '12:00:00' AND '23:59:58'", [0, 1]),
204
("dt BETWEEN '2050-01-01' AND '2100-12-31'", [1]),
205
("dt::datetime = '1960-01-07'", [2]),
206
("dt::datetime = '1960-01-07 00:00'", [2]),
207
("dt::datetime = '1960-01-07 00:00:00'", [2]),
208
("dtm BETWEEN '2020-12-30 10:30:44' AND '2023-01-01 00:00'", [2]),
209
("dt IN ('1960-01-07','2077-01-01','2222-02-22')", [1, 2]),
210
(
211
"dtm = '2024-01-07 01:02:03.123456000' OR dtm = '2020-12-30 10:30:45.987654'",
212
[0, 2],
213
),
214
],
215
)
216
def test_implicit_temporal_strings(constraint: str, expected: list[int]) -> None:
217
df = pl.DataFrame(
218
{
219
"idx": [0, 1, 2],
220
"dtm": [
221
datetime(2024, 1, 7, 1, 2, 3, 123456),
222
datetime(2006, 1, 1, 23, 59, 59, 555555),
223
datetime(2020, 12, 30, 10, 30, 45, 987654),
224
],
225
"dt": [
226
date(2020, 12, 30),
227
date(2077, 1, 1),
228
date(1960, 1, 7),
229
],
230
"tm": [
231
time(17, 30, 45),
232
time(22, 10, 30),
233
time(10, 25, 15),
234
],
235
}
236
)
237
res = df.sql(f"SELECT idx FROM self WHERE {constraint}")
238
actual = sorted(res["idx"])
239
assert actual == expected
240
241
242
@pytest.mark.parametrize(
243
"dtval",
244
[
245
# none of these are valid dates
246
"2020-12-30T10:30:45",
247
"yyyy-mm-dd",
248
"2222-22-22",
249
"10:30:45",
250
"foo",
251
],
252
)
253
def test_implicit_temporal_string_errors(dtval: str) -> None:
254
df = pl.DataFrame({"dt": [date(2020, 12, 30)]})
255
256
with pytest.raises(
257
InvalidOperationError,
258
match="(conversion.*failed)|(cannot compare.*string.*temporal)",
259
):
260
df.sql(f"SELECT * FROM self WHERE dt = '{dtval}'")
261
262
263
def test_strftime() -> None:
264
df = pl.DataFrame(
265
{
266
"dtm": [
267
None,
268
datetime(1980, 9, 30, 1, 25, 50),
269
datetime(2077, 7, 17, 11, 30, 55),
270
],
271
"dt": [date(1978, 7, 5), date(1969, 12, 31), date(2020, 4, 10)],
272
"tm": [time(10, 10, 10), time(22, 33, 55), None],
273
}
274
)
275
res = df.sql(
276
"""
277
SELECT
278
STRFTIME(dtm,'%m.%d.%Y/%T') AS s_dtm,
279
STRFTIME(dt ,'%B %d, %Y') AS s_dt,
280
STRFTIME(tm ,'%S.%M.%H') AS s_tm,
281
FROM self
282
"""
283
)
284
assert res.to_dict(as_series=False) == {
285
"s_dtm": [None, "09.30.1980/01:25:50", "07.17.2077/11:30:55"],
286
"s_dt": ["July 05, 1978", "December 31, 1969", "April 10, 2020"],
287
"s_tm": ["10.10.10", "55.33.22", None],
288
}
289
290
with pytest.raises(
291
SQLSyntaxError,
292
match=r"STRFTIME expects 2 arguments \(found 4\)",
293
):
294
pl.sql_expr("STRFTIME(dtm,'%Y-%m-%d','[extra]','[param]')")
295
296
297
def test_strptime() -> None:
298
df = pl.DataFrame(
299
{
300
"s_dtm": [None, "09.30.1980/01:25:50", "07.17.2077/11:30:55"],
301
"s_dt": ["July 5, 1978", "December 31, 1969", "April 10, 2020"],
302
"s_tm": ["10.10.10", "55.33.22", None],
303
}
304
)
305
res = df.sql(
306
"""
307
SELECT
308
STRPTIME(s_dtm,'%m.%d.%Y/%T') AS dtm,
309
STRPTIME(s_dt ,'%B %d, %Y')::date AS dt,
310
STRPTIME(s_tm ,'%S.%M.%H')::time AS tm
311
FROM self
312
"""
313
)
314
assert res.to_dict(as_series=False) == {
315
"dtm": [
316
None,
317
datetime(1980, 9, 30, 1, 25, 50),
318
datetime(2077, 7, 17, 11, 30, 55),
319
],
320
"dt": [date(1978, 7, 5), date(1969, 12, 31), date(2020, 4, 10)],
321
"tm": [time(10, 10, 10), time(22, 33, 55), None],
322
}
323
with pytest.raises(
324
SQLSyntaxError,
325
match=r"STRPTIME expects 2 arguments \(found 3\)",
326
):
327
pl.sql_expr("STRPTIME(s,'%Y.%m.%d',false) AS dt")
328
329
330
def test_temporal_stings_to_datetime() -> None:
331
df = pl.DataFrame(
332
{
333
"s_dt": ["2077-10-10", "1942-01-08", "2000-07-05"],
334
"s_dtm1": [
335
"1999-12-31 10:30:45",
336
"2020-06-10",
337
"2022-08-07T00:01:02.654321",
338
],
339
"s_dtm2": ["31-12-1999 10:30", "10-06-2020 00:00", "07-08-2022 00:01"],
340
"s_tm": ["02:04:06", "12:30:45.999", "23:59:59.123456"],
341
}
342
)
343
res = df.sql(
344
"""
345
SELECT
346
DATE(s_dt) AS dt1,
347
DATETIME(s_dt) AS dt2,
348
DATETIME(s_dtm1) AS dtm1,
349
DATETIME(s_dtm2,'%d-%m-%Y %H:%M') AS dtm2,
350
TIME(s_tm) AS tm
351
FROM self
352
"""
353
)
354
assert res.schema == {
355
"dt1": pl.Date,
356
"dt2": pl.Datetime("us"),
357
"dtm1": pl.Datetime("us"),
358
"dtm2": pl.Datetime("us"),
359
"tm": pl.Time,
360
}
361
assert res.rows() == [
362
(
363
date(2077, 10, 10),
364
datetime(2077, 10, 10, 0, 0),
365
datetime(1999, 12, 31, 10, 30, 45),
366
datetime(1999, 12, 31, 10, 30),
367
time(2, 4, 6),
368
),
369
(
370
date(1942, 1, 8),
371
datetime(1942, 1, 8, 0, 0),
372
datetime(2020, 6, 10, 0, 0),
373
datetime(2020, 6, 10, 0, 0),
374
time(12, 30, 45, 999000),
375
),
376
(
377
date(2000, 7, 5),
378
datetime(2000, 7, 5, 0, 0),
379
datetime(2022, 8, 7, 0, 1, 2, 654321),
380
datetime(2022, 8, 7, 0, 1),
381
time(23, 59, 59, 123456),
382
),
383
]
384
385
for fn in ("DATE", "TIME", "DATETIME"):
386
with pytest.raises(
387
SQLSyntaxError,
388
match=rf"{fn} expects 1-2 arguments \(found 3\)",
389
):
390
pl.sql_expr(rf"{fn}(s,fmt,misc) AS xyz")
391
392
393
def test_temporal_typed_literals() -> None:
394
res = pl.sql(
395
"""
396
SELECT
397
DATE '2020-12-30' AS dt,
398
TIME '00:01:02' AS tm1,
399
TIME '23:59:59.123456' AS tm2,
400
TIMESTAMP '1930-01-01 12:30:00' AS dtm1,
401
TIMESTAMP '2077-04-27T23:45:30.123456' AS dtm2
402
FROM
403
(VALUES (0)) tbl (x)
404
""",
405
eager=True,
406
)
407
assert res.to_dict(as_series=False) == {
408
"dt": [date(2020, 12, 30)],
409
"tm1": [time(0, 1, 2)],
410
"tm2": [time(23, 59, 59, 123456)],
411
"dtm1": [datetime(1930, 1, 1, 12, 30)],
412
"dtm2": [datetime(2077, 4, 27, 23, 45, 30, 123456)],
413
}
414
415
416
@pytest.mark.parametrize("fn", ["DATE", "TIME", "TIMESTAMP"])
417
def test_typed_literals_errors(fn: str) -> None:
418
with pytest.raises(SQLSyntaxError, match=f"invalid {fn} literal '999'"):
419
pl.sql_expr(f"{fn} '999'")
420
421
422
@pytest.mark.parametrize(
423
("unit", "expected"),
424
[
425
("ms", [1704589323123, 1609324245987, 1136159999555]),
426
("us", [1704589323123456, 1609324245987654, 1136159999555555]),
427
("ns", [1704589323123456000, 1609324245987654000, 1136159999555555000]),
428
],
429
)
430
def test_timestamp_time_unit(unit: str | None, expected: list[int]) -> None:
431
df = pl.DataFrame(
432
{
433
"ts": [
434
datetime(2024, 1, 7, 1, 2, 3, 123456),
435
datetime(2020, 12, 30, 10, 30, 45, 987654),
436
datetime(2006, 1, 1, 23, 59, 59, 555555),
437
],
438
}
439
)
440
precision = {"ms": 3, "us": 6, "ns": 9}
441
442
with pl.SQLContext(frame_data=df, eager=True) as ctx:
443
prec = f"({precision[unit]})" if unit else ""
444
res = ctx.execute(f"SELECT ts::timestamp{prec} FROM frame_data").to_series()
445
446
assert res.dtype == pl.Datetime(time_unit=unit) # type: ignore[arg-type]
447
assert res.to_physical().to_list() == expected
448
449
450
def test_timestamp_time_unit_errors() -> None:
451
df = pl.DataFrame({"ts": [datetime(2024, 1, 7, 1, 2, 3, 123456)]})
452
453
with pl.SQLContext(frame_data=df, eager=True) as ctx:
454
for prec in (0, 15):
455
with pytest.raises(
456
SQLSyntaxError,
457
match=rf"invalid temporal type precision \(expected 1-9, found {prec}\)",
458
):
459
ctx.execute(f"SELECT ts::timestamp({prec}) FROM frame_data")
460
461
with pytest.raises(
462
SQLInterfaceError,
463
match="sql parser error: Expected: literal int, found: - ",
464
):
465
ctx.execute("SELECT ts::timestamp(-3) FROM frame_data")
466
467