Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_temporal.py
8406 views
1
from __future__ import annotations
2
3
from datetime import date, datetime, time
4
from typing import Any, Literal
5
6
import pytest
7
8
import polars as pl
9
from polars.exceptions import InvalidOperationError, SQLInterfaceError, SQLSyntaxError
10
from polars.testing import assert_frame_equal
11
12
13
def test_date_func() -> None:
14
df = pl.DataFrame(
15
{
16
"date": [
17
date(2021, 3, 15),
18
date(2021, 3, 28),
19
date(2021, 4, 4),
20
],
21
"version": ["0.0.1", "0.7.3", "0.7.4"],
22
}
23
)
24
with pl.SQLContext(df=df, eager=True) as ctx:
25
result = ctx.execute("SELECT date < DATE('2021-03-20') FROM df")
26
27
expected = pl.DataFrame({"date": [True, False, False]})
28
assert_frame_equal(result, expected)
29
30
result = pl.select(pl.sql_expr("CAST(DATE('2023-03', '%Y-%m') as STRING)"))
31
expected = pl.DataFrame({"literal": ["2023-03-01"]})
32
assert_frame_equal(result, expected)
33
34
with pytest.raises(
35
SQLSyntaxError,
36
match=r"DATE expects 1-2 arguments \(found 0\)",
37
):
38
df.sql("SELECT DATE() FROM self")
39
40
with pytest.raises(InvalidOperationError):
41
df.sql("SELECT DATE('2077-07-07','not_a_valid_strftime_format') FROM self")
42
43
44
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
45
def test_datetime_to_time(time_unit: Literal["ns", "us", "ms"]) -> None:
46
s = pl.Series(
47
name="dtm",
48
values=[
49
datetime(2099, 12, 31, 23, 59, 59),
50
datetime(1999, 12, 31, 12, 30, 30),
51
datetime(1969, 12, 31, 1, 1, 1),
52
datetime(1899, 12, 31, 0, 0, 0),
53
],
54
dtype=pl.Datetime(time_unit),
55
)
56
df = s.to_frame()
57
lf = df.lazy()
58
59
select = "SELECT dtm::time AS tm"
60
for res in (
61
pl.sql(f"{select} FROM df").collect(),
62
pl.sql(f"{select} FROM s").collect(),
63
lf.sql(f"{select} FROM self").collect(),
64
df.sql(f"{select} FROM self"),
65
s.sql(f"{select} FROM self"),
66
):
67
assert isinstance(res, pl.DataFrame)
68
assert res["tm"].to_list() == [
69
time(23, 59, 59),
70
time(12, 30, 30),
71
time(1, 1, 1),
72
time(0, 0, 0),
73
]
74
75
76
@pytest.mark.parametrize(
77
("parts", "dtype", "expected"),
78
[
79
(["decade", "decades"], pl.Int32, [202, 202, 200]),
80
(["isoyear"], pl.Int32, [2024, 2020, 2005]),
81
(["year", "y"], pl.Int32, [2024, 2020, 2006]),
82
(["quarter"], pl.Int8, [1, 4, 1]),
83
(["month", "months", "mon", "mons"], pl.Int8, [1, 12, 1]),
84
(["week", "weeks"], pl.Int8, [1, 53, 52]),
85
(["doy"], pl.Int16, [7, 365, 1]),
86
(["isodow"], pl.Int8, [7, 3, 7]),
87
(["dow"], pl.Int8, [0, 3, 0]),
88
(["day", "days", "d"], pl.Int8, [7, 30, 1]),
89
(["hour", "hours", "h"], pl.Int8, [1, 10, 23]),
90
(["minute", "min", "mins", "m"], pl.Int8, [2, 30, 59]),
91
(["second", "seconds", "secs", "sec"], pl.Int8, [3, 45, 59]),
92
(
93
["millisecond", "milliseconds", "ms"],
94
pl.Float64,
95
[3123.456, 45987.654, 59555.555],
96
),
97
(
98
["microsecond", "microseconds", "us"],
99
pl.Float64,
100
[3123456.0, 45987654.0, 59555555.0],
101
),
102
(
103
["nanosecond", "nanoseconds", "ns"],
104
pl.Float64,
105
[3123456000.0, 45987654000.0, 59555555000.0],
106
),
107
(
108
["time"],
109
pl.Time,
110
[time(1, 2, 3, 123456), time(10, 30, 45, 987654), time(23, 59, 59, 555555)],
111
),
112
(
113
["epoch"],
114
pl.Float64,
115
[1704589323.123456, 1609324245.987654, 1136159999.555555],
116
),
117
],
118
)
119
def test_extract(parts: list[str], dtype: pl.DataType, expected: list[Any]) -> None:
120
df = pl.DataFrame(
121
{
122
"dt": [
123
# note: these values test several edge-cases, such as isoyear,
124
# the mon/sun wrapping of dow vs isodow, epoch rounding, etc,
125
# and the results have been validated against postgresql.
126
datetime(2024, 1, 7, 1, 2, 3, 123456),
127
datetime(2020, 12, 30, 10, 30, 45, 987654),
128
datetime(2006, 1, 1, 23, 59, 59, 555555),
129
],
130
}
131
)
132
with pl.SQLContext(frame_data=df, eager=True) as ctx:
133
for part in parts:
134
for fn in (
135
f"EXTRACT({part} FROM dt)",
136
f"DATE_PART('{part}',dt)",
137
):
138
res = ctx.execute(f"SELECT {fn} AS {part} FROM frame_data").to_series()
139
assert res.dtype == dtype
140
assert res.to_list() == expected
141
142
143
def test_extract_errors() -> None:
144
df = pl.DataFrame({"dt": [datetime(2024, 1, 7, 1, 2, 3, 123456)]})
145
146
with pl.SQLContext(frame_data=df, eager=True) as ctx:
147
for part in ("femtosecond", "stroopwafel"):
148
with pytest.raises(
149
SQLSyntaxError,
150
match=f"EXTRACT/DATE_PART does not support '{part}' part",
151
):
152
ctx.execute(f"SELECT EXTRACT({part} FROM dt) FROM frame_data")
153
154
with pytest.raises(
155
SQLSyntaxError,
156
match=r"EXTRACT/DATE_PART does not support 'week\(tuesday\)' part",
157
):
158
ctx.execute("SELECT DATE_PART('week(tuesday)', dt) FROM frame_data")
159
160
161
@pytest.mark.parametrize(
162
("dt", "expected"),
163
[
164
(date(1, 1, 1), [1, 1]),
165
(date(100, 1, 1), [1, 1]),
166
(date(101, 1, 1), [1, 2]),
167
(date(1000, 1, 1), [1, 10]),
168
(date(1001, 1, 1), [2, 11]),
169
(date(1899, 12, 31), [2, 19]),
170
(date(1900, 12, 31), [2, 19]),
171
(date(1901, 1, 1), [2, 20]),
172
(date(2000, 12, 31), [2, 20]),
173
(date(2001, 1, 1), [3, 21]),
174
(date(5555, 5, 5), [6, 56]),
175
(date(9999, 12, 31), [10, 100]),
176
],
177
)
178
def test_extract_century_millennium(dt: date, expected: list[int]) -> None:
179
with pl.SQLContext(frame_data=pl.DataFrame({"dt": [dt]}), eager=True) as ctx:
180
res = ctx.execute(
181
"""
182
SELECT
183
EXTRACT(MILLENNIUM FROM dt) AS c1,
184
DATE_PART('century',dt) AS c2,
185
EXTRACT(millennium FROM dt) AS c3,
186
DATE_PART('CENTURY',dt) AS c4,
187
FROM frame_data
188
"""
189
)
190
assert_frame_equal(
191
left=res,
192
right=pl.DataFrame(
193
data=[expected + expected],
194
schema=["c1", "c2", "c3", "c4"],
195
orient="row",
196
).cast(pl.Int32),
197
)
198
199
200
@pytest.mark.parametrize(
201
("constraint", "expected"),
202
[
203
("dtm >= '2020-12-30T10:30:45.987'", [0, 2]),
204
("dtm::date > '2006-01-01'", [0, 2]),
205
("dtm > '2006-01-01'", [0, 1, 2]), # << implies '2006-01-01 00:00:00'
206
("dtm <= '2006-01-01'", []), # << implies '2006-01-01 00:00:00'
207
("dt != '1960-01-07'", [0, 1]),
208
("tm != '22:10:30'", [0, 2]),
209
("tm >= '11:00:00' AND tm < '22:00'", [0]),
210
("tm >= '11:00' AND tm < '22:00:00.000'", [0]),
211
("tm BETWEEN '12:00' AND '23:59:58'", [0, 1]),
212
("tm BETWEEN '12:00:00' AND '23:59:58'", [0, 1]),
213
("dt BETWEEN '2050-01-01' AND '2100-12-31'", [1]),
214
("dt::datetime = '1960-01-07'", [2]),
215
("dt::datetime = '1960-01-07 00:00'", [2]),
216
("dt::datetime = '1960-01-07 00:00:00'", [2]),
217
("dtm BETWEEN '2020-12-30 10:30:44' AND '2023-01-01 00:00'", [2]),
218
("dt IN ('1960-01-07','2077-01-01','2222-02-22')", [1, 2]),
219
(
220
"dtm = '2024-01-07 01:02:03.123456000' OR dtm = '2020-12-30 10:30:45.987654'",
221
[0, 2],
222
),
223
],
224
)
225
def test_implicit_temporal_strings(constraint: str, expected: list[int]) -> None:
226
df = pl.DataFrame(
227
{
228
"idx": [0, 1, 2],
229
"dtm": [
230
datetime(2024, 1, 7, 1, 2, 3, 123456),
231
datetime(2006, 1, 1, 23, 59, 59, 555555),
232
datetime(2020, 12, 30, 10, 30, 45, 987654),
233
],
234
"dt": [
235
date(2020, 12, 30),
236
date(2077, 1, 1),
237
date(1960, 1, 7),
238
],
239
"tm": [
240
time(17, 30, 45),
241
time(22, 10, 30),
242
time(10, 25, 15),
243
],
244
}
245
)
246
res = df.sql(f"SELECT idx FROM self WHERE {constraint}")
247
actual = sorted(res["idx"])
248
assert actual == expected
249
250
251
@pytest.mark.parametrize(
252
"dtval",
253
[
254
# none of these are valid dates
255
"2020-12-30T10:30:45",
256
"yyyy-mm-dd",
257
"2222-22-22",
258
"10:30:45",
259
"foo",
260
],
261
)
262
def test_implicit_temporal_string_errors(dtval: str) -> None:
263
df = pl.DataFrame({"dt": [date(2020, 12, 30)]})
264
265
with pytest.raises(
266
InvalidOperationError,
267
match=r"(conversion.*failed)|(cannot compare.*string.*temporal)",
268
):
269
df.sql(f"SELECT * FROM self WHERE dt = '{dtval}'")
270
271
272
def test_strftime() -> None:
273
df = pl.DataFrame(
274
{
275
"dtm": [
276
None,
277
datetime(1980, 9, 30, 1, 25, 50),
278
datetime(2077, 7, 17, 11, 30, 55),
279
],
280
"dt": [date(1978, 7, 5), date(1969, 12, 31), date(2020, 4, 10)],
281
"tm": [time(10, 10, 10), time(22, 33, 55), None],
282
}
283
)
284
res = df.sql(
285
"""
286
SELECT
287
STRFTIME(dtm,'%m.%d.%Y/%T') AS s_dtm,
288
STRFTIME(dt ,'%B %d, %Y') AS s_dt,
289
STRFTIME(tm ,'%S.%M.%H') AS s_tm,
290
FROM self
291
"""
292
)
293
assert res.to_dict(as_series=False) == {
294
"s_dtm": [None, "09.30.1980/01:25:50", "07.17.2077/11:30:55"],
295
"s_dt": ["July 05, 1978", "December 31, 1969", "April 10, 2020"],
296
"s_tm": ["10.10.10", "55.33.22", None],
297
}
298
299
with pytest.raises(
300
SQLSyntaxError,
301
match=r"STRFTIME expects 2 arguments \(found 4\)",
302
):
303
pl.sql_expr("STRFTIME(dtm,'%Y-%m-%d','[extra]','[param]')")
304
305
306
def test_strptime() -> None:
307
df = pl.DataFrame(
308
{
309
"s_dtm": [None, "09.30.1980/01:25:50", "07.17.2077/11:30:55"],
310
"s_dt": ["July 5, 1978", "December 31, 1969", "April 10, 2020"],
311
"s_tm": ["10.10.10", "55.33.22", None],
312
}
313
)
314
res = df.sql(
315
"""
316
SELECT
317
STRPTIME(s_dtm,'%m.%d.%Y/%T') AS dtm,
318
STRPTIME(s_dt ,'%B %d, %Y')::date AS dt,
319
STRPTIME(s_tm ,'%S.%M.%H')::time AS tm
320
FROM self
321
"""
322
)
323
assert res.to_dict(as_series=False) == {
324
"dtm": [
325
None,
326
datetime(1980, 9, 30, 1, 25, 50),
327
datetime(2077, 7, 17, 11, 30, 55),
328
],
329
"dt": [date(1978, 7, 5), date(1969, 12, 31), date(2020, 4, 10)],
330
"tm": [time(10, 10, 10), time(22, 33, 55), None],
331
}
332
with pytest.raises(
333
SQLSyntaxError,
334
match=r"STRPTIME expects 2 arguments \(found 3\)",
335
):
336
pl.sql_expr("STRPTIME(s,'%Y.%m.%d',false) AS dt")
337
338
339
def test_temporal_stings_to_datetime() -> None:
340
df = pl.DataFrame(
341
{
342
"s_dt": ["2077-10-10", "1942-01-08", "2000-07-05"],
343
"s_dtm1": [
344
"1999-12-31 10:30:45",
345
"2020-06-10",
346
"2022-08-07T00:01:02.654321",
347
],
348
"s_dtm2": ["31-12-1999 10:30", "10-06-2020 00:00", "07-08-2022 00:01"],
349
"s_tm": ["02:04:06", "12:30:45.999", "23:59:59.123456"],
350
}
351
)
352
res = df.sql(
353
"""
354
SELECT
355
DATE(s_dt) AS dt1,
356
DATETIME(s_dt) AS dt2,
357
DATETIME(s_dtm1) AS dtm1,
358
DATETIME(s_dtm2,'%d-%m-%Y %H:%M') AS dtm2,
359
TIME(s_tm) AS tm
360
FROM self
361
"""
362
)
363
assert res.schema == {
364
"dt1": pl.Date,
365
"dt2": pl.Datetime("us"),
366
"dtm1": pl.Datetime("us"),
367
"dtm2": pl.Datetime("us"),
368
"tm": pl.Time,
369
}
370
assert res.rows() == [
371
(
372
date(2077, 10, 10),
373
datetime(2077, 10, 10, 0, 0),
374
datetime(1999, 12, 31, 10, 30, 45),
375
datetime(1999, 12, 31, 10, 30),
376
time(2, 4, 6),
377
),
378
(
379
date(1942, 1, 8),
380
datetime(1942, 1, 8, 0, 0),
381
datetime(2020, 6, 10, 0, 0),
382
datetime(2020, 6, 10, 0, 0),
383
time(12, 30, 45, 999000),
384
),
385
(
386
date(2000, 7, 5),
387
datetime(2000, 7, 5, 0, 0),
388
datetime(2022, 8, 7, 0, 1, 2, 654321),
389
datetime(2022, 8, 7, 0, 1),
390
time(23, 59, 59, 123456),
391
),
392
]
393
394
for fn in ("DATE", "TIME", "DATETIME"):
395
with pytest.raises(
396
SQLSyntaxError,
397
match=rf"{fn} expects 1-2 arguments \(found 3\)",
398
):
399
pl.sql_expr(rf"{fn}(s,fmt,misc) AS xyz")
400
401
402
def test_temporal_typed_literals() -> None:
403
res = pl.sql(
404
"""
405
SELECT
406
DATE '2020-12-30' AS dt,
407
TIME '00:01:02' AS tm1,
408
TIME '23:59:59.123456' AS tm2,
409
TIMESTAMP '1930-01-01 12:30:00' AS dtm1,
410
TIMESTAMP '2077-04-27T23:45:30.123456' AS dtm2
411
FROM
412
(VALUES (0)) tbl (x)
413
""",
414
eager=True,
415
)
416
assert res.to_dict(as_series=False) == {
417
"dt": [date(2020, 12, 30)],
418
"tm1": [time(0, 1, 2)],
419
"tm2": [time(23, 59, 59, 123456)],
420
"dtm1": [datetime(1930, 1, 1, 12, 30)],
421
"dtm2": [datetime(2077, 4, 27, 23, 45, 30, 123456)],
422
}
423
424
425
@pytest.mark.parametrize("fn", ["DATE", "TIME", "TIMESTAMP"])
426
def test_typed_literals_errors(fn: str) -> None:
427
with pytest.raises(SQLSyntaxError, match=f"invalid {fn} literal '999'"):
428
pl.sql_expr(f"{fn} '999'")
429
430
431
@pytest.mark.parametrize(
432
("unit", "expected"),
433
[
434
("ms", [1704589323123, 1609324245987, 1136159999555]),
435
("us", [1704589323123456, 1609324245987654, 1136159999555555]),
436
("ns", [1704589323123456000, 1609324245987654000, 1136159999555555000]),
437
],
438
)
439
def test_timestamp_time_unit(unit: str | None, expected: list[int]) -> None:
440
df = pl.DataFrame(
441
{
442
"ts": [
443
datetime(2024, 1, 7, 1, 2, 3, 123456),
444
datetime(2020, 12, 30, 10, 30, 45, 987654),
445
datetime(2006, 1, 1, 23, 59, 59, 555555),
446
],
447
}
448
)
449
precision = {"ms": 3, "us": 6, "ns": 9}
450
451
with pl.SQLContext(frame_data=df, eager=True) as ctx:
452
prec = f"({precision[unit]})" if unit else ""
453
res = ctx.execute(f"SELECT ts::timestamp{prec} FROM frame_data").to_series()
454
455
assert res.dtype == pl.Datetime(time_unit=unit) # type: ignore[arg-type]
456
assert res.to_physical().to_list() == expected
457
458
459
def test_timestamp_time_unit_errors() -> None:
460
df = pl.DataFrame({"ts": [datetime(2024, 1, 7, 1, 2, 3, 123456)]})
461
462
with pl.SQLContext(frame_data=df, eager=True) as ctx:
463
for prec in (0, 15):
464
with pytest.raises(
465
SQLSyntaxError,
466
match=rf"invalid temporal type precision \(expected 1-9, found {prec}\)",
467
):
468
ctx.execute(f"SELECT ts::timestamp({prec}) FROM frame_data")
469
470
with pytest.raises(
471
SQLInterfaceError,
472
match="sql parser error: Expected: literal int, found: - ",
473
):
474
ctx.execute("SELECT ts::timestamp(-3) FROM frame_data")
475
476