Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_operators.py
8364 views
1
from __future__ import annotations
2
3
from datetime import date
4
from pathlib import Path
5
6
import pytest
7
8
import polars as pl
9
import polars.selectors as cs
10
from polars.exceptions import InvalidOperationError
11
from polars.testing import assert_frame_equal
12
13
14
@pytest.fixture
15
def foods_ipc_path() -> Path:
16
return Path(__file__).parent.parent / "io" / "files" / "foods1.ipc"
17
18
19
def test_div() -> None:
20
df = pl.LazyFrame(
21
{
22
"a": [10.0, 20.0, 30.0, 40.0, 50.0],
23
"b": [-100.5, 7.0, 2.5, None, -3.14],
24
}
25
)
26
with pl.SQLContext(df=df, eager=True) as ctx:
27
res = ctx.execute(
28
"""
29
SELECT
30
a / b AS a_div_b,
31
a // b AS a_floordiv_b,
32
SIGN(b) AS b_sign,
33
FROM df
34
"""
35
)
36
37
assert_frame_equal(
38
pl.DataFrame(
39
[
40
[-0.0995024875621891, 2.85714285714286, 12.0, None, -15.92356687898089],
41
[-1, 2, 12, None, -16],
42
[-1.0, 1.0, 1.0, None, -1.0],
43
],
44
schema=["a_div_b", "a_floordiv_b", "b_sign"],
45
),
46
res,
47
)
48
49
50
def test_equal_not_equal() -> None:
51
# validate null-aware/unaware equality operators
52
df = pl.DataFrame({"a": [1, None, 3, 6, 5], "b": [1, None, 3, 4, None]})
53
54
with pl.SQLContext(frame_data=df) as ctx:
55
res = ctx.execute(
56
"""
57
SELECT
58
-- not null-aware
59
(a = b) as "1_eq_unaware",
60
(a <> b) as "2_neq_unaware",
61
(a != b) as "3_neq_unaware",
62
-- null-aware
63
(a <=> b) as "4_eq_aware",
64
(a IS NOT DISTINCT FROM b) as "5_eq_aware",
65
(a IS DISTINCT FROM b) as "6_neq_aware",
66
FROM frame_data
67
"""
68
).collect()
69
70
assert res.select(cs.contains("_aware").null_count().sum()).row(0) == (0, 0, 0)
71
assert res.select(cs.contains("_unaware").null_count().sum()).row(0) == (2, 2, 2)
72
73
assert res.to_dict(as_series=False) == {
74
"1_eq_unaware": [True, None, True, False, None],
75
"2_neq_unaware": [False, None, False, True, None],
76
"3_neq_unaware": [False, None, False, True, None],
77
"4_eq_aware": [True, True, True, False, False],
78
"5_eq_aware": [True, True, True, False, False],
79
"6_neq_aware": [False, False, False, True, True],
80
}
81
82
83
@pytest.mark.parametrize(
84
"in_clause",
85
[
86
"values NOT IN ([0], [3,4], [7,8], [6,6,6])",
87
"values IN ([0], [5,6], [1,2], [8,8,8,8])",
88
"dt NOT IN ('1950-12-24', '1997-07-05')",
89
"dt IN ('2020-10-10', '2077-03-18')",
90
"rowid NOT IN (1, 3)",
91
"rowid IN (4, 2)",
92
],
93
)
94
def test_in_not_in(in_clause: str) -> None:
95
df = pl.DataFrame(
96
{
97
"rowid": [4, 3, 2, 1],
98
"values": [[1, 2], [3, 4], [5, 6], [7, 8]],
99
"dt": [
100
date(2020, 10, 10),
101
date(1997, 7, 5),
102
date(2077, 3, 18),
103
date(1950, 12, 24),
104
],
105
}
106
)
107
res = df.sql(
108
f"""
109
SELECT "values"
110
FROM self
111
WHERE {in_clause}
112
ORDER BY "rowid" DESC
113
"""
114
)
115
assert res.to_dict(as_series=False) == {
116
"values": [[1, 2], [5, 6]],
117
}
118
119
120
def test_is_between(foods_ipc_path: Path) -> None:
121
lf = pl.scan_ipc(foods_ipc_path)
122
123
ctx = pl.SQLContext(foods1=lf, eager=True)
124
res = ctx.execute(
125
"""
126
SELECT *
127
FROM foods1
128
WHERE foods1.calories BETWEEN 22 AND 30
129
ORDER BY "calories" DESC, "sugars_g" DESC
130
"""
131
)
132
assert res.rows() == [
133
("fruit", 30, 0.0, 5),
134
("vegetables", 30, 0.0, 5),
135
("fruit", 30, 0.0, 3),
136
("vegetables", 25, 0.0, 4),
137
("vegetables", 25, 0.0, 3),
138
("vegetables", 25, 0.0, 2),
139
("vegetables", 22, 0.0, 3),
140
]
141
res = ctx.execute(
142
"""
143
SELECT *
144
FROM foods1
145
WHERE calories NOT BETWEEN 22 AND 30
146
ORDER BY "calories" ASC
147
"""
148
)
149
assert not any((22 <= cal <= 30) for cal in res["calories"])
150
151
152
def test_logical_not() -> None:
153
lf = pl.LazyFrame(
154
{
155
"valid": [True, False, None, False, True],
156
"int_code": [1, 0, 2, None, -1],
157
},
158
)
159
res = lf.sql(
160
"""
161
SELECT
162
valid,
163
NOT valid AS not_valid,
164
int_code,
165
NOT int_code AS int_code_zero
166
FROM self
167
ORDER BY int_code NULLS FIRST
168
"""
169
).collect()
170
# ┌───────┬───────────┬──────────┬───────────────┐
171
# │ valid ┆ not_valid ┆ int_code ┆ int_code_zero │
172
# │ --- ┆ --- ┆ --- ┆ --- │
173
# │ bool ┆ bool ┆ i64 ┆ bool │
174
# ╞═══════╪═══════════╪══════════╪═══════════════╡
175
# │ false ┆ true ┆ null ┆ null │
176
# │ true ┆ false ┆ -1 ┆ false │
177
# │ false ┆ true ┆ 0 ┆ true │
178
# │ true ┆ false ┆ 1 ┆ false │
179
# │ null ┆ null ┆ 2 ┆ false │
180
# └───────┴───────────┴──────────┴───────────────┘
181
assert res.to_dict(as_series=False) == {
182
"valid": [False, True, False, True, None],
183
"not_valid": [True, False, True, False, None],
184
"int_code": [None, -1, 0, 1, 2],
185
"int_code_zero": [None, False, True, False, False],
186
}
187
188
# expect failure when applying logical 'NOT' to an incompatible dtype
189
for invalid_literal in ("'foo'", "'2026-12-31'::date"):
190
with pytest.raises(
191
InvalidOperationError,
192
match=r"cast.* to Boolean not supported",
193
):
194
pl.sql(f"SELECT NOT {invalid_literal}", eager=True)
195
196
197
def test_starts_with() -> None:
198
lf = pl.LazyFrame(
199
{
200
"x": ["aaa", "bbb", "a"],
201
"y": ["abc", "b", "aa"],
202
},
203
)
204
assert lf.sql("SELECT x ^@ 'a' AS x_starts_with_a FROM self").collect().rows() == [
205
(True,),
206
(False,),
207
(True,),
208
]
209
assert lf.sql("SELECT x ^@ y AS x_starts_with_y FROM self").collect().rows() == [
210
(False,),
211
(True,),
212
(False,),
213
]
214
215
216
@pytest.mark.parametrize("match_float", [False, True])
217
def test_unary_ops_8890(match_float: bool) -> None:
218
with pl.SQLContext(
219
df=pl.DataFrame({"a": [-2, -1, 1, 2], "b": ["w", "x", "y", "z"]}),
220
) as ctx:
221
in_values = "(-3.0, -1.0, +2.0, +4.0)" if match_float else "(-3, -1, +2, +4)"
222
res = ctx.execute(
223
f"""
224
SELECT *, -(3) as c, (+4) as d
225
FROM df WHERE a IN {in_values}
226
"""
227
)
228
assert res.collect().to_dict(as_series=False) == {
229
"a": [-1, 2],
230
"b": ["x", "z"],
231
"c": [-3, -3],
232
"d": [4, 4],
233
}
234
235