Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_operators.py
6939 views
1
from __future__ import annotations
2
3
from datetime import date
4
from pathlib import Path
5
6
import pytest
7
8
import polars as pl
9
import polars.selectors as cs
10
from polars.testing import assert_frame_equal
11
12
13
@pytest.fixture
14
def foods_ipc_path() -> Path:
15
return Path(__file__).parent.parent / "io" / "files" / "foods1.ipc"
16
17
18
def test_div() -> None:
19
df = pl.LazyFrame(
20
{
21
"a": [10.0, 20.0, 30.0, 40.0, 50.0],
22
"b": [-100.5, 7.0, 2.5, None, -3.14],
23
}
24
)
25
with pl.SQLContext(df=df, eager=True) as ctx:
26
res = ctx.execute(
27
"""
28
SELECT
29
a / b AS a_div_b,
30
a // b AS a_floordiv_b,
31
SIGN(b) AS b_sign,
32
FROM df
33
"""
34
)
35
36
assert_frame_equal(
37
pl.DataFrame(
38
[
39
[-0.0995024875621891, 2.85714285714286, 12.0, None, -15.92356687898089],
40
[-1, 2, 12, None, -16],
41
[-1.0, 1.0, 1.0, None, -1.0],
42
],
43
schema=["a_div_b", "a_floordiv_b", "b_sign"],
44
),
45
res,
46
)
47
48
49
def test_equal_not_equal() -> None:
50
# validate null-aware/unaware equality operators
51
df = pl.DataFrame({"a": [1, None, 3, 6, 5], "b": [1, None, 3, 4, None]})
52
53
with pl.SQLContext(frame_data=df) as ctx:
54
out = ctx.execute(
55
"""
56
SELECT
57
-- not null-aware
58
(a = b) as "1_eq_unaware",
59
(a <> b) as "2_neq_unaware",
60
(a != b) as "3_neq_unaware",
61
-- null-aware
62
(a <=> b) as "4_eq_aware",
63
(a IS NOT DISTINCT FROM b) as "5_eq_aware",
64
(a IS DISTINCT FROM b) as "6_neq_aware",
65
FROM frame_data
66
"""
67
).collect()
68
69
assert out.select(cs.contains("_aware").null_count().sum()).row(0) == (0, 0, 0)
70
assert out.select(cs.contains("_unaware").null_count().sum()).row(0) == (2, 2, 2)
71
72
assert out.to_dict(as_series=False) == {
73
"1_eq_unaware": [True, None, True, False, None],
74
"2_neq_unaware": [False, None, False, True, None],
75
"3_neq_unaware": [False, None, False, True, None],
76
"4_eq_aware": [True, True, True, False, False],
77
"5_eq_aware": [True, True, True, False, False],
78
"6_neq_aware": [False, False, False, True, True],
79
}
80
81
82
@pytest.mark.parametrize(
83
"in_clause",
84
[
85
"values NOT IN ([0], [3,4], [7,8], [6,6,6])",
86
"values IN ([0], [5,6], [1,2], [8,8,8,8])",
87
"dt NOT IN ('1950-12-24', '1997-07-05')",
88
"dt IN ('2020-10-10', '2077-03-18')",
89
"rowid NOT IN (1, 3)",
90
"rowid IN (4, 2)",
91
],
92
)
93
def test_in_not_in(in_clause: str) -> None:
94
df = pl.DataFrame(
95
{
96
"rowid": [4, 3, 2, 1],
97
"values": [[1, 2], [3, 4], [5, 6], [7, 8]],
98
"dt": [
99
date(2020, 10, 10),
100
date(1997, 7, 5),
101
date(2077, 3, 18),
102
date(1950, 12, 24),
103
],
104
}
105
)
106
res = df.sql(
107
f"""
108
SELECT "values"
109
FROM self
110
WHERE {in_clause}
111
ORDER BY "rowid" DESC
112
"""
113
)
114
assert res.to_dict(as_series=False) == {
115
"values": [[1, 2], [5, 6]],
116
}
117
118
119
def test_is_between(foods_ipc_path: Path) -> None:
120
lf = pl.scan_ipc(foods_ipc_path)
121
122
ctx = pl.SQLContext(foods1=lf, eager=True)
123
out = ctx.execute(
124
"""
125
SELECT *
126
FROM foods1
127
WHERE foods1.calories BETWEEN 22 AND 30
128
ORDER BY "calories" DESC, "sugars_g" DESC
129
"""
130
)
131
assert out.rows() == [
132
("fruit", 30, 0.0, 5),
133
("vegetables", 30, 0.0, 5),
134
("fruit", 30, 0.0, 3),
135
("vegetables", 25, 0.0, 4),
136
("vegetables", 25, 0.0, 3),
137
("vegetables", 25, 0.0, 2),
138
("vegetables", 22, 0.0, 3),
139
]
140
out = ctx.execute(
141
"""
142
SELECT *
143
FROM foods1
144
WHERE calories NOT BETWEEN 22 AND 30
145
ORDER BY "calories" ASC
146
"""
147
)
148
assert not any((22 <= cal <= 30) for cal in out["calories"])
149
150
151
def test_starts_with() -> None:
152
lf = pl.LazyFrame(
153
{
154
"x": ["aaa", "bbb", "a"],
155
"y": ["abc", "b", "aa"],
156
},
157
)
158
assert lf.sql("SELECT x ^@ 'a' AS x_starts_with_a FROM self").collect().rows() == [
159
(True,),
160
(False,),
161
(True,),
162
]
163
assert lf.sql("SELECT x ^@ y AS x_starts_with_y FROM self").collect().rows() == [
164
(False,),
165
(True,),
166
(False,),
167
]
168
169
170
@pytest.mark.parametrize("match_float", [False, True])
171
def test_unary_ops_8890(match_float: bool) -> None:
172
with pl.SQLContext(
173
df=pl.DataFrame({"a": [-2, -1, 1, 2], "b": ["w", "x", "y", "z"]}),
174
) as ctx:
175
in_values = "(-3.0, -1.0, +2.0, +4.0)" if match_float else "(-3, -1, +2, +4)"
176
res = ctx.execute(
177
f"""
178
SELECT *, -(3) as c, (+4) as d
179
FROM df WHERE a IN {in_values}
180
"""
181
)
182
assert res.collect().to_dict(as_series=False) == {
183
"a": [-1, 2],
184
"b": ["x", "z"],
185
"c": [-3, -3],
186
"d": [4, 4],
187
}
188
189