Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_wildcard_opts.py
6939 views
1
from __future__ import annotations
2
3
from typing import Any
4
5
import pytest
6
7
import polars as pl
8
from polars.exceptions import DuplicateError, SQLInterfaceError
9
from polars.testing import assert_frame_equal
10
11
12
@pytest.fixture
13
def df() -> pl.DataFrame:
14
return pl.DataFrame(
15
{
16
"ID": [333, 666, 999],
17
"FirstName": ["Bruce", "Diana", "Clark"],
18
"LastName": ["Wayne", "Prince", "Kent"],
19
"Address": ["Batcave", "Paradise Island", "Fortress of Solitude"],
20
"City": ["Gotham", "Themyscira", "Metropolis"],
21
}
22
)
23
24
25
@pytest.mark.parametrize(
26
("excluded", "order_by", "expected"),
27
[
28
("ID", "ORDER BY 2, 1", ["FirstName", "LastName", "Address", "City"]),
29
("(ID)", "ORDER BY City", ["FirstName", "LastName", "Address", "City"]),
30
("(Address, LastName, FirstName)", "", ["ID", "City"]),
31
('("ID", "FirstName", "LastName", "Address", "City")', "", []),
32
],
33
)
34
def test_select_exclude(
35
excluded: str,
36
order_by: str,
37
expected: list[str],
38
df: pl.DataFrame,
39
) -> None:
40
for exclude_keyword in ("EXCLUDE", "EXCEPT"):
41
assert (
42
df.sql(f"SELECT * {exclude_keyword} {excluded} FROM self").columns
43
== expected
44
)
45
46
47
def test_select_exclude_order_by(
48
df: pl.DataFrame,
49
) -> None:
50
expected = pl.DataFrame(
51
{
52
"FirstName": ["Diana", "Clark", "Bruce"],
53
"Address": ["Paradise Island", "Fortress of Solitude", "Batcave"],
54
}
55
)
56
for order_by in ("", "ORDER BY 1 DESC", "ORDER BY 2 DESC", "ORDER BY Address DESC"):
57
actual = df.sql(f"SELECT * EXCLUDE (ID,LastName,City) FROM self {order_by}")
58
if not order_by:
59
actual = actual.sort("FirstName", descending=True)
60
assert_frame_equal(actual, expected)
61
62
63
def test_ilike(df: pl.DataFrame) -> None:
64
assert df.sql("SELECT * ILIKE 'a%e' FROM self").columns == []
65
assert df.sql("SELECT * ILIKE '%nam_' FROM self").columns == [
66
"FirstName",
67
"LastName",
68
]
69
assert df.sql("SELECT * ILIKE '%a%e%' FROM self").columns == [
70
"FirstName",
71
"LastName",
72
"Address",
73
]
74
assert df.sql(
75
"""SELECT * ILIKE '%I%' RENAME (FirstName AS Name) FROM self"""
76
).columns == [
77
"ID",
78
"Name",
79
"City",
80
]
81
82
83
@pytest.mark.parametrize(
84
("renames", "expected"),
85
[
86
(
87
"Address AS Location",
88
["ID", "FirstName", "LastName", "Location", "City"],
89
),
90
(
91
'(Address AS "Location")',
92
["ID", "FirstName", "LastName", "Location", "City"],
93
),
94
(
95
'("Address" AS Location, "ID" AS PersonID)',
96
["PersonID", "FirstName", "LastName", "Location", "City"],
97
),
98
],
99
)
100
def test_select_rename(
101
renames: str,
102
expected: list[str],
103
df: pl.DataFrame,
104
) -> None:
105
assert df.sql(f"SELECT * RENAME {renames} FROM self").columns == expected
106
107
108
@pytest.mark.parametrize("order_by", ["1 DESC", "Name DESC", "FirstName DESC"])
109
def test_select_rename_exclude_sort(order_by: str, df: pl.DataFrame) -> None:
110
actual = df.sql(
111
f"""
112
SELECT * EXCLUDE (ID, City, LastName) RENAME FirstName AS Name
113
FROM self
114
ORDER BY {order_by}
115
"""
116
)
117
expected = pl.DataFrame(
118
{
119
"Name": ["Diana", "Clark", "Bruce"],
120
"Address": ["Paradise Island", "Fortress of Solitude", "Batcave"],
121
}
122
)
123
assert_frame_equal(expected, actual)
124
125
126
@pytest.mark.parametrize(
127
("replacements", "check_cols", "expected"),
128
[
129
(
130
"(ID // 3 AS ID)",
131
["ID"],
132
[(333,), (222,), (111,)],
133
),
134
(
135
"(ID // 3 AS ID) RENAME (ID AS Identifier)",
136
["Identifier"],
137
[(333,), (222,), (111,)],
138
),
139
(
140
"((City || ':' || City) AS City, ID // -3 AS ID)",
141
["City", "ID"],
142
[
143
("Gotham:Gotham", -111),
144
("Themyscira:Themyscira", -222),
145
("Metropolis:Metropolis", -333),
146
],
147
),
148
],
149
)
150
def test_select_replace(
151
replacements: str,
152
check_cols: list[str],
153
expected: list[tuple[Any]],
154
df: pl.DataFrame,
155
) -> None:
156
for order_by in ("", "ORDER BY ID DESC", "ORDER BY -ID ASC"):
157
res = df.sql(f"SELECT * REPLACE {replacements} FROM self {order_by}")
158
if not order_by:
159
res = res.sort(check_cols[-1], descending=True)
160
161
assert res.select(check_cols).rows() == expected
162
expected_columns = (
163
check_cols + df.columns[1:] if check_cols == ["Identifier"] else df.columns
164
)
165
assert res.columns == expected_columns
166
167
168
def test_select_wildcard_errors(df: pl.DataFrame) -> None:
169
# EXCLUDE and ILIKE are not allowed together
170
with pytest.raises(SQLInterfaceError, match="ILIKE"):
171
assert df.sql("SELECT * EXCLUDE Address ILIKE '%o%' FROM self")
172
173
# these two options are aliases, with EXCLUDE being preferred
174
with pytest.raises(
175
SQLInterfaceError,
176
match="EXCLUDE and EXCEPT wildcard options cannot be used together",
177
):
178
assert df.sql("SELECT * EXCLUDE Address EXCEPT City FROM self")
179
180
# note: missing "()" around the exclude option results in dupe col
181
with pytest.raises(
182
DuplicateError,
183
match="City",
184
):
185
assert df.sql("SELECT * EXCLUDE Address, City FROM self")
186
187