CoCalc -- test_wildcard

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_wildcard_opts.py
⁶⁹³⁹ views
1
from __future__ import annotations
2

3
from typing import Any
4

5
import pytest
6

7
import polars as pl
8
from polars.exceptions import DuplicateError, SQLInterfaceError
9
from polars.testing import assert_frame_equal
10

11

12
@pytest.fixture
13
def df() -> pl.DataFrame:
14
    return pl.DataFrame(
15
        {
16
            "ID": [333, 666, 999],
17
            "FirstName": ["Bruce", "Diana", "Clark"],
18
            "LastName": ["Wayne", "Prince", "Kent"],
19
            "Address": ["Batcave", "Paradise Island", "Fortress of Solitude"],
20
            "City": ["Gotham", "Themyscira", "Metropolis"],
21
        }
22
    )
23

24

25
@pytest.mark.parametrize(
26
    ("excluded", "order_by", "expected"),
27
    [
28
        ("ID", "ORDER BY 2, 1", ["FirstName", "LastName", "Address", "City"]),
29
        ("(ID)", "ORDER BY City", ["FirstName", "LastName", "Address", "City"]),
30
        ("(Address, LastName, FirstName)", "", ["ID", "City"]),
31
        ('("ID", "FirstName", "LastName", "Address", "City")', "", []),
32
    ],
33
)
34
def test_select_exclude(
35
    excluded: str,
36
    order_by: str,
37
    expected: list[str],
38
    df: pl.DataFrame,
39
) -> None:
40
    for exclude_keyword in ("EXCLUDE", "EXCEPT"):
41
        assert (
42
            df.sql(f"SELECT * {exclude_keyword} {excluded} FROM self").columns
43
            == expected
44
        )
45

46

47
def test_select_exclude_order_by(
48
    df: pl.DataFrame,
49
) -> None:
50
    expected = pl.DataFrame(
51
        {
52
            "FirstName": ["Diana", "Clark", "Bruce"],
53
            "Address": ["Paradise Island", "Fortress of Solitude", "Batcave"],
54
        }
55
    )
56
    for order_by in ("", "ORDER BY 1 DESC", "ORDER BY 2 DESC", "ORDER BY Address DESC"):
57
        actual = df.sql(f"SELECT * EXCLUDE (ID,LastName,City) FROM self {order_by}")
58
        if not order_by:
59
            actual = actual.sort("FirstName", descending=True)
60
        assert_frame_equal(actual, expected)
61

62

63
def test_ilike(df: pl.DataFrame) -> None:
64
    assert df.sql("SELECT * ILIKE 'a%e' FROM self").columns == []
65
    assert df.sql("SELECT * ILIKE '%nam_' FROM self").columns == [
66
        "FirstName",
67
        "LastName",
68
    ]
69
    assert df.sql("SELECT * ILIKE '%a%e%' FROM self").columns == [
70
        "FirstName",
71
        "LastName",
72
        "Address",
73
    ]
74
    assert df.sql(
75
        """SELECT * ILIKE '%I%' RENAME (FirstName AS Name) FROM self"""
76
    ).columns == [
77
        "ID",
78
        "Name",
79
        "City",
80
    ]
81

82

83
@pytest.mark.parametrize(
84
    ("renames", "expected"),
85
    [
86
        (
87
            "Address AS Location",
88
            ["ID", "FirstName", "LastName", "Location", "City"],
89
        ),
90
        (
91
            '(Address AS "Location")',
92
            ["ID", "FirstName", "LastName", "Location", "City"],
93
        ),
94
        (
95
            '("Address" AS Location, "ID" AS PersonID)',
96
            ["PersonID", "FirstName", "LastName", "Location", "City"],
97
        ),
98
    ],
99
)
100
def test_select_rename(
101
    renames: str,
102
    expected: list[str],
103
    df: pl.DataFrame,
104
) -> None:
105
    assert df.sql(f"SELECT * RENAME {renames} FROM self").columns == expected
106

107

108
@pytest.mark.parametrize("order_by", ["1 DESC", "Name DESC", "FirstName DESC"])
109
def test_select_rename_exclude_sort(order_by: str, df: pl.DataFrame) -> None:
110
    actual = df.sql(
111
        f"""
112
        SELECT * EXCLUDE (ID, City, LastName) RENAME FirstName AS Name
113
        FROM self
114
        ORDER BY {order_by}
115
        """
116
    )
117
    expected = pl.DataFrame(
118
        {
119
            "Name": ["Diana", "Clark", "Bruce"],
120
            "Address": ["Paradise Island", "Fortress of Solitude", "Batcave"],
121
        }
122
    )
123
    assert_frame_equal(expected, actual)
124

125

126
@pytest.mark.parametrize(
127
    ("replacements", "check_cols", "expected"),
128
    [
129
        (
130
            "(ID // 3 AS ID)",
131
            ["ID"],
132
            [(333,), (222,), (111,)],
133
        ),
134
        (
135
            "(ID // 3 AS ID) RENAME (ID AS Identifier)",
136
            ["Identifier"],
137
            [(333,), (222,), (111,)],
138
        ),
139
        (
140
            "((City || ':' || City) AS City, ID // -3 AS ID)",
141
            ["City", "ID"],
142
            [
143
                ("Gotham:Gotham", -111),
144
                ("Themyscira:Themyscira", -222),
145
                ("Metropolis:Metropolis", -333),
146
            ],
147
        ),
148
    ],
149
)
150
def test_select_replace(
151
    replacements: str,
152
    check_cols: list[str],
153
    expected: list[tuple[Any]],
154
    df: pl.DataFrame,
155
) -> None:
156
    for order_by in ("", "ORDER BY ID DESC", "ORDER BY -ID ASC"):
157
        res = df.sql(f"SELECT * REPLACE {replacements} FROM self {order_by}")
158
        if not order_by:
159
            res = res.sort(check_cols[-1], descending=True)
160

161
        assert res.select(check_cols).rows() == expected
162
        expected_columns = (
163
            check_cols + df.columns[1:] if check_cols == ["Identifier"] else df.columns
164
        )
165
        assert res.columns == expected_columns
166

167

168
def test_select_wildcard_errors(df: pl.DataFrame) -> None:
169
    # EXCLUDE and ILIKE are not allowed together
170
    with pytest.raises(SQLInterfaceError, match="ILIKE"):
171
        assert df.sql("SELECT * EXCLUDE Address ILIKE '%o%' FROM self")
172

173
    # these two options are aliases, with EXCLUDE being preferred
174
    with pytest.raises(
175
        SQLInterfaceError,
176
        match="EXCLUDE and EXCEPT wildcard options cannot be used together",
177
    ):
178
        assert df.sql("SELECT * EXCLUDE Address EXCEPT City FROM self")
179

180
    # note: missing "()" around the exclude option results in dupe col
181
    with pytest.raises(
182
        DuplicateError,
183
        match="City",
184
    ):
185
        assert df.sql("SELECT * EXCLUDE Address, City FROM self")
186

187
Product

Resources

Company