Path: blob/main/py-polars/tests/unit/sql/test_wildcard_opts.py
6939 views
from __future__ import annotations12from typing import Any34import pytest56import polars as pl7from polars.exceptions import DuplicateError, SQLInterfaceError8from polars.testing import assert_frame_equal91011@pytest.fixture12def df() -> pl.DataFrame:13return pl.DataFrame(14{15"ID": [333, 666, 999],16"FirstName": ["Bruce", "Diana", "Clark"],17"LastName": ["Wayne", "Prince", "Kent"],18"Address": ["Batcave", "Paradise Island", "Fortress of Solitude"],19"City": ["Gotham", "Themyscira", "Metropolis"],20}21)222324@pytest.mark.parametrize(25("excluded", "order_by", "expected"),26[27("ID", "ORDER BY 2, 1", ["FirstName", "LastName", "Address", "City"]),28("(ID)", "ORDER BY City", ["FirstName", "LastName", "Address", "City"]),29("(Address, LastName, FirstName)", "", ["ID", "City"]),30('("ID", "FirstName", "LastName", "Address", "City")', "", []),31],32)33def test_select_exclude(34excluded: str,35order_by: str,36expected: list[str],37df: pl.DataFrame,38) -> None:39for exclude_keyword in ("EXCLUDE", "EXCEPT"):40assert (41df.sql(f"SELECT * {exclude_keyword} {excluded} FROM self").columns42== expected43)444546def test_select_exclude_order_by(47df: pl.DataFrame,48) -> None:49expected = pl.DataFrame(50{51"FirstName": ["Diana", "Clark", "Bruce"],52"Address": ["Paradise Island", "Fortress of Solitude", "Batcave"],53}54)55for order_by in ("", "ORDER BY 1 DESC", "ORDER BY 2 DESC", "ORDER BY Address DESC"):56actual = df.sql(f"SELECT * EXCLUDE (ID,LastName,City) FROM self {order_by}")57if not order_by:58actual = actual.sort("FirstName", descending=True)59assert_frame_equal(actual, expected)606162def test_ilike(df: pl.DataFrame) -> None:63assert df.sql("SELECT * ILIKE 'a%e' FROM self").columns == []64assert df.sql("SELECT * ILIKE '%nam_' FROM self").columns == [65"FirstName",66"LastName",67]68assert df.sql("SELECT * ILIKE '%a%e%' FROM self").columns == [69"FirstName",70"LastName",71"Address",72]73assert df.sql(74"""SELECT * ILIKE '%I%' RENAME (FirstName AS Name) FROM self"""75).columns == [76"ID",77"Name",78"City",79]808182@pytest.mark.parametrize(83("renames", "expected"),84[85(86"Address AS Location",87["ID", "FirstName", "LastName", "Location", "City"],88),89(90'(Address AS "Location")',91["ID", "FirstName", "LastName", "Location", "City"],92),93(94'("Address" AS Location, "ID" AS PersonID)',95["PersonID", "FirstName", "LastName", "Location", "City"],96),97],98)99def test_select_rename(100renames: str,101expected: list[str],102df: pl.DataFrame,103) -> None:104assert df.sql(f"SELECT * RENAME {renames} FROM self").columns == expected105106107@pytest.mark.parametrize("order_by", ["1 DESC", "Name DESC", "FirstName DESC"])108def test_select_rename_exclude_sort(order_by: str, df: pl.DataFrame) -> None:109actual = df.sql(110f"""111SELECT * EXCLUDE (ID, City, LastName) RENAME FirstName AS Name112FROM self113ORDER BY {order_by}114"""115)116expected = pl.DataFrame(117{118"Name": ["Diana", "Clark", "Bruce"],119"Address": ["Paradise Island", "Fortress of Solitude", "Batcave"],120}121)122assert_frame_equal(expected, actual)123124125@pytest.mark.parametrize(126("replacements", "check_cols", "expected"),127[128(129"(ID // 3 AS ID)",130["ID"],131[(333,), (222,), (111,)],132),133(134"(ID // 3 AS ID) RENAME (ID AS Identifier)",135["Identifier"],136[(333,), (222,), (111,)],137),138(139"((City || ':' || City) AS City, ID // -3 AS ID)",140["City", "ID"],141[142("Gotham:Gotham", -111),143("Themyscira:Themyscira", -222),144("Metropolis:Metropolis", -333),145],146),147],148)149def test_select_replace(150replacements: str,151check_cols: list[str],152expected: list[tuple[Any]],153df: pl.DataFrame,154) -> None:155for order_by in ("", "ORDER BY ID DESC", "ORDER BY -ID ASC"):156res = df.sql(f"SELECT * REPLACE {replacements} FROM self {order_by}")157if not order_by:158res = res.sort(check_cols[-1], descending=True)159160assert res.select(check_cols).rows() == expected161expected_columns = (162check_cols + df.columns[1:] if check_cols == ["Identifier"] else df.columns163)164assert res.columns == expected_columns165166167def test_select_wildcard_errors(df: pl.DataFrame) -> None:168# EXCLUDE and ILIKE are not allowed together169with pytest.raises(SQLInterfaceError, match="ILIKE"):170assert df.sql("SELECT * EXCLUDE Address ILIKE '%o%' FROM self")171172# these two options are aliases, with EXCLUDE being preferred173with pytest.raises(174SQLInterfaceError,175match="EXCLUDE and EXCEPT wildcard options cannot be used together",176):177assert df.sql("SELECT * EXCLUDE Address EXCEPT City FROM self")178179# note: missing "()" around the exclude option results in dupe col180with pytest.raises(181DuplicateError,182match="City",183):184assert df.sql("SELECT * EXCLUDE Address, City FROM self")185186187