Path: blob/main/py-polars/tests/unit/sql/test_operators.py
8364 views
from __future__ import annotations12from datetime import date3from pathlib import Path45import pytest67import polars as pl8import polars.selectors as cs9from polars.exceptions import InvalidOperationError10from polars.testing import assert_frame_equal111213@pytest.fixture14def foods_ipc_path() -> Path:15return Path(__file__).parent.parent / "io" / "files" / "foods1.ipc"161718def test_div() -> None:19df = pl.LazyFrame(20{21"a": [10.0, 20.0, 30.0, 40.0, 50.0],22"b": [-100.5, 7.0, 2.5, None, -3.14],23}24)25with pl.SQLContext(df=df, eager=True) as ctx:26res = ctx.execute(27"""28SELECT29a / b AS a_div_b,30a // b AS a_floordiv_b,31SIGN(b) AS b_sign,32FROM df33"""34)3536assert_frame_equal(37pl.DataFrame(38[39[-0.0995024875621891, 2.85714285714286, 12.0, None, -15.92356687898089],40[-1, 2, 12, None, -16],41[-1.0, 1.0, 1.0, None, -1.0],42],43schema=["a_div_b", "a_floordiv_b", "b_sign"],44),45res,46)474849def test_equal_not_equal() -> None:50# validate null-aware/unaware equality operators51df = pl.DataFrame({"a": [1, None, 3, 6, 5], "b": [1, None, 3, 4, None]})5253with pl.SQLContext(frame_data=df) as ctx:54res = ctx.execute(55"""56SELECT57-- not null-aware58(a = b) as "1_eq_unaware",59(a <> b) as "2_neq_unaware",60(a != b) as "3_neq_unaware",61-- null-aware62(a <=> b) as "4_eq_aware",63(a IS NOT DISTINCT FROM b) as "5_eq_aware",64(a IS DISTINCT FROM b) as "6_neq_aware",65FROM frame_data66"""67).collect()6869assert res.select(cs.contains("_aware").null_count().sum()).row(0) == (0, 0, 0)70assert res.select(cs.contains("_unaware").null_count().sum()).row(0) == (2, 2, 2)7172assert res.to_dict(as_series=False) == {73"1_eq_unaware": [True, None, True, False, None],74"2_neq_unaware": [False, None, False, True, None],75"3_neq_unaware": [False, None, False, True, None],76"4_eq_aware": [True, True, True, False, False],77"5_eq_aware": [True, True, True, False, False],78"6_neq_aware": [False, False, False, True, True],79}808182@pytest.mark.parametrize(83"in_clause",84[85"values NOT IN ([0], [3,4], [7,8], [6,6,6])",86"values IN ([0], [5,6], [1,2], [8,8,8,8])",87"dt NOT IN ('1950-12-24', '1997-07-05')",88"dt IN ('2020-10-10', '2077-03-18')",89"rowid NOT IN (1, 3)",90"rowid IN (4, 2)",91],92)93def test_in_not_in(in_clause: str) -> None:94df = pl.DataFrame(95{96"rowid": [4, 3, 2, 1],97"values": [[1, 2], [3, 4], [5, 6], [7, 8]],98"dt": [99date(2020, 10, 10),100date(1997, 7, 5),101date(2077, 3, 18),102date(1950, 12, 24),103],104}105)106res = df.sql(107f"""108SELECT "values"109FROM self110WHERE {in_clause}111ORDER BY "rowid" DESC112"""113)114assert res.to_dict(as_series=False) == {115"values": [[1, 2], [5, 6]],116}117118119def test_is_between(foods_ipc_path: Path) -> None:120lf = pl.scan_ipc(foods_ipc_path)121122ctx = pl.SQLContext(foods1=lf, eager=True)123res = ctx.execute(124"""125SELECT *126FROM foods1127WHERE foods1.calories BETWEEN 22 AND 30128ORDER BY "calories" DESC, "sugars_g" DESC129"""130)131assert res.rows() == [132("fruit", 30, 0.0, 5),133("vegetables", 30, 0.0, 5),134("fruit", 30, 0.0, 3),135("vegetables", 25, 0.0, 4),136("vegetables", 25, 0.0, 3),137("vegetables", 25, 0.0, 2),138("vegetables", 22, 0.0, 3),139]140res = ctx.execute(141"""142SELECT *143FROM foods1144WHERE calories NOT BETWEEN 22 AND 30145ORDER BY "calories" ASC146"""147)148assert not any((22 <= cal <= 30) for cal in res["calories"])149150151def test_logical_not() -> None:152lf = pl.LazyFrame(153{154"valid": [True, False, None, False, True],155"int_code": [1, 0, 2, None, -1],156},157)158res = lf.sql(159"""160SELECT161valid,162NOT valid AS not_valid,163int_code,164NOT int_code AS int_code_zero165FROM self166ORDER BY int_code NULLS FIRST167"""168).collect()169# ┌───────┬───────────┬──────────┬───────────────┐170# │ valid ┆ not_valid ┆ int_code ┆ int_code_zero │171# │ --- ┆ --- ┆ --- ┆ --- │172# │ bool ┆ bool ┆ i64 ┆ bool │173# ╞═══════╪═══════════╪══════════╪═══════════════╡174# │ false ┆ true ┆ null ┆ null │175# │ true ┆ false ┆ -1 ┆ false │176# │ false ┆ true ┆ 0 ┆ true │177# │ true ┆ false ┆ 1 ┆ false │178# │ null ┆ null ┆ 2 ┆ false │179# └───────┴───────────┴──────────┴───────────────┘180assert res.to_dict(as_series=False) == {181"valid": [False, True, False, True, None],182"not_valid": [True, False, True, False, None],183"int_code": [None, -1, 0, 1, 2],184"int_code_zero": [None, False, True, False, False],185}186187# expect failure when applying logical 'NOT' to an incompatible dtype188for invalid_literal in ("'foo'", "'2026-12-31'::date"):189with pytest.raises(190InvalidOperationError,191match=r"cast.* to Boolean not supported",192):193pl.sql(f"SELECT NOT {invalid_literal}", eager=True)194195196def test_starts_with() -> None:197lf = pl.LazyFrame(198{199"x": ["aaa", "bbb", "a"],200"y": ["abc", "b", "aa"],201},202)203assert lf.sql("SELECT x ^@ 'a' AS x_starts_with_a FROM self").collect().rows() == [204(True,),205(False,),206(True,),207]208assert lf.sql("SELECT x ^@ y AS x_starts_with_y FROM self").collect().rows() == [209(False,),210(True,),211(False,),212]213214215@pytest.mark.parametrize("match_float", [False, True])216def test_unary_ops_8890(match_float: bool) -> None:217with pl.SQLContext(218df=pl.DataFrame({"a": [-2, -1, 1, 2], "b": ["w", "x", "y", "z"]}),219) as ctx:220in_values = "(-3.0, -1.0, +2.0, +4.0)" if match_float else "(-3, -1, +2, +4)"221res = ctx.execute(222f"""223SELECT *, -(3) as c, (+4) as d224FROM df WHERE a IN {in_values}225"""226)227assert res.collect().to_dict(as_series=False) == {228"a": [-1, 2],229"b": ["x", "z"],230"c": [-3, -3],231"d": [4, 4],232}233234235