Path: blob/main/py-polars/tests/unit/sql/test_operators.py
6939 views
from __future__ import annotations12from datetime import date3from pathlib import Path45import pytest67import polars as pl8import polars.selectors as cs9from polars.testing import assert_frame_equal101112@pytest.fixture13def foods_ipc_path() -> Path:14return Path(__file__).parent.parent / "io" / "files" / "foods1.ipc"151617def test_div() -> None:18df = pl.LazyFrame(19{20"a": [10.0, 20.0, 30.0, 40.0, 50.0],21"b": [-100.5, 7.0, 2.5, None, -3.14],22}23)24with pl.SQLContext(df=df, eager=True) as ctx:25res = ctx.execute(26"""27SELECT28a / b AS a_div_b,29a // b AS a_floordiv_b,30SIGN(b) AS b_sign,31FROM df32"""33)3435assert_frame_equal(36pl.DataFrame(37[38[-0.0995024875621891, 2.85714285714286, 12.0, None, -15.92356687898089],39[-1, 2, 12, None, -16],40[-1.0, 1.0, 1.0, None, -1.0],41],42schema=["a_div_b", "a_floordiv_b", "b_sign"],43),44res,45)464748def test_equal_not_equal() -> None:49# validate null-aware/unaware equality operators50df = pl.DataFrame({"a": [1, None, 3, 6, 5], "b": [1, None, 3, 4, None]})5152with pl.SQLContext(frame_data=df) as ctx:53out = ctx.execute(54"""55SELECT56-- not null-aware57(a = b) as "1_eq_unaware",58(a <> b) as "2_neq_unaware",59(a != b) as "3_neq_unaware",60-- null-aware61(a <=> b) as "4_eq_aware",62(a IS NOT DISTINCT FROM b) as "5_eq_aware",63(a IS DISTINCT FROM b) as "6_neq_aware",64FROM frame_data65"""66).collect()6768assert out.select(cs.contains("_aware").null_count().sum()).row(0) == (0, 0, 0)69assert out.select(cs.contains("_unaware").null_count().sum()).row(0) == (2, 2, 2)7071assert out.to_dict(as_series=False) == {72"1_eq_unaware": [True, None, True, False, None],73"2_neq_unaware": [False, None, False, True, None],74"3_neq_unaware": [False, None, False, True, None],75"4_eq_aware": [True, True, True, False, False],76"5_eq_aware": [True, True, True, False, False],77"6_neq_aware": [False, False, False, True, True],78}798081@pytest.mark.parametrize(82"in_clause",83[84"values NOT IN ([0], [3,4], [7,8], [6,6,6])",85"values IN ([0], [5,6], [1,2], [8,8,8,8])",86"dt NOT IN ('1950-12-24', '1997-07-05')",87"dt IN ('2020-10-10', '2077-03-18')",88"rowid NOT IN (1, 3)",89"rowid IN (4, 2)",90],91)92def test_in_not_in(in_clause: str) -> None:93df = pl.DataFrame(94{95"rowid": [4, 3, 2, 1],96"values": [[1, 2], [3, 4], [5, 6], [7, 8]],97"dt": [98date(2020, 10, 10),99date(1997, 7, 5),100date(2077, 3, 18),101date(1950, 12, 24),102],103}104)105res = df.sql(106f"""107SELECT "values"108FROM self109WHERE {in_clause}110ORDER BY "rowid" DESC111"""112)113assert res.to_dict(as_series=False) == {114"values": [[1, 2], [5, 6]],115}116117118def test_is_between(foods_ipc_path: Path) -> None:119lf = pl.scan_ipc(foods_ipc_path)120121ctx = pl.SQLContext(foods1=lf, eager=True)122out = ctx.execute(123"""124SELECT *125FROM foods1126WHERE foods1.calories BETWEEN 22 AND 30127ORDER BY "calories" DESC, "sugars_g" DESC128"""129)130assert out.rows() == [131("fruit", 30, 0.0, 5),132("vegetables", 30, 0.0, 5),133("fruit", 30, 0.0, 3),134("vegetables", 25, 0.0, 4),135("vegetables", 25, 0.0, 3),136("vegetables", 25, 0.0, 2),137("vegetables", 22, 0.0, 3),138]139out = ctx.execute(140"""141SELECT *142FROM foods1143WHERE calories NOT BETWEEN 22 AND 30144ORDER BY "calories" ASC145"""146)147assert not any((22 <= cal <= 30) for cal in out["calories"])148149150def test_starts_with() -> None:151lf = pl.LazyFrame(152{153"x": ["aaa", "bbb", "a"],154"y": ["abc", "b", "aa"],155},156)157assert lf.sql("SELECT x ^@ 'a' AS x_starts_with_a FROM self").collect().rows() == [158(True,),159(False,),160(True,),161]162assert lf.sql("SELECT x ^@ y AS x_starts_with_y FROM self").collect().rows() == [163(False,),164(True,),165(False,),166]167168169@pytest.mark.parametrize("match_float", [False, True])170def test_unary_ops_8890(match_float: bool) -> None:171with pl.SQLContext(172df=pl.DataFrame({"a": [-2, -1, 1, 2], "b": ["w", "x", "y", "z"]}),173) as ctx:174in_values = "(-3.0, -1.0, +2.0, +4.0)" if match_float else "(-3, -1, +2, +4)"175res = ctx.execute(176f"""177SELECT *, -(3) as c, (+4) as d178FROM df WHERE a IN {in_values}179"""180)181assert res.collect().to_dict(as_series=False) == {182"a": [-1, 2],183"b": ["x", "z"],184"c": [-3, -3],185"d": [4, 4],186}187188189