Path: blob/main/py-polars/tests/unit/sql/test_order_by.py
6939 views
from __future__ import annotations12from pathlib import Path34import pytest56import polars as pl7from polars.exceptions import SQLInterfaceError, SQLSyntaxError8910@pytest.fixture11def foods_ipc_path() -> Path:12return Path(__file__).parent.parent / "io" / "files" / "foods1.ipc"131415def test_order_by_basic(foods_ipc_path: Path) -> None:16foods = pl.scan_ipc(foods_ipc_path)1718order_by_distinct_res = foods.sql(19"""20SELECT DISTINCT category21FROM self22ORDER BY category DESC23"""24).collect()25assert order_by_distinct_res.to_dict(as_series=False) == {26"category": ["vegetables", "seafood", "meat", "fruit"]27}2829for category in ("category", "category AS cat"):30category_col = category.split(" ")[-1]31order_by_group_by_res = foods.sql(32f"""33SELECT {category}34FROM self35GROUP BY category36ORDER BY {category_col} DESC37"""38).collect()39assert order_by_group_by_res.to_dict(as_series=False) == {40category_col: ["vegetables", "seafood", "meat", "fruit"]41}4243order_by_constructed_group_by_res = foods.sql(44"""45SELECT category, SUM(calories) as summed_calories46FROM self47GROUP BY category48ORDER BY summed_calories DESC49"""50).collect()51assert order_by_constructed_group_by_res.to_dict(as_series=False) == {52"category": ["seafood", "meat", "fruit", "vegetables"],53"summed_calories": [1250, 540, 410, 192],54}5556order_by_unselected_res = foods.sql(57"""58SELECT SUM(calories) as summed_calories59FROM self60GROUP BY category61ORDER BY summed_calories DESC62"""63).collect()64assert order_by_unselected_res.to_dict(as_series=False) == {65"summed_calories": [1250, 540, 410, 192],66}676869def test_order_by_misc_selection() -> None:70df = pl.DataFrame({"x": [None, 1, 2, 3], "y": [4, 2, None, 8]})7172# order by aliased col73res = df.sql("SELECT x, y AS y2 FROM self ORDER BY y2")74assert res.to_dict(as_series=False) == {"x": [1, None, 3, 2], "y2": [2, 4, 8, None]}7576res = df.sql("SELECT x, y AS y2 FROM self ORDER BY y2 DESC")77assert res.to_dict(as_series=False) == {"x": [2, 3, None, 1], "y2": [None, 8, 4, 2]}7879# order by col found in wildcard80res = df.sql("SELECT *, y AS y2 FROM self ORDER BY y")81assert res.to_dict(as_series=False) == {82"x": [1, None, 3, 2],83"y": [2, 4, 8, None],84"y2": [2, 4, 8, None],85}86res = df.sql("SELECT *, y AS y2 FROM self ORDER BY y NULLS FIRST")87assert res.to_dict(as_series=False) == {88"x": [2, 1, None, 3],89"y": [None, 2, 4, 8],90"y2": [None, 2, 4, 8],91}9293# order by col found in qualified wildcard94res = df.sql("SELECT self.* FROM self ORDER BY x NULLS FIRST")95assert res.to_dict(as_series=False) == {"x": [None, 1, 2, 3], "y": [4, 2, None, 8]}9697res = df.sql("SELECT self.* FROM self ORDER BY y NULLS FIRST")98assert res.to_dict(as_series=False) == {"x": [2, 1, None, 3], "y": [None, 2, 4, 8]}99100# order by col excluded from wildcard101res = df.sql("SELECT * EXCLUDE y FROM self ORDER BY y")102assert res.to_dict(as_series=False) == {"x": [1, None, 3, 2]}103104res = df.sql("SELECT * EXCLUDE y FROM self ORDER BY y NULLS FIRST")105assert res.to_dict(as_series=False) == {"x": [2, 1, None, 3]}106107# order by col excluded from qualified wildcard108res = df.sql("SELECT self.* EXCLUDE y FROM self ORDER BY y")109assert res.to_dict(as_series=False) == {"x": [1, None, 3, 2]}110111# order by expression112res = df.sql("SELECT (x % y) AS xmy FROM self ORDER BY -(x % y)")113assert res.to_dict(as_series=False) == {"xmy": [3, 1, None, None]}114115res = df.sql("SELECT (x % y) AS xmy FROM self ORDER BY x % y NULLS FIRST")116assert res.to_dict(as_series=False) == {"xmy": [None, None, 1, 3]}117118# confirm that 'order by all' syntax prioritises cols119df = pl.DataFrame({"SOME": [0, 1], "ALL": [1, 0]})120res = df.sql("SELECT * FROM self ORDER BY ALL")121assert res.to_dict(as_series=False) == {"SOME": [1, 0], "ALL": [0, 1]}122123res = df.sql("SELECT * FROM self ORDER BY ALL DESC")124assert res.to_dict(as_series=False) == {"SOME": [0, 1], "ALL": [1, 0]}125126127def test_order_by_misc_16579() -> None:128res = pl.DataFrame(129{130"x": ["apple", "orange"],131"y": ["sheep", "alligator"],132"z": ["hello", "world"],133}134).sql(135"""136SELECT z, y, x137FROM self ORDER BY y DESC138"""139)140assert res.columns == ["z", "y", "x"]141assert res.to_dict(as_series=False) == {142"z": ["hello", "world"],143"y": ["sheep", "alligator"],144"x": ["apple", "orange"],145}146147148def test_order_by_multi_nulls_first_last() -> None:149df = pl.DataFrame({"x": [None, 1, None, 3], "y": [3, 2, None, 1]})150# ┌──────┬──────┐151# │ x ┆ y │152# │ --- ┆ --- │153# │ i64 ┆ i64 │154# ╞══════╪══════╡155# │ null ┆ 3 │156# │ 1 ┆ 2 │157# │ null ┆ null │158# │ 3 ┆ 1 │159# └──────┴──────┘160161res1 = df.sql("SELECT * FROM self ORDER BY x, y")162res2 = df.sql("SELECT * FROM self ORDER BY ALL")163for res in (res1, res2):164assert res.to_dict(as_series=False) == {165"x": [1, 3, None, None],166"y": [2, 1, 3, None],167}168169res = df.sql("SELECT * FROM self ORDER BY x NULLS FIRST, y")170assert res.to_dict(as_series=False) == {171"x": [None, None, 1, 3],172"y": [3, None, 2, 1],173}174175res = df.sql("SELECT * FROM self ORDER BY x, y NULLS FIRST")176assert res.to_dict(as_series=False) == {177"x": [1, 3, None, None],178"y": [2, 1, None, 3],179}180181res1 = df.sql("SELECT * FROM self ORDER BY x NULLS FIRST, y NULLS FIRST")182res2 = df.sql("SELECT * FROM self ORDER BY All NULLS FIRST")183for res in (res1, res2):184assert res.to_dict(as_series=False) == {185"x": [None, None, 1, 3],186"y": [None, 3, 2, 1],187}188189res1 = df.sql("SELECT * FROM self ORDER BY x DESC NULLS FIRST, y DESC NULLS FIRST")190res2 = df.sql("SELECT * FROM self ORDER BY all DESC NULLS FIRST")191for res in (res1, res2):192assert res.to_dict(as_series=False) == {193"x": [None, None, 3, 1],194"y": [None, 3, 1, 2],195}196197res = df.sql("SELECT * FROM self ORDER BY x DESC NULLS FIRST, y DESC NULLS LAST")198assert res.to_dict(as_series=False) == {199"x": [None, None, 3, 1],200"y": [3, None, 1, 2],201}202203res = df.sql("SELECT * FROM self ORDER BY y DESC NULLS FIRST, x NULLS LAST")204assert res.to_dict(as_series=False) == {205"x": [None, None, 1, 3],206"y": [None, 3, 2, 1],207}208209210def test_order_by_ordinal() -> None:211df = pl.DataFrame({"x": [None, 1, None, 3], "y": [3, 2, None, 1]})212213res = df.sql("SELECT * FROM self ORDER BY 1, 2")214assert res.to_dict(as_series=False) == {215"x": [1, 3, None, None],216"y": [2, 1, 3, None],217}218219res = df.sql("SELECT * FROM self ORDER BY 1 DESC, 2")220assert res.to_dict(as_series=False) == {221"x": [None, None, 3, 1],222"y": [3, None, 1, 2],223}224225res = df.sql("SELECT * FROM self ORDER BY 1 DESC NULLS LAST, 2 ASC")226assert res.to_dict(as_series=False) == {227"x": [3, 1, None, None],228"y": [1, 2, 3, None],229}230231res = df.sql("SELECT * FROM self ORDER BY 1 DESC NULLS LAST, 2 ASC NULLS FIRST")232assert res.to_dict(as_series=False) == {233"x": [3, 1, None, None],234"y": [1, 2, None, 3],235}236237res = df.sql("SELECT * FROM self ORDER BY 1 DESC, 2 DESC NULLS FIRST")238assert res.to_dict(as_series=False) == {239"x": [None, None, 3, 1],240"y": [None, 3, 1, 2],241}242243244def test_order_by_errors() -> None:245df = pl.DataFrame({"a": ["w", "x", "y", "z"], "b": [1, 2, 3, 4]})246247with pytest.raises(248SQLInterfaceError,249match="ORDER BY ordinal value must refer to a valid column; found 99",250):251df.sql("SELECT * FROM self ORDER BY 99")252253with pytest.raises(254SQLSyntaxError,255match="negative ordinal values are invalid for ORDER BY; found -1",256):257df.sql("SELECT * FROM self ORDER BY -1")258259260