Path: blob/main/py-polars/tests/unit/sql/test_subqueries.py
6939 views
import pytest12import polars as pl3from polars.exceptions import SQLSyntaxError4from polars.testing import assert_frame_equal567@pytest.mark.parametrize(8("cols", "join_type", "constraint"),9[10("x", "INNER", ""),11("y", "INNER", ""),12("x", "LEFT", "WHERE y IN (0,1,2,3,4,5)"),13("y", "LEFT", "WHERE y >= 0"),14("df1.*", "FULL", "WHERE y >= 0"),15("df2.*", "FULL", "WHERE x >= 0"),16("* EXCLUDE y", "LEFT", "WHERE y >= 0"),17("* EXCLUDE x", "LEFT", "WHERE x >= 0"),18],19)20def test_from_subquery(cols: str, join_type: str, constraint: str) -> None:21df1 = pl.DataFrame({"x": [-1, 0, 3, 1, 2, -1]})22df2 = pl.DataFrame({"y": [0, 1, 2, 3]})2324sql = pl.SQLContext(df1=df1, df2=df2)25res = sql.execute(26f"""27SELECT {cols} FROM (SELECT * FROM df1) AS df128{join_type} JOIN (SELECT * FROM df2) AS df229ON df1.x = df2.y {constraint}30""",31eager=True,32)33assert sorted(res.to_series()) == [0, 1, 2, 3]343536@pytest.mark.may_fail_cloud # reason: with_context37def test_in_subquery() -> None:38df = pl.DataFrame(39{40"x": [1, 2, 3, 4, 5, 6],41"y": [2, 3, 4, 5, 6, 7],42}43)44df_other = pl.DataFrame(45{46"w": [1, 2, 3, 4, 5, 6],47"z": [2, 3, 4, 5, 6, 7],48}49)50df_chars = pl.DataFrame(51{52"one": ["a", "b", "c", "d", "e", "f"],53"two": ["b", "c", "d", "e", "f", "g"],54}55)5657sql = pl.SQLContext(df=df, df_other=df_other, df_chars=df_chars)58res_same = sql.execute(59"""60SELECT61df.x as x62FROM df63WHERE x IN (SELECT y FROM df)64""",65eager=True,66)67df_expected_same = pl.DataFrame({"x": [2, 3, 4, 5, 6]})68assert_frame_equal(69left=df_expected_same,70right=res_same,71)7273res_double = sql.execute(74"""75SELECT76df.x as x77FROM df78WHERE x IN (SELECT y FROM df)79AND y IN(SELECT w FROM df_other)80""",81eager=True,82)83df_expected_double = pl.DataFrame({"x": [2, 3, 4, 5]})84assert_frame_equal(85left=df_expected_double,86right=res_double,87)8889res_expressions = sql.execute(90"""91SELECT92df.x as x93FROM df94WHERE x+1 IN (SELECT y FROM df)95AND y IN(SELECT w-1 FROM df_other)96""",97eager=True,98)99df_expected_expressions = pl.DataFrame({"x": [1, 2, 3, 4]})100assert_frame_equal(101left=df_expected_expressions,102right=res_expressions,103)104105res_not_in = sql.execute(106"""107SELECT108df.x as x109FROM df110WHERE x NOT IN (SELECT y-5 FROM df)111AND y NOT IN(SELECT w+5 FROM df_other)112""",113eager=True,114)115df_not_in = pl.DataFrame({"x": [3, 4]})116assert_frame_equal(117left=df_not_in,118right=res_not_in,119)120121res_chars = sql.execute(122"""123SELECT124df_chars.one125FROM df_chars126WHERE one IN (SELECT two FROM df_chars)127""",128eager=True,129)130df_expected_chars = pl.DataFrame({"one": ["b", "c", "d", "e", "f"]})131assert_frame_equal(132left=res_chars,133right=df_expected_chars,134)135136with pytest.raises(137SQLSyntaxError,138match="SQL subquery returns more than one column",139):140sql.execute(141"""142SELECT143df_chars.one144FROM df_chars145WHERE one IN (SELECT one, two FROM df_chars)146""",147eager=True,148)149150151