Path: blob/main/py-polars/tests/unit/sql/test_literals.py
8406 views
from __future__ import annotations12from datetime import date, datetime, timedelta34import pytest56import polars as pl7from polars.exceptions import SQLInterfaceError, SQLSyntaxError8from polars.testing import assert_frame_equal9from tests.unit.sql import assert_sql_matches101112def test_bit_hex_literals() -> None:13with pl.SQLContext(df=None, eager=True) as ctx:14out = ctx.execute(15"""16SELECT *,17-- bit strings18b'' AS b0,19b'1001' AS b1,20b'11101011' AS b2,21b'1111110100110010' AS b3,22-- hex strings23x'' AS x0,24x'FF' AS x1,25x'4142' AS x2,26x'DeadBeef' AS x3,27FROM df28"""29)3031assert out.to_dict(as_series=False) == {32"b0": [b""],33"b1": [b"\t"],34"b2": [b"\xeb"],35"b3": [b"\xfd2"],36"x0": [b""],37"x1": [b"\xff"],38"x2": [b"AB"],39"x3": [b"\xde\xad\xbe\xef"],40}414243def test_bit_hex_filter() -> None:44df = pl.DataFrame(45{"bin": [b"\x01", b"\x02", b"\x03", b"\x04"], "val": [9, 8, 7, 6]}46)47with pl.SQLContext(test=df) as ctx:48for two in ("b'10'", "x'02'", "'\x02'", "b'0010'"):49out = ctx.execute(f"SELECT val FROM test WHERE bin > {two}", eager=True)50assert out.to_series().to_list() == [7, 6]515253def test_bit_hex_errors() -> None:54with pl.SQLContext(test=None) as ctx:55with pytest.raises(56SQLSyntaxError,57match="bit string literal should contain only 0s and 1s",58):59ctx.execute("SELECT b'007' FROM test", eager=True)6061with pytest.raises(62SQLSyntaxError,63match="hex string literal must have an even number of digits",64):65ctx.execute("SELECT x'00F' FROM test", eager=True)6667with pytest.raises(68SQLSyntaxError,69match="hex string literal must have an even number of digits",70):71pl.sql_expr("colx IN (x'FF',x'123')")7273with pytest.raises(74SQLInterfaceError,75match=r'NationalStringLiteral\("hmmm"\) is not a supported literal',76):77pl.sql_expr("N'hmmm'")787980def test_bit_hex_membership() -> None:81df = pl.DataFrame(82{83"x": [b"\x05", b"\xff", b"\xcc", b"\x0b"],84"y": [1, 2, 3, 4],85}86)87# this checks the internal `visit_any_value` codepath88for values in (89"b'0101', b'1011'",90"x'05', x'0b'",91):92dff = df.filter(pl.sql_expr(f"x IN ({values})"))93assert dff["y"].to_list() == [1, 4]949596def test_dollar_quoted_literals() -> None:97df = pl.sql(98"""99SELECT100$$xyz$$ AS dq1,101$q$xyz$q$ AS dq2,102$tag$xyz$tag$ AS dq3,103$QUOTE$xyz$QUOTE$ AS dq4,104"""105).collect()106assert df.to_dict(as_series=False) == {f"dq{n}": ["xyz"] for n in range(1, 5)}107108df = pl.sql("SELECT $$x$z$$ AS dq").collect()109assert df.item() == "x$z"110111112def test_fixed_intervals() -> None:113with pl.SQLContext(df=None, eager=True) as ctx:114out = ctx.execute(115"""116SELECT117-- short form with/without spaces118INTERVAL '1w2h3m4s' AS i1,119INTERVAL '100ms 100us' AS i2,120-- long form with/without commas (case-insensitive)121INTERVAL '1 week, 2 hours, 3 minutes, 4 seconds' AS i3122FROM df123"""124)125expected = pl.DataFrame(126{127"i1": [timedelta(weeks=1, hours=2, minutes=3, seconds=4)],128"i2": [timedelta(microseconds=100100)],129"i3": [timedelta(weeks=1, hours=2, minutes=3, seconds=4)],130},131).cast(pl.Duration("ns"))132133assert_frame_equal(expected, out)134135# TODO: negative intervals136with pytest.raises(137SQLInterfaceError,138match="minus signs are not yet supported in interval strings; found '-7d'",139):140ctx.execute("SELECT INTERVAL '-7d' AS one_week_ago FROM df")141142with pytest.raises(143SQLSyntaxError,144match="unary ops are not valid on interval strings; found -'7d'",145):146ctx.execute("SELECT INTERVAL -'7d' AS one_week_ago FROM df")147148with pytest.raises(149SQLSyntaxError,150match="fixed-duration interval cannot contain years, quarters, or months",151):152ctx.execute("SELECT INTERVAL '1 quarter 1 month' AS q FROM df")153154155def test_interval_offsets() -> None:156df = pl.DataFrame(157{158"dtm": [159datetime(1899, 12, 31, 8),160datetime(1999, 6, 8, 10, 30),161datetime(2010, 5, 7, 20, 20, 20),162],163"dt": [164date(1950, 4, 10),165date(2048, 1, 20),166date(2026, 8, 5),167],168}169)170171out = df.sql(172"""173SELECT174dtm + INTERVAL '2 months, 30 minutes' AS dtm_plus_2mo30m,175dt + INTERVAL '100 years' AS dt_plus_100y,176dt - INTERVAL '1 quarter' AS dt_minus_1q177FROM self178ORDER BY 1179"""180)181assert out.to_dict(as_series=False) == {182"dtm_plus_2mo30m": [183datetime(1900, 2, 28, 8, 30),184datetime(1999, 8, 8, 11, 0),185datetime(2010, 7, 7, 20, 50, 20),186],187"dt_plus_100y": [188date(2050, 4, 10),189date(2148, 1, 20),190date(2126, 8, 5),191],192"dt_minus_1q": [193date(1950, 1, 10),194date(2047, 10, 20),195date(2026, 5, 5),196],197}198199200@pytest.mark.parametrize(201("interval_comparison", "expected_result"),202[203("INTERVAL '3 days' <= INTERVAL '3 days, 1 microsecond'", True),204("INTERVAL '3 days, 1 microsecond' <= INTERVAL '3 days'", False),205("INTERVAL '3 months' >= INTERVAL '3 months'", True),206("INTERVAL '2 quarters' < INTERVAL '2 quarters'", False),207("INTERVAL '2 quarters' > INTERVAL '2 quarters'", False),208("INTERVAL '3 years' <=> INTERVAL '3 years'", True),209("INTERVAL '3 years' == INTERVAL '1008 weeks'", False),210("INTERVAL '8 weeks' != INTERVAL '2 months'", True),211("INTERVAL '8 weeks' = INTERVAL '2 months'", False),212("INTERVAL '1 year' != INTERVAL '365 days'", True),213("INTERVAL '1 year' = INTERVAL '1 year'", True),214],215)216def test_interval_comparisons(interval_comparison: str, expected_result: bool) -> None:217with pl.SQLContext() as ctx:218res = ctx.execute(f"SELECT {interval_comparison} AS res")219assert res.collect().to_dict(as_series=False) == {"res": [expected_result]}220221222def test_select_literals_no_table() -> None:223res = pl.sql("SELECT 1 AS one, '2' AS two, 3.0 AS three", eager=True)224assert res.to_dict(as_series=False) == {225"one": [1],226"two": ["2"],227"three": [3.0],228}229230231def test_literal_only_select() -> None:232"""Check that literal-only SELECT broadcasts to the source table's height."""233df = pl.DataFrame({"x": [1, 2, 3], "y": [4.0, 5.0, 6.0]})234235assert_sql_matches(236df,237query="SELECT 1 AS one, 2.5 AS two FROM self",238expected={"one": [1, 1, 1], "two": [2.5, 2.5, 2.5]},239compare_with="sqlite",240)241assert_sql_matches(242df,243query="SELECT 1 + 2 AS sum, 'abc' || 'def' AS concat FROM self",244expected={"sum": [3, 3, 3], "concat": ["abcdef", "abcdef", "abcdef"]},245compare_with="sqlite",246)247248# empty table should result in zero rows249df = df.clear()250251assert_sql_matches(252df,253query="SELECT 42 AS the_answer, 'test' AS str FROM self",254expected={"the_answer": [], "str": []},255compare_with="sqlite",256)257258259def test_literal_only_select_distinct() -> None:260"""Test literal-only SELECT with DISTINCT clause."""261df = pl.DataFrame({"x": [1, 2, 3, 4, 5]})262263# DISTINCT on broadcast literals should collapse to 1 row264assert_sql_matches(265df,266query="SELECT DISTINCT 42 AS val FROM self",267expected={"val": [42]},268compare_with="sqlite",269)270271272def test_literal_only_select_order_by() -> None:273"""Test literal-only SELECT with ORDER BY (edge case: no-op but shouldn't error)."""274df = pl.DataFrame({"x": [3, 1, 2]})275276# ORDER BY on literal column is a no-op but should still work277assert_sql_matches(278df,279query="SELECT 1 AS one FROM self ORDER BY one",280expected={"one": [1, 1, 1]},281compare_with="sqlite",282)283284285def test_literal_only_select_where() -> None:286"""Test literal-only SELECT respects WHERE filtering."""287df = pl.DataFrame({"x": [1, 2, 3, 4, 5]})288289# WHERE clause should filter, then literals broadcast to the filtered height290assert_sql_matches(291df,292query="SELECT 99 AS lit FROM self WHERE x > 3",293expected={"lit": [99, 99]},294compare_with="sqlite",295)296assert_sql_matches(297df,298query="SELECT 99 AS lit FROM self WHERE x > 100000000",299expected={"lit": []},300compare_with="sqlite",301)302303304def test_literal_only_select_limit() -> None:305"""Test literal-only SELECT with LIMIT clause."""306df = pl.DataFrame({"x": list(range(10))})307308assert_sql_matches(309df,310query="SELECT 'val' AS s FROM self LIMIT 3",311expected={"s": ["val", "val", "val"]},312compare_with="sqlite",313)314315316def test_literal_only_select_nested_expressions() -> None:317"""Test literal-only SELECT with complex nested expressions (no column refs)."""318df = pl.DataFrame({"x": [1, 2]})319320assert_sql_matches(321df,322query="""323SELECT324CASE WHEN 1 > 0 THEN 'yes' ELSE 'no' END AS cond,325COALESCE(NULL, 'fallback') AS coal,326ABS(-5) AS absval327FROM self328""",329expected={330"cond": ["yes", "yes"],331"coal": ["fallback", "fallback"],332"absval": [5, 5],333},334compare_with="sqlite",335)336337338def test_mixed_literal_and_column() -> None:339"""Test basic mixed literal/column SELECT."""340df = pl.DataFrame({"x": [10, 20, 30]})341342# When there's at least one column reference, normal behavior applies343assert_sql_matches(344df,345query="SELECT x, 99 AS lit FROM self",346expected={"x": [10, 20, 30], "lit": [99, 99, 99]},347compare_with="sqlite",348)349350351def test_select_from_table_with_reserved_names() -> None:352select = pl.DataFrame({"select": [1, 2, 3], "from": [4, 5, 6]})353out = pl.sql(354query="""355SELECT "from", "select"356FROM "select"357WHERE "from" >= 5 AND "select" % 2 != 1358""",359eager=True,360)361assert out.rows() == [(5, 2)]362363364