Path: blob/main/py-polars/tests/unit/sql/test_table_operations.py
6939 views
from __future__ import annotations12import re3from datetime import date45import pytest67import polars as pl8from polars.exceptions import SQLInterfaceError9from polars.testing import assert_frame_equal101112@pytest.fixture13def test_frame() -> pl.LazyFrame:14return pl.LazyFrame(15{16"x": [1, 2, 3],17"y": ["aaa", "bbb", "ccc"],18"z": [date(2000, 12, 31), date(1978, 11, 15), date(2077, 10, 20)],19},20schema_overrides={"x": pl.UInt8},21)222324@pytest.mark.parametrize(25("delete_constraint", "expected_ids"),26[27# basic constraints28("WHERE id = 200", {100, 300}),29("WHERE id = 200 OR id = 300", {100}),30("WHERE id IN (200, 300, 400)", {100}),31("WHERE id NOT IN (200, 300, 400)", {200, 300}),32# more involved constraints33("WHERE EXTRACT(year FROM dt) >= 2000", {200}),34# null-handling (in the data)35("WHERE v1 < 0", {100, 300}),36("WHERE v1 > 0", {200, 300}),37# null handling (in the constraint)38("WHERE v1 IS NULL", {100, 200}),39("WHERE v1 IS NOT NULL", {300}),40# boolean handling (delete all/none)41("WHERE FALSE", {100, 200, 300}),42("WHERE TRUE", set()),43# no constraint; equivalent to TRUNCATE (drop all rows)44("", set()),45],46)47def test_delete_clause(delete_constraint: str, expected_ids: set[int]) -> None:48df = pl.DataFrame(49{50"id": [100, 200, 300],51"dt": [date(2020, 10, 10), date(1999, 1, 2), date(2001, 7, 5)],52"v1": [3.5, -4.0, None],53"v2": [10.0, 2.5, -1.5],54}55)56res = df.sql(f"DELETE FROM self {delete_constraint}")57assert set(res["id"]) == expected_ids585960def test_drop_table(test_frame: pl.LazyFrame) -> None:61# 'drop' completely removes the table from sql context62expected = pl.DataFrame()6364with pl.SQLContext(frame=test_frame, eager=True) as ctx:65res = ctx.execute("DROP TABLE frame")66assert_frame_equal(res, expected)6768with pytest.raises(SQLInterfaceError, match="'frame' was not found"):69ctx.execute("SELECT * FROM frame")707172def test_explain_query(test_frame: pl.LazyFrame) -> None:73# 'explain' returns the query plan for the given sql74with pl.SQLContext(frame=test_frame) as ctx:75plan = (76ctx.execute("EXPLAIN SELECT * FROM frame")77.select(pl.col("Logical Plan").str.join())78.collect()79.item()80)81assert (82re.search(83pattern=r"PROJECT.+?COLUMNS",84string=plan,85flags=re.IGNORECASE,86)87is not None88)899091def test_show_tables(test_frame: pl.LazyFrame) -> None:92# 'show tables' lists all tables registered with the sql context in sorted order93with pl.SQLContext(94tbl3=test_frame,95tbl2=test_frame,96tbl1=test_frame,97) as ctx:98res = ctx.execute("SHOW TABLES").collect()99assert_frame_equal(res, pl.DataFrame({"name": ["tbl1", "tbl2", "tbl3"]}))100101102@pytest.mark.parametrize(103"truncate_sql",104[105"TRUNCATE TABLE frame",106"TRUNCATE frame",107],108)109def test_truncate_table(truncate_sql: str, test_frame: pl.LazyFrame) -> None:110# 'truncate' preserves the table, but optimally drops all rows within it111expected = pl.DataFrame(schema=test_frame.collect_schema())112113with pl.SQLContext(frame=test_frame, eager=True) as ctx:114res = ctx.execute(truncate_sql)115assert_frame_equal(res, expected)116117res = ctx.execute("SELECT * FROM frame")118assert_frame_equal(res, expected)119120121