Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_table_operations.py
6939 views
1
from __future__ import annotations
2
3
import re
4
from datetime import date
5
6
import pytest
7
8
import polars as pl
9
from polars.exceptions import SQLInterfaceError
10
from polars.testing import assert_frame_equal
11
12
13
@pytest.fixture
14
def test_frame() -> pl.LazyFrame:
15
return pl.LazyFrame(
16
{
17
"x": [1, 2, 3],
18
"y": ["aaa", "bbb", "ccc"],
19
"z": [date(2000, 12, 31), date(1978, 11, 15), date(2077, 10, 20)],
20
},
21
schema_overrides={"x": pl.UInt8},
22
)
23
24
25
@pytest.mark.parametrize(
26
("delete_constraint", "expected_ids"),
27
[
28
# basic constraints
29
("WHERE id = 200", {100, 300}),
30
("WHERE id = 200 OR id = 300", {100}),
31
("WHERE id IN (200, 300, 400)", {100}),
32
("WHERE id NOT IN (200, 300, 400)", {200, 300}),
33
# more involved constraints
34
("WHERE EXTRACT(year FROM dt) >= 2000", {200}),
35
# null-handling (in the data)
36
("WHERE v1 < 0", {100, 300}),
37
("WHERE v1 > 0", {200, 300}),
38
# null handling (in the constraint)
39
("WHERE v1 IS NULL", {100, 200}),
40
("WHERE v1 IS NOT NULL", {300}),
41
# boolean handling (delete all/none)
42
("WHERE FALSE", {100, 200, 300}),
43
("WHERE TRUE", set()),
44
# no constraint; equivalent to TRUNCATE (drop all rows)
45
("", set()),
46
],
47
)
48
def test_delete_clause(delete_constraint: str, expected_ids: set[int]) -> None:
49
df = pl.DataFrame(
50
{
51
"id": [100, 200, 300],
52
"dt": [date(2020, 10, 10), date(1999, 1, 2), date(2001, 7, 5)],
53
"v1": [3.5, -4.0, None],
54
"v2": [10.0, 2.5, -1.5],
55
}
56
)
57
res = df.sql(f"DELETE FROM self {delete_constraint}")
58
assert set(res["id"]) == expected_ids
59
60
61
def test_drop_table(test_frame: pl.LazyFrame) -> None:
62
# 'drop' completely removes the table from sql context
63
expected = pl.DataFrame()
64
65
with pl.SQLContext(frame=test_frame, eager=True) as ctx:
66
res = ctx.execute("DROP TABLE frame")
67
assert_frame_equal(res, expected)
68
69
with pytest.raises(SQLInterfaceError, match="'frame' was not found"):
70
ctx.execute("SELECT * FROM frame")
71
72
73
def test_explain_query(test_frame: pl.LazyFrame) -> None:
74
# 'explain' returns the query plan for the given sql
75
with pl.SQLContext(frame=test_frame) as ctx:
76
plan = (
77
ctx.execute("EXPLAIN SELECT * FROM frame")
78
.select(pl.col("Logical Plan").str.join())
79
.collect()
80
.item()
81
)
82
assert (
83
re.search(
84
pattern=r"PROJECT.+?COLUMNS",
85
string=plan,
86
flags=re.IGNORECASE,
87
)
88
is not None
89
)
90
91
92
def test_show_tables(test_frame: pl.LazyFrame) -> None:
93
# 'show tables' lists all tables registered with the sql context in sorted order
94
with pl.SQLContext(
95
tbl3=test_frame,
96
tbl2=test_frame,
97
tbl1=test_frame,
98
) as ctx:
99
res = ctx.execute("SHOW TABLES").collect()
100
assert_frame_equal(res, pl.DataFrame({"name": ["tbl1", "tbl2", "tbl3"]}))
101
102
103
@pytest.mark.parametrize(
104
"truncate_sql",
105
[
106
"TRUNCATE TABLE frame",
107
"TRUNCATE frame",
108
],
109
)
110
def test_truncate_table(truncate_sql: str, test_frame: pl.LazyFrame) -> None:
111
# 'truncate' preserves the table, but optimally drops all rows within it
112
expected = pl.DataFrame(schema=test_frame.collect_schema())
113
114
with pl.SQLContext(frame=test_frame, eager=True) as ctx:
115
res = ctx.execute(truncate_sql)
116
assert_frame_equal(res, expected)
117
118
res = ctx.execute("SELECT * FROM frame")
119
assert_frame_equal(res, expected)
120
121