Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_table_operations.py
8391 views
1
from __future__ import annotations
2
3
import re
4
from datetime import date
5
from typing import TYPE_CHECKING
6
7
import pytest
8
9
import polars as pl
10
from polars.exceptions import SQLInterfaceError
11
from polars.testing import assert_frame_equal
12
13
if TYPE_CHECKING:
14
from pathlib import Path
15
16
17
@pytest.fixture
18
def test_frame() -> pl.LazyFrame:
19
return pl.LazyFrame(
20
{
21
"x": [1, 2, 3],
22
"y": ["aaa", "bbb", "ccc"],
23
"z": [date(2000, 12, 31), date(1978, 11, 15), date(2077, 10, 20)],
24
},
25
schema_overrides={"x": pl.UInt8},
26
)
27
28
29
def test_create_table() -> None:
30
with pl.SQLContext() as ctx:
31
# test all three ways of creating a new table
32
ctx.execute("CREATE TABLE tbl1(colx VARCHAR, coly DATE, colz ARRAY<DOUBLE>)")
33
ctx.execute("CREATE TABLE tbl2 AS SELECT * FROM tbl1")
34
ctx.execute("CREATE TABLE tbl3 LIKE tbl2")
35
df = ctx.execute("SELECT * FROM tbl3", eager=True)
36
37
df_expected = pl.DataFrame(
38
schema={
39
"colx": pl.String,
40
"coly": pl.Date,
41
"colz": pl.List(pl.Float64),
42
}
43
)
44
assert_frame_equal(df_expected, df)
45
46
47
def test_create_table_from_file_io(io_files_path: Path) -> None:
48
foods_csv = io_files_path / "foods*.csv"
49
with pl.SQLContext() as ctx:
50
ctx.execute(
51
query=f"""
52
CREATE TABLE foods AS
53
SELECT * FROM READ_CSV('{foods_csv}')
54
""",
55
eager=True,
56
)
57
df = ctx.execute("SELECT * FROM foods", eager=True)
58
assert df.schema == {
59
"category": pl.String,
60
"calories": pl.Int64,
61
"fats_g": pl.Float64,
62
"sugars_g": pl.Int64,
63
}
64
assert df.shape == (135, 4)
65
66
67
@pytest.mark.parametrize(
68
("delete_constraint", "expected_ids"),
69
[
70
# basic constraints
71
("WHERE id = 200", {100, 300}),
72
("WHERE id = 200 OR id = 300", {100}),
73
("WHERE id IN (200, 300, 400)", {100}),
74
("WHERE id NOT IN (200, 300, 400)", {200, 300}),
75
# more involved constraints
76
("WHERE EXTRACT(year FROM dt) >= 2000", {200}),
77
# null-handling (in the data)
78
("WHERE v1 < 0", {100, 300}),
79
("WHERE v1 > 0", {200, 300}),
80
# null handling (in the constraint)
81
("WHERE v1 IS NULL", {100, 200}),
82
("WHERE v1 IS NOT NULL", {300}),
83
# boolean handling (delete all/none)
84
("WHERE FALSE", {100, 200, 300}),
85
("WHERE TRUE", set()),
86
# no constraint; equivalent to TRUNCATE (drop all rows)
87
("", set()),
88
],
89
)
90
def test_delete_clause(delete_constraint: str, expected_ids: set[int]) -> None:
91
df = pl.DataFrame(
92
{
93
"id": [100, 200, 300],
94
"dt": [date(2020, 10, 10), date(1999, 1, 2), date(2001, 7, 5)],
95
"v1": [3.5, -4.0, None],
96
"v2": [10.0, 2.5, -1.5],
97
}
98
)
99
res = df.sql(f"DELETE FROM self {delete_constraint}")
100
assert set(res["id"]) == expected_ids
101
102
103
def test_drop_table(test_frame: pl.LazyFrame) -> None:
104
# 'drop' completely removes the table from sql context
105
expected = pl.DataFrame()
106
107
with pl.SQLContext(frame=test_frame, eager=True) as ctx:
108
res = ctx.execute("DROP TABLE frame")
109
assert_frame_equal(res, expected)
110
111
with pytest.raises(SQLInterfaceError, match="'frame' was not found"):
112
ctx.execute("SELECT * FROM frame")
113
114
115
def test_explain_query(test_frame: pl.LazyFrame) -> None:
116
# 'explain' returns the query plan for the given sql
117
with pl.SQLContext(frame=test_frame) as ctx:
118
plan = (
119
ctx.execute("EXPLAIN SELECT * FROM frame")
120
.select(pl.col("Logical Plan").str.join())
121
.collect()
122
.item()
123
)
124
assert (
125
re.search(
126
pattern=r"PROJECT.+?COLUMNS",
127
string=plan,
128
flags=re.IGNORECASE,
129
)
130
is not None
131
)
132
133
134
def test_show_tables(test_frame: pl.LazyFrame) -> None:
135
# 'show tables' lists all tables registered with the sql context in sorted order
136
with pl.SQLContext(
137
tbl3=test_frame,
138
tbl2=test_frame,
139
tbl1=test_frame,
140
) as ctx:
141
res = ctx.execute("SHOW TABLES").collect()
142
assert_frame_equal(res, pl.DataFrame({"name": ["tbl1", "tbl2", "tbl3"]}))
143
144
145
@pytest.mark.parametrize(
146
"truncate_sql",
147
[
148
"TRUNCATE TABLE frame",
149
"TRUNCATE frame",
150
],
151
)
152
def test_truncate_table(truncate_sql: str, test_frame: pl.LazyFrame) -> None:
153
# 'truncate' preserves the table, but optimally drops all rows within it
154
expected = pl.DataFrame(schema=test_frame.collect_schema())
155
156
with pl.SQLContext(frame=test_frame, eager=True) as ctx:
157
res = ctx.execute(truncate_sql)
158
assert_frame_equal(res, expected)
159
160
res = ctx.execute("SELECT * FROM frame")
161
assert_frame_equal(res, expected)
162
163