Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_fetch.py
8353 views
1
from __future__ import annotations
2
3
from typing import TYPE_CHECKING, Any
4
5
import pytest
6
7
import polars as pl
8
from polars.exceptions import SQLInterfaceError, SQLSyntaxError
9
from tests.unit.sql import assert_sql_matches
10
11
if TYPE_CHECKING:
12
from collections.abc import Sequence
13
14
15
@pytest.fixture
16
def df() -> pl.DataFrame:
17
return pl.DataFrame(
18
{
19
"id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
20
"label": ["A", "B", "A", "B", "A", "B", "A", "B", "A", "B"],
21
"value": [100, 200, 150, 250, 120, 220, 180, 280, 130, 230],
22
}
23
)
24
25
26
@pytest.mark.parametrize(
27
("query", "expected"),
28
[
29
( # Basic FETCH FIRST / NEXT variants
30
"SELECT * FROM self ORDER BY id FETCH FIRST 3 ROWS ONLY",
31
{"id": [1, 2, 3], "label": ["A", "B", "A"], "value": [100, 200, 150]},
32
),
33
(
34
"SELECT * FROM self ORDER BY id FETCH NEXT 3 ROWS ONLY",
35
{"id": [1, 2, 3], "label": ["A", "B", "A"], "value": [100, 200, 150]},
36
),
37
(
38
"SELECT * FROM self ORDER BY id FETCH FIRST 1 ROW ONLY",
39
{"id": [1], "label": ["A"], "value": [100]},
40
),
41
( # OFFSET with FETCH
42
"SELECT * FROM self ORDER BY id OFFSET 8 ROWS FETCH NEXT 5 ROWS ONLY",
43
{"id": [9, 10], "label": ["A", "B"], "value": [130, 230]},
44
),
45
( # FETCH with WHERE
46
"SELECT * FROM self WHERE label = 'A' ORDER BY value FETCH FIRST 3 ROWS ONLY",
47
{"id": [1, 5, 9], "label": ["A", "A", "A"], "value": [100, 120, 130]},
48
),
49
( # FETCH with GROUP BY
50
"SELECT label, SUM(value) AS total FROM self GROUP BY label ORDER BY total DESC FETCH FIRST 1 ROW ONLY",
51
{"label": ["B"], "total": [1180]},
52
),
53
( # FETCH with DISTINCT
54
"SELECT DISTINCT label FROM self ORDER BY label FETCH FIRST 1 ROW ONLY",
55
{"label": ["A"]},
56
),
57
( # FETCH in subquery
58
"SELECT * FROM (SELECT * FROM self ORDER BY value DESC FETCH FIRST 3 ROWS ONLY) AS top3 ORDER BY id",
59
{"id": [4, 8, 10], "label": ["B", "B", "B"], "value": [250, 280, 230]},
60
),
61
( # Queries that should return no rows
62
"SELECT * FROM self FETCH FIRST 0 ROWS ONLY",
63
{"id": [], "label": [], "value": []},
64
),
65
(
66
"SELECT * FROM self ORDER BY id OFFSET 100 ROWS FETCH FIRST 5 ROWS ONLY",
67
{"id": [], "label": [], "value": []},
68
),
69
( # FETCH in CTE
70
"""
71
WITH top5 AS (
72
SELECT * FROM self ORDER BY value DESC FETCH FIRST 5 ROWS ONLY
73
)
74
SELECT label, COUNT(*) AS cnt
75
FROM top5
76
GROUP BY label
77
ORDER BY label
78
""",
79
{"label": ["B"], "cnt": [5]},
80
),
81
],
82
)
83
def test_fetch_clause(
84
df: pl.DataFrame, query: str, expected: dict[str, Sequence[Any]]
85
) -> None:
86
assert_sql_matches(
87
df,
88
query=query,
89
compare_with="duckdb",
90
expected=expected,
91
)
92
93
94
def test_fetch_with_join(df: pl.DataFrame) -> None:
95
categories = pl.DataFrame({"label": ["A", "B"], "description": ["Alpha", "Beta"]})
96
assert_sql_matches(
97
frames={
98
"test": df,
99
"categories": categories,
100
},
101
query="""
102
SELECT test.id, test.value, categories.description
103
FROM test
104
JOIN categories ON test.label = categories.label
105
ORDER BY test.value DESC
106
FETCH FIRST 3 ROWS ONLY
107
""",
108
compare_with="duckdb",
109
expected={
110
"id": [8, 4, 10],
111
"value": [280, 250, 230],
112
"description": ["Beta", "Beta", "Beta"],
113
},
114
)
115
116
117
def test_fetch_with_union(df: pl.DataFrame) -> None:
118
assert_sql_matches(
119
frames={"tbl": df},
120
query="""
121
(
122
SELECT id, value FROM tbl WHERE label = 'A'
123
UNION ALL
124
SELECT id, value FROM tbl WHERE label = 'B'
125
)
126
ORDER BY value
127
FETCH FIRST 5 ROWS ONLY
128
""",
129
expected={"id": [1, 5, 9, 3, 7], "value": [100, 120, 130, 150, 180]},
130
compare_with="duckdb",
131
)
132
133
134
@pytest.mark.parametrize(
135
("query", "error_type", "match"),
136
[
137
(
138
"SELECT * FROM self FETCH FIRST 50 PERCENT ROWS ONLY",
139
SQLInterfaceError,
140
r"`FETCH` with `PERCENT` is not supported",
141
),
142
(
143
"SELECT * FROM self ORDER BY value FETCH FIRST 5 ROWS WITH TIES",
144
SQLInterfaceError,
145
r"`FETCH` with `WITH TIES` is not supported",
146
),
147
(
148
"SELECT * FROM self LIMIT 5 FETCH FIRST 3 ROWS ONLY",
149
SQLSyntaxError,
150
r"cannot use both `LIMIT` and `FETCH`",
151
),
152
],
153
)
154
def test_fetch_errors(
155
df: pl.DataFrame, query: str, error_type: type[Exception], match: str
156
) -> None:
157
"""Test error conditions for unsupported FETCH features."""
158
with pytest.raises(error_type, match=match):
159
df.sql(query)
160
161