CoCalc -- test

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_fetch.py
⁸³⁵³ views
1
from __future__ import annotations
2

3
from typing import TYPE_CHECKING, Any
4

5
import pytest
6

7
import polars as pl
8
from polars.exceptions import SQLInterfaceError, SQLSyntaxError
9
from tests.unit.sql import assert_sql_matches
10

11
if TYPE_CHECKING:
12
    from collections.abc import Sequence
13

14

15
@pytest.fixture
16
def df() -> pl.DataFrame:
17
    return pl.DataFrame(
18
        {
19
            "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
20
            "label": ["A", "B", "A", "B", "A", "B", "A", "B", "A", "B"],
21
            "value": [100, 200, 150, 250, 120, 220, 180, 280, 130, 230],
22
        }
23
    )
24

25

26
@pytest.mark.parametrize(
27
    ("query", "expected"),
28
    [
29
        (  # Basic FETCH FIRST / NEXT variants
30
            "SELECT * FROM self ORDER BY id FETCH FIRST 3 ROWS ONLY",
31
            {"id": [1, 2, 3], "label": ["A", "B", "A"], "value": [100, 200, 150]},
32
        ),
33
        (
34
            "SELECT * FROM self ORDER BY id FETCH NEXT 3 ROWS ONLY",
35
            {"id": [1, 2, 3], "label": ["A", "B", "A"], "value": [100, 200, 150]},
36
        ),
37
        (
38
            "SELECT * FROM self ORDER BY id FETCH FIRST 1 ROW ONLY",
39
            {"id": [1], "label": ["A"], "value": [100]},
40
        ),
41
        (  # OFFSET with FETCH
42
            "SELECT * FROM self ORDER BY id OFFSET 8 ROWS FETCH NEXT 5 ROWS ONLY",
43
            {"id": [9, 10], "label": ["A", "B"], "value": [130, 230]},
44
        ),
45
        (  # FETCH with WHERE
46
            "SELECT * FROM self WHERE label = 'A' ORDER BY value FETCH FIRST 3 ROWS ONLY",
47
            {"id": [1, 5, 9], "label": ["A", "A", "A"], "value": [100, 120, 130]},
48
        ),
49
        (  # FETCH with GROUP BY
50
            "SELECT label, SUM(value) AS total FROM self GROUP BY label ORDER BY total DESC FETCH FIRST 1 ROW ONLY",
51
            {"label": ["B"], "total": [1180]},
52
        ),
53
        (  # FETCH with DISTINCT
54
            "SELECT DISTINCT label FROM self ORDER BY label FETCH FIRST 1 ROW ONLY",
55
            {"label": ["A"]},
56
        ),
57
        (  # FETCH in subquery
58
            "SELECT * FROM (SELECT * FROM self ORDER BY value DESC FETCH FIRST 3 ROWS ONLY) AS top3 ORDER BY id",
59
            {"id": [4, 8, 10], "label": ["B", "B", "B"], "value": [250, 280, 230]},
60
        ),
61
        (  # Queries that should return no rows
62
            "SELECT * FROM self FETCH FIRST 0 ROWS ONLY",
63
            {"id": [], "label": [], "value": []},
64
        ),
65
        (
66
            "SELECT * FROM self ORDER BY id OFFSET 100 ROWS FETCH FIRST 5 ROWS ONLY",
67
            {"id": [], "label": [], "value": []},
68
        ),
69
        (  # FETCH in CTE
70
            """
71
            WITH top5 AS (
72
                SELECT * FROM self ORDER BY value DESC FETCH FIRST 5 ROWS ONLY
73
            )
74
            SELECT label, COUNT(*) AS cnt
75
            FROM top5
76
            GROUP BY label
77
            ORDER BY label
78
            """,
79
            {"label": ["B"], "cnt": [5]},
80
        ),
81
    ],
82
)
83
def test_fetch_clause(
84
    df: pl.DataFrame, query: str, expected: dict[str, Sequence[Any]]
85
) -> None:
86
    assert_sql_matches(
87
        df,
88
        query=query,
89
        compare_with="duckdb",
90
        expected=expected,
91
    )
92

93

94
def test_fetch_with_join(df: pl.DataFrame) -> None:
95
    categories = pl.DataFrame({"label": ["A", "B"], "description": ["Alpha", "Beta"]})
96
    assert_sql_matches(
97
        frames={
98
            "test": df,
99
            "categories": categories,
100
        },
101
        query="""
102
            SELECT test.id, test.value, categories.description
103
            FROM test
104
            JOIN categories ON test.label = categories.label
105
            ORDER BY test.value DESC
106
            FETCH FIRST 3 ROWS ONLY
107
        """,
108
        compare_with="duckdb",
109
        expected={
110
            "id": [8, 4, 10],
111
            "value": [280, 250, 230],
112
            "description": ["Beta", "Beta", "Beta"],
113
        },
114
    )
115

116

117
def test_fetch_with_union(df: pl.DataFrame) -> None:
118
    assert_sql_matches(
119
        frames={"tbl": df},
120
        query="""
121
          (
122
            SELECT id, value FROM tbl WHERE label = 'A'
123
            UNION ALL
124
            SELECT id, value FROM tbl WHERE label = 'B'
125
          )
126
          ORDER BY value
127
          FETCH FIRST 5 ROWS ONLY
128
        """,
129
        expected={"id": [1, 5, 9, 3, 7], "value": [100, 120, 130, 150, 180]},
130
        compare_with="duckdb",
131
    )
132

133

134
@pytest.mark.parametrize(
135
    ("query", "error_type", "match"),
136
    [
137
        (
138
            "SELECT * FROM self FETCH FIRST 50 PERCENT ROWS ONLY",
139
            SQLInterfaceError,
140
            r"`FETCH` with `PERCENT` is not supported",
141
        ),
142
        (
143
            "SELECT * FROM self ORDER BY value FETCH FIRST 5 ROWS WITH TIES",
144
            SQLInterfaceError,
145
            r"`FETCH` with `WITH TIES` is not supported",
146
        ),
147
        (
148
            "SELECT * FROM self LIMIT 5 FETCH FIRST 3 ROWS ONLY",
149
            SQLSyntaxError,
150
            r"cannot use both `LIMIT` and `FETCH`",
151
        ),
152
    ],
153
)
154
def test_fetch_errors(
155
    df: pl.DataFrame, query: str, error_type: type[Exception], match: str
156
) -> None:
157
    """Test error conditions for unsupported FETCH features."""
158
    with pytest.raises(error_type, match=match):
159
        df.sql(query)
160

161
Product

Resources

Company