Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_subqueries.py
6939 views
1
import pytest
2
3
import polars as pl
4
from polars.exceptions import SQLSyntaxError
5
from polars.testing import assert_frame_equal
6
7
8
@pytest.mark.parametrize(
9
("cols", "join_type", "constraint"),
10
[
11
("x", "INNER", ""),
12
("y", "INNER", ""),
13
("x", "LEFT", "WHERE y IN (0,1,2,3,4,5)"),
14
("y", "LEFT", "WHERE y >= 0"),
15
("df1.*", "FULL", "WHERE y >= 0"),
16
("df2.*", "FULL", "WHERE x >= 0"),
17
("* EXCLUDE y", "LEFT", "WHERE y >= 0"),
18
("* EXCLUDE x", "LEFT", "WHERE x >= 0"),
19
],
20
)
21
def test_from_subquery(cols: str, join_type: str, constraint: str) -> None:
22
df1 = pl.DataFrame({"x": [-1, 0, 3, 1, 2, -1]})
23
df2 = pl.DataFrame({"y": [0, 1, 2, 3]})
24
25
sql = pl.SQLContext(df1=df1, df2=df2)
26
res = sql.execute(
27
f"""
28
SELECT {cols} FROM (SELECT * FROM df1) AS df1
29
{join_type} JOIN (SELECT * FROM df2) AS df2
30
ON df1.x = df2.y {constraint}
31
""",
32
eager=True,
33
)
34
assert sorted(res.to_series()) == [0, 1, 2, 3]
35
36
37
@pytest.mark.may_fail_cloud # reason: with_context
38
def test_in_subquery() -> None:
39
df = pl.DataFrame(
40
{
41
"x": [1, 2, 3, 4, 5, 6],
42
"y": [2, 3, 4, 5, 6, 7],
43
}
44
)
45
df_other = pl.DataFrame(
46
{
47
"w": [1, 2, 3, 4, 5, 6],
48
"z": [2, 3, 4, 5, 6, 7],
49
}
50
)
51
df_chars = pl.DataFrame(
52
{
53
"one": ["a", "b", "c", "d", "e", "f"],
54
"two": ["b", "c", "d", "e", "f", "g"],
55
}
56
)
57
58
sql = pl.SQLContext(df=df, df_other=df_other, df_chars=df_chars)
59
res_same = sql.execute(
60
"""
61
SELECT
62
df.x as x
63
FROM df
64
WHERE x IN (SELECT y FROM df)
65
""",
66
eager=True,
67
)
68
df_expected_same = pl.DataFrame({"x": [2, 3, 4, 5, 6]})
69
assert_frame_equal(
70
left=df_expected_same,
71
right=res_same,
72
)
73
74
res_double = sql.execute(
75
"""
76
SELECT
77
df.x as x
78
FROM df
79
WHERE x IN (SELECT y FROM df)
80
AND y IN(SELECT w FROM df_other)
81
""",
82
eager=True,
83
)
84
df_expected_double = pl.DataFrame({"x": [2, 3, 4, 5]})
85
assert_frame_equal(
86
left=df_expected_double,
87
right=res_double,
88
)
89
90
res_expressions = sql.execute(
91
"""
92
SELECT
93
df.x as x
94
FROM df
95
WHERE x+1 IN (SELECT y FROM df)
96
AND y IN(SELECT w-1 FROM df_other)
97
""",
98
eager=True,
99
)
100
df_expected_expressions = pl.DataFrame({"x": [1, 2, 3, 4]})
101
assert_frame_equal(
102
left=df_expected_expressions,
103
right=res_expressions,
104
)
105
106
res_not_in = sql.execute(
107
"""
108
SELECT
109
df.x as x
110
FROM df
111
WHERE x NOT IN (SELECT y-5 FROM df)
112
AND y NOT IN(SELECT w+5 FROM df_other)
113
""",
114
eager=True,
115
)
116
df_not_in = pl.DataFrame({"x": [3, 4]})
117
assert_frame_equal(
118
left=df_not_in,
119
right=res_not_in,
120
)
121
122
res_chars = sql.execute(
123
"""
124
SELECT
125
df_chars.one
126
FROM df_chars
127
WHERE one IN (SELECT two FROM df_chars)
128
""",
129
eager=True,
130
)
131
df_expected_chars = pl.DataFrame({"one": ["b", "c", "d", "e", "f"]})
132
assert_frame_equal(
133
left=res_chars,
134
right=df_expected_chars,
135
)
136
137
with pytest.raises(
138
SQLSyntaxError,
139
match="SQL subquery returns more than one column",
140
):
141
sql.execute(
142
"""
143
SELECT
144
df_chars.one
145
FROM df_chars
146
WHERE one IN (SELECT one, two FROM df_chars)
147
""",
148
eager=True,
149
)
150
151