Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/sql/test_cast.py
8396 views
1
from __future__ import annotations
2
3
from typing import Any
4
5
import pytest
6
7
import polars as pl
8
import polars.selectors as cs
9
from polars.exceptions import InvalidOperationError, SQLInterfaceError
10
from polars.testing import assert_frame_equal
11
12
13
def test_cast() -> None:
14
df = pl.DataFrame(
15
{
16
"a": [1, 2, 3, 4, 5],
17
"b": [1.1, 2.2, 3.3, 4.4, 5.5],
18
"c": ["a", "b", "c", "d", "e"],
19
"d": [True, False, True, False, True],
20
"e": [-1, 0, None, 1, 2],
21
}
22
)
23
24
# test various dtype casts, using standard ("CAST <col> AS <dtype>")
25
# and postgres-specific ("<col>::<dtype>") cast syntax
26
with pl.SQLContext(df=df, eager=True) as ctx:
27
res = ctx.execute(
28
"""
29
SELECT
30
-- float
31
CAST(a AS DOUBLE PRECISION) AS a_f64,
32
a::real AS a_f32,
33
b::float(24) AS b_f32,
34
b::float(25) AS b_f64,
35
e::float8 AS e_f64,
36
e::float4 AS e_f32,
37
38
-- integer
39
CAST(b AS TINYINT) AS b_i8,
40
CAST(b AS SMALLINT) AS b_i16,
41
b::bigint AS b_i64,
42
d::tinyint AS d_i8,
43
d::hugeint AS d_i128,
44
d::uhugeint as d_u128,
45
a::int1 AS a_i8,
46
a::int2 AS a_i16,
47
a::int4 AS a_i32,
48
a::int8 AS a_i64,
49
50
-- unsigned integer
51
CAST(a AS TINYINT UNSIGNED) AS a_u8,
52
d::uint1 AS d_u8,
53
a::uint2 AS a_u16,
54
b::uint4 AS b_u32,
55
b::uint8 AS b_u64,
56
CAST(a AS BIGINT UNSIGNED) AS a_u64,
57
b::utinyint AS b_u8,
58
b::usmallint AS b_u16,
59
a::uinteger AS a_u32,
60
d::ubigint AS d_u64,
61
62
-- string/binary
63
CAST(a AS CHAR) AS a_char,
64
CAST(b AS VARCHAR) AS b_varchar,
65
c::blob AS c_blob,
66
c::bytes AS c_bytes,
67
c::VARBINARY AS c_varbinary,
68
CAST(d AS CHARACTER VARYING) AS d_charvar,
69
70
-- boolean
71
e::bool AS e_bool,
72
e::boolean AS e_boolean
73
FROM df
74
"""
75
)
76
assert res.schema == {
77
"a_f64": pl.Float64,
78
"a_f32": pl.Float32,
79
"b_f32": pl.Float32,
80
"b_f64": pl.Float64,
81
"e_f64": pl.Float64,
82
"e_f32": pl.Float32,
83
"b_i8": pl.Int8,
84
"b_i16": pl.Int16,
85
"b_i64": pl.Int64,
86
"d_i8": pl.Int8,
87
"d_i128": pl.Int128,
88
"d_u128": pl.UInt128,
89
"a_i8": pl.Int8,
90
"a_i16": pl.Int16,
91
"a_i32": pl.Int32,
92
"a_i64": pl.Int64,
93
"a_u8": pl.UInt8,
94
"d_u8": pl.UInt8,
95
"a_u16": pl.UInt16,
96
"b_u32": pl.UInt32,
97
"b_u64": pl.UInt64,
98
"a_u64": pl.UInt64,
99
"b_u8": pl.UInt8,
100
"b_u16": pl.UInt16,
101
"a_u32": pl.UInt32,
102
"d_u64": pl.UInt64,
103
"a_char": pl.String,
104
"b_varchar": pl.String,
105
"c_blob": pl.Binary,
106
"c_bytes": pl.Binary,
107
"c_varbinary": pl.Binary,
108
"d_charvar": pl.String,
109
"e_bool": pl.Boolean,
110
"e_boolean": pl.Boolean,
111
}
112
assert res.select(cs.by_dtype(pl.Float32)).rows() == pytest.approx(
113
[
114
(1.0, 1.100000023841858, -1.0),
115
(2.0, 2.200000047683716, 0.0),
116
(3.0, 3.299999952316284, None),
117
(4.0, 4.400000095367432, 1.0),
118
(5.0, 5.5, 2.0),
119
]
120
)
121
assert res.select(cs.by_dtype(pl.Float64)).rows() == [
122
(1.0, 1.1, -1.0),
123
(2.0, 2.2, 0.0),
124
(3.0, 3.3, None),
125
(4.0, 4.4, 1.0),
126
(5.0, 5.5, 2.0),
127
]
128
assert res.select(cs.integer()).rows() == [
129
(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
130
(2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0),
131
(3, 3, 3, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 1),
132
(4, 4, 4, 0, 0, 0, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 0),
133
(5, 5, 5, 1, 1, 1, 5, 5, 5, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 1),
134
]
135
assert res.select(cs.string()).rows() == [
136
("1", "1.1", "true"),
137
("2", "2.2", "false"),
138
("3", "3.3", "true"),
139
("4", "4.4", "false"),
140
("5", "5.5", "true"),
141
]
142
assert res.select(cs.binary()).rows() == [
143
(b"a", b"a", b"a"),
144
(b"b", b"b", b"b"),
145
(b"c", b"c", b"c"),
146
(b"d", b"d", b"d"),
147
(b"e", b"e", b"e"),
148
]
149
assert res.select(cs.boolean()).rows() == [
150
(True, True),
151
(False, False),
152
(None, None),
153
(True, True),
154
(True, True),
155
]
156
157
with pytest.raises(
158
SQLInterfaceError,
159
match="use of FORMAT is not currently supported in CAST",
160
):
161
pl.SQLContext(df=df, eager=True).execute(
162
"SELECT CAST(a AS STRING FORMAT 'HEX') FROM df"
163
)
164
165
166
@pytest.mark.parametrize(
167
("values", "cast_op", "error"),
168
[
169
([1.0, -1.0], "values::uint8", "conversion from `f64` to `u64` failed"),
170
([10, 0, -1], "values::uint4", "conversion from `i64` to `u32` failed"),
171
([int(1e8)], "values::int1", "conversion from `i64` to `i8` failed"),
172
(["a", "b"], "values::date", "conversion from `str` to `date` failed"),
173
(["a", "b"], "values::time", "conversion from `str` to `time` failed"),
174
(["a", "b"], "values::int4", "conversion from `str` to `i32` failed"),
175
],
176
)
177
def test_cast_errors(values: Any, cast_op: str, error: str) -> None:
178
df = pl.DataFrame({"values": values})
179
180
# invalid CAST should raise an error...
181
with pytest.raises(InvalidOperationError, match=error):
182
df.sql(f"SELECT {cast_op} FROM self")
183
184
# ... or return `null` values if using TRY_CAST
185
target_type = cast_op.split("::")[1]
186
res = df.sql(f"SELECT TRY_CAST(values AS {target_type}) AS cast_values FROM self")
187
assert None in res.to_series()
188
189
190
@pytest.mark.may_fail_cloud # reason: eager construct to_struct
191
@pytest.mark.xfail # this is a construct we cannot deal with anymore
192
def test_cast_json() -> None:
193
df = pl.DataFrame({"txt": ['{"a":[1,2,3],"b":["x","y","z"],"c":5.0}']})
194
195
with pl.SQLContext(df=df, eager=True) as ctx:
196
for json_cast in ("txt::json", "CAST(txt AS JSON)"):
197
res = ctx.execute(f"SELECT {json_cast} AS j FROM df")
198
199
assert res.schema == {
200
"j": pl.Struct(
201
{
202
"a": pl.List(pl.Int64),
203
"b": pl.List(pl.String),
204
"c": pl.Float64,
205
},
206
)
207
}
208
assert_frame_equal(
209
res.unnest("j"),
210
pl.DataFrame(
211
{
212
"a": [[1, 2, 3]],
213
"b": [["x", "y", "z"]],
214
"c": [5.0],
215
}
216
),
217
)
218
219