Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/expr/test_literal.py
8424 views
1
from __future__ import annotations
2
3
from datetime import datetime, timezone
4
from typing import TYPE_CHECKING, Any
5
from zoneinfo import ZoneInfo
6
7
import pytest
8
from dateutil.tz import tzoffset
9
10
import polars as pl
11
from polars.testing import assert_frame_equal
12
13
if TYPE_CHECKING:
14
from polars._typing import PolarsDataType
15
16
17
def test_literal_scalar_list_18686() -> None:
18
df = pl.DataFrame({"column1": [1, 2], "column2": ["A", "B"]})
19
out = df.with_columns(lit1=pl.lit([]).cast(pl.List(pl.String)), lit2=pl.lit([]))
20
21
assert out.to_dict(as_series=False) == {
22
"column1": [1, 2],
23
"column2": ["A", "B"],
24
"lit1": [[], []],
25
"lit2": [[], []],
26
}
27
assert out.schema == pl.Schema(
28
[
29
("column1", pl.Int64),
30
("column2", pl.String),
31
("lit1", pl.List(pl.String)),
32
("lit2", pl.List(pl.Null)),
33
]
34
)
35
36
37
def test_literal_integer_20807() -> None:
38
for i in range(100):
39
value = 2**i
40
assert pl.select(pl.lit(value)).item() == value
41
assert pl.select(pl.lit(-value)).item() == -value
42
assert pl.select(pl.lit(value, dtype=pl.Int128)).item() == value
43
assert pl.select(pl.lit(-value, dtype=pl.Int128)).item() == -value
44
45
46
@pytest.mark.parametrize(
47
("tz", "lit_dtype"),
48
[
49
(ZoneInfo("Asia/Kabul"), None),
50
(ZoneInfo("Asia/Kabul"), pl.Datetime("us", "Asia/Kabul")),
51
(ZoneInfo("Europe/Paris"), pl.Datetime("us", "Europe/Paris")),
52
(timezone.utc, pl.Datetime("us", "UTC")),
53
],
54
)
55
def test_literal_datetime_timezone(tz: Any, lit_dtype: pl.DataType | None) -> None:
56
expected_dtype = pl.Datetime("us", time_zone=str(tz))
57
value = datetime(2020, 1, 1, tzinfo=tz)
58
59
df1 = pl.DataFrame({"dt": [value]})
60
df2 = pl.select(dt=pl.lit(value, dtype=lit_dtype))
61
62
assert_frame_equal(df1, df2)
63
assert df1.schema["dt"] == expected_dtype
64
assert df1.item() == value
65
66
67
@pytest.mark.parametrize(
68
("tz", "lit_dtype", "expected_item"),
69
[
70
(
71
# fixed offset from UTC
72
tzoffset(None, 16200),
73
None,
74
datetime(2019, 12, 31, 19, 30, tzinfo=timezone.utc),
75
),
76
(
77
# fixed offset from UTC
78
tzoffset("Kabul", 16200),
79
None,
80
datetime(2019, 12, 31, 19, 30, tzinfo=ZoneInfo("UTC")),
81
),
82
(
83
# fixed offset from UTC with matching timezone
84
tzoffset(None, 16200),
85
pl.Datetime("us", "Asia/Kabul"),
86
datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kabul")),
87
),
88
(
89
# fixed offset from UTC with matching timezone
90
tzoffset("Kabul", 16200),
91
pl.Datetime("us", "Asia/Kabul"),
92
datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kabul")),
93
),
94
],
95
)
96
def test_literal_datetime_timezone_utc_offset(
97
tz: Any, lit_dtype: pl.DataType | None, expected_item: datetime
98
) -> None:
99
overrides = {"schema_overrides": {"dt": lit_dtype}} if lit_dtype else {}
100
value = datetime(2020, 1, 1, tzinfo=tz)
101
102
# validate both frame and lit constructors
103
df1 = pl.DataFrame({"dt": [value]}, **overrides) # type: ignore[arg-type]
104
df2 = pl.select(dt=pl.lit(value, dtype=lit_dtype))
105
106
assert_frame_equal(df1, df2)
107
108
expected_tz = "UTC" if lit_dtype is None else getattr(lit_dtype, "time_zone", None)
109
expected_dtype = pl.Datetime("us", time_zone=expected_tz)
110
111
for df in (df1, df2):
112
assert df.schema["dt"] == expected_dtype
113
assert df.item() == expected_item
114
115
116
def test_literal_datetime_timezone_utc_error() -> None:
117
value = datetime(2020, 1, 1, tzinfo=tzoffset("Somewhere", offset=3600))
118
119
with pytest.raises(
120
TypeError,
121
match=(
122
r"time zone of dtype \('Pacific/Galapagos'\) differs from"
123
r" time zone of value \(tzoffset\('Somewhere', 3600\)\)"
124
),
125
):
126
# the offset does not correspond to the offset of the declared timezone
127
pl.select(dt=pl.lit(value, dtype=pl.Datetime(time_zone="Pacific/Galapagos")))
128
129
130
def test_literal_object_25679() -> None:
131
df = pl.DataFrame(
132
data={"colx": [0, 0, 1, 2, None, 3, 3, None]},
133
schema={"colx": pl.Object},
134
)
135
obj_zero = pl.lit(0, dtype=pl.Object())
136
res = df.select(pl.col("colx").fill_null(obj_zero))
137
138
assert res.schema == {"colx": pl.Object()}
139
assert res["colx"].to_list() == [0, 0, 1, 2, 0, 3, 3, 0]
140
141
142
@pytest.mark.parametrize(
143
("numerator", "divisor", "floordiv", "mod"),
144
[
145
(10, 3, 3, 1),
146
(10, 2, 5, 0),
147
(1, 2, 0, 1),
148
(0, 10, 0, 0),
149
(1, 0, None, None),
150
],
151
)
152
@pytest.mark.parametrize(
153
("numerator_dtype", "divisor_dtype"),
154
[
155
(x, y)
156
for x in [pl.Int8, pl.UInt8, None]
157
for y in [pl.Int8, pl.UInt8, None]
158
if x == y or x is None or y is None
159
],
160
)
161
def test_floordiv_mod(
162
numerator: int,
163
divisor: int,
164
floordiv: int | None,
165
mod: int | None,
166
numerator_dtype: PolarsDataType | None,
167
divisor_dtype: PolarsDataType | None,
168
) -> None:
169
assert_frame_equal(
170
pl.select(
171
pl.lit(numerator, dtype=numerator_dtype)
172
// pl.lit(divisor, dtype=divisor_dtype)
173
),
174
pl.DataFrame(
175
{"literal": [floordiv]},
176
schema={"literal": numerator_dtype or divisor_dtype or pl.Int32},
177
),
178
)
179
assert_frame_equal(
180
pl.select(
181
pl.lit(numerator, dtype=numerator_dtype)
182
% pl.lit(divisor, dtype=divisor_dtype)
183
),
184
pl.DataFrame(
185
{"literal": [mod]},
186
schema={"literal": numerator_dtype or divisor_dtype or pl.Int32},
187
),
188
)
189
190