Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/test_bitwise.py
6939 views
1
from __future__ import annotations
2
3
import sys
4
import typing
5
6
import pytest
7
8
import polars as pl
9
from polars.testing import assert_frame_equal, assert_series_equal
10
from tests.unit.conftest import INTEGER_DTYPES
11
12
13
@pytest.mark.parametrize("op", ["and_", "or_"])
14
def test_bitwise_integral_schema(op: str) -> None:
15
df = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
16
q = df.select(getattr(pl.col("a"), op)(pl.col("b")))
17
assert q.collect_schema()["a"] == df.collect_schema()["a"]
18
19
20
@pytest.mark.parametrize("op", ["and_", "or_", "xor"])
21
def test_bitwise_single_null_value_schema(op: str) -> None:
22
df = pl.DataFrame({"a": [True, True]})
23
q = df.select(getattr(pl.col("a"), op)(None))
24
result_schema = q.collect_schema()
25
assert result_schema.len() == 1
26
assert "a" in result_schema
27
28
29
def leading_zeros(v: int | None, nb: int) -> int | None:
30
if v is None:
31
return None
32
33
b = bin(v)[2:]
34
blen = len(b) - len(b.lstrip("0"))
35
if blen == len(b):
36
return nb
37
else:
38
return nb - len(b) + blen
39
40
41
def leading_ones(v: int | None, nb: int) -> int | None:
42
if v is None:
43
return None
44
45
b = bin(v)[2:]
46
if len(b) < nb:
47
return 0
48
else:
49
return len(b) - len(b.lstrip("1"))
50
51
52
def trailing_zeros(v: int | None, nb: int) -> int | None:
53
if v is None:
54
return None
55
56
b = bin(v)[2:]
57
blen = len(b) - len(b.rstrip("0"))
58
if blen == len(b):
59
return nb
60
else:
61
return blen
62
63
64
def trailing_ones(v: int | None) -> int | None:
65
if v is None:
66
return None
67
68
b = bin(v)[2:]
69
return len(b) - len(b.rstrip("1"))
70
71
72
@pytest.mark.parametrize(
73
"value",
74
[
75
0x00,
76
0x01,
77
0xFCEF_0123,
78
0xFFFF_FFFF,
79
0xFFF0_FFE1_ABCD_EF01,
80
0xAAAA_AAAA_AAAA_AAAA,
81
None,
82
],
83
)
84
@pytest.mark.parametrize("dtype", [*INTEGER_DTYPES, pl.Boolean])
85
@pytest.mark.skipif(sys.version_info < (3, 10), reason="bit_count introduced in 3.10")
86
@typing.no_type_check
87
def test_bit_counts(value: int, dtype: pl.DataType) -> None:
88
bitsize = 8
89
if "Boolean" in str(dtype):
90
bitsize = 1
91
if "16" in str(dtype):
92
bitsize = 16
93
elif "32" in str(dtype):
94
bitsize = 32
95
elif "64" in str(dtype):
96
bitsize = 64
97
elif "128" in str(dtype):
98
bitsize = 128
99
100
if bitsize == 1 and value is not None:
101
value = value & 1 != 0
102
103
co = 1 if value else 0
104
cz = 0 if value else 1
105
elif value is not None:
106
value = value & ((1 << bitsize) - 1)
107
108
if dtype.is_signed_integer() and value >> (bitsize - 1) > 0:
109
value = value - pow(2, bitsize - 1)
110
111
co = value.bit_count()
112
cz = bitsize - co
113
else:
114
co = None
115
cz = None
116
117
assert_series_equal(
118
pl.Series("a", [value], dtype).bitwise_count_ones(),
119
pl.Series("a", [co], pl.UInt32),
120
)
121
assert_series_equal(
122
pl.Series("a", [value], dtype).bitwise_count_zeros(),
123
pl.Series("a", [cz], pl.UInt32),
124
)
125
assert_series_equal(
126
pl.Series("a", [value], dtype).bitwise_leading_ones(),
127
pl.Series("a", [leading_ones(value, bitsize)], pl.UInt32),
128
)
129
assert_series_equal(
130
pl.Series("a", [value], dtype).bitwise_leading_zeros(),
131
pl.Series("a", [leading_zeros(value, bitsize)], pl.UInt32),
132
)
133
assert_series_equal(
134
pl.Series("a", [value], dtype).bitwise_trailing_ones(),
135
pl.Series("a", [trailing_ones(value)], pl.UInt32),
136
)
137
assert_series_equal(
138
pl.Series("a", [value], dtype).bitwise_trailing_zeros(),
139
pl.Series("a", [trailing_zeros(value, bitsize)], pl.UInt32),
140
)
141
142
143
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
144
def test_bit_aggregations(dtype: pl.DataType) -> None:
145
s = pl.Series("a", [0x74, 0x1C, 0x05], dtype)
146
147
df = s.to_frame().select(
148
AND=pl.col.a.bitwise_and(),
149
OR=pl.col.a.bitwise_or(),
150
XOR=pl.col.a.bitwise_xor(),
151
)
152
153
assert_frame_equal(
154
df,
155
pl.DataFrame(
156
[
157
pl.Series("AND", [0x04], dtype),
158
pl.Series("OR", [0x7D], dtype),
159
pl.Series("XOR", [0x6D], dtype),
160
]
161
),
162
)
163
164
165
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
166
def test_bit_aggregations_lazy_no_nulls(dtype: pl.DataType) -> None:
167
s = pl.Series("a", [0x74, 0x1C, 0x05], dtype)
168
169
lf = s.to_frame().lazy()
170
171
out = lf.select(
172
AND=pl.col.a.bitwise_and(),
173
OR=pl.col.a.bitwise_or(),
174
XOR=pl.col.a.bitwise_xor(),
175
).collect()
176
177
assert_frame_equal(
178
out,
179
pl.DataFrame(
180
[
181
pl.Series("AND", [0x04], dtype),
182
pl.Series("OR", [0x7D], dtype),
183
pl.Series("XOR", [0x6D], dtype),
184
]
185
),
186
)
187
188
189
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
190
def test_bit_aggregations_lazy_some_nulls(dtype: pl.DataType) -> None:
191
s = pl.Series("a", [0x74, None, 0x1C, None, 0x05], dtype)
192
out = (
193
s.to_frame()
194
.lazy()
195
.select(
196
AND=pl.col.a.bitwise_and(),
197
OR=pl.col.a.bitwise_or(),
198
XOR=pl.col.a.bitwise_xor(),
199
)
200
.collect()
201
)
202
203
assert_frame_equal(
204
out,
205
pl.DataFrame(
206
[
207
pl.Series("AND", [0x04], dtype),
208
pl.Series("OR", [0x7D], dtype),
209
pl.Series("XOR", [0x6D], dtype),
210
]
211
),
212
)
213
214
215
@pytest.mark.parametrize(
216
"expr",
217
[pl.col("a").bitwise_and(), pl.col("a").bitwise_or(), pl.col("a").bitwise_xor()],
218
)
219
def test_bit_aggregations_lazy_all_nulls(expr: pl.Expr) -> None:
220
dtype = pl.Int64
221
s = pl.Series("a", [None, None, None], dtype)
222
out = s.to_frame().lazy().select(OUT=expr).collect()
223
224
assert_frame_equal(
225
out,
226
pl.DataFrame([pl.Series("OUT", [None], dtype)]),
227
)
228
229
230
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
231
def test_bit_group_by(dtype: pl.DataType) -> None:
232
df = pl.DataFrame(
233
[
234
pl.Series("g", [4, 1, 1, 2, 3, 2, 4, 4], pl.Int8),
235
pl.Series("a", [0x03, 0x74, 0x1C, 0x05, None, 0x70, 0x01, None], dtype),
236
]
237
)
238
239
df = df.group_by("g").agg(
240
AND=pl.col.a.bitwise_and(),
241
OR=pl.col.a.bitwise_or(),
242
XOR=pl.col.a.bitwise_xor(),
243
)
244
245
assert_frame_equal(
246
df,
247
pl.DataFrame(
248
[
249
pl.Series("g", [1, 2, 3, 4], pl.Int8),
250
pl.Series("AND", [0x74 & 0x1C, 0x05 & 0x70, None, 0x01], dtype),
251
pl.Series("OR", [0x74 | 0x1C, 0x05 | 0x70, None, 0x03], dtype),
252
pl.Series("XOR", [0x74 ^ 0x1C, 0x05 ^ 0x70, None, 0x02], dtype),
253
]
254
),
255
check_row_order=False,
256
)
257
258
259
def test_scalar_bitwise_xor() -> None:
260
df = pl.select(
261
pl.repeat(pl.lit(0x80, pl.UInt8), i).bitwise_xor().alias(f"l{i}")
262
for i in range(5)
263
).transpose()
264
265
assert_series_equal(
266
df.to_series(),
267
pl.Series("x", [None, 0x80, 0x00, 0x80, 0x00], pl.UInt8),
268
check_names=False,
269
)
270
271
272
@pytest.mark.parametrize(
273
("expr", "result"),
274
[
275
(pl.all().bitwise_and(), [True, False, False, True, False, None]),
276
(pl.all().bitwise_or(), [True, True, False, True, False, None]),
277
(pl.all().bitwise_xor(), [False, True, False, True, False, None]),
278
],
279
)
280
def test_bool_bitwise_with_nulls_23314(expr: pl.Expr, result: list[bool]) -> None:
281
df = pl.DataFrame(
282
{
283
"a": [True, True, None],
284
"b": [True, False, None],
285
"c": [False, False, None],
286
"d": [True, None, None],
287
"e": [False, None, None],
288
"f": [None, None, None],
289
},
290
schema_overrides={"f": pl.Boolean},
291
)
292
columns = ["a", "b", "c", "d", "e", "f"]
293
out = df.select(expr)
294
expected = pl.DataFrame(
295
[result], orient="row", schema=columns, schema_overrides={"f": pl.Boolean}
296
)
297
assert_frame_equal(out, expected)
298
299
300
@pytest.mark.parametrize(
301
("expr", "result"),
302
[
303
(pl.all().bitwise_and(), [True, False, False, False, False, None]),
304
(pl.all().bitwise_or(), [True, True, True, False, True, None]),
305
(pl.all().bitwise_xor(), [True, False, True, False, True, None]),
306
],
307
)
308
def test_bitwise_boolean(expr: pl.Expr, result: list[bool]) -> None:
309
lf = pl.LazyFrame(
310
{
311
"a": [True, True, True],
312
"b": [True, False, True],
313
"c": [False, True, False],
314
"d": [False, False, False],
315
"x": [True, False, None],
316
"z": [None, None, None],
317
},
318
schema_overrides={"z": pl.Boolean},
319
)
320
321
columns = ["a", "b", "c", "d", "x", "z"]
322
expected = pl.DataFrame(
323
[result], orient="row", schema=columns, schema_overrides={"z": pl.Boolean}
324
)
325
out = lf.select(expr).collect()
326
assert_frame_equal(out, expected)
327
328
329
# Although there is no way to deterministically trigger the `evict` path
330
# in the code, the below test will do so with high likelihood
331
# POLARS_MAX_THREADS is only honored when tested in isolation, see issue #22070
332
def test_bitwise_boolean_evict_path(monkeypatch: pytest.MonkeyPatch) -> None:
333
monkeypatch.setenv("POLARS_MAX_THREADS", "1")
334
monkeypatch.setenv("POLARS_HOT_TABLE_SIZE", "2")
335
n_groups = 100
336
group_size_pairs = 10
337
group_size = group_size_pairs * 2
338
339
col_a = list(range(group_size)) * n_groups
340
col_b = [True, False] * group_size_pairs * n_groups
341
df = pl.DataFrame({"a": pl.Series(col_a), "b": pl.Series(col_b)}).sort("a")
342
343
out = (
344
df.lazy()
345
.group_by("a")
346
.agg(
347
[
348
pl.col("b").bitwise_and().alias("bitwise_and"),
349
pl.col("b").bitwise_or().alias("bitwise_or"),
350
pl.col("b").bitwise_xor().alias("bitwise_xor"),
351
]
352
)
353
.sort("a")
354
.collect()
355
)
356
expected = pl.DataFrame(
357
{
358
"a": list(range(group_size)),
359
"bitwise_and": [True, False] * group_size_pairs,
360
"bitwise_or": [True, False] * group_size_pairs,
361
"bitwise_xor": [n_groups % 2 == 1, False] * group_size_pairs,
362
}
363
)
364
assert_frame_equal(out, expected)
365
366