Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/test_bitwise.py
8420 views
1
from __future__ import annotations
2
3
import typing
4
from typing import TYPE_CHECKING
5
6
import pytest
7
8
import polars as pl
9
from polars.testing import assert_frame_equal, assert_series_equal
10
from tests.unit.conftest import INTEGER_DTYPES
11
12
if TYPE_CHECKING:
13
from tests.conftest import PlMonkeyPatch
14
15
16
@pytest.mark.parametrize("op", ["and_", "or_"])
17
def test_bitwise_integral_schema(op: str) -> None:
18
df = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
19
q = df.select(getattr(pl.col("a"), op)(pl.col("b")))
20
assert q.collect_schema()["a"] == df.collect_schema()["a"]
21
22
23
@pytest.mark.parametrize("op", ["and_", "or_", "xor"])
24
def test_bitwise_single_null_value_schema(op: str) -> None:
25
df = pl.DataFrame({"a": [True, True]})
26
q = df.select(getattr(pl.col("a"), op)(None))
27
result_schema = q.collect_schema()
28
assert result_schema.len() == 1
29
assert "a" in result_schema
30
31
32
def leading_zeros(v: int | None, nb: int) -> int | None:
33
if v is None:
34
return None
35
36
b = bin(v)[2:]
37
blen = len(b) - len(b.lstrip("0"))
38
if blen == len(b):
39
return nb
40
else:
41
return nb - len(b) + blen
42
43
44
def leading_ones(v: int | None, nb: int) -> int | None:
45
if v is None:
46
return None
47
48
b = bin(v)[2:]
49
if len(b) < nb:
50
return 0
51
else:
52
return len(b) - len(b.lstrip("1"))
53
54
55
def trailing_zeros(v: int | None, nb: int) -> int | None:
56
if v is None:
57
return None
58
59
b = bin(v)[2:]
60
blen = len(b) - len(b.rstrip("0"))
61
if blen == len(b):
62
return nb
63
else:
64
return blen
65
66
67
def trailing_ones(v: int | None) -> int | None:
68
if v is None:
69
return None
70
71
b = bin(v)[2:]
72
return len(b) - len(b.rstrip("1"))
73
74
75
@pytest.mark.parametrize(
76
"value",
77
[
78
0x00,
79
0x01,
80
0xFCEF_0123,
81
0xFFFF_FFFF,
82
0xFFF0_FFE1_ABCD_EF01,
83
0xAAAA_AAAA_AAAA_AAAA,
84
None,
85
],
86
)
87
@pytest.mark.parametrize("dtype", [*INTEGER_DTYPES, pl.Boolean])
88
@typing.no_type_check
89
def test_bit_counts(value: int, dtype: pl.DataType) -> None:
90
bitsize = 8
91
if "Boolean" in str(dtype):
92
bitsize = 1
93
if "16" in str(dtype):
94
bitsize = 16
95
elif "32" in str(dtype):
96
bitsize = 32
97
elif "64" in str(dtype):
98
bitsize = 64
99
elif "128" in str(dtype):
100
bitsize = 128
101
102
if bitsize == 1 and value is not None:
103
value = value & 1 != 0
104
105
co = 1 if value else 0
106
cz = 0 if value else 1
107
elif value is not None:
108
value = value & ((1 << bitsize) - 1)
109
110
if dtype.is_signed_integer() and value >> (bitsize - 1) > 0:
111
value = value - pow(2, bitsize - 1)
112
113
co = value.bit_count()
114
cz = bitsize - co
115
else:
116
co = None
117
cz = None
118
119
assert_series_equal(
120
pl.Series("a", [value], dtype).bitwise_count_ones(),
121
pl.Series("a", [co], pl.UInt32),
122
)
123
assert_series_equal(
124
pl.Series("a", [value], dtype).bitwise_count_zeros(),
125
pl.Series("a", [cz], pl.UInt32),
126
)
127
assert_series_equal(
128
pl.Series("a", [value], dtype).bitwise_leading_ones(),
129
pl.Series("a", [leading_ones(value, bitsize)], pl.UInt32),
130
)
131
assert_series_equal(
132
pl.Series("a", [value], dtype).bitwise_leading_zeros(),
133
pl.Series("a", [leading_zeros(value, bitsize)], pl.UInt32),
134
)
135
assert_series_equal(
136
pl.Series("a", [value], dtype).bitwise_trailing_ones(),
137
pl.Series("a", [trailing_ones(value)], pl.UInt32),
138
)
139
assert_series_equal(
140
pl.Series("a", [value], dtype).bitwise_trailing_zeros(),
141
pl.Series("a", [trailing_zeros(value, bitsize)], pl.UInt32),
142
)
143
144
145
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
146
def test_bit_aggregations(dtype: pl.DataType) -> None:
147
s = pl.Series("a", [0x74, 0x1C, 0x05], dtype)
148
149
df = s.to_frame().select(
150
AND=pl.col.a.bitwise_and(),
151
OR=pl.col.a.bitwise_or(),
152
XOR=pl.col.a.bitwise_xor(),
153
)
154
155
assert_frame_equal(
156
df,
157
pl.DataFrame(
158
[
159
pl.Series("AND", [0x04], dtype),
160
pl.Series("OR", [0x7D], dtype),
161
pl.Series("XOR", [0x6D], dtype),
162
]
163
),
164
)
165
166
167
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
168
def test_bit_aggregations_lazy_no_nulls(dtype: pl.DataType) -> None:
169
s = pl.Series("a", [0x74, 0x1C, 0x05], dtype)
170
171
lf = s.to_frame().lazy()
172
173
out = lf.select(
174
AND=pl.col.a.bitwise_and(),
175
OR=pl.col.a.bitwise_or(),
176
XOR=pl.col.a.bitwise_xor(),
177
).collect()
178
179
assert_frame_equal(
180
out,
181
pl.DataFrame(
182
[
183
pl.Series("AND", [0x04], dtype),
184
pl.Series("OR", [0x7D], dtype),
185
pl.Series("XOR", [0x6D], dtype),
186
]
187
),
188
)
189
190
191
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
192
def test_bit_aggregations_lazy_some_nulls(dtype: pl.DataType) -> None:
193
s = pl.Series("a", [0x74, None, 0x1C, None, 0x05], dtype)
194
out = (
195
s.to_frame()
196
.lazy()
197
.select(
198
AND=pl.col.a.bitwise_and(),
199
OR=pl.col.a.bitwise_or(),
200
XOR=pl.col.a.bitwise_xor(),
201
)
202
.collect()
203
)
204
205
assert_frame_equal(
206
out,
207
pl.DataFrame(
208
[
209
pl.Series("AND", [0x04], dtype),
210
pl.Series("OR", [0x7D], dtype),
211
pl.Series("XOR", [0x6D], dtype),
212
]
213
),
214
)
215
216
217
@pytest.mark.parametrize(
218
"expr",
219
[pl.col("a").bitwise_and(), pl.col("a").bitwise_or(), pl.col("a").bitwise_xor()],
220
)
221
def test_bit_aggregations_lazy_all_nulls(expr: pl.Expr) -> None:
222
dtype = pl.Int64
223
s = pl.Series("a", [None, None, None], dtype)
224
out = s.to_frame().lazy().select(OUT=expr).collect()
225
226
assert_frame_equal(
227
out,
228
pl.DataFrame([pl.Series("OUT", [None], dtype)]),
229
)
230
231
232
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
233
def test_bit_group_by(dtype: pl.DataType) -> None:
234
df = pl.DataFrame(
235
[
236
pl.Series("g", [4, 1, 1, 2, 3, 2, 4, 4], pl.Int8),
237
pl.Series("a", [0x03, 0x74, 0x1C, 0x05, None, 0x70, 0x01, None], dtype),
238
]
239
)
240
241
df = df.group_by("g").agg(
242
AND=pl.col.a.bitwise_and(),
243
OR=pl.col.a.bitwise_or(),
244
XOR=pl.col.a.bitwise_xor(),
245
)
246
247
assert_frame_equal(
248
df,
249
pl.DataFrame(
250
[
251
pl.Series("g", [1, 2, 3, 4], pl.Int8),
252
pl.Series("AND", [0x74 & 0x1C, 0x05 & 0x70, None, 0x01], dtype),
253
pl.Series("OR", [0x74 | 0x1C, 0x05 | 0x70, None, 0x03], dtype),
254
pl.Series("XOR", [0x74 ^ 0x1C, 0x05 ^ 0x70, None, 0x02], dtype),
255
]
256
),
257
check_row_order=False,
258
)
259
260
261
def test_scalar_bitwise_xor() -> None:
262
df = pl.select(
263
pl.repeat(pl.lit(0x80, pl.UInt8), i).bitwise_xor().alias(f"l{i}")
264
for i in range(5)
265
).transpose()
266
267
assert_series_equal(
268
df.to_series(),
269
pl.Series("x", [None, 0x80, 0x00, 0x80, 0x00], pl.UInt8),
270
check_names=False,
271
)
272
273
274
@pytest.mark.parametrize(
275
("expr", "result"),
276
[
277
(pl.all().bitwise_and(), [True, False, False, True, False, None]),
278
(pl.all().bitwise_or(), [True, True, False, True, False, None]),
279
(pl.all().bitwise_xor(), [False, True, False, True, False, None]),
280
],
281
)
282
def test_bool_bitwise_with_nulls_23314(expr: pl.Expr, result: list[bool]) -> None:
283
df = pl.DataFrame(
284
{
285
"a": [True, True, None],
286
"b": [True, False, None],
287
"c": [False, False, None],
288
"d": [True, None, None],
289
"e": [False, None, None],
290
"f": [None, None, None],
291
},
292
schema_overrides={"f": pl.Boolean},
293
)
294
columns = ["a", "b", "c", "d", "e", "f"]
295
out = df.select(expr)
296
expected = pl.DataFrame(
297
[result], orient="row", schema=columns, schema_overrides={"f": pl.Boolean}
298
)
299
assert_frame_equal(out, expected)
300
301
302
@pytest.mark.parametrize(
303
("expr", "result"),
304
[
305
(pl.all().bitwise_and(), [True, False, False, False, False, None]),
306
(pl.all().bitwise_or(), [True, True, True, False, True, None]),
307
(pl.all().bitwise_xor(), [True, False, True, False, True, None]),
308
],
309
)
310
def test_bitwise_boolean(expr: pl.Expr, result: list[bool]) -> None:
311
lf = pl.LazyFrame(
312
{
313
"a": [True, True, True],
314
"b": [True, False, True],
315
"c": [False, True, False],
316
"d": [False, False, False],
317
"x": [True, False, None],
318
"z": [None, None, None],
319
},
320
schema_overrides={"z": pl.Boolean},
321
)
322
323
columns = ["a", "b", "c", "d", "x", "z"]
324
expected = pl.DataFrame(
325
[result], orient="row", schema=columns, schema_overrides={"z": pl.Boolean}
326
)
327
out = lf.select(expr).collect()
328
assert_frame_equal(out, expected)
329
330
331
# Although there is no way to deterministically trigger the `evict` path
332
# in the code, the below test will do so with high likelihood
333
# POLARS_MAX_THREADS is only honored when tested in isolation, see issue #22070
334
def test_bitwise_boolean_evict_path(plmonkeypatch: PlMonkeyPatch) -> None:
335
plmonkeypatch.setenv("POLARS_MAX_THREADS", "1")
336
plmonkeypatch.setenv("POLARS_HOT_TABLE_SIZE", "2")
337
n_groups = 100
338
group_size_pairs = 10
339
group_size = group_size_pairs * 2
340
341
col_a = list(range(group_size)) * n_groups
342
col_b = [True, False] * group_size_pairs * n_groups
343
df = pl.DataFrame({"a": pl.Series(col_a), "b": pl.Series(col_b)}).sort("a")
344
345
out = (
346
df.lazy()
347
.group_by("a")
348
.agg(
349
[
350
pl.col("b").bitwise_and().alias("bitwise_and"),
351
pl.col("b").bitwise_or().alias("bitwise_or"),
352
pl.col("b").bitwise_xor().alias("bitwise_xor"),
353
]
354
)
355
.sort("a")
356
.collect()
357
)
358
expected = pl.DataFrame(
359
{
360
"a": list(range(group_size)),
361
"bitwise_and": [True, False] * group_size_pairs,
362
"bitwise_or": [True, False] * group_size_pairs,
363
"bitwise_xor": [n_groups % 2 == 1, False] * group_size_pairs,
364
}
365
)
366
assert_frame_equal(out, expected)
367
368
369
def test_bitwise_in_group_by() -> None:
370
df = pl.DataFrame(
371
{
372
"a": [
373
111,
374
222,
375
111,
376
222,
377
333,
378
333,
379
999,
380
888,
381
999,
382
],
383
}
384
)
385
386
assert_frame_equal(
387
df.group_by(pl.lit(1))
388
.agg(
389
bwand=pl.col.a.bitwise_and(),
390
bwor=pl.col.a.bitwise_or(),
391
bwxor=pl.col.a.bitwise_xor(),
392
)
393
.drop("literal"),
394
df.select(
395
bwand=pl.col.a.bitwise_and(),
396
bwor=pl.col.a.bitwise_or(),
397
bwxor=pl.col.a.bitwise_xor(),
398
),
399
)
400
401